Modified project in general

- Exported some functionalities into libraries
6 years ago · f79a0de8c1
parent a209475dea
commit f79a0de8c1
10 changed files with 261 additions and 75 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 *.zip
 .idea
-.cache
+.cache
+__pycache__.*
+__pycache__
--- a/conf/logging.config
+++ b/conf/logging.config
@ -0,0 +1,27 @@
+[loggers]
+keys=root
+
+[handlers]
+keys=stream_handler, file_handler
+
+[formatters]
+keys=formatter
+
+[logger_root]
+level=DEBUG
+handlers=stream_handler, file_handler
+
+[handler_stream_handler]
+class=StreamHandler
+level=FATAL
+formatter=formatter
+args=(sys.stderr,)
+
+[handler_file_handler]
+class=handlers.TimedRotatingFileHandler
+level=DEBUG
+formatter=formatter
+args=('./logs/utility.log','midnight',1,5,'utf-8',False,True,)
+
+[formatter_formatter]
+format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s
--- a/lib/cutils.py
+++ b/lib/cutils.py
@ -0,0 +1,31 @@
+
+class ProgressBar:
+    def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'):
+        self.prefix = prefix
+        self.suffix = suffix
+        self.fill = fill
+        self.length = length
+        self.total = total
+        self.progress = 0
+
+    def tick(self):
+        self.progress += 1
+        self._print_progress()
+
+    def setprogress(self, progress):
+        self.progress = progress
+        self._print_progress()
+
+    def _print_progress(self):
+        iteration = self.progress
+        total = self.total
+        prefix = self.prefix
+        suffix = self.suffix
+
+        percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
+        filled_length = int(self.length * iteration // total)
+        bar = self.fill * filled_length + '-' * (self.length - filled_length)
+        print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r')
+        # Print New Line on Complete
+        if iteration == total:
+            print()
--- a/lib/fsutils.py
+++ b/lib/fsutils.py
@ -0,0 +1,70 @@
+import os
+import shutil
+
+
+def dir_exist_guarantee(dirpath):
+    if not os.path.exists(dirpath):
+        os.mkdir(dirpath)
+
+
+def get_extension(fname: str):
+    return fname.split('.')[-1]
+
+
+class FileInfo:
+    """ A simple wrapper around the os path functions that returns basic file info
+     and let's you peform basic file tasks."""
+    def __init__(self, fname: str):
+        self._init_info(fname)
+
+    def _init_info(self, fname):
+        """ Set's all the required variables for performing file tasks and to
+         access when working with the file object. """
+        # stringvars
+        self._path = os.path.normpath(fname.replace('\\', '/')).encode('utf-8')
+        if not os.path.isfile(self._path):
+            raise Exception("Not a File")
+        self._extless, self.extension = os.path.splitext(self._path)
+        self.dirname, self.basename = os.path.split(self._path)
+        self.fullname = os.path.join(self.dirname, self.basename)
+        # boolvars
+        self.exist = os.path.exists(self.fullname)
+        self.ismount = self.islink = False
+        if self.exist:
+            self.ismount = os.path.ismount(self.fullname)
+            self.islink = os.path.islink(self.fullname)
+
+    def delete(self):
+        """ Deletes the file if it exists.
+         Does nothing, if it does not exist."""
+        if self.exist:
+            os.remove(self.fullname)
+
+    def create(self):
+        """ Creates the file if it doesn't exist.
+         Does nothing, if it does."""
+        if not self.exist:
+            with open(self.fullname, 'w') as f:
+                f.write('');
+
+    def reset(self):
+        """ Opens the file and writes nothing into it. """
+        with open(self.fullname, 'w') as f:
+            f.write('')
+
+    def open(self, mode: str):
+        """ Returns the file opened with the open method. """
+        self.create()
+        return open(self.fullname, mode)
+
+    def copy(self, dest: str):
+        if self.exist:
+            shutil.copyfile(self.fullname, dest)
+            return FileInfo(dest)
+
+    def move(self, dest: str):
+        if self.exist:
+            shutil.move(self.fullname, dest)
+            self._init_info(dest)
+        else:
+            self._init_info(dest)
--- a/lib/logs/utility.log
+++ b/lib/logs/utility.log
--- a/lib/logutils.py
+++ b/lib/logutils.py
@ -0,0 +1,13 @@
+import logging
+from logging.config import fileConfig
+
+from lib import fsutils
+
+
+def get_logger(name=None):
+    fsutils.dir_exist_guarantee('logs')
+    fileConfig('./conf/logging.config')
+    if name:
+        return logging.getLogger(name)
+    else:
+        return logging.getLogger()
--- a/lib/netutils.py
+++ b/lib/netutils.py
@ -0,0 +1,37 @@
+import urllib.request as urlreq
+import time
+
+from bs4 import BeautifulSoup
+
+from lib import logutils
+
+logger = logutils.get_logger('netutils')
+
+
+def get_soup4url(url: str, retrys: int =2, headers: dict=urlreq.noheaders(), timeout: int =30) -> BeautifulSoup:
+    """ Returns a soup for the url """
+    req = urlreq.Request(url, headers=headers)
+    html = None
+    for _ in range(0, retrys+1):
+        try:
+            html = urlreq.urlopen(req, timeout=timeout).read()
+            break
+        except Exception as e:
+            logger.exception(e)
+            time.sleep(1)  # to avoid request flooding
+    if html:
+        soup = BeautifulSoup(html, "lxml")
+        return soup
+    return False
+
+
+def download_file(url: str, dest: str, headers: dict=urlreq.noheaders()):
+    f = open(dest, "wb")
+    req = urlreq.Request(url, headers=headers)
+    try:
+        image = urlreq.urlopen(req)
+    except ConnectionError:
+        print('\n [-] Connection Error')
+        return
+    f.write(image.read())
+    f.close()
--- a/logs/utility.log
+++ b/logs/utility.log
@ -0,0 +1,61 @@
+2018-11-20 11:15:43,247 netutils     ERROR    <urlopen error _ssl.c:830: The handshake operation timed out>
+Traceback (most recent call last):
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 1318, in do_open
+    encode_chunked=req.has_header('Transfer-encoding'))
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1239, in request
+    self._send_request(method, url, body, headers, encode_chunked)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1285, in _send_request
+    self.endheaders(body, encode_chunked=encode_chunked)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1234, in endheaders
+    self._send_output(message_body, encode_chunked=encode_chunked)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1026, in _send_output
+    self.send(msg)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 964, in send
+    self.connect()
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1400, in connect
+    server_hostname=server_hostname)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 407, in wrap_socket
+    _context=self, _session=session)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 817, in __init__
+    self.do_handshake()
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 1077, in do_handshake
+    self._sslobj.do_handshake()
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 689, in do_handshake
+    self._sslobj.do_handshake()
+socket.timeout: _ssl.c:830: The handshake operation timed out
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "C:\Users\dev\Documents\Projekte\python-utility-scripts\lib\netutils.py", line 15, in get_soup4url
+    html = urlreq.urlopen(req, timeout=timeout).read()
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 223, in urlopen
+    return opener.open(url, data, timeout)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 526, in open
+    response = self._open(req, data)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 544, in _open
+    '_open', req)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 504, in _call_chain
+    result = func(*args)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 1361, in https_open
+    context=self._context, check_hostname=self._check_hostname)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 1320, in do_open
+    raise URLError(err)
+urllib.error.URLError: <urlopen error _ssl.c:830: The handshake operation timed out>
+2018-11-20 14:11:39,064 netutils     ERROR    The read operation timed out
+Traceback (most recent call last):
+  File "C:\Users\dev\Documents\Projekte\python-utility-scripts\lib\netutils.py", line 15, in get_soup4url
+    html = urlreq.urlopen(req, timeout=timeout).read()
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 462, in read
+    s = self._safe_read(self.length)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 612, in _safe_read
+    chunk = self.fp.read(min(amt, MAXAMOUNT))
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\socket.py", line 586, in readinto
+    return self._sock.recv_into(b)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 1012, in recv_into
+    return self.read(nbytes, buffer)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 874, in read
+    return self._sslobj.read(len, buffer)
+  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 631, in read
+    v = self._sslobj.read(len, buffer)
+socket.timeout: The read operation timed out
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,3 @@
 beautifulsoup4==4.6.3
-bs4==0.0.1
 lxml==4.2.5
 typing==3.6.4
--- a/riddle2.py
+++ b/riddle2.py
@ -1,7 +1,3 @@
-# encoding=utf-8
-import urllib.request as urlreq
-from bs4 import BeautifulSoup
-
 import zipfile
 import time
 import os
@ -9,6 +5,8 @@ import sys
 import optparse
 import shutil

+from lib import cutils, netutils, fsutils
+
 blacklist = ['b.thumbs.redditmedia.com', 'reddit.com']
 dl_dir = './.cache/'
 img_ext = ['jpg', 'jpeg', 'png']    # define the urls we are searching for
@ -21,66 +19,25 @@ hdr = {                             # request header
 errors = {}


-def print_progress(iteration, total, prefix='', suffix='', decimals=1, length=50, fill='█'):
-    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
-    filled_length = int(length * iteration // total)
-    bar = fill * filled_length + '-' * (length - filled_length)
-    print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r')
-    sys.stdout.flush()
-    # Print New Line on Complete
-    if iteration == total:
-        print()
-
-
-def spinning_cursor():
-    while True:
-        for cursor in '|/-\\':
-            yield cursor
-
-
-def get_extension(fstring):
-    return fstring.split('.')[-1].lower()
-
-
-def get_soup4url(url):
-    """ Returns a soup for the url with 10 retrys """
-    req = urlreq.Request(url, headers=hdr)
-    html = None
-    for x in range(0, 10):
-        try:
-            html = urlreq.urlopen(req).read()
-            break
-        except Exception as e:
-            if errors[e]:
-                errors[e] += 1
-            else:
-                errors[e] = 1
-            time.sleep(1)  # to avoid request flooding
-    if html:
-        soup = BeautifulSoup(html, "lxml")
-        return soup
-    return False
-
-
-def has_source(tag):
+def has_source(tag: netutils.BeautifulSoup) -> bool:
    if tag.has_attr('src'):
        try:
-            return get_extension(tag['src']) in img_ext
+            return fsutils.get_extension(tag['src']) in img_ext
        except IndexError or KeyError:
            return False
    elif tag.has_attr('data-url'):
        try:
            tag['src'] = tag['data-url']
-            return get_extension(tag['src']) in img_ext
+            return fsutils.get_extension(tag['src']) in img_ext
        except IndexError or KeyError:
            return False
    else:
        return False


-def get_next_url(baseurl, url):
+def get_next_url(baseurl: str, url: str):
    ids = []
-    soup = get_soup4url(url)
+    soup = netutils.get_soup4url(url, headers=hdr)
    if not soup:
        return False
    for t in soup.find_all(has_source):
@ -92,16 +49,16 @@ def get_next_url(baseurl, url):
                pass
    ids = [_id for _id in ids if _id]
    if len(ids) == 0:
-        return False
+        return []
    _id = ids[-1]
    next_url = '{}/?after={}'.format(baseurl, _id)
    return next_url


-def get_img4site(url):
-    soup = get_soup4url(url)
+def get_img4site(url: str) -> list:
+    soup = netutils.get_soup4url(url, headers=hdr)
    if not soup:
-        return False
+        return []
    ret = []
    sys.stdout.write('.')
    sys.stdout.flush()
@ -122,7 +79,7 @@ def get_img4site(url):
    return ret


-def get_img4sub(url, length=-1):
+def get_img4sub(url: str, length: int =-1) -> list:
    baseurl = url
    imgs = []
    print('[~] 1/2 Getting images...')
@ -153,41 +110,30 @@ def get_img4sub(url, length=-1):
    return imgs


-def download_images(imgs, zfile):
-    count = 1
+def download_images(imgs: list, zfile: zipfile.ZipFile):
    imgcount = len(imgs)
    fnames = [zinfo.filename for zinfo in zfile.infolist()]
    print('[~] Downloading %s images' % imgcount)
-    if not os.path.isdir(dl_dir):
-        os.mkdir(dl_dir)
+    pb = cutils.ProgressBar(total=imgcount, prefix="[~] 2/2 Downloadinng", suffix="Complete")
+    fsutils.dir_exist_guarantee(dl_dir)
    for img in imgs:
-        print_progress(count, imgcount, prefix="2/2 Downloading: ", suffix="Complete")
+        pb.tick()
        imgname = img.split('/')[-1]
-        name = dl_dir + imgname
+        name = os.path.join(dl_dir, imgname)
        if os.path.isfile(name) or imgname in fnames:
-            count += 1
            continue
-        f = open(name, "wb")
-        req = urlreq.Request(img, headers=hdr)
-        try:
-            image = urlreq.urlopen(req)
-        except ConnectionError:
-            print('\n [-] Connection Error')
-            return
-        f.write(image.read())
-        f.close()
+        netutils.download_file(img, name, headers=hdr)
        zfile.write(name, imgname, zipfile.ZIP_DEFLATED)
        try:
            os.remove(name)
        except FileNotFoundError or PermissionError:
            pass
        time.sleep(0.1)  # no don't penetrate
-        count += 1
    added = len(zfile.infolist()) - len(fnames)
    print('[+] Added %s files to the zipfile' % added)


-def download_subreddit(sub, count=-1, out=None):
+def download_subreddit(sub: str, count: int =-1, out: str =None):
    mode = 'w'
    zname = sub + '.zip'
    if out: