Modified project in general

- Exported some functionalities into libraries
6 years ago · f79a0de8c1
parent a209475dea
commit f79a0de8c1
10 changed files with 261 additions and 75 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 *.zip
 .idea
-.cache
+.cache
 __pycache__.*
 __pycache__
--- a/conf/logging.config
+++ b/conf/logging.config
@ -0,0 +1,27 @@
 [loggers]
 keys=root
 [handlers]
 keys=stream_handler, file_handler
 [formatters]
 keys=formatter
 [logger_root]
 level=DEBUG
 handlers=stream_handler, file_handler
 [handler_stream_handler]
 class=StreamHandler
 level=FATAL
 formatter=formatter
 args=(sys.stderr,)
 [handler_file_handler]
 class=handlers.TimedRotatingFileHandler
 level=DEBUG
 formatter=formatter
 args=('./logs/utility.log','midnight',1,5,'utf-8',False,True,)
 [formatter_formatter]
 format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s
--- a/lib/cutils.py
+++ b/lib/cutils.py
@ -0,0 +1,31 @@
 class ProgressBar:
    def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'):
        self.prefix = prefix
        self.suffix = suffix
        self.fill = fill
        self.length = length
        self.total = total
        self.progress = 0
    def tick(self):
        self.progress += 1
        self._print_progress()
    def setprogress(self, progress):
        self.progress = progress
        self._print_progress()
    def _print_progress(self):
        iteration = self.progress
        total = self.total
        prefix = self.prefix
        suffix = self.suffix
        percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
        filled_length = int(self.length * iteration // total)
        bar = self.fill * filled_length + '-' * (self.length - filled_length)
        print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r')
        # Print New Line on Complete
        if iteration == total:
            print()
--- a/lib/fsutils.py
+++ b/lib/fsutils.py
@ -0,0 +1,70 @@
 import os
 import shutil
 def dir_exist_guarantee(dirpath):
    if not os.path.exists(dirpath):
        os.mkdir(dirpath)
 def get_extension(fname: str):
    return fname.split('.')[-1]
 class FileInfo:
    """ A simple wrapper around the os path functions that returns basic file info
     and let's you peform basic file tasks."""
    def __init__(self, fname: str):
        self._init_info(fname)
    def _init_info(self, fname):
        """ Set's all the required variables for performing file tasks and to
         access when working with the file object. """
        # stringvars
        self._path = os.path.normpath(fname.replace('\\', '/')).encode('utf-8')
        if not os.path.isfile(self._path):
            raise Exception("Not a File")
        self._extless, self.extension = os.path.splitext(self._path)
        self.dirname, self.basename = os.path.split(self._path)
        self.fullname = os.path.join(self.dirname, self.basename)
        # boolvars
        self.exist = os.path.exists(self.fullname)
        self.ismount = self.islink = False
        if self.exist:
            self.ismount = os.path.ismount(self.fullname)
            self.islink = os.path.islink(self.fullname)
    def delete(self):
        """ Deletes the file if it exists.
         Does nothing, if it does not exist."""
        if self.exist:
            os.remove(self.fullname)
    def create(self):
        """ Creates the file if it doesn't exist.
         Does nothing, if it does."""
        if not self.exist:
            with open(self.fullname, 'w') as f:
                f.write('');
    def reset(self):
        """ Opens the file and writes nothing into it. """
        with open(self.fullname, 'w') as f:
            f.write('')
    def open(self, mode: str):
        """ Returns the file opened with the open method. """
        self.create()
        return open(self.fullname, mode)
    def copy(self, dest: str):
        if self.exist:
            shutil.copyfile(self.fullname, dest)
            return FileInfo(dest)
    def move(self, dest: str):
        if self.exist:
            shutil.move(self.fullname, dest)
            self._init_info(dest)
        else:
            self._init_info(dest)
--- a/lib/logs/utility.log
+++ b/lib/logs/utility.log
--- a/lib/logutils.py
+++ b/lib/logutils.py
@ -0,0 +1,13 @@
 import logging
 from logging.config import fileConfig
 from lib import fsutils
 def get_logger(name=None):
    fsutils.dir_exist_guarantee('logs')
    fileConfig('./conf/logging.config')
    if name:
        return logging.getLogger(name)
    else:
        return logging.getLogger()
--- a/lib/netutils.py
+++ b/lib/netutils.py
@ -0,0 +1,37 @@
 import urllib.request as urlreq
 import time
 from bs4 import BeautifulSoup
 from lib import logutils
 logger = logutils.get_logger('netutils')
 def get_soup4url(url: str, retrys: int =2, headers: dict=urlreq.noheaders(), timeout: int =30) -> BeautifulSoup:
    """ Returns a soup for the url """
    req = urlreq.Request(url, headers=headers)
    html = None
    for _ in range(0, retrys+1):
        try:
            html = urlreq.urlopen(req, timeout=timeout).read()
            break
        except Exception as e:
            logger.exception(e)
            time.sleep(1)  # to avoid request flooding
    if html:
        soup = BeautifulSoup(html, "lxml")
        return soup
    return False
 def download_file(url: str, dest: str, headers: dict=urlreq.noheaders()):
    f = open(dest, "wb")
    req = urlreq.Request(url, headers=headers)
    try:
        image = urlreq.urlopen(req)
    except ConnectionError:
        print('\n [-] Connection Error')
        return
    f.write(image.read())
    f.close()
--- a/logs/utility.log
+++ b/logs/utility.log
@ -0,0 +1,61 @@
 2018-11-20 11:15:43,247 netutils     ERROR    <urlopen error _ssl.c:830: The handshake operation timed out>
 Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 1318, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1239, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1285, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1234, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1026, in _send_output
    self.send(msg)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 964, in send
    self.connect()
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 1400, in connect
    server_hostname=server_hostname)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 407, in wrap_socket
    _context=self, _session=session)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 817, in __init__
    self.do_handshake()
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 1077, in do_handshake
    self._sslobj.do_handshake()
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 689, in do_handshake
    self._sslobj.do_handshake()
 socket.timeout: _ssl.c:830: The handshake operation timed out
 During handling of the above exception, another exception occurred:
 Traceback (most recent call last):
  File "C:\Users\dev\Documents\Projekte\python-utility-scripts\lib\netutils.py", line 15, in get_soup4url
    html = urlreq.urlopen(req, timeout=timeout).read()
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 223, in urlopen
    return opener.open(url, data, timeout)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 526, in open
    response = self._open(req, data)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 544, in _open
    '_open', req)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 504, in _call_chain
    result = func(*args)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 1361, in https_open
    context=self._context, check_hostname=self._check_hostname)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\urllib\request.py", line 1320, in do_open
    raise URLError(err)
 urllib.error.URLError: <urlopen error _ssl.c:830: The handshake operation timed out>
 2018-11-20 14:11:39,064 netutils     ERROR    The read operation timed out
 Traceback (most recent call last):
  File "C:\Users\dev\Documents\Projekte\python-utility-scripts\lib\netutils.py", line 15, in get_soup4url
    html = urlreq.urlopen(req, timeout=timeout).read()
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 462, in read
    s = self._safe_read(self.length)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\http\client.py", line 612, in _safe_read
    chunk = self.fp.read(min(amt, MAXAMOUNT))
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\socket.py", line 586, in readinto
    return self._sock.recv_into(b)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 1012, in recv_into
    return self.read(nbytes, buffer)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 874, in read
    return self._sslobj.read(len, buffer)
  File "C:\ProgramData\Anaconda3\envs\python-utility-scripts\lib\ssl.py", line 631, in read
    v = self._sslobj.read(len, buffer)
 socket.timeout: The read operation timed out
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,3 @@
 beautifulsoup4==4.6.3
 bs4==0.0.1
 lxml==4.2.5
 typing==3.6.4
--- a/riddle2.py
+++ b/riddle2.py
@ -1,7 +1,3 @@
 # encoding=utf-8
 import urllib.request as urlreq
 from bs4 import BeautifulSoup
 import zipfile
 import time
 import os
@ -9,6 +5,8 @@ import sys
 import optparse
 import shutil
 from lib import cutils, netutils, fsutils
 blacklist = ['b.thumbs.redditmedia.com', 'reddit.com']
 dl_dir = './.cache/'
 img_ext = ['jpg', 'jpeg', 'png']    # define the urls we are searching for
@ -21,66 +19,25 @@ hdr = {                             # request header
 errors = {}
-def print_progress(iteration, total, prefix='', suffix='', decimals=1, length=50, fill='█'):
+def has_source(tag: netutils.BeautifulSoup) -> bool:
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filled_length = int(length * iteration // total)
    bar = fill * filled_length + '-' * (length - filled_length)
    print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r')
    sys.stdout.flush()
    # Print New Line on Complete
    if iteration == total:
        print()
 def spinning_cursor():
    while True:
        for cursor in '|/-\\':
            yield cursor
 def get_extension(fstring):
    return fstring.split('.')[-1].lower()
 def get_soup4url(url):
    """ Returns a soup for the url with 10 retrys """
    req = urlreq.Request(url, headers=hdr)
    html = None
    for x in range(0, 10):
        try:
            html = urlreq.urlopen(req).read()
            break
        except Exception as e:
            if errors[e]:
                errors[e] += 1
            else:
                errors[e] = 1
            time.sleep(1)  # to avoid request flooding
    if html:
        soup = BeautifulSoup(html, "lxml")
        return soup
    return False
 def has_source(tag):
    if tag.has_attr('src'):
        try:
-            return get_extension(tag['src']) in img_ext
+            return fsutils.get_extension(tag['src']) in img_ext
        except IndexError or KeyError:
            return False
    elif tag.has_attr('data-url'):
        try:
            tag['src'] = tag['data-url']
-            return get_extension(tag['src']) in img_ext
+            return fsutils.get_extension(tag['src']) in img_ext
        except IndexError or KeyError:
            return False
    else:
        return False
-def get_next_url(baseurl, url):
+def get_next_url(baseurl: str, url: str):
    ids = []
-    soup = get_soup4url(url)
+    soup = netutils.get_soup4url(url, headers=hdr)
    if not soup:
        return False
    for t in soup.find_all(has_source):
@ -92,16 +49,16 @@ def get_next_url(baseurl, url):
                pass
    ids = [_id for _id in ids if _id]
    if len(ids) == 0:
-        return False
+        return []
    _id = ids[-1]
    next_url = '{}/?after={}'.format(baseurl, _id)
    return next_url
-def get_img4site(url):
+def get_img4site(url: str) -> list:
-    soup = get_soup4url(url)
+    soup = netutils.get_soup4url(url, headers=hdr)
    if not soup:
-        return False
+        return []
    ret = []
    sys.stdout.write('.')
    sys.stdout.flush()
@ -122,7 +79,7 @@ def get_img4site(url):
    return ret
-def get_img4sub(url, length=-1):
+def get_img4sub(url: str, length: int =-1) -> list:
    baseurl = url
    imgs = []
    print('[~] 1/2 Getting images...')
@ -153,41 +110,30 @@ def get_img4sub(url, length=-1):
    return imgs
-def download_images(imgs, zfile):
+def download_images(imgs: list, zfile: zipfile.ZipFile):
    count = 1
    imgcount = len(imgs)
    fnames = [zinfo.filename for zinfo in zfile.infolist()]
    print('[~] Downloading %s images' % imgcount)
-    if not os.path.isdir(dl_dir):
+    pb = cutils.ProgressBar(total=imgcount, prefix="[~] 2/2 Downloadinng", suffix="Complete")
-        os.mkdir(dl_dir)
+    fsutils.dir_exist_guarantee(dl_dir)
    for img in imgs:
-        print_progress(count, imgcount, prefix="2/2 Downloading: ", suffix="Complete")
+        pb.tick()
        imgname = img.split('/')[-1]
-        name = dl_dir + imgname
+        name = os.path.join(dl_dir, imgname)
        if os.path.isfile(name) or imgname in fnames:
            count += 1
            continue
-        f = open(name, "wb")
+        netutils.download_file(img, name, headers=hdr)
        req = urlreq.Request(img, headers=hdr)
        try:
            image = urlreq.urlopen(req)
        except ConnectionError:
            print('\n [-] Connection Error')
            return
        f.write(image.read())
        f.close()
        zfile.write(name, imgname, zipfile.ZIP_DEFLATED)
        try:
            os.remove(name)
        except FileNotFoundError or PermissionError:
            pass
        time.sleep(0.1)  # no don't penetrate
        count += 1
    added = len(zfile.infolist()) - len(fnames)
    print('[+] Added %s files to the zipfile' % added)
-def download_subreddit(sub, count=-1, out=None):
+def download_subreddit(sub: str, count: int =-1, out: str =None):
    mode = 'w'
    zname = sub + '.zip'
    if out: