Merge pull request #6 from Trivernis/develop

Develop
5 years ago · 3a7d676766
parent afe858ac5f c5c4742db9
commit 3a7d676766
7 changed files with 242 additions and 43 deletions
--- a/.gitignore
+++ b/.gitignore
@ -65,4 +65,5 @@ config.yaml

 # data
 .ignore
-.cache
+.cache
+*.zip
--- a/13
+++ b/13
@ -0,0 +1,13 @@
+[[source]]
+url = "https://pypi.python.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+PyYAML = "*"
+praw = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.7"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@ -0,0 +1,111 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "e030a28963c27bc726b49ad8bc68cf9648c19fde4e1a5a76d1fc8a5955b06cd1"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.7"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.python.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "certifi": {
+            "hashes": [
+                "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50",
+                "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef"
+            ],
+            "version": "==2019.9.11"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+            ],
+            "version": "==3.0.4"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
+                "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
+            ],
+            "version": "==2.8"
+        },
+        "praw": {
+            "hashes": [
+                "sha256:2e5c98e49fe60e5308255ed147b670d350f98281f84f582df30f87de727b6de2",
+                "sha256:cb8f85541ad4c6b10214ef9639acccfb5fed7ffee977be169b85357d2d2ea6d9"
+            ],
+            "index": "pypi",
+            "version": "==6.4.0"
+        },
+        "prawcore": {
+            "hashes": [
+                "sha256:25dd14bf121bc0ad2ffc78e2322d9a01a516017105a5596cc21bb1e9a928b40c",
+                "sha256:ab5558efb438aa73fc66c4178bfc809194dea3ce2addf4dec873de7e2fd2824e"
+            ],
+            "version": "==1.0.1"
+        },
+        "pyyaml": {
+            "hashes": [
+                "sha256:0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9",
+                "sha256:01adf0b6c6f61bd11af6e10ca52b7d4057dd0be0343eb9283c878cf3af56aee4",
+                "sha256:5124373960b0b3f4aa7df1707e63e9f109b5263eca5976c66e08b1c552d4eaf8",
+                "sha256:5ca4f10adbddae56d824b2c09668e91219bb178a1eee1faa56af6f99f11bf696",
+                "sha256:7907be34ffa3c5a32b60b95f4d95ea25361c951383a894fec31be7252b2b6f34",
+                "sha256:7ec9b2a4ed5cad025c2278a1e6a19c011c80a3caaac804fd2d329e9cc2c287c9",
+                "sha256:87ae4c829bb25b9fe99cf71fbb2140c448f534e24c998cc60f39ae4f94396a73",
+                "sha256:9de9919becc9cc2ff03637872a440195ac4241c80536632fffeb6a1e25a74299",
+                "sha256:a5a85b10e450c66b49f98846937e8cfca1db3127a9d5d1e31ca45c3d0bef4c5b",
+                "sha256:b0997827b4f6a7c286c01c5f60384d218dca4ed7d9efa945c3e1aa623d5709ae",
+                "sha256:b631ef96d3222e62861443cc89d6563ba3eeb816eeb96b2629345ab795e53681",
+                "sha256:bf47c0607522fdbca6c9e817a6e81b08491de50f3766a7a0e6a5be7905961b41",
+                "sha256:f81025eddd0327c7d4cfe9b62cf33190e1e736cc6e97502b3ec425f574b3e7a8"
+            ],
+            "index": "pypi",
+            "version": "==5.1.2"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
+                "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"
+            ],
+            "version": "==2.22.0"
+        },
+        "six": {
+            "hashes": [
+                "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
+                "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
+            ],
+            "version": "==1.12.0"
+        },
+        "update-checker": {
+            "hashes": [
+                "sha256:59cfad7f9a0ee99f95f1dfc60f55bf184937bcab46a7270341c2c33695572453",
+                "sha256:70e39446fccf77b21192cf7a8214051fa93a636dc3b5c8b602b589d100a168b8"
+            ],
+            "version": "==0.16"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398",
+                "sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86"
+            ],
+            "version": "==1.25.6"
+        },
+        "websocket-client": {
+            "hashes": [
+                "sha256:1151d5fb3a62dc129164292e1227655e4bbc5dd5340a5165dfae61128ec50aa9",
+                "sha256:1fd5520878b68b84b5748bb30e592b10d0a91529d5383f74f4964e72b297fd3a"
+            ],
+            "version": "==0.56.0"
+        }
+    },
+    "develop": {}
+}
--- a/README.md
+++ b/README.md
@ -8,7 +8,11 @@ This script requires at least Python 3.6.
 After cloning this repository you need to install the requirements via 

 ```sh
-pip install -r requirements.txt
+pipenv install
+```
+or
+```sh
+pip3 install -r requirements.txt
 ```

 ## Configuration
@ -23,21 +27,23 @@ You can copy the `default-config.yaml` file to the `config.yaml` file and change
 ```yaml
 # user app credentials
 credentials:
-  client_id: your app-client id           # change this
-  client_secret: your app-client secret   # and change this
+  client_id: your app-client id  # change this
+  client_secret: your app-client secret  # change this

 # required extension of the file to be downloaded
 image-extensions:
  - png
  - jpg
  - jpeg
+
+min-size: 5 # minimum size in kilobytes
 ```

 ## Running

 ### Help output

-```sh
+```
 Usage: riddle.py [options] [subreddits]

 Options:
@ -49,7 +55,8 @@ Options:
                        The name of the output folder. If none is specified,
                        it's the subreddits name.
  -z, --zip             Stores the images in a zip file if true
-  -n, --nsfw            If set nsfw-content is also downloaded.
+  --nsfw                If set nsfw-content is also downloaded.
+  --lzma                If set the lzma-compression module is used.
 ```

 ### Example
--- a/default-config.yaml
+++ b/default-config.yaml
@ -1,10 +1,12 @@
-# user app credentials
-credentials:
-  client_id: your app-client id
-  client_secret: your app-client secret
-
-# required extension of the file to be downloaded
-image-extensions:
-  - png
-  - jpg
-  - jpeg
+# user app credentials
+credentials:
+  client_id: your app-client id
+  client_secret: your app-client secret
+
+# required extension of the file to be downloaded
+image-extensions:
+  - png
+  - jpg
+  - jpeg
+
+min-size: 5 # minimum size in kilobytes
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,2 @@
-PyYaml
-praw
-zipfile
+PyYaml
+praw
--- a/riddle.py
+++ b/riddle.py
@ -1,3 +1,7 @@
+#!/usr/bin/env python3
+
+# coding: utf-8
+# author: u/Trivernis
 import os
 import shutil
 import yaml
@ -8,6 +12,7 @@ import urllib.request as urlreq

 user_agent = 'python:riddle:3.0 (by u/Trivernis)'  # the reddit api user-agent
 img_ext = ['jpg', 'jpeg', 'png']  # default used extensions to filter for images
+min_size = 5  # minimum size in kilobytes. changeable in settings


 def assert_dir_exist(dirpath):
@ -20,32 +25,56 @@ def assert_dir_exist(dirpath):
        os.mkdir(dirpath)


-def download_file(url: str, dest: str):
+def download_file(url: str, dest: str, progressbar = None):
    """
    Downloads a url to a file
    :param url: download url
    :param dest: download destination
+    :param progressbar: The progressbar instance to clear it before writing an error message
    :return: Success?
    """
    f = open(dest, "wb")
    req = urlreq.Request(url)
+    success = False
    try:
        image = urlreq.urlopen(req)
        f.write(image.read())
-        f.close()
-        return True
+        success = True
    except ConnectionError:
-        print('\r[-] Connection Error \r')
-        return False
+        if progressbar:
+            progressbar.clear()
+        print('\r[-] Connection Error')
    except urlreq.HTTPError as err:
-        print('\r[-] HTTPError for %s: %s \r' % (url, err))
-        return False
+        if progressbar:
+            progressbar.clear()
+        print('\r[-] HTTPError for %s: %s' % (url, err))
+    except urlreq.URLError as err:
+        if progressbar:
+            progressbar.clear()
+        print('\r[-] URLError for %s: %s' % (url, err))
+    f.close()
+    try:
+        file_size = round(os.path.getsize(dest) / 1000)
+        if not success:
+            os.remove(dest)
+        elif file_size < min_size:
+            os.remove(dest)
+            success = False
+            if progressbar:
+                progressbar.clear()
+            print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size))
+    except IOError as err:
+        if progressbar:
+            progressbar.clear()
+        print('\r[-] Error when removing file %s: %s' % (dest, err))
+    return success


 class ProgressBar:
    """
    A simple progressbar.
    """
+
    def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'):
        self.prefix = prefix
        self.suffix = suffix
@ -53,12 +82,22 @@ class ProgressBar:
        self.length = length
        self.total = total
        self.progress = 0
+        self.textlength = 0

    def tick(self):
+        """
+        Next step of the progressbar. The stepwidth is always 1.
+        :return:
+        """
        self.progress += 1
        self._print_progress()

-    def setprogress(self, progress):
+    def setprogress(self, progress: float):
+        """
+        Set the progress of the bar.
+        :param progress: progress in percent
+        :return: None
+        """
        self.progress = progress
        self._print_progress()

@ -71,11 +110,20 @@ class ProgressBar:
        percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
        filled_length = int(self.length * iteration // total)
        bar = self.fill * filled_length + '-' * (self.length - filled_length)
-        print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r')
-        # Print New Line on Complete
+        textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix)
+        print(textout, end='\r')
+        self.textlength = len(textout)
+        # Print new line on complete
        if iteration == total:
            print()

+    def clear(self):
+        """
+        clear last progress output
+        :return:
+        """
+        print(' '*self.textlength, end='\r')
+

 def parser_init():
    """
@ -89,13 +137,17 @@ def parser_init():
                      If not set it is the maximum fetchable number.""")
    parser.add_option('-o', '--output', dest='output',
                      type='str', default=None,
-                      help='The name of the output folder. If none is specified, it\'s the subreddits name.')
+                      help="""The name of the output folder.
+                      If none is specified, it\'s the subreddits name.""")
    parser.add_option('-z', '--zip', dest='zip',
                      action='store_true', default=False,
                      help='Stores the images in a zip file if true')
-    parser.add_option('-n', '--nsfw', dest='nsfw',
+    parser.add_option('--nsfw', dest='nsfw',
                      action='store_true', default=False,
                      help='If set nsfw-content is also downloaded.')
+    parser.add_option('--lzma', dest='lzma',
+                      action='store_true', default=False,
+                      help='If set the lzma-compression module is used.')
    return parser.parse_args()


@ -110,7 +162,7 @@ def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: boo
    """
    print('[~] Fetching images for r/%s...' % subreddit)
    urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)
-            if not submission.over_18 or nsfw]  # fetches hot images and filters by nsfw if nsfw not set to true
+            if not submission.over_18 or nsfw]  # fetches hot images and filters nsfw if set to false
    return [url for url in urls if url.split('.')[-1] in img_ext]


@ -128,16 +180,16 @@ def download_images(images: list, dl_dir: str):
    assert_dir_exist(dl_dir)

    for img in images:  # download each image if it doesn't exist
-        pb.tick()
        success = False
        imgname = img.split('/')[-1]
        name = os.path.join(dl_dir, imgname)
        if not os.path.isfile(name):
-            success = download_file(img, name)
+            success = download_file(img, name, pb)
        else:
            preexist += 1
        if success:
            realcount += 1
+        pb.tick()
    print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
          (realcount, imgcount, dl_dir, preexist))

@ -158,11 +210,12 @@ def filter_zip_files(images: list, zip_fname: str):
        return images


-def compress_folder(folder: str, zip_fname: str):
+def compress_folder(folder: str, zip_fname: str, compression: int):
    """
    Zips the contents of a folder to the destination zipfile name.
    :param folder: the folder to zip
    :param zip_fname: the name of the destination zipfile
+    :param compression: The compression method (constant from zipfile module)
    :return: None
    """
    print('[~] Compressing folder...')
@ -171,7 +224,7 @@ def compress_folder(folder: str, zip_fname: str):
    if os.path.isfile(zip_fname):  # append to the zipfile if it already exists
        mode = 'a'

-    zfile = zipfile.ZipFile(zip_fname, mode)
+    zfile = zipfile.ZipFile(zip_fname, mode, compression=compression)

    for _, _, files in os.walk(folder):  # add all files of the folder to the zipfile
        for file in files:
@ -181,16 +234,24 @@ def compress_folder(folder: str, zip_fname: str):


 def main():
+    """
+    Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched.
+    If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed.
+    """
    options, subreddits = parser_init()
-    with open('config.yaml', 'r') as file:  # loads the config.yaml file
+    config_fname = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yaml')
+    with open(config_fname, 'r') as file:  # loads the config.yaml file
        try:
            settings = yaml.safe_load(file)
        except yaml.YAMLError as err:
            print(err)
    if settings:
-        if 'image-extensions' in settings:  # uses image extensions specified in config.yaml fallback to default
+        if 'image-extensions' in settings:
            global img_ext
            img_ext = settings['image-extensions']
+        if 'min-size' in settings:
+            global min_size
+            min_size = int(settings['min-size'])
        credentials = settings['credentials']
        client = praw.Reddit(
            client_id=credentials['client_id'],
@ -200,13 +261,18 @@ def main():
        for subreddit in subreddits:
            dldest = subreddit
            if options.output:
-                dldest = options.output  # uses the -o output destination instead of a folder with the subreddit name
-            images = get_images(client, subreddit, limit=options.count, nsfw=options.nsfw)
+                dldest = options.output  # uses the -o output destination
+            images = get_images(client, subreddit, limit=options.count,
+                                nsfw=options.nsfw)
            if options.zip:  # downloads to a cache-folder first before compressing it to zip
+                comp_mode = zipfile.ZIP_STORED
+                if options.lzma:
+                    comp_mode = zipfile.ZIP_LZMA
+                cachedir = '.cache-' + dldest.split('/')[-1]
                images = filter_zip_files(images, dldest+'.zip')
-                download_images(images, '.cache')
-                compress_folder('.cache', dldest+'.zip')
-                shutil.rmtree('.cache')
+                download_images(images, cachedir)
+                compress_folder(cachedir, dldest+'.zip', compression=comp_mode)
+                shutil.rmtree(cachedir)
            else:
                download_images(images, dldest)
        print('[+] All downloads finished')