diff --git a/.gitignore b/.gitignore index 6c61521..f1a60f4 100644 --- a/.gitignore +++ b/.gitignore @@ -65,4 +65,5 @@ config.yaml # data .ignore -.cache \ No newline at end of file +.cache +*.zip \ No newline at end of file diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..a41cab4 --- /dev/null +++ b/Pipfile @@ -0,0 +1,13 @@ +[[source]] +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +PyYAML = "*" +praw = "*" + +[dev-packages] + +[requires] +python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..3c20aa9 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,111 @@ +{ + "_meta": { + "hash": { + "sha256": "e030a28963c27bc726b49ad8bc68cf9648c19fde4e1a5a76d1fc8a5955b06cd1" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "hashes": [ + "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", + "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + ], + "version": "==2019.9.11" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "idna": { + "hashes": [ + "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", + "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + ], + "version": "==2.8" + }, + "praw": { + "hashes": [ + "sha256:2e5c98e49fe60e5308255ed147b670d350f98281f84f582df30f87de727b6de2", + "sha256:cb8f85541ad4c6b10214ef9639acccfb5fed7ffee977be169b85357d2d2ea6d9" + ], + "index": "pypi", + "version": "==6.4.0" + }, + "prawcore": { + "hashes": [ + "sha256:25dd14bf121bc0ad2ffc78e2322d9a01a516017105a5596cc21bb1e9a928b40c", + "sha256:ab5558efb438aa73fc66c4178bfc809194dea3ce2addf4dec873de7e2fd2824e" + ], + "version": "==1.0.1" + }, + "pyyaml": { + "hashes": [ + "sha256:0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9", + "sha256:01adf0b6c6f61bd11af6e10ca52b7d4057dd0be0343eb9283c878cf3af56aee4", + "sha256:5124373960b0b3f4aa7df1707e63e9f109b5263eca5976c66e08b1c552d4eaf8", + "sha256:5ca4f10adbddae56d824b2c09668e91219bb178a1eee1faa56af6f99f11bf696", + "sha256:7907be34ffa3c5a32b60b95f4d95ea25361c951383a894fec31be7252b2b6f34", + "sha256:7ec9b2a4ed5cad025c2278a1e6a19c011c80a3caaac804fd2d329e9cc2c287c9", + "sha256:87ae4c829bb25b9fe99cf71fbb2140c448f534e24c998cc60f39ae4f94396a73", + "sha256:9de9919becc9cc2ff03637872a440195ac4241c80536632fffeb6a1e25a74299", + "sha256:a5a85b10e450c66b49f98846937e8cfca1db3127a9d5d1e31ca45c3d0bef4c5b", + "sha256:b0997827b4f6a7c286c01c5f60384d218dca4ed7d9efa945c3e1aa623d5709ae", + "sha256:b631ef96d3222e62861443cc89d6563ba3eeb816eeb96b2629345ab795e53681", + "sha256:bf47c0607522fdbca6c9e817a6e81b08491de50f3766a7a0e6a5be7905961b41", + "sha256:f81025eddd0327c7d4cfe9b62cf33190e1e736cc6e97502b3ec425f574b3e7a8" + ], + "index": "pypi", + "version": "==5.1.2" + }, + "requests": { + "hashes": [ + "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", + "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" + ], + "version": "==2.22.0" + }, + "six": { + "hashes": [ + "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", + "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + ], + "version": "==1.12.0" + }, + "update-checker": { + "hashes": [ + "sha256:59cfad7f9a0ee99f95f1dfc60f55bf184937bcab46a7270341c2c33695572453", + "sha256:70e39446fccf77b21192cf7a8214051fa93a636dc3b5c8b602b589d100a168b8" + ], + "version": "==0.16" + }, + "urllib3": { + "hashes": [ + "sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398", + "sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86" + ], + "version": "==1.25.6" + }, + "websocket-client": { + "hashes": [ + "sha256:1151d5fb3a62dc129164292e1227655e4bbc5dd5340a5165dfae61128ec50aa9", + "sha256:1fd5520878b68b84b5748bb30e592b10d0a91529d5383f74f4964e72b297fd3a" + ], + "version": "==0.56.0" + } + }, + "develop": {} +} diff --git a/README.md b/README.md index 2514062..849552e 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,11 @@ This script requires at least Python 3.6. After cloning this repository you need to install the requirements via ```sh -pip install -r requirements.txt +pipenv install +``` +or +```sh +pip3 install -r requirements.txt ``` ## Configuration @@ -23,21 +27,23 @@ You can copy the `default-config.yaml` file to the `config.yaml` file and change ```yaml # user app credentials credentials: - client_id: your app-client id # change this - client_secret: your app-client secret # and change this + client_id: your app-client id # change this + client_secret: your app-client secret # change this # required extension of the file to be downloaded image-extensions: - png - jpg - jpeg + +min-size: 5 # minimum size in kilobytes ``` ## Running ### Help output -```sh +``` Usage: riddle.py [options] [subreddits] Options: @@ -49,7 +55,8 @@ Options: The name of the output folder. If none is specified, it's the subreddits name. -z, --zip Stores the images in a zip file if true - -n, --nsfw If set nsfw-content is also downloaded. + --nsfw If set nsfw-content is also downloaded. + --lzma If set the lzma-compression module is used. ``` ### Example diff --git a/default-config.yaml b/default-config.yaml index be2f786..859d07f 100644 --- a/default-config.yaml +++ b/default-config.yaml @@ -1,10 +1,12 @@ -# user app credentials -credentials: - client_id: your app-client id - client_secret: your app-client secret - -# required extension of the file to be downloaded -image-extensions: - - png - - jpg - - jpeg \ No newline at end of file +# user app credentials +credentials: + client_id: your app-client id + client_secret: your app-client secret + +# required extension of the file to be downloaded +image-extensions: + - png + - jpg + - jpeg + +min-size: 5 # minimum size in kilobytes \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ee07df1..190b46b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -PyYaml -praw -zipfile \ No newline at end of file +PyYaml +praw \ No newline at end of file diff --git a/riddle.py b/riddle.py index 01d9397..900ae9d 100644 --- a/riddle.py +++ b/riddle.py @@ -1,3 +1,7 @@ +#!/usr/bin/env python3 + +# coding: utf-8 +# author: u/Trivernis import os import shutil import yaml @@ -8,6 +12,7 @@ import urllib.request as urlreq user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images +min_size = 5 # minimum size in kilobytes. changeable in settings def assert_dir_exist(dirpath): @@ -20,32 +25,56 @@ def assert_dir_exist(dirpath): os.mkdir(dirpath) -def download_file(url: str, dest: str): +def download_file(url: str, dest: str, progressbar = None): """ Downloads a url to a file :param url: download url :param dest: download destination + :param progressbar: The progressbar instance to clear it before writing an error message :return: Success? """ f = open(dest, "wb") req = urlreq.Request(url) + success = False try: image = urlreq.urlopen(req) f.write(image.read()) - f.close() - return True + success = True except ConnectionError: - print('\r[-] Connection Error \r') - return False + if progressbar: + progressbar.clear() + print('\r[-] Connection Error') except urlreq.HTTPError as err: - print('\r[-] HTTPError for %s: %s \r' % (url, err)) - return False + if progressbar: + progressbar.clear() + print('\r[-] HTTPError for %s: %s' % (url, err)) + except urlreq.URLError as err: + if progressbar: + progressbar.clear() + print('\r[-] URLError for %s: %s' % (url, err)) + f.close() + try: + file_size = round(os.path.getsize(dest) / 1000) + if not success: + os.remove(dest) + elif file_size < min_size: + os.remove(dest) + success = False + if progressbar: + progressbar.clear() + print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size)) + except IOError as err: + if progressbar: + progressbar.clear() + print('\r[-] Error when removing file %s: %s' % (dest, err)) + return success class ProgressBar: """ A simple progressbar. """ + def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'): self.prefix = prefix self.suffix = suffix @@ -53,12 +82,22 @@ class ProgressBar: self.length = length self.total = total self.progress = 0 + self.textlength = 0 def tick(self): + """ + Next step of the progressbar. The stepwidth is always 1. + :return: + """ self.progress += 1 self._print_progress() - def setprogress(self, progress): + def setprogress(self, progress: float): + """ + Set the progress of the bar. + :param progress: progress in percent + :return: None + """ self.progress = progress self._print_progress() @@ -71,11 +110,20 @@ class ProgressBar: percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total))) filled_length = int(self.length * iteration // total) bar = self.fill * filled_length + '-' * (self.length - filled_length) - print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r') - # Print New Line on Complete + textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix) + print(textout, end='\r') + self.textlength = len(textout) + # Print new line on complete if iteration == total: print() + def clear(self): + """ + clear last progress output + :return: + """ + print(' '*self.textlength, end='\r') + def parser_init(): """ @@ -89,13 +137,17 @@ def parser_init(): If not set it is the maximum fetchable number.""") parser.add_option('-o', '--output', dest='output', type='str', default=None, - help='The name of the output folder. If none is specified, it\'s the subreddits name.') + help="""The name of the output folder. + If none is specified, it\'s the subreddits name.""") parser.add_option('-z', '--zip', dest='zip', action='store_true', default=False, help='Stores the images in a zip file if true') - parser.add_option('-n', '--nsfw', dest='nsfw', + parser.add_option('--nsfw', dest='nsfw', action='store_true', default=False, help='If set nsfw-content is also downloaded.') + parser.add_option('--lzma', dest='lzma', + action='store_true', default=False, + help='If set the lzma-compression module is used.') return parser.parse_args() @@ -110,7 +162,7 @@ def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: boo """ print('[~] Fetching images for r/%s...' % subreddit) urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit) - if not submission.over_18 or nsfw] # fetches hot images and filters by nsfw if nsfw not set to true + if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false return [url for url in urls if url.split('.')[-1] in img_ext] @@ -128,16 +180,16 @@ def download_images(images: list, dl_dir: str): assert_dir_exist(dl_dir) for img in images: # download each image if it doesn't exist - pb.tick() success = False imgname = img.split('/')[-1] name = os.path.join(dl_dir, imgname) if not os.path.isfile(name): - success = download_file(img, name) + success = download_file(img, name, pb) else: preexist += 1 if success: realcount += 1 + pb.tick() print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' % (realcount, imgcount, dl_dir, preexist)) @@ -158,11 +210,12 @@ def filter_zip_files(images: list, zip_fname: str): return images -def compress_folder(folder: str, zip_fname: str): +def compress_folder(folder: str, zip_fname: str, compression: int): """ Zips the contents of a folder to the destination zipfile name. :param folder: the folder to zip :param zip_fname: the name of the destination zipfile + :param compression: The compression method (constant from zipfile module) :return: None """ print('[~] Compressing folder...') @@ -171,7 +224,7 @@ def compress_folder(folder: str, zip_fname: str): if os.path.isfile(zip_fname): # append to the zipfile if it already exists mode = 'a' - zfile = zipfile.ZipFile(zip_fname, mode) + zfile = zipfile.ZipFile(zip_fname, mode, compression=compression) for _, _, files in os.walk(folder): # add all files of the folder to the zipfile for file in files: @@ -181,16 +234,24 @@ def compress_folder(folder: str, zip_fname: str): def main(): + """ + Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched. + If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed. + """ options, subreddits = parser_init() - with open('config.yaml', 'r') as file: # loads the config.yaml file + config_fname = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yaml') + with open(config_fname, 'r') as file: # loads the config.yaml file try: settings = yaml.safe_load(file) except yaml.YAMLError as err: print(err) if settings: - if 'image-extensions' in settings: # uses image extensions specified in config.yaml fallback to default + if 'image-extensions' in settings: global img_ext img_ext = settings['image-extensions'] + if 'min-size' in settings: + global min_size + min_size = int(settings['min-size']) credentials = settings['credentials'] client = praw.Reddit( client_id=credentials['client_id'], @@ -200,13 +261,18 @@ def main(): for subreddit in subreddits: dldest = subreddit if options.output: - dldest = options.output # uses the -o output destination instead of a folder with the subreddit name - images = get_images(client, subreddit, limit=options.count, nsfw=options.nsfw) + dldest = options.output # uses the -o output destination + images = get_images(client, subreddit, limit=options.count, + nsfw=options.nsfw) if options.zip: # downloads to a cache-folder first before compressing it to zip + comp_mode = zipfile.ZIP_STORED + if options.lzma: + comp_mode = zipfile.ZIP_LZMA + cachedir = '.cache-' + dldest.split('/')[-1] images = filter_zip_files(images, dldest+'.zip') - download_images(images, '.cache') - compress_folder('.cache', dldest+'.zip') - shutil.rmtree('.cache') + download_images(images, cachedir) + compress_folder(cachedir, dldest+'.zip', compression=comp_mode) + shutil.rmtree(cachedir) else: download_images(images, dldest) print('[+] All downloads finished')