From 9fcfe625d4adb948a0447341dd0f6d35e85ee5c6 Mon Sep 17 00:00:00 2001 From: Trivernis Date: Mon, 28 Oct 2019 20:40:26 +0100 Subject: [PATCH] Added pipfile and shebag line - added pipfile for pipenv - added shebag line to execute the file directly - (converted to unix line endings) --- Pipfile | 13 + Pipfile.lock | 111 +++++++++ default-config.yaml | 22 +- requirements.txt | 2 +- riddle.py | 563 ++++++++++++++++++++++---------------------- 5 files changed, 418 insertions(+), 293 deletions(-) create mode 100644 Pipfile create mode 100644 Pipfile.lock diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..a41cab4 --- /dev/null +++ b/Pipfile @@ -0,0 +1,13 @@ +[[source]] +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +PyYAML = "*" +praw = "*" + +[dev-packages] + +[requires] +python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..3c20aa9 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,111 @@ +{ + "_meta": { + "hash": { + "sha256": "e030a28963c27bc726b49ad8bc68cf9648c19fde4e1a5a76d1fc8a5955b06cd1" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "hashes": [ + "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", + "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + ], + "version": "==2019.9.11" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "idna": { + "hashes": [ + "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", + "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + ], + "version": "==2.8" + }, + "praw": { + "hashes": [ + "sha256:2e5c98e49fe60e5308255ed147b670d350f98281f84f582df30f87de727b6de2", + "sha256:cb8f85541ad4c6b10214ef9639acccfb5fed7ffee977be169b85357d2d2ea6d9" + ], + "index": "pypi", + "version": "==6.4.0" + }, + "prawcore": { + "hashes": [ + "sha256:25dd14bf121bc0ad2ffc78e2322d9a01a516017105a5596cc21bb1e9a928b40c", + "sha256:ab5558efb438aa73fc66c4178bfc809194dea3ce2addf4dec873de7e2fd2824e" + ], + "version": "==1.0.1" + }, + "pyyaml": { + "hashes": [ + "sha256:0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9", + "sha256:01adf0b6c6f61bd11af6e10ca52b7d4057dd0be0343eb9283c878cf3af56aee4", + "sha256:5124373960b0b3f4aa7df1707e63e9f109b5263eca5976c66e08b1c552d4eaf8", + "sha256:5ca4f10adbddae56d824b2c09668e91219bb178a1eee1faa56af6f99f11bf696", + "sha256:7907be34ffa3c5a32b60b95f4d95ea25361c951383a894fec31be7252b2b6f34", + "sha256:7ec9b2a4ed5cad025c2278a1e6a19c011c80a3caaac804fd2d329e9cc2c287c9", + "sha256:87ae4c829bb25b9fe99cf71fbb2140c448f534e24c998cc60f39ae4f94396a73", + "sha256:9de9919becc9cc2ff03637872a440195ac4241c80536632fffeb6a1e25a74299", + "sha256:a5a85b10e450c66b49f98846937e8cfca1db3127a9d5d1e31ca45c3d0bef4c5b", + "sha256:b0997827b4f6a7c286c01c5f60384d218dca4ed7d9efa945c3e1aa623d5709ae", + "sha256:b631ef96d3222e62861443cc89d6563ba3eeb816eeb96b2629345ab795e53681", + "sha256:bf47c0607522fdbca6c9e817a6e81b08491de50f3766a7a0e6a5be7905961b41", + "sha256:f81025eddd0327c7d4cfe9b62cf33190e1e736cc6e97502b3ec425f574b3e7a8" + ], + "index": "pypi", + "version": "==5.1.2" + }, + "requests": { + "hashes": [ + "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", + "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" + ], + "version": "==2.22.0" + }, + "six": { + "hashes": [ + "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", + "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + ], + "version": "==1.12.0" + }, + "update-checker": { + "hashes": [ + "sha256:59cfad7f9a0ee99f95f1dfc60f55bf184937bcab46a7270341c2c33695572453", + "sha256:70e39446fccf77b21192cf7a8214051fa93a636dc3b5c8b602b589d100a168b8" + ], + "version": "==0.16" + }, + "urllib3": { + "hashes": [ + "sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398", + "sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86" + ], + "version": "==1.25.6" + }, + "websocket-client": { + "hashes": [ + "sha256:1151d5fb3a62dc129164292e1227655e4bbc5dd5340a5165dfae61128ec50aa9", + "sha256:1fd5520878b68b84b5748bb30e592b10d0a91529d5383f74f4964e72b297fd3a" + ], + "version": "==0.56.0" + } + }, + "develop": {} +} diff --git a/default-config.yaml b/default-config.yaml index 1fe8827..859d07f 100644 --- a/default-config.yaml +++ b/default-config.yaml @@ -1,12 +1,12 @@ -# user app credentials -credentials: - client_id: your app-client id - client_secret: your app-client secret - -# required extension of the file to be downloaded -image-extensions: - - png - - jpg - - jpeg - +# user app credentials +credentials: + client_id: your app-client id + client_secret: your app-client secret + +# required extension of the file to be downloaded +image-extensions: + - png + - jpg + - jpeg + min-size: 5 # minimum size in kilobytes \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 954fea7..190b46b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -PyYaml +PyYaml praw \ No newline at end of file diff --git a/riddle.py b/riddle.py index f9de30c..0513659 100644 --- a/riddle.py +++ b/riddle.py @@ -1,281 +1,282 @@ - -# coding: utf-8 -# author: u/Trivernis -import os -import shutil -import yaml -import praw -import optparse -import zipfile -import urllib.request as urlreq - -user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent -img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images -min_size = 5 # minimum size in kilobytes. changeable in settings - - -def assert_dir_exist(dirpath): - """ - Creates the directory if it doesn't exist - :param dirpath: path to the directory - :return: None - """ - if not os.path.exists(dirpath): - os.mkdir(dirpath) - - -def download_file(url: str, dest: str, progressbar = None): - """ - Downloads a url to a file - :param url: download url - :param dest: download destination - :param progressbar: The progressbar instance to clear it before writing an error message - :return: Success? - """ - f = open(dest, "wb") - req = urlreq.Request(url) - success = False - try: - image = urlreq.urlopen(req) - f.write(image.read()) - success = True - except ConnectionError: - if progressbar: - progressbar.clear() - print('\r[-] Connection Error') - except urlreq.HTTPError as err: - if progressbar: - progressbar.clear() - print('\r[-] HTTPError for %s: %s' % (url, err)) - except urlreq.URLError as err: - if progressbar: - progressbar.clear() - print('\r[-] URLError for %s: %s' % (url, err)) - f.close() - try: - file_size = round(os.path.getsize(dest) / 1000) - if not success: - os.remove(dest) - elif file_size < min_size: - os.remove(dest) - success = False - if progressbar: - progressbar.clear() - print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size)) - except IOError as err: - if progressbar: - progressbar.clear() - print('\r[-] Error when removing file %s: %s' % (dest, err)) - return success - - -class ProgressBar: - """ - A simple progressbar. - """ - - def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'): - self.prefix = prefix - self.suffix = suffix - self.fill = fill - self.length = length - self.total = total - self.progress = 0 - self.textlength = 0 - - def tick(self): - """ - Next step of the progressbar. The stepwidth is always 1. - :return: - """ - self.progress += 1 - self._print_progress() - - def setprogress(self, progress: float): - """ - Set the progress of the bar. - :param progress: progress in percent - :return: None - """ - self.progress = progress - self._print_progress() - - def _print_progress(self): - iteration = self.progress - total = self.total - prefix = self.prefix - suffix = self.suffix - - percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total))) - filled_length = int(self.length * iteration // total) - bar = self.fill * filled_length + '-' * (self.length - filled_length) - textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix) - print(textout, end='\r') - self.textlength = len(textout) - # Print new line on complete - if iteration == total: - print() - - def clear(self): - """ - clear last progress output - :return: - """ - print(' '*self.textlength, end='\r') - - -def parser_init(): - """ - Initializes and parses command line arguments - :return: dict, list - """ - parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]") - parser.add_option('-c', '--count', dest='count', - type='int', default=None, - help="""The number of images to download for each subreddit. - If not set it is the maximum fetchable number.""") - parser.add_option('-o', '--output', dest='output', - type='str', default=None, - help="""The name of the output folder. - If none is specified, it\'s the subreddits name.""") - parser.add_option('-z', '--zip', dest='zip', - action='store_true', default=False, - help='Stores the images in a zip file if true') - parser.add_option('--nsfw', dest='nsfw', - action='store_true', default=False, - help='If set nsfw-content is also downloaded.') - parser.add_option('--lzma', dest='lzma', - action='store_true', default=False, - help='If set the lzma-compression module is used.') - return parser.parse_args() - - -def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False): - """ - Uses the reddit api to fetch all image posts - :param reddit_client: instance of the reddit client - :param subreddit: reddit subreddit name - :param limit: max images to download. if set to None the maximum fetchable amout is used. - :param nsfw: if set to true, nsfw-images won't be filtered - :return: list of images - """ - print('[~] Fetching images for r/%s...' % subreddit) - urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit) - if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false - return [url for url in urls if url.split('.')[-1] in img_ext] - - -def download_images(images: list, dl_dir: str): - """ - Downloads a list of image urls to a folder - :param images: list of image urls - :param dl_dir: destination directory - :return: None - """ - imgcount = len(images) - realcount = preexist = 0 - print('[~] Downloading %s images to %s' % (imgcount, dl_dir)) - pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete') - assert_dir_exist(dl_dir) - - for img in images: # download each image if it doesn't exist - success = False - imgname = img.split('/')[-1] - name = os.path.join(dl_dir, imgname) - if not os.path.isfile(name): - success = download_file(img, name, pb) - else: - preexist += 1 - if success: - realcount += 1 - pb.tick() - print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' % - (realcount, imgcount, dl_dir, preexist)) - - -def filter_zip_files(images: list, zip_fname: str): - """ - Removes the images that already exist in the zip-file - :param images: - :param zip_fname: - :return: - """ - if os.path.isfile(zip_fname): - zfile = zipfile.ZipFile(zip_fname, 'r') - zfnames = [f.filename for f in zfile.infolist()] - print('[~] Removing entries already in zip-file') - return [img for img in images if img.split('/')[-1] not in zfnames] - else: - return images - - -def compress_folder(folder: str, zip_fname: str, compression: int): - """ - Zips the contents of a folder to the destination zipfile name. - :param folder: the folder to zip - :param zip_fname: the name of the destination zipfile - :param compression: The compression method (constant from zipfile module) - :return: None - """ - print('[~] Compressing folder...') - mode = 'w' - - if os.path.isfile(zip_fname): # append to the zipfile if it already exists - mode = 'a' - - zfile = zipfile.ZipFile(zip_fname, mode, compression=compression) - - for _, _, files in os.walk(folder): # add all files of the folder to the zipfile - for file in files: - zfile.write(os.path.join(folder, file), file) - zfile.close() - print('[+] Folder %s compressed to %s.' % (folder, zip_fname)) - - -def main(): - """ - Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched. - If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed. - """ - options, subreddits = parser_init() - with open('config.yaml', 'r') as file: # loads the config.yaml file - try: - settings = yaml.safe_load(file) - except yaml.YAMLError as err: - print(err) - if settings: - if 'image-extensions' in settings: - global img_ext - img_ext = settings['image-extensions'] - if 'min-size' in settings: - global min_size - min_size = int(settings['min-size']) - credentials = settings['credentials'] - client = praw.Reddit( - client_id=credentials['client_id'], - client_secret=credentials['client_secret'], - user_agent=user_agent - ) - for subreddit in subreddits: - dldest = subreddit - if options.output: - dldest = options.output # uses the -o output destination - images = get_images(client, subreddit, limit=options.count, - nsfw=options.nsfw) - if options.zip: # downloads to a cache-folder first before compressing it to zip - comp_mode = zipfile.ZIP_STORED - if options.lzma: - comp_mode = zipfile.ZIP_LZMA - cachedir = '.cache-' + dldest.split('/')[-1] - images = filter_zip_files(images, dldest+'.zip') - download_images(images, cachedir) - compress_folder(cachedir, dldest+'.zip', compression=comp_mode) - shutil.rmtree(cachedir) - else: - download_images(images, dldest) - print('[+] All downloads finished') - - -if __name__ == '__main__': - print('\n--- riddle.py reddit downloader by u/Trivernis ---\n') - main() +#!/usr/bin/env python3 + +# coding: utf-8 +# author: u/Trivernis +import os +import shutil +import yaml +import praw +import optparse +import zipfile +import urllib.request as urlreq + +user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent +img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images +min_size = 5 # minimum size in kilobytes. changeable in settings + + +def assert_dir_exist(dirpath): + """ + Creates the directory if it doesn't exist + :param dirpath: path to the directory + :return: None + """ + if not os.path.exists(dirpath): + os.mkdir(dirpath) + + +def download_file(url: str, dest: str, progressbar = None): + """ + Downloads a url to a file + :param url: download url + :param dest: download destination + :param progressbar: The progressbar instance to clear it before writing an error message + :return: Success? + """ + f = open(dest, "wb") + req = urlreq.Request(url) + success = False + try: + image = urlreq.urlopen(req) + f.write(image.read()) + success = True + except ConnectionError: + if progressbar: + progressbar.clear() + print('\r[-] Connection Error') + except urlreq.HTTPError as err: + if progressbar: + progressbar.clear() + print('\r[-] HTTPError for %s: %s' % (url, err)) + except urlreq.URLError as err: + if progressbar: + progressbar.clear() + print('\r[-] URLError for %s: %s' % (url, err)) + f.close() + try: + file_size = round(os.path.getsize(dest) / 1000) + if not success: + os.remove(dest) + elif file_size < min_size: + os.remove(dest) + success = False + if progressbar: + progressbar.clear() + print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size)) + except IOError as err: + if progressbar: + progressbar.clear() + print('\r[-] Error when removing file %s: %s' % (dest, err)) + return success + + +class ProgressBar: + """ + A simple progressbar. + """ + + def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'): + self.prefix = prefix + self.suffix = suffix + self.fill = fill + self.length = length + self.total = total + self.progress = 0 + self.textlength = 0 + + def tick(self): + """ + Next step of the progressbar. The stepwidth is always 1. + :return: + """ + self.progress += 1 + self._print_progress() + + def setprogress(self, progress: float): + """ + Set the progress of the bar. + :param progress: progress in percent + :return: None + """ + self.progress = progress + self._print_progress() + + def _print_progress(self): + iteration = self.progress + total = self.total + prefix = self.prefix + suffix = self.suffix + + percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total))) + filled_length = int(self.length * iteration // total) + bar = self.fill * filled_length + '-' * (self.length - filled_length) + textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix) + print(textout, end='\r') + self.textlength = len(textout) + # Print new line on complete + if iteration == total: + print() + + def clear(self): + """ + clear last progress output + :return: + """ + print(' '*self.textlength, end='\r') + + +def parser_init(): + """ + Initializes and parses command line arguments + :return: dict, list + """ + parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]") + parser.add_option('-c', '--count', dest='count', + type='int', default=None, + help="""The number of images to download for each subreddit. + If not set it is the maximum fetchable number.""") + parser.add_option('-o', '--output', dest='output', + type='str', default=None, + help="""The name of the output folder. + If none is specified, it\'s the subreddits name.""") + parser.add_option('-z', '--zip', dest='zip', + action='store_true', default=False, + help='Stores the images in a zip file if true') + parser.add_option('--nsfw', dest='nsfw', + action='store_true', default=False, + help='If set nsfw-content is also downloaded.') + parser.add_option('--lzma', dest='lzma', + action='store_true', default=False, + help='If set the lzma-compression module is used.') + return parser.parse_args() + + +def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False): + """ + Uses the reddit api to fetch all image posts + :param reddit_client: instance of the reddit client + :param subreddit: reddit subreddit name + :param limit: max images to download. if set to None the maximum fetchable amout is used. + :param nsfw: if set to true, nsfw-images won't be filtered + :return: list of images + """ + print('[~] Fetching images for r/%s...' % subreddit) + urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit) + if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false + return [url for url in urls if url.split('.')[-1] in img_ext] + + +def download_images(images: list, dl_dir: str): + """ + Downloads a list of image urls to a folder + :param images: list of image urls + :param dl_dir: destination directory + :return: None + """ + imgcount = len(images) + realcount = preexist = 0 + print('[~] Downloading %s images to %s' % (imgcount, dl_dir)) + pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete') + assert_dir_exist(dl_dir) + + for img in images: # download each image if it doesn't exist + success = False + imgname = img.split('/')[-1] + name = os.path.join(dl_dir, imgname) + if not os.path.isfile(name): + success = download_file(img, name, pb) + else: + preexist += 1 + if success: + realcount += 1 + pb.tick() + print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' % + (realcount, imgcount, dl_dir, preexist)) + + +def filter_zip_files(images: list, zip_fname: str): + """ + Removes the images that already exist in the zip-file + :param images: + :param zip_fname: + :return: + """ + if os.path.isfile(zip_fname): + zfile = zipfile.ZipFile(zip_fname, 'r') + zfnames = [f.filename for f in zfile.infolist()] + print('[~] Removing entries already in zip-file') + return [img for img in images if img.split('/')[-1] not in zfnames] + else: + return images + + +def compress_folder(folder: str, zip_fname: str, compression: int): + """ + Zips the contents of a folder to the destination zipfile name. + :param folder: the folder to zip + :param zip_fname: the name of the destination zipfile + :param compression: The compression method (constant from zipfile module) + :return: None + """ + print('[~] Compressing folder...') + mode = 'w' + + if os.path.isfile(zip_fname): # append to the zipfile if it already exists + mode = 'a' + + zfile = zipfile.ZipFile(zip_fname, mode, compression=compression) + + for _, _, files in os.walk(folder): # add all files of the folder to the zipfile + for file in files: + zfile.write(os.path.join(folder, file), file) + zfile.close() + print('[+] Folder %s compressed to %s.' % (folder, zip_fname)) + + +def main(): + """ + Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched. + If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed. + """ + options, subreddits = parser_init() + with open('config.yaml', 'r') as file: # loads the config.yaml file + try: + settings = yaml.safe_load(file) + except yaml.YAMLError as err: + print(err) + if settings: + if 'image-extensions' in settings: + global img_ext + img_ext = settings['image-extensions'] + if 'min-size' in settings: + global min_size + min_size = int(settings['min-size']) + credentials = settings['credentials'] + client = praw.Reddit( + client_id=credentials['client_id'], + client_secret=credentials['client_secret'], + user_agent=user_agent + ) + for subreddit in subreddits: + dldest = subreddit + if options.output: + dldest = options.output # uses the -o output destination + images = get_images(client, subreddit, limit=options.count, + nsfw=options.nsfw) + if options.zip: # downloads to a cache-folder first before compressing it to zip + comp_mode = zipfile.ZIP_STORED + if options.lzma: + comp_mode = zipfile.ZIP_LZMA + cachedir = '.cache-' + dldest.split('/')[-1] + images = filter_zip_files(images, dldest+'.zip') + download_images(images, cachedir) + compress_folder(cachedir, dldest+'.zip', compression=comp_mode) + shutil.rmtree(cachedir) + else: + download_images(images, dldest) + print('[+] All downloads finished') + + +if __name__ == '__main__': + print('\n--- riddle.py reddit downloader by u/Trivernis ---\n') + main()