From 03cd4ccd997dd964f63bc94b7b34339385329562 Mon Sep 17 00:00:00 2001 From: Trivernis Date: Mon, 29 Apr 2019 13:16:31 +0200 Subject: [PATCH 1/4] Cleanup - improved readme - added comments to riddle.py --- README.md | 6 +++--- riddle.py | 29 ++++++++++++++++++----------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index f1369e6..f62c1bb 100644 --- a/README.md +++ b/README.md @@ -53,19 +53,19 @@ Options: ### Example -Download all images from r/EarthPorn: +Download all images from [r/EarthPorn](https://EarthPorn.reddit.com): ```sh python3 riddle.py EarthPorn ``` -Download all images from r/astrophotography to a zip-file: +Download all images from [r/astrophotography](https://astrophotography.reddit.com) to a zip-file: ```sh python3 riddle.py -z astrophotography ``` -Download a maximum of 200 images from r/astrophotography or r/EarthPorn to one zip-file named coolpics.zip: +Download a maximum of 200 images from [r/astrophotography](https://astrophotography.reddit.com) and [r/EarthPorn](https://EarthPorn.reddit.com) to one zip-file named coolpics.zip: ```sh python3 riddle.py -z -c 100 -o coolpics astrophotography EarthPorn diff --git a/riddle.py b/riddle.py index 9912e07..2009571 100644 --- a/riddle.py +++ b/riddle.py @@ -6,8 +6,8 @@ import optparse import zipfile import urllib.request as urlreq -user_agent = 'python:riddle:3.0 (by u/Trivernis)' -img_ext = ['jpg', 'jpeg', 'png', 'svg', 'gif'] +user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent +img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images def assert_dir_exist(dirpath): @@ -40,6 +40,9 @@ def download_file(url: str, dest: str): class ProgressBar: + """ + A simple progressbar. + """ def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'): self.prefix = prefix self.suffix = suffix @@ -99,7 +102,7 @@ def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int): :return: list of images """ print('[~] Fetching images for %s...' % subreddit) - urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] + urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] # fetches hot images return [url for url in urls if url.split('.')[-1] in img_ext] @@ -114,8 +117,8 @@ def download_images(images: list, dl_dir: str): print('[~] Downloading %s images to %s' % (imgcount, dl_dir)) pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete') assert_dir_exist(dl_dir) - - for img in images: + + for img in images: # download each image if it doesn't exist pb.tick() imgname = img.split('/')[-1] name = os.path.join(dl_dir, imgname) @@ -132,10 +135,13 @@ def compress_folder(folder: str, zip_fname: str): """ print('[~] Compressing folder...') mode = 'w' - if os.path.isfile(zip_fname): + + if os.path.isfile(zip_fname): # append to the zipfile if it already exists mode = 'a' + zfile = zipfile.ZipFile(zip_fname, mode) - for _, _, files in os.walk(folder): + + for _, _, files in os.walk(folder): # add all files of the folder to the zipfile for file in files: zfile.write(os.path.join(folder, file), file) zfile.close() @@ -144,13 +150,13 @@ def compress_folder(folder: str, zip_fname: str): def main(): options, subreddits = parser_init() - with open('config.yaml', 'r') as file: + with open('config.yaml', 'r') as file: # loads the config.yaml file try: settings = yaml.safe_load(file) except yaml.YAMLError as err: print(err) if settings: - if 'image-extensions' in settings: + if 'image-extensions' in settings: # uses image extensions specified in config.yaml fallback to default global img_ext img_ext = settings['image-extensions'] credentials = settings['credentials'] @@ -162,9 +168,9 @@ def main(): for subreddit in subreddits: dldest = subreddit if options.output: - dldest = options.output + dldest = options.output # uses the -o output destination instead of a folder with the subreddit name images = get_images(client, subreddit, limit=options.count) - if options.zip: + if options.zip: # downloads to a cache-folder first before compressing it to zip download_images(images, '.cache') compress_folder('.cache', dldest+'.zip') shutil.rmtree('.cache') @@ -173,4 +179,5 @@ def main(): if __name__ == '__main__': + print('\n--- riddle.py reddit-downloader by u/Trivernis ---\n') main() From c0366f6c090684bb756324fc840bed2c352931b9 Mon Sep 17 00:00:00 2001 From: Trivernis Date: Mon, 29 Apr 2019 13:28:08 +0200 Subject: [PATCH 2/4] Improved output - prints image count after download - prints finish of all downloads --- riddle.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/riddle.py b/riddle.py index 2009571..3445a1b 100644 --- a/riddle.py +++ b/riddle.py @@ -25,7 +25,7 @@ def download_file(url: str, dest: str): Downloads a url to a file :param url: download url :param dest: download destination - :return: None + :return: Success? """ f = open(dest, "wb") req = urlreq.Request(url) @@ -33,10 +33,13 @@ def download_file(url: str, dest: str): image = urlreq.urlopen(req) f.write(image.read()) f.close() + return True except ConnectionError: print('\r[-] Connection Error \r') + return False except urlreq.HTTPError as err: print('\r[-] HTTPError for %s: %s \r' % (url, err)) + return False class ProgressBar: @@ -101,7 +104,7 @@ def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int): :param limit: max images to download. if set to None the maximum fetchable amout is used. :return: list of images """ - print('[~] Fetching images for %s...' % subreddit) + print('[~] Fetching images for r/%s...' % subreddit) urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] # fetches hot images return [url for url in urls if url.split('.')[-1] in img_ext] @@ -114,16 +117,24 @@ def download_images(images: list, dl_dir: str): :return: None """ imgcount = len(images) + realcount = preexist = 0 print('[~] Downloading %s images to %s' % (imgcount, dl_dir)) pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete') assert_dir_exist(dl_dir) for img in images: # download each image if it doesn't exist pb.tick() + success = False imgname = img.split('/')[-1] name = os.path.join(dl_dir, imgname) if not os.path.isfile(name): - download_file(img, name) + success = download_file(img, name) + else: + preexist += 1 + if success: + realcount += 1 + print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' % + (realcount, imgcount, dl_dir, preexist)) def compress_folder(folder: str, zip_fname: str): @@ -176,6 +187,7 @@ def main(): shutil.rmtree('.cache') else: download_images(images, dldest) + print('[+] All downloads finished') if __name__ == '__main__': From 60a660be2f0a4199b8aee9acb9415926907c06e9 Mon Sep 17 00:00:00 2001 From: Trivernis Date: Mon, 29 Apr 2019 13:39:26 +0200 Subject: [PATCH 3/4] Fancy changes - removed - between reddit and downloader - added badges to README --- README.md | 2 +- riddle.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f62c1bb..6f1d80d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# reddit-riddle +# riddle.py [![CodeFactor](https://www.codefactor.io/repository/github/trivernis/reddit-riddle/badge)](https://www.codefactor.io/repository/github/trivernis/reddit-riddle) [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg?style=flat-square)](https://www.gnu.org/licenses/gpl-3.0) This is a script for downloading images (or other media) from reddit subreddits. diff --git a/riddle.py b/riddle.py index 3445a1b..dd94f17 100644 --- a/riddle.py +++ b/riddle.py @@ -191,5 +191,5 @@ def main(): if __name__ == '__main__': - print('\n--- riddle.py reddit-downloader by u/Trivernis ---\n') + print('\n--- riddle.py reddit downloader by u/Trivernis ---\n') main() From fe8858c3ed4318c316fdf14620ed5f9001498971 Mon Sep 17 00:00:00 2001 From: Trivernis Date: Mon, 29 Apr 2019 14:04:38 +0200 Subject: [PATCH 4/4] Added nsfw filter - defaults to exclude nsfw results - added cli-option --nsfw to include nsfw results --- README.md | 1 + riddle.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6f1d80d..2514062 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ Options: The name of the output folder. If none is specified, it's the subreddits name. -z, --zip Stores the images in a zip file if true + -n, --nsfw If set nsfw-content is also downloaded. ``` ### Example diff --git a/riddle.py b/riddle.py index dd94f17..f967132 100644 --- a/riddle.py +++ b/riddle.py @@ -93,19 +93,24 @@ def parser_init(): parser.add_option('-z', '--zip', dest='zip', action='store_true', default=False, help='Stores the images in a zip file if true') + parser.add_option('-n', '--nsfw', dest='nsfw', + action='store_true', default=False, + help='If set nsfw-content is also downloaded.') return parser.parse_args() -def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int): +def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False): """ Uses the reddit api to fetch all image posts :param reddit_client: instance of the reddit client :param subreddit: reddit subreddit name :param limit: max images to download. if set to None the maximum fetchable amout is used. + :param nsfw: if set to true, nsfw-images won't be filtered :return: list of images """ print('[~] Fetching images for r/%s...' % subreddit) - urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] # fetches hot images + urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit) + if not submission.over_18 or nsfw] # fetches hot images and filters by nsfw if nsfw not set to true return [url for url in urls if url.split('.')[-1] in img_ext] @@ -180,7 +185,7 @@ def main(): dldest = subreddit if options.output: dldest = options.output # uses the -o output destination instead of a folder with the subreddit name - images = get_images(client, subreddit, limit=options.count) + images = get_images(client, subreddit, limit=options.count, nsfw=options.nsfw) if options.zip: # downloads to a cache-folder first before compressing it to zip download_images(images, '.cache') compress_folder('.cache', dldest+'.zip')