From 03cd4ccd997dd964f63bc94b7b34339385329562 Mon Sep 17 00:00:00 2001
From: Trivernis <trivernis@hotmail.de>
Date: Mon, 29 Apr 2019 13:16:31 +0200
Subject: [PATCH 1/4] Cleanup

- improved readme
- added comments to riddle.py
---
 README.md |  6 +++---
 riddle.py | 29 ++++++++++++++++++-----------
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index f1369e6..f62c1bb 100644
--- a/README.md
+++ b/README.md
@@ -53,19 +53,19 @@ Options:
 
 ### Example
 
-Download all images from r/EarthPorn:
+Download all images from [r/EarthPorn](https://EarthPorn.reddit.com):
 
 ```sh
 python3 riddle.py EarthPorn
 ```
 
-Download all images from r/astrophotography to a zip-file:
+Download all images from [r/astrophotography](https://astrophotography.reddit.com) to a zip-file:
 
 ```sh
 python3 riddle.py -z astrophotography
 ```
 
-Download a maximum of 200 images from r/astrophotography or r/EarthPorn to one zip-file named coolpics.zip:
+Download a maximum of 200 images from [r/astrophotography](https://astrophotography.reddit.com) and [r/EarthPorn](https://EarthPorn.reddit.com) to one zip-file named coolpics.zip:
 
 ```sh
 python3 riddle.py -z -c 100 -o coolpics astrophotography EarthPorn
diff --git a/riddle.py b/riddle.py
index 9912e07..2009571 100644
--- a/riddle.py
+++ b/riddle.py
@@ -6,8 +6,8 @@ import optparse
 import zipfile
 import urllib.request as urlreq
 
-user_agent = 'python:riddle:3.0 (by u/Trivernis)'
-img_ext = ['jpg', 'jpeg', 'png', 'svg', 'gif']
+user_agent = 'python:riddle:3.0 (by u/Trivernis)'  # the reddit api user-agent
+img_ext = ['jpg', 'jpeg', 'png']  # default used extensions to filter for images
 
 
 def assert_dir_exist(dirpath):
@@ -40,6 +40,9 @@ def download_file(url: str, dest: str):
 
 
 class ProgressBar:
+    """
+    A simple progressbar.
+    """
     def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'):
         self.prefix = prefix
         self.suffix = suffix
@@ -99,7 +102,7 @@ def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int):
     :return: list of images
     """
     print('[~] Fetching images for %s...' % subreddit)
-    urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)]
+    urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] # fetches hot images
     return [url for url in urls if url.split('.')[-1] in img_ext]
 
 
@@ -114,8 +117,8 @@ def download_images(images: list, dl_dir: str):
     print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
     pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
     assert_dir_exist(dl_dir)
-    
-    for img in images:
+
+    for img in images:  # download each image if it doesn't exist
         pb.tick()
         imgname = img.split('/')[-1]
         name = os.path.join(dl_dir, imgname)
@@ -132,10 +135,13 @@ def compress_folder(folder: str, zip_fname: str):
     """
     print('[~] Compressing folder...')
     mode = 'w'
-    if os.path.isfile(zip_fname):
+
+    if os.path.isfile(zip_fname):  # append to the zipfile if it already exists
         mode = 'a'
+
     zfile = zipfile.ZipFile(zip_fname, mode)
-    for _, _, files in os.walk(folder):
+
+    for _, _, files in os.walk(folder):  # add all files of the folder to the zipfile
         for file in files:
             zfile.write(os.path.join(folder, file), file)
     zfile.close()
@@ -144,13 +150,13 @@ def compress_folder(folder: str, zip_fname: str):
 
 def main():
     options, subreddits = parser_init()
-    with open('config.yaml', 'r') as file:
+    with open('config.yaml', 'r') as file:  # loads the config.yaml file
         try:
             settings = yaml.safe_load(file)
         except yaml.YAMLError as err:
             print(err)
     if settings:
-        if 'image-extensions' in settings:
+        if 'image-extensions' in settings:  # uses image extensions specified in config.yaml fallback to default
             global img_ext
             img_ext = settings['image-extensions']
         credentials = settings['credentials']
@@ -162,9 +168,9 @@ def main():
         for subreddit in subreddits:
             dldest = subreddit
             if options.output:
-                dldest = options.output
+                dldest = options.output  # uses the -o output destination instead of a folder with the subreddit name
             images = get_images(client, subreddit, limit=options.count)
-            if options.zip:
+            if options.zip:  # downloads to a cache-folder first before compressing it to zip
                 download_images(images, '.cache')
                 compress_folder('.cache', dldest+'.zip')
                 shutil.rmtree('.cache')
@@ -173,4 +179,5 @@ def main():
 
 
 if __name__ == '__main__':
+    print('\n--- riddle.py reddit-downloader by u/Trivernis ---\n')
     main()

From c0366f6c090684bb756324fc840bed2c352931b9 Mon Sep 17 00:00:00 2001
From: Trivernis <trivernis@hotmail.de>
Date: Mon, 29 Apr 2019 13:28:08 +0200
Subject: [PATCH 2/4] Improved output

- prints image count after download
- prints finish of all downloads
---
 riddle.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/riddle.py b/riddle.py
index 2009571..3445a1b 100644
--- a/riddle.py
+++ b/riddle.py
@@ -25,7 +25,7 @@ def download_file(url: str, dest: str):
     Downloads a url to a file
     :param url: download url
     :param dest: download destination
-    :return: None
+    :return: Success?
     """
     f = open(dest, "wb")
     req = urlreq.Request(url)
@@ -33,10 +33,13 @@ def download_file(url: str, dest: str):
         image = urlreq.urlopen(req)
         f.write(image.read())
         f.close()
+        return True
     except ConnectionError:
         print('\r[-] Connection Error \r')
+        return False
     except urlreq.HTTPError as err:
         print('\r[-] HTTPError for %s: %s \r' % (url, err))
+        return False
 
 
 class ProgressBar:
@@ -101,7 +104,7 @@ def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int):
     :param limit: max images to download. if set to None the maximum fetchable amout is used.
     :return: list of images
     """
-    print('[~] Fetching images for %s...' % subreddit)
+    print('[~] Fetching images for r/%s...' % subreddit)
     urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] # fetches hot images
     return [url for url in urls if url.split('.')[-1] in img_ext]
 
@@ -114,16 +117,24 @@ def download_images(images: list, dl_dir: str):
     :return: None
     """
     imgcount = len(images)
+    realcount = preexist = 0
     print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
     pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
     assert_dir_exist(dl_dir)
 
     for img in images:  # download each image if it doesn't exist
         pb.tick()
+        success = False
         imgname = img.split('/')[-1]
         name = os.path.join(dl_dir, imgname)
         if not os.path.isfile(name):
-            download_file(img, name)
+            success = download_file(img, name)
+        else:
+            preexist += 1
+        if success:
+            realcount += 1
+    print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
+          (realcount, imgcount, dl_dir, preexist))
 
 
 def compress_folder(folder: str, zip_fname: str):
@@ -176,6 +187,7 @@ def main():
                 shutil.rmtree('.cache')
             else:
                 download_images(images, dldest)
+        print('[+] All downloads finished')
 
 
 if __name__ == '__main__':

From 60a660be2f0a4199b8aee9acb9415926907c06e9 Mon Sep 17 00:00:00 2001
From: Trivernis <trivernis@hotmail.de>
Date: Mon, 29 Apr 2019 13:39:26 +0200
Subject: [PATCH 3/4] Fancy changes

- removed - between reddit and downloader
- added badges to README
---
 README.md | 2 +-
 riddle.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f62c1bb..6f1d80d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# reddit-riddle
+# riddle.py [![CodeFactor](https://www.codefactor.io/repository/github/trivernis/reddit-riddle/badge)](https://www.codefactor.io/repository/github/trivernis/reddit-riddle) [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg?style=flat-square)](https://www.gnu.org/licenses/gpl-3.0) 
 
 This is a script for downloading images (or other media) from reddit subreddits.
 
diff --git a/riddle.py b/riddle.py
index 3445a1b..dd94f17 100644
--- a/riddle.py
+++ b/riddle.py
@@ -191,5 +191,5 @@ def main():
 
 
 if __name__ == '__main__':
-    print('\n--- riddle.py reddit-downloader by u/Trivernis ---\n')
+    print('\n--- riddle.py reddit downloader by u/Trivernis ---\n')
     main()

From fe8858c3ed4318c316fdf14620ed5f9001498971 Mon Sep 17 00:00:00 2001
From: Trivernis <trivernis@hotmail.de>
Date: Mon, 29 Apr 2019 14:04:38 +0200
Subject: [PATCH 4/4] Added nsfw filter

- defaults to exclude nsfw results
- added cli-option --nsfw to include nsfw results
---
 README.md |  1 +
 riddle.py | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 6f1d80d..2514062 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,7 @@ Options:
                         The name of the output folder. If none is specified,
                         it's the subreddits name.
   -z, --zip             Stores the images in a zip file if true
+  -n, --nsfw            If set nsfw-content is also downloaded.
 ```
 
 ### Example
diff --git a/riddle.py b/riddle.py
index dd94f17..f967132 100644
--- a/riddle.py
+++ b/riddle.py
@@ -93,19 +93,24 @@ def parser_init():
     parser.add_option('-z', '--zip', dest='zip',
                       action='store_true', default=False,
                       help='Stores the images in a zip file if true')
+    parser.add_option('-n', '--nsfw', dest='nsfw',
+                      action='store_true', default=False,
+                      help='If set nsfw-content is also downloaded.')
     return parser.parse_args()
 
 
-def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int):
+def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
     """
     Uses the reddit api to fetch all image posts
     :param reddit_client: instance of the reddit client
     :param subreddit: reddit subreddit name
     :param limit: max images to download. if set to None the maximum fetchable amout is used.
+    :param nsfw: if set to true, nsfw-images won't be filtered
     :return: list of images
     """
     print('[~] Fetching images for r/%s...' % subreddit)
-    urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] # fetches hot images
+    urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)
+            if not submission.over_18 or nsfw]  # fetches hot images and filters by nsfw if nsfw not set to true
     return [url for url in urls if url.split('.')[-1] in img_ext]
 
 
@@ -180,7 +185,7 @@ def main():
             dldest = subreddit
             if options.output:
                 dldest = options.output  # uses the -o output destination instead of a folder with the subreddit name
-            images = get_images(client, subreddit, limit=options.count)
+            images = get_images(client, subreddit, limit=options.count, nsfw=options.nsfw)
             if options.zip:  # downloads to a cache-folder first before compressing it to zip
                 download_images(images, '.cache')
                 compress_folder('.cache', dldest+'.zip')