From 9cb07ea4a1b5ec2c680811cff3f3ace04d15535b Mon Sep 17 00:00:00 2001 From: Trivernis Date: Mon, 29 Apr 2019 12:54:41 +0200 Subject: [PATCH] First working commit - added working riddle script using the reddit api --- .gitignore | 10 +++ README.md | 73 +++++++++++++++++- default-config.yaml | 10 +++ requirements.txt | 4 + riddle.py | 176 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 default-config.yaml create mode 100644 requirements.txt create mode 100644 riddle.py diff --git a/.gitignore b/.gitignore index a65d046..6c61521 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,13 @@ docs/_build/ # PyBuilder target/ + +# pycharm +.idea + +# sensitive information +config.yaml + +# data +.ignore +.cache \ No newline at end of file diff --git a/README.md b/README.md index cb6611c..f1369e6 100644 --- a/README.md +++ b/README.md @@ -1 +1,72 @@ -reddit-riddle +# reddit-riddle + +This is a script for downloading images (or other media) from reddit subreddits. + +## Install + +This script requires at least Python 3.6. +After cloning this repository you need to install the requirements via + +```sh +pip install -r requirements.txt +``` + +## Configuration + +Before running you need to provide information for the reddit api. +To do so you must create an app in your reddit [account preferences](https://www.reddit.com/prefs/apps). +The application must be of type 'script'. +That must be done via a config.yaml file in the scripts directory. +You can copy the `default-config.yaml` file to the `config.yaml` file and change the keys +`client_id` and `client_secret` under `credentials`. + +```yaml +# user app credentials +credentials: + client_id: your app-client id # change this + client_secret: your app-client secret # and change this + +# required extension of the file to be downloaded +image-extensions: + - png + - jpg + - jpeg +``` + +## Running + +### Help output + +```sh +Usage: riddle.py [options] [subreddits] + +Options: + -h, --help show this help message and exit + -c COUNT, --count=COUNT + The number of images to download for each subreddit. + If not set it is the maximum fetchable number. + -o OUTPUT, --output=OUTPUT + The name of the output folder. If none is specified, + it's the subreddits name. + -z, --zip Stores the images in a zip file if true +``` + +### Example + +Download all images from r/EarthPorn: + +```sh +python3 riddle.py EarthPorn +``` + +Download all images from r/astrophotography to a zip-file: + +```sh +python3 riddle.py -z astrophotography +``` + +Download a maximum of 200 images from r/astrophotography or r/EarthPorn to one zip-file named coolpics.zip: + +```sh +python3 riddle.py -z -c 100 -o coolpics astrophotography EarthPorn +``` diff --git a/default-config.yaml b/default-config.yaml new file mode 100644 index 0000000..be2f786 --- /dev/null +++ b/default-config.yaml @@ -0,0 +1,10 @@ +# user app credentials +credentials: + client_id: your app-client id + client_secret: your app-client secret + +# required extension of the file to be downloaded +image-extensions: + - png + - jpg + - jpeg \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..38c2083 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +yaml +praw +optparse +zipfile \ No newline at end of file diff --git a/riddle.py b/riddle.py new file mode 100644 index 0000000..9912e07 --- /dev/null +++ b/riddle.py @@ -0,0 +1,176 @@ +import os +import shutil +import yaml +import praw +import optparse +import zipfile +import urllib.request as urlreq + +user_agent = 'python:riddle:3.0 (by u/Trivernis)' +img_ext = ['jpg', 'jpeg', 'png', 'svg', 'gif'] + + +def assert_dir_exist(dirpath): + """ + Creates the directory if it doesn't exist + :param dirpath: path to the directory + :return: None + """ + if not os.path.exists(dirpath): + os.mkdir(dirpath) + + +def download_file(url: str, dest: str): + """ + Downloads a url to a file + :param url: download url + :param dest: download destination + :return: None + """ + f = open(dest, "wb") + req = urlreq.Request(url) + try: + image = urlreq.urlopen(req) + f.write(image.read()) + f.close() + except ConnectionError: + print('\r[-] Connection Error \r') + except urlreq.HTTPError as err: + print('\r[-] HTTPError for %s: %s \r' % (url, err)) + + +class ProgressBar: + def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'): + self.prefix = prefix + self.suffix = suffix + self.fill = fill + self.length = length + self.total = total + self.progress = 0 + + def tick(self): + self.progress += 1 + self._print_progress() + + def setprogress(self, progress): + self.progress = progress + self._print_progress() + + def _print_progress(self): + iteration = self.progress + total = self.total + prefix = self.prefix + suffix = self.suffix + + percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total))) + filled_length = int(self.length * iteration // total) + bar = self.fill * filled_length + '-' * (self.length - filled_length) + print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r') + # Print New Line on Complete + if iteration == total: + print() + + +def parser_init(): + """ + Initializes and parses command line arguments + :return: dict, list + """ + parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]") + parser.add_option('-c', '--count', dest='count', + type='int', default=None, + help="""The number of images to download for each subreddit. + If not set it is the maximum fetchable number.""") + parser.add_option('-o', '--output', dest='output', + type='str', default=None, + help='The name of the output folder. If none is specified, it\'s the subreddits name.') + parser.add_option('-z', '--zip', dest='zip', + action='store_true', default=False, + help='Stores the images in a zip file if true') + return parser.parse_args() + + +def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int): + """ + Uses the reddit api to fetch all image posts + :param reddit_client: instance of the reddit client + :param subreddit: reddit subreddit name + :param limit: max images to download. if set to None the maximum fetchable amout is used. + :return: list of images + """ + print('[~] Fetching images for %s...' % subreddit) + urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] + return [url for url in urls if url.split('.')[-1] in img_ext] + + +def download_images(images: list, dl_dir: str): + """ + Downloads a list of image urls to a folder + :param images: list of image urls + :param dl_dir: destination directory + :return: None + """ + imgcount = len(images) + print('[~] Downloading %s images to %s' % (imgcount, dl_dir)) + pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete') + assert_dir_exist(dl_dir) + + for img in images: + pb.tick() + imgname = img.split('/')[-1] + name = os.path.join(dl_dir, imgname) + if not os.path.isfile(name): + download_file(img, name) + + +def compress_folder(folder: str, zip_fname: str): + """ + Zips the contents of a folder to the destination zipfile name. + :param folder: the folder to zip + :param zip_fname: the name of the destination zipfile + :return: None + """ + print('[~] Compressing folder...') + mode = 'w' + if os.path.isfile(zip_fname): + mode = 'a' + zfile = zipfile.ZipFile(zip_fname, mode) + for _, _, files in os.walk(folder): + for file in files: + zfile.write(os.path.join(folder, file), file) + zfile.close() + print('[+] Folder %s compressed to %s.' % (folder, zip_fname)) + + +def main(): + options, subreddits = parser_init() + with open('config.yaml', 'r') as file: + try: + settings = yaml.safe_load(file) + except yaml.YAMLError as err: + print(err) + if settings: + if 'image-extensions' in settings: + global img_ext + img_ext = settings['image-extensions'] + credentials = settings['credentials'] + client = praw.Reddit( + client_id=credentials['client_id'], + client_secret=credentials['client_secret'], + user_agent=user_agent + ) + for subreddit in subreddits: + dldest = subreddit + if options.output: + dldest = options.output + images = get_images(client, subreddit, limit=options.count) + if options.zip: + download_images(images, '.cache') + compress_folder('.cache', dldest+'.zip') + shutil.rmtree('.cache') + else: + download_images(images, dldest) + + +if __name__ == '__main__': + main()