Merge pull request #1 from Trivernis/develop

Develop
master
Trivernis 6 years ago committed by GitHub
commit 22624bc625
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,4 +1,4 @@
# reddit-riddle # riddle.py [![CodeFactor](https://www.codefactor.io/repository/github/trivernis/reddit-riddle/badge)](https://www.codefactor.io/repository/github/trivernis/reddit-riddle) [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg?style=flat-square)](https://www.gnu.org/licenses/gpl-3.0)
This is a script for downloading images (or other media) from reddit subreddits. This is a script for downloading images (or other media) from reddit subreddits.
@ -49,23 +49,24 @@ Options:
The name of the output folder. If none is specified, The name of the output folder. If none is specified,
it's the subreddits name. it's the subreddits name.
-z, --zip Stores the images in a zip file if true -z, --zip Stores the images in a zip file if true
-n, --nsfw If set nsfw-content is also downloaded.
``` ```
### Example ### Example
Download all images from r/EarthPorn: Download all images from [r/EarthPorn](https://EarthPorn.reddit.com):
```sh ```sh
python3 riddle.py EarthPorn python3 riddle.py EarthPorn
``` ```
Download all images from r/astrophotography to a zip-file: Download all images from [r/astrophotography](https://astrophotography.reddit.com) to a zip-file:
```sh ```sh
python3 riddle.py -z astrophotography python3 riddle.py -z astrophotography
``` ```
Download a maximum of 200 images from r/astrophotography or r/EarthPorn to one zip-file named coolpics.zip: Download a maximum of 200 images from [r/astrophotography](https://astrophotography.reddit.com) and [r/EarthPorn](https://EarthPorn.reddit.com) to one zip-file named coolpics.zip:
```sh ```sh
python3 riddle.py -z -c 100 -o coolpics astrophotography EarthPorn python3 riddle.py -z -c 100 -o coolpics astrophotography EarthPorn

@ -6,8 +6,8 @@ import optparse
import zipfile import zipfile
import urllib.request as urlreq import urllib.request as urlreq
user_agent = 'python:riddle:3.0 (by u/Trivernis)' user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent
img_ext = ['jpg', 'jpeg', 'png', 'svg', 'gif'] img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images
def assert_dir_exist(dirpath): def assert_dir_exist(dirpath):
@ -25,7 +25,7 @@ def download_file(url: str, dest: str):
Downloads a url to a file Downloads a url to a file
:param url: download url :param url: download url
:param dest: download destination :param dest: download destination
:return: None :return: Success?
""" """
f = open(dest, "wb") f = open(dest, "wb")
req = urlreq.Request(url) req = urlreq.Request(url)
@ -33,13 +33,19 @@ def download_file(url: str, dest: str):
image = urlreq.urlopen(req) image = urlreq.urlopen(req)
f.write(image.read()) f.write(image.read())
f.close() f.close()
return True
except ConnectionError: except ConnectionError:
print('\r[-] Connection Error \r') print('\r[-] Connection Error \r')
return False
except urlreq.HTTPError as err: except urlreq.HTTPError as err:
print('\r[-] HTTPError for %s: %s \r' % (url, err)) print('\r[-] HTTPError for %s: %s \r' % (url, err))
return False
class ProgressBar: class ProgressBar:
"""
A simple progressbar.
"""
def __init__(self, total=100, prefix='', suffix='', length=50, fill=''): def __init__(self, total=100, prefix='', suffix='', length=50, fill=''):
self.prefix = prefix self.prefix = prefix
self.suffix = suffix self.suffix = suffix
@ -87,19 +93,24 @@ def parser_init():
parser.add_option('-z', '--zip', dest='zip', parser.add_option('-z', '--zip', dest='zip',
action='store_true', default=False, action='store_true', default=False,
help='Stores the images in a zip file if true') help='Stores the images in a zip file if true')
parser.add_option('-n', '--nsfw', dest='nsfw',
action='store_true', default=False,
help='If set nsfw-content is also downloaded.')
return parser.parse_args() return parser.parse_args()
def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int): def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
""" """
Uses the reddit api to fetch all image posts Uses the reddit api to fetch all image posts
:param reddit_client: instance of the reddit client :param reddit_client: instance of the reddit client
:param subreddit: reddit subreddit name :param subreddit: reddit subreddit name
:param limit: max images to download. if set to None the maximum fetchable amout is used. :param limit: max images to download. if set to None the maximum fetchable amout is used.
:param nsfw: if set to true, nsfw-images won't be filtered
:return: list of images :return: list of images
""" """
print('[~] Fetching images for %s...' % subreddit) print('[~] Fetching images for r/%s...' % subreddit)
urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)] urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)
if not submission.over_18 or nsfw] # fetches hot images and filters by nsfw if nsfw not set to true
return [url for url in urls if url.split('.')[-1] in img_ext] return [url for url in urls if url.split('.')[-1] in img_ext]
@ -111,16 +122,24 @@ def download_images(images: list, dl_dir: str):
:return: None :return: None
""" """
imgcount = len(images) imgcount = len(images)
realcount = preexist = 0
print('[~] Downloading %s images to %s' % (imgcount, dl_dir)) print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete') pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
assert_dir_exist(dl_dir) assert_dir_exist(dl_dir)
for img in images: for img in images: # download each image if it doesn't exist
pb.tick() pb.tick()
success = False
imgname = img.split('/')[-1] imgname = img.split('/')[-1]
name = os.path.join(dl_dir, imgname) name = os.path.join(dl_dir, imgname)
if not os.path.isfile(name): if not os.path.isfile(name):
download_file(img, name) success = download_file(img, name)
else:
preexist += 1
if success:
realcount += 1
print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
(realcount, imgcount, dl_dir, preexist))
def compress_folder(folder: str, zip_fname: str): def compress_folder(folder: str, zip_fname: str):
@ -132,10 +151,13 @@ def compress_folder(folder: str, zip_fname: str):
""" """
print('[~] Compressing folder...') print('[~] Compressing folder...')
mode = 'w' mode = 'w'
if os.path.isfile(zip_fname):
if os.path.isfile(zip_fname): # append to the zipfile if it already exists
mode = 'a' mode = 'a'
zfile = zipfile.ZipFile(zip_fname, mode) zfile = zipfile.ZipFile(zip_fname, mode)
for _, _, files in os.walk(folder):
for _, _, files in os.walk(folder): # add all files of the folder to the zipfile
for file in files: for file in files:
zfile.write(os.path.join(folder, file), file) zfile.write(os.path.join(folder, file), file)
zfile.close() zfile.close()
@ -144,13 +166,13 @@ def compress_folder(folder: str, zip_fname: str):
def main(): def main():
options, subreddits = parser_init() options, subreddits = parser_init()
with open('config.yaml', 'r') as file: with open('config.yaml', 'r') as file: # loads the config.yaml file
try: try:
settings = yaml.safe_load(file) settings = yaml.safe_load(file)
except yaml.YAMLError as err: except yaml.YAMLError as err:
print(err) print(err)
if settings: if settings:
if 'image-extensions' in settings: if 'image-extensions' in settings: # uses image extensions specified in config.yaml fallback to default
global img_ext global img_ext
img_ext = settings['image-extensions'] img_ext = settings['image-extensions']
credentials = settings['credentials'] credentials = settings['credentials']
@ -162,15 +184,17 @@ def main():
for subreddit in subreddits: for subreddit in subreddits:
dldest = subreddit dldest = subreddit
if options.output: if options.output:
dldest = options.output dldest = options.output # uses the -o output destination instead of a folder with the subreddit name
images = get_images(client, subreddit, limit=options.count) images = get_images(client, subreddit, limit=options.count, nsfw=options.nsfw)
if options.zip: if options.zip: # downloads to a cache-folder first before compressing it to zip
download_images(images, '.cache') download_images(images, '.cache')
compress_folder('.cache', dldest+'.zip') compress_folder('.cache', dldest+'.zip')
shutil.rmtree('.cache') shutil.rmtree('.cache')
else: else:
download_images(images, dldest) download_images(images, dldest)
print('[+] All downloads finished')
if __name__ == '__main__': if __name__ == '__main__':
print('\n--- riddle.py reddit downloader by u/Trivernis ---\n')
main() main()

Loading…
Cancel
Save