You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
reddit-riddle/riddle.py

298 lines
10 KiB
Python

#!/usr/bin/env python3
# coding: utf-8
# author: u/Trivernis
import os
import shutil
import yaml
import praw
import optparse
import zipfile
import urllib.request as urlreq
from PIL import Image
user_agent = 'linux:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent
img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images
min_size = 5 # minimum size in kilobytes. changeable in settings
min_mp = 0.5 # minimum megapixels. changeable in the settings
class ProgressBar:
"""
A simple progressbar.
"""
def __init__(self, total=100, prefix='', suffix='', length=50, fill=''):
self.prefix = prefix
self.suffix = suffix
self.fill = fill
self.length = length
self.total = total
self.progress = 0
self.textlength = 0
def tick(self):
"""
Next step of the progressbar. The stepwidth is always 1.
:return:
"""
self.progress += 1
self._print_progress()
def setprogress(self, progress: float):
"""
Set the progress of the bar.
:param progress: progress in percent
:return: None
"""
self.progress = progress
self._print_progress()
def _print_progress(self):
iteration = self.progress
total = self.total
prefix = self.prefix
suffix = self.suffix
percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
filled_length = int(self.length * iteration // total)
bar = self.fill * filled_length + '-' * (self.length - filled_length)
textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix)
print(textout, end='\r')
self.textlength = len(textout)
# Print new line on complete
if iteration == total:
print()
def clear(self):
"""
clear last progress output
:return:
"""
print(' '*self.textlength, end='\r')
def parser_init():
"""
Initializes and parses command line arguments
:return: dict, list
"""
parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]")
parser.add_option('-c', '--count', dest='count',
type='int', default=None,
help="""The number of images to download for each subreddit.
If not set it is the maximum fetchable number.""")
parser.add_option('-o', '--output', dest='output',
type='str', default=None,
help="""The name of the output folder.
If none is specified, it\'s the subreddits name.""")
parser.add_option('-z', '--zip', dest='zip',
action='store_true', default=False,
help='Stores the images in a zip file if true')
parser.add_option('--nsfw', dest='nsfw',
action='store_true', default=False,
help='If set nsfw-content is also downloaded.')
parser.add_option('--lzma', dest='lzma',
action='store_true', default=False,
help='If set the lzma-compression module is used.')
return parser.parse_args()
def assert_dir_exist(dirpath):
"""
Creates the directory if it doesn't exist
:param dirpath: path to the directory
:return: None
"""
if not os.path.exists(dirpath):
os.mkdir(dirpath)
def download_file(url: str, dest: str, progressbar = None):
"""
Downloads a url to a file
:param url: download url
:param dest: download destination
:param progressbar: The progressbar instance to clear it before writing an error message
:return: Success?
"""
f = open(dest, "wb")
req = urlreq.Request(url)
success = False
try:
image = urlreq.urlopen(req)
f.write(image.read())
success = True
except ConnectionError:
if progressbar:
progressbar.clear()
print('\r[-] Connection Error')
except urlreq.HTTPError as err:
if progressbar:
progressbar.clear()
print('\r[-] HTTPError for %s: %s' % (url, err))
except urlreq.URLError as err:
if progressbar:
progressbar.clear()
print('\r[-] URLError for %s: %s' % (url, err))
f.close()
try:
width, height = Image.open(dest).size
mp = (width * height)/1000000
file_size = round(os.path.getsize(dest) / 1000)
if __name__ == '__main__':
if not success:
os.remove(dest)
elif file_size < min_size or mp < min_mp:
os.remove(dest)
success = False
if progressbar:
progressbar.clear()
print('\r[-] Removed %s: Too small (%s kb, %s MP)' % (dest, file_size, mp))
except IOError as err:
if progressbar:
progressbar.clear()
print('\r[-] Error when removing file %s: %s' % (dest, err))
return success
def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
"""
Uses the reddit api to fetch all image posts
:param reddit_client: instance of the reddit client
:param subreddit: reddit subreddit name
:param limit: max images to download. if set to None the maximum fetchable amout is used.
:param nsfw: if set to true, nsfw-images won't be filtered
:return: list of images
"""
print('[~] Fetching images for r/%s...' % subreddit)
urls = []
try:
for submission in reddit_client.subreddit(subreddit).hot(limit=limit):
if not submission.over_18 or nsfw:
urls.append(submission.url)
print('\r[~] %s images' % len(urls), end='\r')
except Exception as e:
print(e)
return [url for url in urls if url.split('.')[-1] in img_ext]
def download_images(images: list, dl_dir: str):
"""
Downloads a list of image urls to a folder
:param images: list of image urls
:param dl_dir: destination directory
:return: None
"""
imgcount = len(images)
realcount = preexist = 0
print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
assert_dir_exist(dl_dir)
for img in images: # download each image if it doesn't exist
success = False
imgname = img.split('/')[-1]
name = os.path.join(dl_dir, imgname)
if not os.path.isfile(name):
success = download_file(img, name, pb)
else:
preexist += 1
if success:
realcount += 1
pb.tick()
print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
(realcount, imgcount, dl_dir, preexist))
def filter_zip_files(images: list, zip_fname: str):
"""
Removes the images that already exist in the zip-file
:param images:
:param zip_fname:
:return:
"""
if os.path.isfile(zip_fname):
zfile = zipfile.ZipFile(zip_fname, 'r')
zfnames = [f.filename for f in zfile.infolist()]
print('[~] Removing entries already in zip-file')
return [img for img in images if img.split('/')[-1] not in zfnames]
else:
return images
def compress_folder(folder: str, zip_fname: str, compression: int):
"""
Zips the contents of a folder to the destination zipfile name.
:param folder: the folder to zip
:param zip_fname: the name of the destination zipfile
:param compression: The compression method (constant from zipfile module)
:return: None
"""
print('[~] Compressing folder...')
mode = 'w'
if os.path.isfile(zip_fname): # append to the zipfile if it already exists
mode = 'a'
zfile = zipfile.ZipFile(zip_fname, mode, compression=compression)
for _, _, files in os.walk(folder): # add all files of the folder to the zipfile
for file in files:
zfile.write(os.path.join(folder, file), file)
zfile.close()
print('[+] Folder %s compressed to %s.' % (folder, zip_fname))
def main():
"""
Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched.
If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed.
"""
options, subreddits = parser_init()
config_fname = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yaml')
with open(config_fname, 'r') as file: # loads the config.yaml file
try:
settings = yaml.safe_load(file)
except yaml.YAMLError as err:
print(err)
if settings:
if 'image-extensions' in settings:
global img_ext
img_ext = settings['image-extensions']
if 'min-size' in settings:
global min_size
min_size = int(settings['min-size'])
if 'min-mp' in settings:
global min_mp
min_mp = int(settings['min-mp'])
credentials = settings['credentials']
client = praw.Reddit(
client_id=credentials['client_id'],
client_secret=credentials['client_secret'],
user_agent=user_agent
)
for subreddit in subreddits:
dldest = subreddit
if options.output:
dldest = options.output # uses the -o output destination
images = get_images(client, subreddit, limit=options.count,
nsfw=options.nsfw)
if options.zip: # downloads to a cache-folder first before compressing it to zip
comp_mode = zipfile.ZIP_STORED
if options.lzma:
comp_mode = zipfile.ZIP_LZMA
cachedir = '.cache-' + dldest.split('/')[-1]
images = filter_zip_files(images, dldest+'.zip')
download_images(images, cachedir)
compress_folder(cachedir, dldest+'.zip', compression=comp_mode)
shutil.rmtree(cachedir)
else:
download_images(images, dldest)
print('[+] All downloads finished')
if __name__ == '__main__':
print('\n--- riddle.py reddit downloader by u/Trivernis ---\n')
main()