Merge remote-tracking branch 'origin/develop' into develop

pull/6/head
Trivernis 5 years ago
commit 660b90d919

@ -1,282 +1,283 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# coding: utf-8 # coding: utf-8
# author: u/Trivernis # author: u/Trivernis
import os import os
import shutil import shutil
import yaml import yaml
import praw import praw
import optparse import optparse
import zipfile import zipfile
import urllib.request as urlreq import urllib.request as urlreq
user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent
img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images
min_size = 5 # minimum size in kilobytes. changeable in settings min_size = 5 # minimum size in kilobytes. changeable in settings
def assert_dir_exist(dirpath): def assert_dir_exist(dirpath):
""" """
Creates the directory if it doesn't exist Creates the directory if it doesn't exist
:param dirpath: path to the directory :param dirpath: path to the directory
:return: None :return: None
""" """
if not os.path.exists(dirpath): if not os.path.exists(dirpath):
os.mkdir(dirpath) os.mkdir(dirpath)
def download_file(url: str, dest: str, progressbar = None): def download_file(url: str, dest: str, progressbar = None):
""" """
Downloads a url to a file Downloads a url to a file
:param url: download url :param url: download url
:param dest: download destination :param dest: download destination
:param progressbar: The progressbar instance to clear it before writing an error message :param progressbar: The progressbar instance to clear it before writing an error message
:return: Success? :return: Success?
""" """
f = open(dest, "wb") f = open(dest, "wb")
req = urlreq.Request(url) req = urlreq.Request(url)
success = False success = False
try: try:
image = urlreq.urlopen(req) image = urlreq.urlopen(req)
f.write(image.read()) f.write(image.read())
success = True success = True
except ConnectionError: except ConnectionError:
if progressbar: if progressbar:
progressbar.clear() progressbar.clear()
print('\r[-] Connection Error') print('\r[-] Connection Error')
except urlreq.HTTPError as err: except urlreq.HTTPError as err:
if progressbar: if progressbar:
progressbar.clear() progressbar.clear()
print('\r[-] HTTPError for %s: %s' % (url, err)) print('\r[-] HTTPError for %s: %s' % (url, err))
except urlreq.URLError as err: except urlreq.URLError as err:
if progressbar: if progressbar:
progressbar.clear() progressbar.clear()
print('\r[-] URLError for %s: %s' % (url, err)) print('\r[-] URLError for %s: %s' % (url, err))
f.close() f.close()
try: try:
file_size = round(os.path.getsize(dest) / 1000) file_size = round(os.path.getsize(dest) / 1000)
if not success: if not success:
os.remove(dest) os.remove(dest)
elif file_size < min_size: elif file_size < min_size:
os.remove(dest) os.remove(dest)
success = False success = False
if progressbar: if progressbar:
progressbar.clear() progressbar.clear()
print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size)) print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size))
except IOError as err: except IOError as err:
if progressbar: if progressbar:
progressbar.clear() progressbar.clear()
print('\r[-] Error when removing file %s: %s' % (dest, err)) print('\r[-] Error when removing file %s: %s' % (dest, err))
return success return success
class ProgressBar: class ProgressBar:
""" """
A simple progressbar. A simple progressbar.
""" """
def __init__(self, total=100, prefix='', suffix='', length=50, fill=''): def __init__(self, total=100, prefix='', suffix='', length=50, fill=''):
self.prefix = prefix self.prefix = prefix
self.suffix = suffix self.suffix = suffix
self.fill = fill self.fill = fill
self.length = length self.length = length
self.total = total self.total = total
self.progress = 0 self.progress = 0
self.textlength = 0 self.textlength = 0
def tick(self): def tick(self):
""" """
Next step of the progressbar. The stepwidth is always 1. Next step of the progressbar. The stepwidth is always 1.
:return: :return:
""" """
self.progress += 1 self.progress += 1
self._print_progress() self._print_progress()
def setprogress(self, progress: float): def setprogress(self, progress: float):
""" """
Set the progress of the bar. Set the progress of the bar.
:param progress: progress in percent :param progress: progress in percent
:return: None :return: None
""" """
self.progress = progress self.progress = progress
self._print_progress() self._print_progress()
def _print_progress(self): def _print_progress(self):
iteration = self.progress iteration = self.progress
total = self.total total = self.total
prefix = self.prefix prefix = self.prefix
suffix = self.suffix suffix = self.suffix
percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total))) percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
filled_length = int(self.length * iteration // total) filled_length = int(self.length * iteration // total)
bar = self.fill * filled_length + '-' * (self.length - filled_length) bar = self.fill * filled_length + '-' * (self.length - filled_length)
textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix) textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix)
print(textout, end='\r') print(textout, end='\r')
self.textlength = len(textout) self.textlength = len(textout)
# Print new line on complete # Print new line on complete
if iteration == total: if iteration == total:
print() print()
def clear(self): def clear(self):
""" """
clear last progress output clear last progress output
:return: :return:
""" """
print(' '*self.textlength, end='\r') print(' '*self.textlength, end='\r')
def parser_init(): def parser_init():
""" """
Initializes and parses command line arguments Initializes and parses command line arguments
:return: dict, list :return: dict, list
""" """
parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]") parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]")
parser.add_option('-c', '--count', dest='count', parser.add_option('-c', '--count', dest='count',
type='int', default=None, type='int', default=None,
help="""The number of images to download for each subreddit. help="""The number of images to download for each subreddit.
If not set it is the maximum fetchable number.""") If not set it is the maximum fetchable number.""")
parser.add_option('-o', '--output', dest='output', parser.add_option('-o', '--output', dest='output',
type='str', default=None, type='str', default=None,
help="""The name of the output folder. help="""The name of the output folder.
If none is specified, it\'s the subreddits name.""") If none is specified, it\'s the subreddits name.""")
parser.add_option('-z', '--zip', dest='zip', parser.add_option('-z', '--zip', dest='zip',
action='store_true', default=False, action='store_true', default=False,
help='Stores the images in a zip file if true') help='Stores the images in a zip file if true')
parser.add_option('--nsfw', dest='nsfw', parser.add_option('--nsfw', dest='nsfw',
action='store_true', default=False, action='store_true', default=False,
help='If set nsfw-content is also downloaded.') help='If set nsfw-content is also downloaded.')
parser.add_option('--lzma', dest='lzma', parser.add_option('--lzma', dest='lzma',
action='store_true', default=False, action='store_true', default=False,
help='If set the lzma-compression module is used.') help='If set the lzma-compression module is used.')
return parser.parse_args() return parser.parse_args()
def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False): def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
""" """
Uses the reddit api to fetch all image posts Uses the reddit api to fetch all image posts
:param reddit_client: instance of the reddit client :param reddit_client: instance of the reddit client
:param subreddit: reddit subreddit name :param subreddit: reddit subreddit name
:param limit: max images to download. if set to None the maximum fetchable amout is used. :param limit: max images to download. if set to None the maximum fetchable amout is used.
:param nsfw: if set to true, nsfw-images won't be filtered :param nsfw: if set to true, nsfw-images won't be filtered
:return: list of images :return: list of images
""" """
print('[~] Fetching images for r/%s...' % subreddit) print('[~] Fetching images for r/%s...' % subreddit)
urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit) urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)
if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false
return [url for url in urls if url.split('.')[-1] in img_ext] return [url for url in urls if url.split('.')[-1] in img_ext]
def download_images(images: list, dl_dir: str): def download_images(images: list, dl_dir: str):
""" """
Downloads a list of image urls to a folder Downloads a list of image urls to a folder
:param images: list of image urls :param images: list of image urls
:param dl_dir: destination directory :param dl_dir: destination directory
:return: None :return: None
""" """
imgcount = len(images) imgcount = len(images)
realcount = preexist = 0 realcount = preexist = 0
print('[~] Downloading %s images to %s' % (imgcount, dl_dir)) print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete') pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
assert_dir_exist(dl_dir) assert_dir_exist(dl_dir)
for img in images: # download each image if it doesn't exist for img in images: # download each image if it doesn't exist
success = False success = False
imgname = img.split('/')[-1] imgname = img.split('/')[-1]
name = os.path.join(dl_dir, imgname) name = os.path.join(dl_dir, imgname)
if not os.path.isfile(name): if not os.path.isfile(name):
success = download_file(img, name, pb) success = download_file(img, name, pb)
else: else:
preexist += 1 preexist += 1
if success: if success:
realcount += 1 realcount += 1
pb.tick() pb.tick()
print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' % print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
(realcount, imgcount, dl_dir, preexist)) (realcount, imgcount, dl_dir, preexist))
def filter_zip_files(images: list, zip_fname: str): def filter_zip_files(images: list, zip_fname: str):
""" """
Removes the images that already exist in the zip-file Removes the images that already exist in the zip-file
:param images: :param images:
:param zip_fname: :param zip_fname:
:return: :return:
""" """
if os.path.isfile(zip_fname): if os.path.isfile(zip_fname):
zfile = zipfile.ZipFile(zip_fname, 'r') zfile = zipfile.ZipFile(zip_fname, 'r')
zfnames = [f.filename for f in zfile.infolist()] zfnames = [f.filename for f in zfile.infolist()]
print('[~] Removing entries already in zip-file') print('[~] Removing entries already in zip-file')
return [img for img in images if img.split('/')[-1] not in zfnames] return [img for img in images if img.split('/')[-1] not in zfnames]
else: else:
return images return images
def compress_folder(folder: str, zip_fname: str, compression: int): def compress_folder(folder: str, zip_fname: str, compression: int):
""" """
Zips the contents of a folder to the destination zipfile name. Zips the contents of a folder to the destination zipfile name.
:param folder: the folder to zip :param folder: the folder to zip
:param zip_fname: the name of the destination zipfile :param zip_fname: the name of the destination zipfile
:param compression: The compression method (constant from zipfile module) :param compression: The compression method (constant from zipfile module)
:return: None :return: None
""" """
print('[~] Compressing folder...') print('[~] Compressing folder...')
mode = 'w' mode = 'w'
if os.path.isfile(zip_fname): # append to the zipfile if it already exists if os.path.isfile(zip_fname): # append to the zipfile if it already exists
mode = 'a' mode = 'a'
zfile = zipfile.ZipFile(zip_fname, mode, compression=compression) zfile = zipfile.ZipFile(zip_fname, mode, compression=compression)
for _, _, files in os.walk(folder): # add all files of the folder to the zipfile for _, _, files in os.walk(folder): # add all files of the folder to the zipfile
for file in files: for file in files:
zfile.write(os.path.join(folder, file), file) zfile.write(os.path.join(folder, file), file)
zfile.close() zfile.close()
print('[+] Folder %s compressed to %s.' % (folder, zip_fname)) print('[+] Folder %s compressed to %s.' % (folder, zip_fname))
def main(): def main():
""" """
Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched. Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched.
If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed. If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed.
""" """
options, subreddits = parser_init() options, subreddits = parser_init()
with open('config.yaml', 'r') as file: # loads the config.yaml file config_fname = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yaml')
try: with open(config_fname, 'r') as file: # loads the config.yaml file
settings = yaml.safe_load(file) try:
except yaml.YAMLError as err: settings = yaml.safe_load(file)
print(err) except yaml.YAMLError as err:
if settings: print(err)
if 'image-extensions' in settings: if settings:
global img_ext if 'image-extensions' in settings:
img_ext = settings['image-extensions'] global img_ext
if 'min-size' in settings: img_ext = settings['image-extensions']
global min_size if 'min-size' in settings:
min_size = int(settings['min-size']) global min_size
credentials = settings['credentials'] min_size = int(settings['min-size'])
client = praw.Reddit( credentials = settings['credentials']
client_id=credentials['client_id'], client = praw.Reddit(
client_secret=credentials['client_secret'], client_id=credentials['client_id'],
user_agent=user_agent client_secret=credentials['client_secret'],
) user_agent=user_agent
for subreddit in subreddits: )
dldest = subreddit for subreddit in subreddits:
if options.output: dldest = subreddit
dldest = options.output # uses the -o output destination if options.output:
images = get_images(client, subreddit, limit=options.count, dldest = options.output # uses the -o output destination
nsfw=options.nsfw) images = get_images(client, subreddit, limit=options.count,
if options.zip: # downloads to a cache-folder first before compressing it to zip nsfw=options.nsfw)
comp_mode = zipfile.ZIP_STORED if options.zip: # downloads to a cache-folder first before compressing it to zip
if options.lzma: comp_mode = zipfile.ZIP_STORED
comp_mode = zipfile.ZIP_LZMA if options.lzma:
cachedir = '.cache-' + dldest.split('/')[-1] comp_mode = zipfile.ZIP_LZMA
images = filter_zip_files(images, dldest+'.zip') cachedir = '.cache-' + dldest.split('/')[-1]
download_images(images, cachedir) images = filter_zip_files(images, dldest+'.zip')
compress_folder(cachedir, dldest+'.zip', compression=comp_mode) download_images(images, cachedir)
shutil.rmtree(cachedir) compress_folder(cachedir, dldest+'.zip', compression=comp_mode)
else: shutil.rmtree(cachedir)
download_images(images, dldest) else:
print('[+] All downloads finished') download_images(images, dldest)
print('[+] All downloads finished')
if __name__ == '__main__':
print('\n--- riddle.py reddit downloader by u/Trivernis ---\n') if __name__ == '__main__':
main() print('\n--- riddle.py reddit downloader by u/Trivernis ---\n')
main()

Loading…
Cancel
Save