Merge remote-tracking branch 'origin/develop' into develop
commit
660b90d919
@ -1,282 +1,283 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
# author: u/Trivernis
|
# author: u/Trivernis
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import yaml
|
import yaml
|
||||||
import praw
|
import praw
|
||||||
import optparse
|
import optparse
|
||||||
import zipfile
|
import zipfile
|
||||||
import urllib.request as urlreq
|
import urllib.request as urlreq
|
||||||
|
|
||||||
user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent
|
user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent
|
||||||
img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images
|
img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images
|
||||||
min_size = 5 # minimum size in kilobytes. changeable in settings
|
min_size = 5 # minimum size in kilobytes. changeable in settings
|
||||||
|
|
||||||
|
|
||||||
def assert_dir_exist(dirpath):
|
def assert_dir_exist(dirpath):
|
||||||
"""
|
"""
|
||||||
Creates the directory if it doesn't exist
|
Creates the directory if it doesn't exist
|
||||||
:param dirpath: path to the directory
|
:param dirpath: path to the directory
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(dirpath):
|
if not os.path.exists(dirpath):
|
||||||
os.mkdir(dirpath)
|
os.mkdir(dirpath)
|
||||||
|
|
||||||
|
|
||||||
def download_file(url: str, dest: str, progressbar = None):
|
def download_file(url: str, dest: str, progressbar = None):
|
||||||
"""
|
"""
|
||||||
Downloads a url to a file
|
Downloads a url to a file
|
||||||
:param url: download url
|
:param url: download url
|
||||||
:param dest: download destination
|
:param dest: download destination
|
||||||
:param progressbar: The progressbar instance to clear it before writing an error message
|
:param progressbar: The progressbar instance to clear it before writing an error message
|
||||||
:return: Success?
|
:return: Success?
|
||||||
"""
|
"""
|
||||||
f = open(dest, "wb")
|
f = open(dest, "wb")
|
||||||
req = urlreq.Request(url)
|
req = urlreq.Request(url)
|
||||||
success = False
|
success = False
|
||||||
try:
|
try:
|
||||||
image = urlreq.urlopen(req)
|
image = urlreq.urlopen(req)
|
||||||
f.write(image.read())
|
f.write(image.read())
|
||||||
success = True
|
success = True
|
||||||
except ConnectionError:
|
except ConnectionError:
|
||||||
if progressbar:
|
if progressbar:
|
||||||
progressbar.clear()
|
progressbar.clear()
|
||||||
print('\r[-] Connection Error')
|
print('\r[-] Connection Error')
|
||||||
except urlreq.HTTPError as err:
|
except urlreq.HTTPError as err:
|
||||||
if progressbar:
|
if progressbar:
|
||||||
progressbar.clear()
|
progressbar.clear()
|
||||||
print('\r[-] HTTPError for %s: %s' % (url, err))
|
print('\r[-] HTTPError for %s: %s' % (url, err))
|
||||||
except urlreq.URLError as err:
|
except urlreq.URLError as err:
|
||||||
if progressbar:
|
if progressbar:
|
||||||
progressbar.clear()
|
progressbar.clear()
|
||||||
print('\r[-] URLError for %s: %s' % (url, err))
|
print('\r[-] URLError for %s: %s' % (url, err))
|
||||||
f.close()
|
f.close()
|
||||||
try:
|
try:
|
||||||
file_size = round(os.path.getsize(dest) / 1000)
|
file_size = round(os.path.getsize(dest) / 1000)
|
||||||
if not success:
|
if not success:
|
||||||
os.remove(dest)
|
os.remove(dest)
|
||||||
elif file_size < min_size:
|
elif file_size < min_size:
|
||||||
os.remove(dest)
|
os.remove(dest)
|
||||||
success = False
|
success = False
|
||||||
if progressbar:
|
if progressbar:
|
||||||
progressbar.clear()
|
progressbar.clear()
|
||||||
print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size))
|
print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size))
|
||||||
except IOError as err:
|
except IOError as err:
|
||||||
if progressbar:
|
if progressbar:
|
||||||
progressbar.clear()
|
progressbar.clear()
|
||||||
print('\r[-] Error when removing file %s: %s' % (dest, err))
|
print('\r[-] Error when removing file %s: %s' % (dest, err))
|
||||||
return success
|
return success
|
||||||
|
|
||||||
|
|
||||||
class ProgressBar:
|
class ProgressBar:
|
||||||
"""
|
"""
|
||||||
A simple progressbar.
|
A simple progressbar.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'):
|
def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'):
|
||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
self.suffix = suffix
|
self.suffix = suffix
|
||||||
self.fill = fill
|
self.fill = fill
|
||||||
self.length = length
|
self.length = length
|
||||||
self.total = total
|
self.total = total
|
||||||
self.progress = 0
|
self.progress = 0
|
||||||
self.textlength = 0
|
self.textlength = 0
|
||||||
|
|
||||||
def tick(self):
|
def tick(self):
|
||||||
"""
|
"""
|
||||||
Next step of the progressbar. The stepwidth is always 1.
|
Next step of the progressbar. The stepwidth is always 1.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
self.progress += 1
|
self.progress += 1
|
||||||
self._print_progress()
|
self._print_progress()
|
||||||
|
|
||||||
def setprogress(self, progress: float):
|
def setprogress(self, progress: float):
|
||||||
"""
|
"""
|
||||||
Set the progress of the bar.
|
Set the progress of the bar.
|
||||||
:param progress: progress in percent
|
:param progress: progress in percent
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
self.progress = progress
|
self.progress = progress
|
||||||
self._print_progress()
|
self._print_progress()
|
||||||
|
|
||||||
def _print_progress(self):
|
def _print_progress(self):
|
||||||
iteration = self.progress
|
iteration = self.progress
|
||||||
total = self.total
|
total = self.total
|
||||||
prefix = self.prefix
|
prefix = self.prefix
|
||||||
suffix = self.suffix
|
suffix = self.suffix
|
||||||
|
|
||||||
percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
|
percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
|
||||||
filled_length = int(self.length * iteration // total)
|
filled_length = int(self.length * iteration // total)
|
||||||
bar = self.fill * filled_length + '-' * (self.length - filled_length)
|
bar = self.fill * filled_length + '-' * (self.length - filled_length)
|
||||||
textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix)
|
textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix)
|
||||||
print(textout, end='\r')
|
print(textout, end='\r')
|
||||||
self.textlength = len(textout)
|
self.textlength = len(textout)
|
||||||
# Print new line on complete
|
# Print new line on complete
|
||||||
if iteration == total:
|
if iteration == total:
|
||||||
print()
|
print()
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
"""
|
"""
|
||||||
clear last progress output
|
clear last progress output
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
print(' '*self.textlength, end='\r')
|
print(' '*self.textlength, end='\r')
|
||||||
|
|
||||||
|
|
||||||
def parser_init():
|
def parser_init():
|
||||||
"""
|
"""
|
||||||
Initializes and parses command line arguments
|
Initializes and parses command line arguments
|
||||||
:return: dict, list
|
:return: dict, list
|
||||||
"""
|
"""
|
||||||
parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]")
|
parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]")
|
||||||
parser.add_option('-c', '--count', dest='count',
|
parser.add_option('-c', '--count', dest='count',
|
||||||
type='int', default=None,
|
type='int', default=None,
|
||||||
help="""The number of images to download for each subreddit.
|
help="""The number of images to download for each subreddit.
|
||||||
If not set it is the maximum fetchable number.""")
|
If not set it is the maximum fetchable number.""")
|
||||||
parser.add_option('-o', '--output', dest='output',
|
parser.add_option('-o', '--output', dest='output',
|
||||||
type='str', default=None,
|
type='str', default=None,
|
||||||
help="""The name of the output folder.
|
help="""The name of the output folder.
|
||||||
If none is specified, it\'s the subreddits name.""")
|
If none is specified, it\'s the subreddits name.""")
|
||||||
parser.add_option('-z', '--zip', dest='zip',
|
parser.add_option('-z', '--zip', dest='zip',
|
||||||
action='store_true', default=False,
|
action='store_true', default=False,
|
||||||
help='Stores the images in a zip file if true')
|
help='Stores the images in a zip file if true')
|
||||||
parser.add_option('--nsfw', dest='nsfw',
|
parser.add_option('--nsfw', dest='nsfw',
|
||||||
action='store_true', default=False,
|
action='store_true', default=False,
|
||||||
help='If set nsfw-content is also downloaded.')
|
help='If set nsfw-content is also downloaded.')
|
||||||
parser.add_option('--lzma', dest='lzma',
|
parser.add_option('--lzma', dest='lzma',
|
||||||
action='store_true', default=False,
|
action='store_true', default=False,
|
||||||
help='If set the lzma-compression module is used.')
|
help='If set the lzma-compression module is used.')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
|
def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
|
||||||
"""
|
"""
|
||||||
Uses the reddit api to fetch all image posts
|
Uses the reddit api to fetch all image posts
|
||||||
:param reddit_client: instance of the reddit client
|
:param reddit_client: instance of the reddit client
|
||||||
:param subreddit: reddit subreddit name
|
:param subreddit: reddit subreddit name
|
||||||
:param limit: max images to download. if set to None the maximum fetchable amout is used.
|
:param limit: max images to download. if set to None the maximum fetchable amout is used.
|
||||||
:param nsfw: if set to true, nsfw-images won't be filtered
|
:param nsfw: if set to true, nsfw-images won't be filtered
|
||||||
:return: list of images
|
:return: list of images
|
||||||
"""
|
"""
|
||||||
print('[~] Fetching images for r/%s...' % subreddit)
|
print('[~] Fetching images for r/%s...' % subreddit)
|
||||||
urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)
|
urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)
|
||||||
if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false
|
if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false
|
||||||
return [url for url in urls if url.split('.')[-1] in img_ext]
|
return [url for url in urls if url.split('.')[-1] in img_ext]
|
||||||
|
|
||||||
|
|
||||||
def download_images(images: list, dl_dir: str):
|
def download_images(images: list, dl_dir: str):
|
||||||
"""
|
"""
|
||||||
Downloads a list of image urls to a folder
|
Downloads a list of image urls to a folder
|
||||||
:param images: list of image urls
|
:param images: list of image urls
|
||||||
:param dl_dir: destination directory
|
:param dl_dir: destination directory
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
imgcount = len(images)
|
imgcount = len(images)
|
||||||
realcount = preexist = 0
|
realcount = preexist = 0
|
||||||
print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
|
print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
|
||||||
pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
|
pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
|
||||||
assert_dir_exist(dl_dir)
|
assert_dir_exist(dl_dir)
|
||||||
|
|
||||||
for img in images: # download each image if it doesn't exist
|
for img in images: # download each image if it doesn't exist
|
||||||
success = False
|
success = False
|
||||||
imgname = img.split('/')[-1]
|
imgname = img.split('/')[-1]
|
||||||
name = os.path.join(dl_dir, imgname)
|
name = os.path.join(dl_dir, imgname)
|
||||||
if not os.path.isfile(name):
|
if not os.path.isfile(name):
|
||||||
success = download_file(img, name, pb)
|
success = download_file(img, name, pb)
|
||||||
else:
|
else:
|
||||||
preexist += 1
|
preexist += 1
|
||||||
if success:
|
if success:
|
||||||
realcount += 1
|
realcount += 1
|
||||||
pb.tick()
|
pb.tick()
|
||||||
print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
|
print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
|
||||||
(realcount, imgcount, dl_dir, preexist))
|
(realcount, imgcount, dl_dir, preexist))
|
||||||
|
|
||||||
|
|
||||||
def filter_zip_files(images: list, zip_fname: str):
|
def filter_zip_files(images: list, zip_fname: str):
|
||||||
"""
|
"""
|
||||||
Removes the images that already exist in the zip-file
|
Removes the images that already exist in the zip-file
|
||||||
:param images:
|
:param images:
|
||||||
:param zip_fname:
|
:param zip_fname:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
if os.path.isfile(zip_fname):
|
if os.path.isfile(zip_fname):
|
||||||
zfile = zipfile.ZipFile(zip_fname, 'r')
|
zfile = zipfile.ZipFile(zip_fname, 'r')
|
||||||
zfnames = [f.filename for f in zfile.infolist()]
|
zfnames = [f.filename for f in zfile.infolist()]
|
||||||
print('[~] Removing entries already in zip-file')
|
print('[~] Removing entries already in zip-file')
|
||||||
return [img for img in images if img.split('/')[-1] not in zfnames]
|
return [img for img in images if img.split('/')[-1] not in zfnames]
|
||||||
else:
|
else:
|
||||||
return images
|
return images
|
||||||
|
|
||||||
|
|
||||||
def compress_folder(folder: str, zip_fname: str, compression: int):
|
def compress_folder(folder: str, zip_fname: str, compression: int):
|
||||||
"""
|
"""
|
||||||
Zips the contents of a folder to the destination zipfile name.
|
Zips the contents of a folder to the destination zipfile name.
|
||||||
:param folder: the folder to zip
|
:param folder: the folder to zip
|
||||||
:param zip_fname: the name of the destination zipfile
|
:param zip_fname: the name of the destination zipfile
|
||||||
:param compression: The compression method (constant from zipfile module)
|
:param compression: The compression method (constant from zipfile module)
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
print('[~] Compressing folder...')
|
print('[~] Compressing folder...')
|
||||||
mode = 'w'
|
mode = 'w'
|
||||||
|
|
||||||
if os.path.isfile(zip_fname): # append to the zipfile if it already exists
|
if os.path.isfile(zip_fname): # append to the zipfile if it already exists
|
||||||
mode = 'a'
|
mode = 'a'
|
||||||
|
|
||||||
zfile = zipfile.ZipFile(zip_fname, mode, compression=compression)
|
zfile = zipfile.ZipFile(zip_fname, mode, compression=compression)
|
||||||
|
|
||||||
for _, _, files in os.walk(folder): # add all files of the folder to the zipfile
|
for _, _, files in os.walk(folder): # add all files of the folder to the zipfile
|
||||||
for file in files:
|
for file in files:
|
||||||
zfile.write(os.path.join(folder, file), file)
|
zfile.write(os.path.join(folder, file), file)
|
||||||
zfile.close()
|
zfile.close()
|
||||||
print('[+] Folder %s compressed to %s.' % (folder, zip_fname))
|
print('[+] Folder %s compressed to %s.' % (folder, zip_fname))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""
|
"""
|
||||||
Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched.
|
Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched.
|
||||||
If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed.
|
If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed.
|
||||||
"""
|
"""
|
||||||
options, subreddits = parser_init()
|
options, subreddits = parser_init()
|
||||||
with open('config.yaml', 'r') as file: # loads the config.yaml file
|
config_fname = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yaml')
|
||||||
try:
|
with open(config_fname, 'r') as file: # loads the config.yaml file
|
||||||
settings = yaml.safe_load(file)
|
try:
|
||||||
except yaml.YAMLError as err:
|
settings = yaml.safe_load(file)
|
||||||
print(err)
|
except yaml.YAMLError as err:
|
||||||
if settings:
|
print(err)
|
||||||
if 'image-extensions' in settings:
|
if settings:
|
||||||
global img_ext
|
if 'image-extensions' in settings:
|
||||||
img_ext = settings['image-extensions']
|
global img_ext
|
||||||
if 'min-size' in settings:
|
img_ext = settings['image-extensions']
|
||||||
global min_size
|
if 'min-size' in settings:
|
||||||
min_size = int(settings['min-size'])
|
global min_size
|
||||||
credentials = settings['credentials']
|
min_size = int(settings['min-size'])
|
||||||
client = praw.Reddit(
|
credentials = settings['credentials']
|
||||||
client_id=credentials['client_id'],
|
client = praw.Reddit(
|
||||||
client_secret=credentials['client_secret'],
|
client_id=credentials['client_id'],
|
||||||
user_agent=user_agent
|
client_secret=credentials['client_secret'],
|
||||||
)
|
user_agent=user_agent
|
||||||
for subreddit in subreddits:
|
)
|
||||||
dldest = subreddit
|
for subreddit in subreddits:
|
||||||
if options.output:
|
dldest = subreddit
|
||||||
dldest = options.output # uses the -o output destination
|
if options.output:
|
||||||
images = get_images(client, subreddit, limit=options.count,
|
dldest = options.output # uses the -o output destination
|
||||||
nsfw=options.nsfw)
|
images = get_images(client, subreddit, limit=options.count,
|
||||||
if options.zip: # downloads to a cache-folder first before compressing it to zip
|
nsfw=options.nsfw)
|
||||||
comp_mode = zipfile.ZIP_STORED
|
if options.zip: # downloads to a cache-folder first before compressing it to zip
|
||||||
if options.lzma:
|
comp_mode = zipfile.ZIP_STORED
|
||||||
comp_mode = zipfile.ZIP_LZMA
|
if options.lzma:
|
||||||
cachedir = '.cache-' + dldest.split('/')[-1]
|
comp_mode = zipfile.ZIP_LZMA
|
||||||
images = filter_zip_files(images, dldest+'.zip')
|
cachedir = '.cache-' + dldest.split('/')[-1]
|
||||||
download_images(images, cachedir)
|
images = filter_zip_files(images, dldest+'.zip')
|
||||||
compress_folder(cachedir, dldest+'.zip', compression=comp_mode)
|
download_images(images, cachedir)
|
||||||
shutil.rmtree(cachedir)
|
compress_folder(cachedir, dldest+'.zip', compression=comp_mode)
|
||||||
else:
|
shutil.rmtree(cachedir)
|
||||||
download_images(images, dldest)
|
else:
|
||||||
print('[+] All downloads finished')
|
download_images(images, dldest)
|
||||||
|
print('[+] All downloads finished')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print('\n--- riddle.py reddit downloader by u/Trivernis ---\n')
|
if __name__ == '__main__':
|
||||||
main()
|
print('\n--- riddle.py reddit downloader by u/Trivernis ---\n')
|
||||||
|
main()
|
||||||
|
Loading…
Reference in New Issue