Added pipfile and shebag line
- added pipfile for pipenv - added shebag line to execute the file directly - (converted to unix line endings)pull/6/head
parent
77468223e2
commit
9fcfe625d4
@ -0,0 +1,13 @@
|
||||
[[source]]
|
||||
url = "https://pypi.python.org/simple"
|
||||
verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[packages]
|
||||
PyYAML = "*"
|
||||
praw = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
[requires]
|
||||
python_version = "3.7"
|
@ -0,0 +1,111 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "e030a28963c27bc726b49ad8bc68cf9648c19fde4e1a5a76d1fc8a5955b06cd1"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_version": "3.7"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.python.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50",
|
||||
"sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef"
|
||||
],
|
||||
"version": "==2019.9.11"
|
||||
},
|
||||
"chardet": {
|
||||
"hashes": [
|
||||
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
|
||||
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
|
||||
],
|
||||
"version": "==3.0.4"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
|
||||
"sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
|
||||
],
|
||||
"version": "==2.8"
|
||||
},
|
||||
"praw": {
|
||||
"hashes": [
|
||||
"sha256:2e5c98e49fe60e5308255ed147b670d350f98281f84f582df30f87de727b6de2",
|
||||
"sha256:cb8f85541ad4c6b10214ef9639acccfb5fed7ffee977be169b85357d2d2ea6d9"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==6.4.0"
|
||||
},
|
||||
"prawcore": {
|
||||
"hashes": [
|
||||
"sha256:25dd14bf121bc0ad2ffc78e2322d9a01a516017105a5596cc21bb1e9a928b40c",
|
||||
"sha256:ab5558efb438aa73fc66c4178bfc809194dea3ce2addf4dec873de7e2fd2824e"
|
||||
],
|
||||
"version": "==1.0.1"
|
||||
},
|
||||
"pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9",
|
||||
"sha256:01adf0b6c6f61bd11af6e10ca52b7d4057dd0be0343eb9283c878cf3af56aee4",
|
||||
"sha256:5124373960b0b3f4aa7df1707e63e9f109b5263eca5976c66e08b1c552d4eaf8",
|
||||
"sha256:5ca4f10adbddae56d824b2c09668e91219bb178a1eee1faa56af6f99f11bf696",
|
||||
"sha256:7907be34ffa3c5a32b60b95f4d95ea25361c951383a894fec31be7252b2b6f34",
|
||||
"sha256:7ec9b2a4ed5cad025c2278a1e6a19c011c80a3caaac804fd2d329e9cc2c287c9",
|
||||
"sha256:87ae4c829bb25b9fe99cf71fbb2140c448f534e24c998cc60f39ae4f94396a73",
|
||||
"sha256:9de9919becc9cc2ff03637872a440195ac4241c80536632fffeb6a1e25a74299",
|
||||
"sha256:a5a85b10e450c66b49f98846937e8cfca1db3127a9d5d1e31ca45c3d0bef4c5b",
|
||||
"sha256:b0997827b4f6a7c286c01c5f60384d218dca4ed7d9efa945c3e1aa623d5709ae",
|
||||
"sha256:b631ef96d3222e62861443cc89d6563ba3eeb816eeb96b2629345ab795e53681",
|
||||
"sha256:bf47c0607522fdbca6c9e817a6e81b08491de50f3766a7a0e6a5be7905961b41",
|
||||
"sha256:f81025eddd0327c7d4cfe9b62cf33190e1e736cc6e97502b3ec425f574b3e7a8"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.1.2"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
|
||||
"sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"
|
||||
],
|
||||
"version": "==2.22.0"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
|
||||
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
|
||||
],
|
||||
"version": "==1.12.0"
|
||||
},
|
||||
"update-checker": {
|
||||
"hashes": [
|
||||
"sha256:59cfad7f9a0ee99f95f1dfc60f55bf184937bcab46a7270341c2c33695572453",
|
||||
"sha256:70e39446fccf77b21192cf7a8214051fa93a636dc3b5c8b602b589d100a168b8"
|
||||
],
|
||||
"version": "==0.16"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398",
|
||||
"sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86"
|
||||
],
|
||||
"version": "==1.25.6"
|
||||
},
|
||||
"websocket-client": {
|
||||
"hashes": [
|
||||
"sha256:1151d5fb3a62dc129164292e1227655e4bbc5dd5340a5165dfae61128ec50aa9",
|
||||
"sha256:1fd5520878b68b84b5748bb30e592b10d0a91529d5383f74f4964e72b297fd3a"
|
||||
],
|
||||
"version": "==0.56.0"
|
||||
}
|
||||
},
|
||||
"develop": {}
|
||||
}
|
@ -1,12 +1,12 @@
|
||||
# user app credentials
|
||||
credentials:
|
||||
client_id: your app-client id
|
||||
client_secret: your app-client secret
|
||||
|
||||
# required extension of the file to be downloaded
|
||||
image-extensions:
|
||||
- png
|
||||
- jpg
|
||||
- jpeg
|
||||
|
||||
# user app credentials
|
||||
credentials:
|
||||
client_id: your app-client id
|
||||
client_secret: your app-client secret
|
||||
|
||||
# required extension of the file to be downloaded
|
||||
image-extensions:
|
||||
- png
|
||||
- jpg
|
||||
- jpeg
|
||||
|
||||
min-size: 5 # minimum size in kilobytes
|
@ -1,2 +1,2 @@
|
||||
PyYaml
|
||||
PyYaml
|
||||
praw
|
@ -1,281 +1,282 @@
|
||||
|
||||
# coding: utf-8
|
||||
# author: u/Trivernis
|
||||
import os
|
||||
import shutil
|
||||
import yaml
|
||||
import praw
|
||||
import optparse
|
||||
import zipfile
|
||||
import urllib.request as urlreq
|
||||
|
||||
user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent
|
||||
img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images
|
||||
min_size = 5 # minimum size in kilobytes. changeable in settings
|
||||
|
||||
|
||||
def assert_dir_exist(dirpath):
|
||||
"""
|
||||
Creates the directory if it doesn't exist
|
||||
:param dirpath: path to the directory
|
||||
:return: None
|
||||
"""
|
||||
if not os.path.exists(dirpath):
|
||||
os.mkdir(dirpath)
|
||||
|
||||
|
||||
def download_file(url: str, dest: str, progressbar = None):
|
||||
"""
|
||||
Downloads a url to a file
|
||||
:param url: download url
|
||||
:param dest: download destination
|
||||
:param progressbar: The progressbar instance to clear it before writing an error message
|
||||
:return: Success?
|
||||
"""
|
||||
f = open(dest, "wb")
|
||||
req = urlreq.Request(url)
|
||||
success = False
|
||||
try:
|
||||
image = urlreq.urlopen(req)
|
||||
f.write(image.read())
|
||||
success = True
|
||||
except ConnectionError:
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] Connection Error')
|
||||
except urlreq.HTTPError as err:
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] HTTPError for %s: %s' % (url, err))
|
||||
except urlreq.URLError as err:
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] URLError for %s: %s' % (url, err))
|
||||
f.close()
|
||||
try:
|
||||
file_size = round(os.path.getsize(dest) / 1000)
|
||||
if not success:
|
||||
os.remove(dest)
|
||||
elif file_size < min_size:
|
||||
os.remove(dest)
|
||||
success = False
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size))
|
||||
except IOError as err:
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] Error when removing file %s: %s' % (dest, err))
|
||||
return success
|
||||
|
||||
|
||||
class ProgressBar:
|
||||
"""
|
||||
A simple progressbar.
|
||||
"""
|
||||
|
||||
def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'):
|
||||
self.prefix = prefix
|
||||
self.suffix = suffix
|
||||
self.fill = fill
|
||||
self.length = length
|
||||
self.total = total
|
||||
self.progress = 0
|
||||
self.textlength = 0
|
||||
|
||||
def tick(self):
|
||||
"""
|
||||
Next step of the progressbar. The stepwidth is always 1.
|
||||
:return:
|
||||
"""
|
||||
self.progress += 1
|
||||
self._print_progress()
|
||||
|
||||
def setprogress(self, progress: float):
|
||||
"""
|
||||
Set the progress of the bar.
|
||||
:param progress: progress in percent
|
||||
:return: None
|
||||
"""
|
||||
self.progress = progress
|
||||
self._print_progress()
|
||||
|
||||
def _print_progress(self):
|
||||
iteration = self.progress
|
||||
total = self.total
|
||||
prefix = self.prefix
|
||||
suffix = self.suffix
|
||||
|
||||
percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
|
||||
filled_length = int(self.length * iteration // total)
|
||||
bar = self.fill * filled_length + '-' * (self.length - filled_length)
|
||||
textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix)
|
||||
print(textout, end='\r')
|
||||
self.textlength = len(textout)
|
||||
# Print new line on complete
|
||||
if iteration == total:
|
||||
print()
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
clear last progress output
|
||||
:return:
|
||||
"""
|
||||
print(' '*self.textlength, end='\r')
|
||||
|
||||
|
||||
def parser_init():
|
||||
"""
|
||||
Initializes and parses command line arguments
|
||||
:return: dict, list
|
||||
"""
|
||||
parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]")
|
||||
parser.add_option('-c', '--count', dest='count',
|
||||
type='int', default=None,
|
||||
help="""The number of images to download for each subreddit.
|
||||
If not set it is the maximum fetchable number.""")
|
||||
parser.add_option('-o', '--output', dest='output',
|
||||
type='str', default=None,
|
||||
help="""The name of the output folder.
|
||||
If none is specified, it\'s the subreddits name.""")
|
||||
parser.add_option('-z', '--zip', dest='zip',
|
||||
action='store_true', default=False,
|
||||
help='Stores the images in a zip file if true')
|
||||
parser.add_option('--nsfw', dest='nsfw',
|
||||
action='store_true', default=False,
|
||||
help='If set nsfw-content is also downloaded.')
|
||||
parser.add_option('--lzma', dest='lzma',
|
||||
action='store_true', default=False,
|
||||
help='If set the lzma-compression module is used.')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
|
||||
"""
|
||||
Uses the reddit api to fetch all image posts
|
||||
:param reddit_client: instance of the reddit client
|
||||
:param subreddit: reddit subreddit name
|
||||
:param limit: max images to download. if set to None the maximum fetchable amout is used.
|
||||
:param nsfw: if set to true, nsfw-images won't be filtered
|
||||
:return: list of images
|
||||
"""
|
||||
print('[~] Fetching images for r/%s...' % subreddit)
|
||||
urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)
|
||||
if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false
|
||||
return [url for url in urls if url.split('.')[-1] in img_ext]
|
||||
|
||||
|
||||
def download_images(images: list, dl_dir: str):
|
||||
"""
|
||||
Downloads a list of image urls to a folder
|
||||
:param images: list of image urls
|
||||
:param dl_dir: destination directory
|
||||
:return: None
|
||||
"""
|
||||
imgcount = len(images)
|
||||
realcount = preexist = 0
|
||||
print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
|
||||
pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
|
||||
assert_dir_exist(dl_dir)
|
||||
|
||||
for img in images: # download each image if it doesn't exist
|
||||
success = False
|
||||
imgname = img.split('/')[-1]
|
||||
name = os.path.join(dl_dir, imgname)
|
||||
if not os.path.isfile(name):
|
||||
success = download_file(img, name, pb)
|
||||
else:
|
||||
preexist += 1
|
||||
if success:
|
||||
realcount += 1
|
||||
pb.tick()
|
||||
print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
|
||||
(realcount, imgcount, dl_dir, preexist))
|
||||
|
||||
|
||||
def filter_zip_files(images: list, zip_fname: str):
|
||||
"""
|
||||
Removes the images that already exist in the zip-file
|
||||
:param images:
|
||||
:param zip_fname:
|
||||
:return:
|
||||
"""
|
||||
if os.path.isfile(zip_fname):
|
||||
zfile = zipfile.ZipFile(zip_fname, 'r')
|
||||
zfnames = [f.filename for f in zfile.infolist()]
|
||||
print('[~] Removing entries already in zip-file')
|
||||
return [img for img in images if img.split('/')[-1] not in zfnames]
|
||||
else:
|
||||
return images
|
||||
|
||||
|
||||
def compress_folder(folder: str, zip_fname: str, compression: int):
|
||||
"""
|
||||
Zips the contents of a folder to the destination zipfile name.
|
||||
:param folder: the folder to zip
|
||||
:param zip_fname: the name of the destination zipfile
|
||||
:param compression: The compression method (constant from zipfile module)
|
||||
:return: None
|
||||
"""
|
||||
print('[~] Compressing folder...')
|
||||
mode = 'w'
|
||||
|
||||
if os.path.isfile(zip_fname): # append to the zipfile if it already exists
|
||||
mode = 'a'
|
||||
|
||||
zfile = zipfile.ZipFile(zip_fname, mode, compression=compression)
|
||||
|
||||
for _, _, files in os.walk(folder): # add all files of the folder to the zipfile
|
||||
for file in files:
|
||||
zfile.write(os.path.join(folder, file), file)
|
||||
zfile.close()
|
||||
print('[+] Folder %s compressed to %s.' % (folder, zip_fname))
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched.
|
||||
If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed.
|
||||
"""
|
||||
options, subreddits = parser_init()
|
||||
with open('config.yaml', 'r') as file: # loads the config.yaml file
|
||||
try:
|
||||
settings = yaml.safe_load(file)
|
||||
except yaml.YAMLError as err:
|
||||
print(err)
|
||||
if settings:
|
||||
if 'image-extensions' in settings:
|
||||
global img_ext
|
||||
img_ext = settings['image-extensions']
|
||||
if 'min-size' in settings:
|
||||
global min_size
|
||||
min_size = int(settings['min-size'])
|
||||
credentials = settings['credentials']
|
||||
client = praw.Reddit(
|
||||
client_id=credentials['client_id'],
|
||||
client_secret=credentials['client_secret'],
|
||||
user_agent=user_agent
|
||||
)
|
||||
for subreddit in subreddits:
|
||||
dldest = subreddit
|
||||
if options.output:
|
||||
dldest = options.output # uses the -o output destination
|
||||
images = get_images(client, subreddit, limit=options.count,
|
||||
nsfw=options.nsfw)
|
||||
if options.zip: # downloads to a cache-folder first before compressing it to zip
|
||||
comp_mode = zipfile.ZIP_STORED
|
||||
if options.lzma:
|
||||
comp_mode = zipfile.ZIP_LZMA
|
||||
cachedir = '.cache-' + dldest.split('/')[-1]
|
||||
images = filter_zip_files(images, dldest+'.zip')
|
||||
download_images(images, cachedir)
|
||||
compress_folder(cachedir, dldest+'.zip', compression=comp_mode)
|
||||
shutil.rmtree(cachedir)
|
||||
else:
|
||||
download_images(images, dldest)
|
||||
print('[+] All downloads finished')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('\n--- riddle.py reddit downloader by u/Trivernis ---\n')
|
||||
main()
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# coding: utf-8
|
||||
# author: u/Trivernis
|
||||
import os
|
||||
import shutil
|
||||
import yaml
|
||||
import praw
|
||||
import optparse
|
||||
import zipfile
|
||||
import urllib.request as urlreq
|
||||
|
||||
user_agent = 'python:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent
|
||||
img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images
|
||||
min_size = 5 # minimum size in kilobytes. changeable in settings
|
||||
|
||||
|
||||
def assert_dir_exist(dirpath):
|
||||
"""
|
||||
Creates the directory if it doesn't exist
|
||||
:param dirpath: path to the directory
|
||||
:return: None
|
||||
"""
|
||||
if not os.path.exists(dirpath):
|
||||
os.mkdir(dirpath)
|
||||
|
||||
|
||||
def download_file(url: str, dest: str, progressbar = None):
|
||||
"""
|
||||
Downloads a url to a file
|
||||
:param url: download url
|
||||
:param dest: download destination
|
||||
:param progressbar: The progressbar instance to clear it before writing an error message
|
||||
:return: Success?
|
||||
"""
|
||||
f = open(dest, "wb")
|
||||
req = urlreq.Request(url)
|
||||
success = False
|
||||
try:
|
||||
image = urlreq.urlopen(req)
|
||||
f.write(image.read())
|
||||
success = True
|
||||
except ConnectionError:
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] Connection Error')
|
||||
except urlreq.HTTPError as err:
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] HTTPError for %s: %s' % (url, err))
|
||||
except urlreq.URLError as err:
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] URLError for %s: %s' % (url, err))
|
||||
f.close()
|
||||
try:
|
||||
file_size = round(os.path.getsize(dest) / 1000)
|
||||
if not success:
|
||||
os.remove(dest)
|
||||
elif file_size < min_size:
|
||||
os.remove(dest)
|
||||
success = False
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size))
|
||||
except IOError as err:
|
||||
if progressbar:
|
||||
progressbar.clear()
|
||||
print('\r[-] Error when removing file %s: %s' % (dest, err))
|
||||
return success
|
||||
|
||||
|
||||
class ProgressBar:
|
||||
"""
|
||||
A simple progressbar.
|
||||
"""
|
||||
|
||||
def __init__(self, total=100, prefix='', suffix='', length=50, fill='█'):
|
||||
self.prefix = prefix
|
||||
self.suffix = suffix
|
||||
self.fill = fill
|
||||
self.length = length
|
||||
self.total = total
|
||||
self.progress = 0
|
||||
self.textlength = 0
|
||||
|
||||
def tick(self):
|
||||
"""
|
||||
Next step of the progressbar. The stepwidth is always 1.
|
||||
:return:
|
||||
"""
|
||||
self.progress += 1
|
||||
self._print_progress()
|
||||
|
||||
def setprogress(self, progress: float):
|
||||
"""
|
||||
Set the progress of the bar.
|
||||
:param progress: progress in percent
|
||||
:return: None
|
||||
"""
|
||||
self.progress = progress
|
||||
self._print_progress()
|
||||
|
||||
def _print_progress(self):
|
||||
iteration = self.progress
|
||||
total = self.total
|
||||
prefix = self.prefix
|
||||
suffix = self.suffix
|
||||
|
||||
percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total)))
|
||||
filled_length = int(self.length * iteration // total)
|
||||
bar = self.fill * filled_length + '-' * (self.length - filled_length)
|
||||
textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix)
|
||||
print(textout, end='\r')
|
||||
self.textlength = len(textout)
|
||||
# Print new line on complete
|
||||
if iteration == total:
|
||||
print()
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
clear last progress output
|
||||
:return:
|
||||
"""
|
||||
print(' '*self.textlength, end='\r')
|
||||
|
||||
|
||||
def parser_init():
|
||||
"""
|
||||
Initializes and parses command line arguments
|
||||
:return: dict, list
|
||||
"""
|
||||
parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]")
|
||||
parser.add_option('-c', '--count', dest='count',
|
||||
type='int', default=None,
|
||||
help="""The number of images to download for each subreddit.
|
||||
If not set it is the maximum fetchable number.""")
|
||||
parser.add_option('-o', '--output', dest='output',
|
||||
type='str', default=None,
|
||||
help="""The name of the output folder.
|
||||
If none is specified, it\'s the subreddits name.""")
|
||||
parser.add_option('-z', '--zip', dest='zip',
|
||||
action='store_true', default=False,
|
||||
help='Stores the images in a zip file if true')
|
||||
parser.add_option('--nsfw', dest='nsfw',
|
||||
action='store_true', default=False,
|
||||
help='If set nsfw-content is also downloaded.')
|
||||
parser.add_option('--lzma', dest='lzma',
|
||||
action='store_true', default=False,
|
||||
help='If set the lzma-compression module is used.')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
|
||||
"""
|
||||
Uses the reddit api to fetch all image posts
|
||||
:param reddit_client: instance of the reddit client
|
||||
:param subreddit: reddit subreddit name
|
||||
:param limit: max images to download. if set to None the maximum fetchable amout is used.
|
||||
:param nsfw: if set to true, nsfw-images won't be filtered
|
||||
:return: list of images
|
||||
"""
|
||||
print('[~] Fetching images for r/%s...' % subreddit)
|
||||
urls = [submission.url for submission in reddit_client.subreddit(subreddit).hot(limit=limit)
|
||||
if not submission.over_18 or nsfw] # fetches hot images and filters nsfw if set to false
|
||||
return [url for url in urls if url.split('.')[-1] in img_ext]
|
||||
|
||||
|
||||
def download_images(images: list, dl_dir: str):
|
||||
"""
|
||||
Downloads a list of image urls to a folder
|
||||
:param images: list of image urls
|
||||
:param dl_dir: destination directory
|
||||
:return: None
|
||||
"""
|
||||
imgcount = len(images)
|
||||
realcount = preexist = 0
|
||||
print('[~] Downloading %s images to %s' % (imgcount, dl_dir))
|
||||
pb = ProgressBar(total=imgcount, prefix='[~] Downloading', suffix='Complete')
|
||||
assert_dir_exist(dl_dir)
|
||||
|
||||
for img in images: # download each image if it doesn't exist
|
||||
success = False
|
||||
imgname = img.split('/')[-1]
|
||||
name = os.path.join(dl_dir, imgname)
|
||||
if not os.path.isfile(name):
|
||||
success = download_file(img, name, pb)
|
||||
else:
|
||||
preexist += 1
|
||||
if success:
|
||||
realcount += 1
|
||||
pb.tick()
|
||||
print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' %
|
||||
(realcount, imgcount, dl_dir, preexist))
|
||||
|
||||
|
||||
def filter_zip_files(images: list, zip_fname: str):
|
||||
"""
|
||||
Removes the images that already exist in the zip-file
|
||||
:param images:
|
||||
:param zip_fname:
|
||||
:return:
|
||||
"""
|
||||
if os.path.isfile(zip_fname):
|
||||
zfile = zipfile.ZipFile(zip_fname, 'r')
|
||||
zfnames = [f.filename for f in zfile.infolist()]
|
||||
print('[~] Removing entries already in zip-file')
|
||||
return [img for img in images if img.split('/')[-1] not in zfnames]
|
||||
else:
|
||||
return images
|
||||
|
||||
|
||||
def compress_folder(folder: str, zip_fname: str, compression: int):
|
||||
"""
|
||||
Zips the contents of a folder to the destination zipfile name.
|
||||
:param folder: the folder to zip
|
||||
:param zip_fname: the name of the destination zipfile
|
||||
:param compression: The compression method (constant from zipfile module)
|
||||
:return: None
|
||||
"""
|
||||
print('[~] Compressing folder...')
|
||||
mode = 'w'
|
||||
|
||||
if os.path.isfile(zip_fname): # append to the zipfile if it already exists
|
||||
mode = 'a'
|
||||
|
||||
zfile = zipfile.ZipFile(zip_fname, mode, compression=compression)
|
||||
|
||||
for _, _, files in os.walk(folder): # add all files of the folder to the zipfile
|
||||
for file in files:
|
||||
zfile.write(os.path.join(folder, file), file)
|
||||
zfile.close()
|
||||
print('[+] Folder %s compressed to %s.' % (folder, zip_fname))
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main entry method. Loads the settings and iterates through subreddits and downloads all images it fetched.
|
||||
If the --zip flag is set, the images will be downloaded in a .cache directory and then compressed.
|
||||
"""
|
||||
options, subreddits = parser_init()
|
||||
with open('config.yaml', 'r') as file: # loads the config.yaml file
|
||||
try:
|
||||
settings = yaml.safe_load(file)
|
||||
except yaml.YAMLError as err:
|
||||
print(err)
|
||||
if settings:
|
||||
if 'image-extensions' in settings:
|
||||
global img_ext
|
||||
img_ext = settings['image-extensions']
|
||||
if 'min-size' in settings:
|
||||
global min_size
|
||||
min_size = int(settings['min-size'])
|
||||
credentials = settings['credentials']
|
||||
client = praw.Reddit(
|
||||
client_id=credentials['client_id'],
|
||||
client_secret=credentials['client_secret'],
|
||||
user_agent=user_agent
|
||||
)
|
||||
for subreddit in subreddits:
|
||||
dldest = subreddit
|
||||
if options.output:
|
||||
dldest = options.output # uses the -o output destination
|
||||
images = get_images(client, subreddit, limit=options.count,
|
||||
nsfw=options.nsfw)
|
||||
if options.zip: # downloads to a cache-folder first before compressing it to zip
|
||||
comp_mode = zipfile.ZIP_STORED
|
||||
if options.lzma:
|
||||
comp_mode = zipfile.ZIP_LZMA
|
||||
cachedir = '.cache-' + dldest.split('/')[-1]
|
||||
images = filter_zip_files(images, dldest+'.zip')
|
||||
download_images(images, cachedir)
|
||||
compress_folder(cachedir, dldest+'.zip', compression=comp_mode)
|
||||
shutil.rmtree(cachedir)
|
||||
else:
|
||||
download_images(images, dldest)
|
||||
print('[+] All downloads finished')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('\n--- riddle.py reddit downloader by u/Trivernis ---\n')
|
||||
main()
|
||||
|
Loading…
Reference in New Issue