Improvements and README edit

- files are stored in cache directory that will be deleted afterwards
- script exits in no-chaos mode
- added help description
pull/1/head
Trivernis 6 years ago
parent e7fa17bb00
commit 58f372048f

@ -1 +1,23 @@
utilities Python Utilities
=========
Here are some python-scripts that do all kinds of things.
- - -
### setup.py
Installs the dependencies.
Use `setup.py --help` for more info. The setup.py help is
provided by setuptools.
### riddle.py
Downloads all images from one ore more given subreddits
```commandline
Usage: riddle.py [options] [subreddits]
Options:
-h, --help show this help message and exit
-c, --chaos Doesn't wait for previous downloads to
finish and doesn't exit when no more
images can be found. Do only activate this if you want to
download a lot of images from multiple
subreddits at the same time.
```

@ -6,11 +6,12 @@ import os
import zipfile import zipfile
import optparse import optparse
import asyncio import asyncio
import shutil
redditurl: str = 'https://old.reddit.com/r/%s' redditurl: str = 'https://old.reddit.com/r/%s'
dl_dir: str = './.cache/' # Format must be ./ dl_dir: str = './.cache/' # Format must be ./
img_ext: List[str] = ['jpg', 'png', 'bmp'] img_ext: List[str] = ['jpg', 'png', 'bmp']
blacklist: List[str] = ['b.thumbs.redditmedia.com'] blacklist: List[str] = ['b.thumbs.redditmedia.com', 'reddit.com']
hdr: Dict[str, str] = { hdr: Dict[str, str] = {
'User-Agent': """Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) 'User-Agent': """Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko)
Chrome/23.0.1271.64 Safari/537.11""", Chrome/23.0.1271.64 Safari/537.11""",
@ -59,7 +60,7 @@ async def get_next(url):
ids.append(fname) ids.append(fname)
except KeyError: except KeyError:
pass pass
return [id for id in ids if id][-1] return [_id for _id in ids if _id][-1]
def has_source(tag): def has_source(tag):
@ -90,7 +91,8 @@ async def download_async(url, zfile=None):
if img.strip('http://').strip('https://').split('/')[0] in blacklist: if img.strip('http://').strip('https://').split('/')[0] in blacklist:
img = None img = None
continue continue
name = dl_dir + img.split('/')[-1] imgname = img.split('/')[-1]
name = dl_dir + imgname
if os.path.isfile(name): if os.path.isfile(name):
continue continue
f = open(name, "wb") f = open(name, "wb")
@ -98,7 +100,7 @@ async def download_async(url, zfile=None):
image = urlreq.urlopen(req) image = urlreq.urlopen(req)
f.write(image.read()) f.write(image.read())
f.close() f.close()
zfile.write(name, compress_type=zipfile.ZIP_DEFLATED) zfile.write(name, imgname, zipfile.ZIP_DEFLATED)
try: try:
os.remove(name) os.remove(name)
except FileNotFoundError or PermissionError: except FileNotFoundError or PermissionError:
@ -106,7 +108,7 @@ async def download_async(url, zfile=None):
print('[+] Saved Image %s from %s' % (img, url)) print('[+] Saved Image %s from %s' % (img, url))
await asyncio.sleep(0.25) await asyncio.sleep(0.25)
except Exception as error: except Exception as error:
print('[-] Failed with %s' % img) print('[-] Failed with %s %s' % (img, error))
print('[+] Finished %s' % url) print('[+] Finished %s' % url)
@ -147,24 +149,30 @@ def main(sections, chaos=False):
try: try:
for sect in sections: for sect in sections:
if chaos: if chaos:
loop.call_soon(loop.create_task( loop.create_task(loop.create_task(
dl_loop(sect, zfiles[sect], loop, chaos=True))) dl_loop(sect, zfiles[sect], loop, chaos=True)))
else: else:
loop.call_soon(loop.create_task( loop.run_until_complete(loop.create_task(
dl_loop(sect, zfiles[sect], loop))) dl_loop(sect, zfiles[sect], loop)))
if chaos:
loop.run_forever() loop.run_forever()
except KeyboardInterrupt: except KeyboardInterrupt:
loop.stop()
for sect in sections: for sect in sections:
try: try:
zfiles[sect].close() zfiles[sect].close()
except Exception as error: except Exception as error:
print(error) print(error)
finally:
shutil.rmtree(dl_dir)
if __name__ == '__main__': if __name__ == '__main__':
parser = optparse.OptionParser() parser = optparse.OptionParser(usage="usage: %prog [options] [subreddits]")
parser.add_option('-c', '--chaos', dest='chaos', parser.add_option('-c', '--chaos', dest='chaos',
action='store_true', default=False) action='store_true', default=False,
help=""" Doesn't wait for previous downloads to finish and doesn't exit when no more
images can be found. Do only activate this if you want to download a lot of images
from multiple subreddits at the same time. Only option to exit is CTRL + C.""")
options, sects = parser.parse_args() options, sects = parser.parse_args()
print('[~] Recieved subreddits %s' % ', '.join(sects))
main(sects, chaos=options.chaos) main(sects, chaos=options.chaos)

Loading…
Cancel
Save