diff --git a/.gitignore b/.gitignore index 6c61521..f1a60f4 100644 --- a/.gitignore +++ b/.gitignore @@ -65,4 +65,5 @@ config.yaml # data .ignore -.cache \ No newline at end of file +.cache +*.zip \ No newline at end of file diff --git a/README.md b/README.md index 0cd45c5..a748186 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This script requires at least Python 3.6. After cloning this repository you need to install the requirements via ```sh -pip install -r requirements.txt +pip3 install -r requirements.txt ``` ## Configuration @@ -51,7 +51,8 @@ Options: The name of the output folder. If none is specified, it's the subreddits name. -z, --zip Stores the images in a zip file if true - -n, --nsfw If set nsfw-content is also downloaded. + --nsfw If set nsfw-content is also downloaded. + --lzma If set the lzma-compression module is used. ``` ### Example diff --git a/requirements.txt b/requirements.txt index ee07df1..954fea7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ PyYaml -praw -zipfile \ No newline at end of file +praw \ No newline at end of file diff --git a/riddle.py b/riddle.py index df67e5f..fac9883 100644 --- a/riddle.py +++ b/riddle.py @@ -24,11 +24,12 @@ def assert_dir_exist(dirpath): os.mkdir(dirpath) -def download_file(url: str, dest: str): +def download_file(url: str, dest: str, progressbar = None): """ Downloads a url to a file :param url: download url :param dest: download destination + :param progressbar: The progressbar instance to clear it before writing an error message :return: Success? """ f = open(dest, "wb") @@ -39,11 +40,17 @@ def download_file(url: str, dest: str): f.write(image.read()) success = True except ConnectionError: - print('\r[-] Connection Error \r') + if progressbar: + progressbar.clear() + print('\r[-] Connection Error') except urlreq.HTTPError as err: - print('\r[-] HTTPError for %s: %s \r' % (url, err)) + if progressbar: + progressbar.clear() + print('\r[-] HTTPError for %s: %s' % (url, err)) except urlreq.URLError as err: - print('\r[-] URLError for %s: %s \r' % (url, err)) + if progressbar: + progressbar.clear() + print('\r[-] URLError for %s: %s' % (url, err)) f.close() try: file_size = round(os.path.getsize(dest) / 1000) @@ -52,8 +59,12 @@ def download_file(url: str, dest: str): elif file_size < min_size: os.remove(dest) success = False - print('\r[-] Removed %s: Too small (%s kb)\r' % (dest, file_size)) + if progressbar: + progressbar.clear() + print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size)) except IOError as err: + if progressbar: + progressbar.clear() print('\r[-] Error when removing file %s: %s' % (dest, err)) return success @@ -70,6 +81,7 @@ class ProgressBar: self.length = length self.total = total self.progress = 0 + self.textlength = 0 def tick(self): """ @@ -97,11 +109,20 @@ class ProgressBar: percent = ("{0:." + str(1) + "f}").format(100 * (iteration / float(total))) filled_length = int(self.length * iteration // total) bar = self.fill * filled_length + '-' * (self.length - filled_length) - print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r') + textout = '\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix) + print(textout, end='\r') + self.textlength = len(textout) # Print new line on complete if iteration == total: print() + def clear(self): + """ + clear last progress output + :return: + """ + print(' '*self.textlength, end='\r') + def parser_init(): """ @@ -120,9 +141,12 @@ def parser_init(): parser.add_option('-z', '--zip', dest='zip', action='store_true', default=False, help='Stores the images in a zip file if true') - parser.add_option('-n', '--nsfw', dest='nsfw', + parser.add_option('--nsfw', dest='nsfw', action='store_true', default=False, help='If set nsfw-content is also downloaded.') + parser.add_option('--lzma', dest='lzma', + action='store_true', default=False, + help='If set the lzma-compression module is used.') return parser.parse_args() @@ -155,16 +179,16 @@ def download_images(images: list, dl_dir: str): assert_dir_exist(dl_dir) for img in images: # download each image if it doesn't exist - pb.tick() success = False imgname = img.split('/')[-1] name = os.path.join(dl_dir, imgname) if not os.path.isfile(name): - success = download_file(img, name) + success = download_file(img, name, pb) else: preexist += 1 if success: realcount += 1 + pb.tick() print('[+] Successfully downloaded %s out of %s images to %s (%s already existed)' % (realcount, imgcount, dl_dir, preexist)) @@ -185,11 +209,12 @@ def filter_zip_files(images: list, zip_fname: str): return images -def compress_folder(folder: str, zip_fname: str): +def compress_folder(folder: str, zip_fname: str, compression: int): """ Zips the contents of a folder to the destination zipfile name. :param folder: the folder to zip :param zip_fname: the name of the destination zipfile + :param compression: The compression method (constant from zipfile module) :return: None """ print('[~] Compressing folder...') @@ -198,7 +223,7 @@ def compress_folder(folder: str, zip_fname: str): if os.path.isfile(zip_fname): # append to the zipfile if it already exists mode = 'a' - zfile = zipfile.ZipFile(zip_fname, mode) + zfile = zipfile.ZipFile(zip_fname, mode, compression=compression) for _, _, files in os.walk(folder): # add all files of the folder to the zipfile for file in files: @@ -238,10 +263,13 @@ def main(): images = get_images(client, subreddit, limit=options.count, nsfw=options.nsfw) if options.zip: # downloads to a cache-folder first before compressing it to zip + comp_mode = zipfile.ZIP_STORED + if options.lzma: + comp_mode = zipfile.ZIP_LZMA images = filter_zip_files(images, dldest+'.zip') - download_images(images, '.cache') - compress_folder('.cache', dldest+'.zip') - shutil.rmtree('.cache') + download_images(images, '.cache-'+dldest) + compress_folder('.cache-'+dldest, dldest+'.zip', compression=comp_mode) + shutil.rmtree('.cache-'+dldest) else: download_images(images, dldest) print('[+] All downloads finished') diff --git a/test.zip b/test.zip deleted file mode 100644 index 3458a09..0000000 Binary files a/test.zip and /dev/null differ