From 0850cd72b7094625bddcb12fdc6f43b21847fe8f Mon Sep 17 00:00:00 2001 From: trivernis Date: Mon, 24 Feb 2020 16:44:30 +0100 Subject: [PATCH] Fix compression and add status file and randomized pauses --- lib/client.py | 3 +++ lib/io.py | 33 ++++++++++++++++++++++++++------- miner.py | 22 ++++++++++++++++++---- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/lib/client.py b/lib/client.py index 380138b..610fb92 100644 --- a/lib/client.py +++ b/lib/client.py @@ -2,9 +2,12 @@ import requests import stem import time import random +import warnings from fake_useragent import UserAgent from stem.control import Controller +warnings.filterwarnings('once', module='urllib3') + class Client: diff --git a/lib/io.py b/lib/io.py index 10d3dd7..994db13 100644 --- a/lib/io.py +++ b/lib/io.py @@ -2,6 +2,9 @@ import os from os import path import zipfile import tempfile +import warnings + +warnings.filterwarnings('ignore', module='zipfile') class FileManager: @@ -20,17 +23,25 @@ class FileManager: if not path.exists(path.join(self._data_dir, d)): os.mkdir(path.join(self._data_dir, d)) - def get_file(self, directory, name): + def get_file(self, directory, name, mode='w'): """ Opens a new file with the given name in the directory + :param mode: :param directory: :param name: :return: """ if self.compress: - return open(path.join(self._tmpdir, name), 'w') + f_name = path.join(self._tmpdir, name) + if mode != 'w': + z_name = path.join(self._data_dir, directory + '.zip') + with get_zip(z_name) as zf: + if name in zf.namelist(): + zf.extract(name, self._tmpdir) + zf.close() + return open(f_name, mode) else: - return open(path.join(self._data_dir, directory, name), 'w') + return open(path.join(self._data_dir, directory, name), mode) def store_file(self, directory, name): """ @@ -40,12 +51,20 @@ class FileManager: :return: """ if self.compress: - mode = 'w' z_name = path.join(self._data_dir, directory + '.zip') - if path.exists(z_name): - mode = 'a' - with zipfile.ZipFile(z_name, mode, compression=zipfile.ZIP_LZMA) as zf: + with get_zip(z_name) as zf: f_path = path.join(self._tmpdir, name) zf.write(f_path, name) zf.close() os.remove(f_path) + + @property + def data_dir(self): + return self._data_dir + + +def get_zip(name): + mode = 'w' + if path.exists(name): + mode = 'a' + return zipfile.ZipFile(name, mode, compression=zipfile.ZIP_DEFLATED) diff --git a/miner.py b/miner.py index c919851..8dfe356 100755 --- a/miner.py +++ b/miner.py @@ -11,6 +11,7 @@ import mimetypes import base64 import hashlib import json +import random def get_folder_name(url: str) -> str: @@ -34,31 +35,44 @@ def parse_arguments(): def request_loop(client: Client, urls: [str], fm: FileManager, method: str = 'GET', verify=True, interval=1800, body=None): + random_factor = round(interval/10) + names = {} + for url in urls: + names[url] = get_folder_name(url) + status_fname = os.path.join(fm.data_dir, '%s-status.csv' % names[url]) + if not os.path.exists(status_fname): + with open(status_fname, 'w') as f: + f.write('datetime,status-code,timing\n') while True: try: for url in urls: + d = names[url] + status_file = open(os.path.join(fm.data_dir, '%s-status.csv' % d), 'a') try: req = client.request(url, method=method, data=body, verify=verify) if req.status_code == 200: extension = mimetypes.guess_extension(req.headers['content-type'].split(';')[0]) print('[+] Request to %s succeeded: mime: %s, timing: %ss' % (url, req.headers['content-type'], req.elapsed.total_seconds())) - d = get_folder_name(url) - f_name = time.strftime('%m-%d-%y_%H-%M-%S') + extension + f_name = time.strftime('%d-%m-%y_%H-%M-%S') + extension with fm.get_file(d, f_name) as f: f.write(req.text) fm.store_file(d, f_name) print('[+] Successfully stored response data as %s ' % f_name) else: print('[-] Request failed with code %s: %s' % (req.status_code, req.text)) + status_file.write('%s,%s,%s\n' % (time.strftime('%d.%m.%y %H:%M:%S'), req.status_code, req.elapsed.total_seconds())) except SSLError: print('There is a problem with the certificate of %s' % url) print('To ignore that please pass the --no-verify flag') except ConnectionError as e: print('Failed to connect to %s: %s' % (url, e)) + status_file.write('%s,0,0\n' % time.strftime('%d.%m.%y %H:%M:%S')) + status_file.close() client.reset() - print('[ ] Pausing for %ss' % interval) - time.sleep(interval) + pause_duration = interval + random.randint(-random_factor, random_factor) + print('[ ] Pausing for %ss' % pause_duration) + time.sleep(pause_duration) except KeyboardInterrupt: client.close() return