Add option to compress the data

master
trivernis 5 years ago
parent cc9095a9af
commit 61846e8180

@ -16,7 +16,7 @@ pipenv install
## Usage ## Usage
``` ```
usage: miner.py [-h] [-t] -o OUTPUT_DIR [-i INTERVAL] [-m METHOD] [-p PAYLOAD_FILE] url usage: miner.py [-h] [-t] -o OUTPUT_DIR [-i INTERVAL] [-m METHOD] [-b BODY] [-p TOR_PASSWORD] [-z] url [url ...]
Periodically mine data Periodically mine data
@ -32,8 +32,10 @@ optional arguments:
The interval in which the data is requested The interval in which the data is requested
-m METHOD, --method METHOD -m METHOD, --method METHOD
The HTTP method that is used The HTTP method that is used
-p PAYLOAD_FILE, --payload-file PAYLOAD_FILE -b BODY, --body BODY The file containing the requests payload/body.
The file containing the requests payload. -p TOR_PASSWORD, --tor-password TOR_PASSWORD
The password used for the tor control port.
-z, --compress If the data should be compressed
``` ```
## License ## License

@ -0,0 +1,51 @@
import os
from os import path
import zipfile
import tempfile
class FileManager:
def __init__(self, data_dir, directories: [str], compress=False, tmp='.tmp'):
self._dirs = directories
self.compress = compress
self._data_dir = data_dir
self._zips = {}
self._tmpdir = tempfile.gettempdir()
if not compress:
self._open_directories()
def _open_directories(self):
for d in self._dirs:
if not path.exists(path.join(self._data_dir, d)):
os.mkdir(path.join(self._data_dir, d))
def get_file(self, directory, name):
"""
Opens a new file with the given name in the directory
:param directory:
:param name:
:return:
"""
if self.compress:
return open(path.join(self._tmpdir, name), 'w')
else:
return open(path.join(self._data_dir, directory, name), 'w')
def store_file(self, directory, name):
"""
Adds the file to the tarfile if compression is active.
:param directory:
:param name:
:return:
"""
if self.compress:
mode = 'w'
z_name = path.join(self._data_dir, directory + '.zip')
if path.exists(z_name):
mode = 'a'
with zipfile.ZipFile(z_name, mode, compression=zipfile.ZIP_LZMA) as zf:
f_path = path.join(self._tmpdir, name)
zf.write(f_path, name)
zf.close()
os.remove(f_path)

@ -2,8 +2,10 @@
import argparse import argparse
from lib.client import TorClient, Client from lib.client import TorClient, Client
from lib.utils import parse_duration from lib.utils import parse_duration
from lib.io import FileManager
import time import time
import os import os
from os import path
import mimetypes import mimetypes
import base64 import base64
import hashlib import hashlib
@ -25,10 +27,11 @@ def parse_arguments():
parser.add_argument('-m', '--method', default='GET', type=str, help='The HTTP method that is used') parser.add_argument('-m', '--method', default='GET', type=str, help='The HTTP method that is used')
parser.add_argument('-b', '--body', type=str, help='The file containing the requests payload/body.') parser.add_argument('-b', '--body', type=str, help='The file containing the requests payload/body.')
parser.add_argument('-p', '--tor-password', type=str, help='The password used for the tor control port.') parser.add_argument('-p', '--tor-password', type=str, help='The password used for the tor control port.')
parser.add_argument('-z', '--compress', action='store_true', help='If the data should be compressed')
return parser.parse_args() return parser.parse_args()
def request_loop(client: Client, urls: [str], out_dir: str, method: str = 'GET', interval=1800, body=None): def request_loop(client: Client, urls: [str], fm: FileManager, method: str = 'GET', interval=1800, body=None):
while True: while True:
try: try:
for url in urls: for url in urls:
@ -37,9 +40,12 @@ def request_loop(client: Client, urls: [str], out_dir: str, method: str = 'GET',
extension = mimetypes.guess_extension(req.headers['content-type'].split(';')[0]) extension = mimetypes.guess_extension(req.headers['content-type'].split(';')[0])
print('[+] Request to %s succeeded: mime: %s, timing: %ss' % print('[+] Request to %s succeeded: mime: %s, timing: %ss' %
(url, req.headers['content-type'], req.elapsed.total_seconds())) (url, req.headers['content-type'], req.elapsed.total_seconds()))
with open('%s/%s/%s%s' % (out_dir, get_folder_name(url), d = get_folder_name(url)
time.strftime('%m-%d-%y_%H-%M-%S'), extension), 'w') as f: f_name = time.strftime('%m-%d-%y_%H-%M-%S') + extension
with fm.get_file(d, f_name) as f:
f.write(req.text) f.write(req.text)
fm.store_file(d, f_name)
print('[+] Successfully stored response data as %s ' % f_name)
else: else:
print('[-] Request failed with code %s: %s' % (req.status_code, req.text)) print('[-] Request failed with code %s: %s' % (req.status_code, req.text))
client.reset() client.reset()
@ -73,19 +79,19 @@ def main():
mapping = json.load(mf) mapping = json.load(mf)
except Exception as e: except Exception as e:
print(e) print(e)
dirs = []
for url in args.url: for url in args.url:
folder_name = get_folder_name(url) folder_name = get_folder_name(url)
folder_path = '%s/%s' % (args.output_dir, folder_name)
mapping[url] = folder_name mapping[url] = folder_name
if not os.path.exists(folder_path): dirs.append(folder_name)
os.mkdir(folder_path)
with open(mapping_file, 'w') as mf: with open(mapping_file, 'w') as mf:
json.dump(mapping, mf, indent=' ') json.dump(mapping, mf, indent=' ')
body = None body = None
if args.body: if args.body:
body = open(args.body, 'rb') body = open(args.body, 'rb')
fm = FileManager(args.output_dir, dirs, compress=args.compress)
print('[ ] Starting request loop...') print('[ ] Starting request loop...')
request_loop(client, args.url, args.output_dir, args.method, int(interval.total_seconds()), body=body) request_loop(client, args.url, fm, args.method, int(interval.total_seconds()), body=body)
if __name__ == '__main__': if __name__ == '__main__':

Loading…
Cancel
Save