Add support for multiple urls and store data in subfolders

master
trivernis 4 years ago
parent 23aa5bd137
commit 0d76d43c00

@ -9,6 +9,7 @@ verify_ssl = true
requests = "*"
fake-useragent = "*"
stem = "*"
toml = "*"
[requires]
python_version = "3.8"

@ -5,11 +5,20 @@ from lib.utils import parse_duration
import time
import os
import mimetypes
import base64
import hashlib
import json
def get_folder_name(url: str) -> str:
m = hashlib.sha256()
m.update(url.encode('utf-8'))
return base64.urlsafe_b64encode(m.digest()).decode('utf-8')
def parse_arguments():
parser = argparse.ArgumentParser(description='Periodically mine data')
parser.add_argument('url', type=str, help='the data endpoint url')
parser.add_argument('url', type=str, help='the data endpoint url', nargs='+')
parser.add_argument('-t', '--tor', action='store_true', help='If tor should be used for requests')
parser.add_argument('-o', '--output-dir', required=True, type=str, help='The output directory for the data')
parser.add_argument('-i', '--interval', default='1h', type=str, help='The interval in which the data is requested')
@ -18,18 +27,20 @@ def parse_arguments():
return parser.parse_args()
def request_loop(client: Client, url: str, out_dir: str, method: str = 'GET', interval=1800, body=None):
def request_loop(client: Client, urls: [str], out_dir: str, method: str = 'GET', interval=1800, body=None):
while True:
try:
req = client.request(url, method=method, data=body)
if req.status_code == 200:
extension = mimetypes.guess_extension(req.headers['content-type'].split(';')[0])
print('[+] Request succeeded: mime: %s, timing: %ss' %
(req.headers['content-type'], req.elapsed.total_seconds()))
with open(out_dir + '/%s%s' % (time.strftime('%m-%d-%y_%H-%M-%S'), extension), 'w') as f:
f.write(req.text)
else:
print('[-] Request failed with code %s: %s' % (req.status_code, req.text))
for url in urls:
req = client.request(url, method=method, data=body)
if req.status_code == 200:
extension = mimetypes.guess_extension(req.headers['content-type'].split(';')[0])
print('[+] Request to %s succeeded: mime: %s, timing: %ss' %
(url, req.headers['content-type'], req.elapsed.total_seconds()))
with open('%s/%s/%s%s' % (out_dir, get_folder_name(url),
time.strftime('%m-%d-%y_%H-%M-%S'), extension), 'w') as f:
f.write(req.text)
else:
print('[-] Request failed with code %s: %s' % (req.status_code, req.text))
client.reset()
print('[ ] Pausing for %ss' % interval)
time.sleep(interval)
@ -53,6 +64,22 @@ def main():
client = Client()
if not os.path.exists(args.output_dir):
os.mkdir(args.output_dir)
mapping = {}
mapping_file = '%s/mapping.json' % args.output_dir
if os.path.exists(mapping_file):
with open(mapping_file, 'r') as mf:
try:
mapping = json.load(mf)
except Exception as e:
print(e)
for url in args.url:
folder_name = get_folder_name(url)
folder_path = '%s/%s' % (args.output_dir, folder_name)
mapping[url] = folder_name
if not os.path.exists(folder_path):
os.mkdir(folder_path)
with open(mapping_file, 'w') as mf:
json.dump(mapping, mf, indent=' ')
body = None
if args.payload_file:
body = open(args.payload_file, 'rb')

Loading…
Cancel
Save