You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1015 B
Python
38 lines
1015 B
Python
6 years ago
|
import urllib.request as urlreq
|
||
|
import time
|
||
|
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
from lib import logutils
|
||
|
|
||
|
logger = logutils.get_logger('netutils')
|
||
|
|
||
|
|
||
|
def get_soup4url(url: str, retrys: int =2, headers: dict=urlreq.noheaders(), timeout: int =30) -> BeautifulSoup:
|
||
|
""" Returns a soup for the url """
|
||
|
req = urlreq.Request(url, headers=headers)
|
||
|
html = None
|
||
|
for _ in range(0, retrys+1):
|
||
|
try:
|
||
|
html = urlreq.urlopen(req, timeout=timeout).read()
|
||
|
break
|
||
|
except Exception as e:
|
||
|
logger.exception(e)
|
||
|
time.sleep(1) # to avoid request flooding
|
||
|
if html:
|
||
|
soup = BeautifulSoup(html, "lxml")
|
||
|
return soup
|
||
|
return False
|
||
|
|
||
|
|
||
|
def download_file(url: str, dest: str, headers: dict=urlreq.noheaders()):
|
||
|
f = open(dest, "wb")
|
||
|
req = urlreq.Request(url, headers=headers)
|
||
|
try:
|
||
|
image = urlreq.urlopen(req)
|
||
|
except ConnectionError:
|
||
|
print('\n [-] Connection Error')
|
||
|
return
|
||
|
f.write(image.read())
|
||
|
f.close()
|