You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1015 B
Python
38 lines
1015 B
Python
import urllib.request as urlreq
|
|
import time
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from lib import logutils
|
|
|
|
logger = logutils.get_logger('netutils')
|
|
|
|
|
|
def get_soup4url(url: str, retrys: int =2, headers: dict=urlreq.noheaders(), timeout: int =30) -> BeautifulSoup:
|
|
""" Returns a soup for the url """
|
|
req = urlreq.Request(url, headers=headers)
|
|
html = None
|
|
for _ in range(0, retrys+1):
|
|
try:
|
|
html = urlreq.urlopen(req, timeout=timeout).read()
|
|
break
|
|
except Exception as e:
|
|
logger.exception(e)
|
|
time.sleep(1) # to avoid request flooding
|
|
if html:
|
|
soup = BeautifulSoup(html, "lxml")
|
|
return soup
|
|
return False
|
|
|
|
|
|
def download_file(url: str, dest: str, headers: dict=urlreq.noheaders()):
|
|
f = open(dest, "wb")
|
|
req = urlreq.Request(url, headers=headers)
|
|
try:
|
|
image = urlreq.urlopen(req)
|
|
except ConnectionError:
|
|
print('\n [-] Connection Error')
|
|
return
|
|
f.write(image.read())
|
|
f.close()
|