python-utility-scripts/lib/netutils.py

import urllib.request as urlreq
import time

from bs4 import BeautifulSoup

from lib import logutils

logger = logutils.get_logger('netutils')


def get_soup4url(url: str, retrys: int =2, headers: dict=urlreq.noheaders(), timeout: int =30) -> BeautifulSoup:
    """ Returns a soup for the url """
    req = urlreq.Request(url, headers=headers)
    html = None
    for _ in range(0, retrys+1):
        try:
            html = urlreq.urlopen(req, timeout=timeout).read()
            break
        except Exception as e:
            logger.exception(e)
            time.sleep(1)  # to avoid request flooding
    if html:
        soup = BeautifulSoup(html, "lxml")
        return soup
    return False


def download_file(url: str, dest: str, headers: dict=urlreq.noheaders()):
    f = open(dest, "wb")
    req = urlreq.Request(url, headers=headers)
    try:
        image = urlreq.urlopen(req)
    except ConnectionError:
        print('\n [-] Connection Error')
        return
    f.write(image.read())
    f.close()
Modified project in general - Exported some functionalities into libraries 6 years ago			`import urllib.request as urlreq`
			`import time`

			`from bs4 import BeautifulSoup`

			`from lib import logutils`

			`logger = logutils.get_logger('netutils')`


			`def get_soup4url(url: str, retrys: int =2, headers: dict=urlreq.noheaders(), timeout: int =30) -> BeautifulSoup:`
			`""" Returns a soup for the url """`
			`req = urlreq.Request(url, headers=headers)`
			`html = None`
			`for _ in range(0, retrys+1):`
			`try:`
			`html = urlreq.urlopen(req, timeout=timeout).read()`
			`break`
			`except Exception as e:`
			`logger.exception(e)`
			`time.sleep(1) # to avoid request flooding`
			`if html:`
			`soup = BeautifulSoup(html, "lxml")`
			`return soup`
			`return False`


			`def download_file(url: str, dest: str, headers: dict=urlreq.noheaders()):`
			`f = open(dest, "wb")`
			`req = urlreq.Request(url, headers=headers)`
			`try:`
			`image = urlreq.urlopen(req)`
			`except ConnectionError:`
			`print('\n [-] Connection Error')`
			`return`
			`f.write(image.read())`
			`f.close()`