Added megapixel filtering

master
Trivernis 5 years ago
parent eddd35f24c
commit 29533d6cc4

@ -6,6 +6,7 @@ name = "pypi"
[packages] [packages]
PyYAML = "*" PyYAML = "*"
praw = "*" praw = "*"
pillow = "*"
[dev-packages] [dev-packages]

44
Pipfile.lock generated

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "e030a28963c27bc726b49ad8bc68cf9648c19fde4e1a5a76d1fc8a5955b06cd1" "sha256": "7abb433480c00144e7e50d130155eab1a78a619f76b57ffc5464f40a6b550edd"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@ -37,6 +37,42 @@
], ],
"version": "==2.8" "version": "==2.8"
}, },
"pillow": {
"hashes": [
"sha256:047d9473cf68af50ac85f8ee5d5f21a60f849bc17d348da7fc85711287a75031",
"sha256:0f66dc6c8a3cc319561a633b6aa82c44107f12594643efa37210d8c924fc1c71",
"sha256:12c9169c4e8fe0a7329e8658c7e488001f6b4c8e88740e76292c2b857af2e94c",
"sha256:248cffc168896982f125f5c13e9317c059f74fffdb4152893339f3be62a01340",
"sha256:27faf0552bf8c260a5cee21a76e031acaea68babb64daf7e8f2e2540745082aa",
"sha256:285edafad9bc60d96978ed24d77cdc0b91dace88e5da8c548ba5937c425bca8b",
"sha256:384b12c9aa8ef95558abdcb50aada56d74bc7cc131dd62d28c2d0e4d3aadd573",
"sha256:38950b3a707f6cef09cd3cbb142474357ad1a985ceb44d921bdf7b4647b3e13e",
"sha256:4aad1b88933fd6dc2846552b89ad0c74ddbba2f0884e2c162aa368374bf5abab",
"sha256:4ac6148008c169603070c092e81f88738f1a0c511e07bd2bb0f9ef542d375da9",
"sha256:4deb1d2a45861ae6f0b12ea0a786a03d19d29edcc7e05775b85ec2877cb54c5e",
"sha256:59aa2c124df72cc75ed72c8d6005c442d4685691a30c55321e00ed915ad1a291",
"sha256:5a47d2123a9ec86660fe0e8d0ebf0aa6bc6a17edc63f338b73ea20ba11713f12",
"sha256:5cc901c2ab9409b4b7ac7b5bcc3e86ac14548627062463da0af3b6b7c555a871",
"sha256:6c1db03e8dff7b9f955a0fb9907eb9ca5da75b5ce056c0c93d33100a35050281",
"sha256:7ce80c0a65a6ea90ef9c1f63c8593fcd2929448613fc8da0adf3e6bfad669d08",
"sha256:809c19241c14433c5d6135e1b6c72da4e3b56d5c865ad5736ab99af8896b8f41",
"sha256:83792cb4e0b5af480588601467c0764242b9a483caea71ef12d22a0d0d6bdce2",
"sha256:846fa202bd7ee0f6215c897a1d33238ef071b50766339186687bd9b7a6d26ac5",
"sha256:9f5529fc02009f96ba95bea48870173426879dc19eec49ca8e08cd63ecd82ddb",
"sha256:a423c2ea001c6265ed28700df056f75e26215fd28c001e93ef4380b0f05f9547",
"sha256:ac4428094b42907aba5879c7c000d01c8278d451a3b7cccd2103e21f6397ea75",
"sha256:b1ae48d87f10d1384e5beecd169c77502fcc04a2c00a4c02b85f0a94b419e5f9",
"sha256:bf4e972a88f8841d8fdc6db1a75e0f8d763e66e3754b03006cbc3854d89f1cb1",
"sha256:c6414f6aad598364aaf81068cabb077894eb88fed99c6a65e6e8217bab62ae7a",
"sha256:c710fcb7ee32f67baf25aa9ffede4795fd5d93b163ce95fdc724383e38c9df96",
"sha256:c7be4b8a09852291c3c48d3c25d1b876d2494a0a674980089ac9d5e0d78bd132",
"sha256:c9e5ffb910b14f090ac9c38599063e354887a5f6d7e6d26795e916b4514f2c1a",
"sha256:e0697b826da6c2472bb6488db4c0a7fa8af0d52fa08833ceb3681358914b14e5",
"sha256:e9a3edd5f714229d41057d56ac0f39ad9bdba6767e8c888c951869f0bdd129b0"
],
"index": "pypi",
"version": "==6.2.1"
},
"praw": { "praw": {
"hashes": [ "hashes": [
"sha256:2e5c98e49fe60e5308255ed147b670d350f98281f84f582df30f87de727b6de2", "sha256:2e5c98e49fe60e5308255ed147b670d350f98281f84f582df30f87de727b6de2",
@ -99,10 +135,10 @@
}, },
"websocket-client": { "websocket-client": {
"hashes": [ "hashes": [
"sha256:1151d5fb3a62dc129164292e1227655e4bbc5dd5340a5165dfae61128ec50aa9", "sha256:0fc45c961324d79c781bab301359d5a1b00b13ad1b10415a4780229ef71a5549",
"sha256:1fd5520878b68b84b5748bb30e592b10d0a91529d5383f74f4964e72b297fd3a" "sha256:d735b91d6d1692a6a181f2a8c9e0238e5f6373356f561bb9dc4c7af36f452010"
], ],
"version": "==0.56.0" "version": "==0.57.0"
} }
}, },
"develop": {} "develop": {}

@ -37,6 +37,7 @@ image-extensions:
- jpeg - jpeg
min-size: 5 # minimum size in kilobytes min-size: 5 # minimum size in kilobytes
min-mp: 0.5 # minimum siz ein megapixels
``` ```
## Running ## Running

@ -9,65 +9,12 @@ import praw
import optparse import optparse
import zipfile import zipfile
import urllib.request as urlreq import urllib.request as urlreq
from PIL import Image
user_agent = 'linux:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent user_agent = 'linux:riddle:3.0 (by u/Trivernis)' # the reddit api user-agent
img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images img_ext = ['jpg', 'jpeg', 'png'] # default used extensions to filter for images
min_size = 5 # minimum size in kilobytes. changeable in settings min_size = 5 # minimum size in kilobytes. changeable in settings
min_mp = 0.5 # minimum megapixels. changeable in the settings
def assert_dir_exist(dirpath):
"""
Creates the directory if it doesn't exist
:param dirpath: path to the directory
:return: None
"""
if not os.path.exists(dirpath):
os.mkdir(dirpath)
def download_file(url: str, dest: str, progressbar = None):
"""
Downloads a url to a file
:param url: download url
:param dest: download destination
:param progressbar: The progressbar instance to clear it before writing an error message
:return: Success?
"""
f = open(dest, "wb")
req = urlreq.Request(url)
success = False
try:
image = urlreq.urlopen(req)
f.write(image.read())
success = True
except ConnectionError:
if progressbar:
progressbar.clear()
print('\r[-] Connection Error')
except urlreq.HTTPError as err:
if progressbar:
progressbar.clear()
print('\r[-] HTTPError for %s: %s' % (url, err))
except urlreq.URLError as err:
if progressbar:
progressbar.clear()
print('\r[-] URLError for %s: %s' % (url, err))
f.close()
try:
file_size = round(os.path.getsize(dest) / 1000)
if not success:
os.remove(dest)
elif file_size < min_size:
os.remove(dest)
success = False
if progressbar:
progressbar.clear()
print('\r[-] Removed %s: Too small (%s kb)' % (dest, file_size))
except IOError as err:
if progressbar:
progressbar.clear()
print('\r[-] Error when removing file %s: %s' % (dest, err))
return success
class ProgressBar: class ProgressBar:
@ -151,6 +98,64 @@ def parser_init():
return parser.parse_args() return parser.parse_args()
def assert_dir_exist(dirpath):
"""
Creates the directory if it doesn't exist
:param dirpath: path to the directory
:return: None
"""
if not os.path.exists(dirpath):
os.mkdir(dirpath)
def download_file(url: str, dest: str, progressbar = None):
"""
Downloads a url to a file
:param url: download url
:param dest: download destination
:param progressbar: The progressbar instance to clear it before writing an error message
:return: Success?
"""
f = open(dest, "wb")
req = urlreq.Request(url)
success = False
try:
image = urlreq.urlopen(req)
f.write(image.read())
success = True
except ConnectionError:
if progressbar:
progressbar.clear()
print('\r[-] Connection Error')
except urlreq.HTTPError as err:
if progressbar:
progressbar.clear()
print('\r[-] HTTPError for %s: %s' % (url, err))
except urlreq.URLError as err:
if progressbar:
progressbar.clear()
print('\r[-] URLError for %s: %s' % (url, err))
f.close()
try:
width, height = Image.open(dest).size
mp = (width * height)/1000000
file_size = round(os.path.getsize(dest) / 1000)
if __name__ == '__main__':
if not success:
os.remove(dest)
elif file_size < min_size or mp < min_mp:
os.remove(dest)
success = False
if progressbar:
progressbar.clear()
print('\r[-] Removed %s: Too small (%s kb, %s MP)' % (dest, file_size, mp))
except IOError as err:
if progressbar:
progressbar.clear()
print('\r[-] Error when removing file %s: %s' % (dest, err))
return success
def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False): def get_images(reddit_client: praw.Reddit, subreddit: str, limit: int, nsfw: bool = False):
""" """
Uses the reddit api to fetch all image posts Uses the reddit api to fetch all image posts
@ -258,6 +263,9 @@ def main():
if 'min-size' in settings: if 'min-size' in settings:
global min_size global min_size
min_size = int(settings['min-size']) min_size = int(settings['min-size'])
if 'min-mp' in settings:
global min_mp
min_mp = int(settings['min-mp'])
credentials = settings['credentials'] credentials = settings['credentials']
client = praw.Reddit( client = praw.Reddit(
client_id=credentials['client_id'], client_id=credentials['client_id'],

Loading…
Cancel
Save