From bed1003f0b9891ebddaf25c0669245e8710970ca Mon Sep 17 00:00:00 2001 From: trivernis Date: Sat, 13 Aug 2022 15:34:30 +0200 Subject: [PATCH] Add reddit url import feature Signed-off-by: trivernis --- .gitignore | 1 + Cargo.lock | 3 + Cargo.toml | 5 +- src/args.rs | 28 ++++- src/error.rs | 3 + src/main.rs | 26 +++- src/operations/find_and_send_reddit_posts.rs | 20 +++ src/operations/mod.rs | 1 + src/utils/mod.rs | 1 + src/utils/reddit.rs | 124 +++++++++++++++++++ 10 files changed, 205 insertions(+), 7 deletions(-) create mode 100644 src/operations/find_and_send_reddit_posts.rs create mode 100644 src/utils/reddit.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..51fd28d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +reddit-urls.txt diff --git a/Cargo.lock b/Cargo.lock index 1f44ae4..ee5685f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -632,7 +632,10 @@ dependencies = [ "clap", "hydrus-api", "pixiv-rs", + "reqwest 0.11.11", "rustnao", + "serde", + "serde_json", "tempdir", "thiserror", "tokio 1.20.1", diff --git a/Cargo.toml b/Cargo.toml index 874e56e..c032a7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,10 @@ thiserror = "1.0.32" tracing-subscriber = { version = "0.3.15", features = ["env-filter"] } tracing = "0.1.36" clap = { version = "3.2.17", features = ["derive", "env"] } +serde = { version = "1.0.143", features = ["derive"] } +reqwest = { version = "0.11.11", features = ["json"] } +serde_json = "1.0.83" [dependencies.tokio] version = "1.20.1" -features = ["macros", "rt", "time"] +features = ["macros", "rt", "time", "fs"] diff --git a/src/args.rs b/src/args.rs index 301a1f2..09cece7 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,3 +1,5 @@ +use std::path::PathBuf; + use clap::{Parser, Subcommand}; #[derive(Parser, Debug)] @@ -17,17 +19,21 @@ pub struct Args { #[derive(Subcommand, Clone, Debug)] pub enum Command { - #[clap(name = "send-url")] /// Looks up files on saucenao and sends urls to hydrus to be imported - FindAndSendUrl(Options), + #[clap(name = "send-url")] + FindAndSendUrl(LookupOptions), - #[clap(name = "send-tags")] /// Looks up files on saucenao and maps the tags found on pixiv to the files - FindAndSendTags(Options), + #[clap(name = "send-tags")] + FindAndSendTags(LookupOptions), + + /// Looks up and imports reddit posts + #[clap(name = "import-reddit-posts")] + ImportRedditPosts(ImportRedditOptions), } #[derive(Parser, Debug, Clone)] -pub struct Options { +pub struct LookupOptions { /// The saucenao api key #[clap(long, env)] pub saucenao_key: String, @@ -44,3 +50,15 @@ pub struct Options { #[clap(short, long)] pub tags: Vec, } + +#[derive(Parser, Debug, Clone)] +pub struct ImportRedditOptions { + /// A file containing all urls with each + /// url in a separate line + #[clap(short, long)] + pub input: Option, + + /// A list of urls to import + #[clap(short, long)] + pub urls: Option>, +} diff --git a/src/error.rs b/src/error.rs index 336bcc4..ab3804b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -18,6 +18,9 @@ pub enum Error { #[error(transparent)] Io(#[from] std::io::Error), + + #[error(transparent)] + Reqwest(#[from] reqwest::Error), } impl From for Error { diff --git a/src/main.rs b/src/main.rs index 7fa3c31..118cdd4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,10 +11,13 @@ use clap::Parser; use hydrus_api::wrapper::service::ServiceName; use hydrus_api::wrapper::tag::Tag; use hydrus_api::{Client, Hydrus}; +use operations::find_and_send_reddit_posts::find_and_send_reddit_posts; use pixiv_rs::PixivClient; use rustnao::{Handler, HandlerBuilder}; use std::str::FromStr; use tempdir::TempDir; +use tokio::fs::File; +use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::time::{Duration, Instant}; use tracing_subscriber::fmt::format::FmtSpan; use tracing_subscriber::EnvFilter; @@ -29,6 +32,7 @@ async fn main() { match args.subcommand { Command::FindAndSendUrl(opt) => send_tags_or_urls(opt, hydrus, true).await, Command::FindAndSendTags(opt) => send_tags_or_urls(opt, hydrus, false).await, + Command::ImportRedditPosts(opt) => import_reddit_posts(opt, hydrus).await, } .expect("Failed to send tags or urls"); } @@ -47,7 +51,7 @@ fn init_logger() { .init(); } -async fn send_tags_or_urls(opt: Options, hydrus: Hydrus, send_urls: bool) -> Result<()> { +async fn send_tags_or_urls(opt: LookupOptions, hydrus: Hydrus, send_urls: bool) -> Result<()> { let pixiv = PixivClient::new(); let handler = HandlerBuilder::new() @@ -89,3 +93,23 @@ async fn send_tags_or_urls(opt: Options, hydrus: Hydrus, send_urls: bool) -> Res Ok(()) } + +async fn import_reddit_posts(opt: ImportRedditOptions, hydrus: Hydrus) -> Result<()> { + let mut urls = Vec::new(); + + if let Some(input_file) = opt.input { + let file = File::open(input_file).await?; + let reader = BufReader::new(file); + let mut lines = reader.lines(); + + while let Some(line) = lines.next_line().await? { + urls.push(line); + } + } else if let Some(args_urls) = opt.urls { + urls = args_urls; + } else { + panic!("No reddit post urls provided"); + } + + find_and_send_reddit_posts(&hydrus, urls).await +} diff --git a/src/operations/find_and_send_reddit_posts.rs b/src/operations/find_and_send_reddit_posts.rs new file mode 100644 index 0000000..a898b5a --- /dev/null +++ b/src/operations/find_and_send_reddit_posts.rs @@ -0,0 +1,20 @@ +use hydrus_api::Hydrus; + +use crate::error::Result; +use crate::utils::reddit::get_post_images; + +pub async fn find_and_send_reddit_posts(hydrus: &Hydrus, post_urls: Vec) -> Result<()> { + let total_posts = post_urls.len(); + + for (index, post) in post_urls.into_iter().enumerate() { + tracing::info!("Importing post {} of {}", index + 1, total_posts); + let images = get_post_images(&post).await?; + tracing::info!("Found {} images for post {}", images.len(), post); + + for url in images { + hydrus.import().url(url).run().await?; + } + } + + Ok(()) +} diff --git a/src/operations/mod.rs b/src/operations/mod.rs index 14fb073..6b905c2 100644 --- a/src/operations/mod.rs +++ b/src/operations/mod.rs @@ -1,2 +1,3 @@ +pub mod find_and_send_reddit_posts; pub mod find_and_send_tags; pub mod find_and_send_urls; diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 95543b2..0ec5ca1 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1 +1,2 @@ pub mod pixiv; +pub mod reddit; diff --git a/src/utils/reddit.rs b/src/utils/reddit.rs new file mode 100644 index 0000000..b7f1992 --- /dev/null +++ b/src/utils/reddit.rs @@ -0,0 +1,124 @@ +#![allow(unused)] +use std::collections::HashMap; + +use crate::Result; +use serde::Deserialize; +use serde_json::Value; + +#[derive(Deserialize)] +#[serde(tag = "kind", content = "data")] +enum DataEntry { + Listing(ListingEntry), +} + +#[derive(Deserialize)] +struct ListingEntry { + children: Vec, + #[serde(flatten)] + _extra: HashMap, +} + +#[derive(Deserialize)] +#[serde(tag = "kind", content = "data")] +enum DataEntryChild { + #[serde(alias = "t3")] + T3(T3Data), + #[serde(alias = "t1")] + T1(HashMap), +} + +#[derive(Deserialize, Debug)] +struct T3Data { + id: String, + url: Option, + gallery_data: Option, + #[serde(flatten)] + _extra: HashMap, +} + +#[derive(Deserialize, Debug)] +struct GalleryData { + items: Vec, +} + +#[derive(Deserialize, Debug)] +struct GalleryItem { + media_id: String, + id: u64, +} + +/// Returns all images associated with a post +pub async fn get_post_images>(post_url: S) -> Result> { + let post_data = get_post(post_url.as_ref()).await?; + + if let Some(gallery_data) = post_data.gallery_data { + let urls = gallery_data + .items + .into_iter() + .map(|item| item.media_id) + .map(|media_id| format!("https://i.redd.it/{}.jpg", media_id)) + .collect(); + Ok(urls) + } else if let Some(url) = post_data.url { + Ok(vec![url]) + } else { + Ok(Vec::new()) + } +} + +async fn get_post(url: &str) -> Result { + let mut response: Vec = reqwest::get(format!("{}.json", url)).await?.json().await?; + response.reverse(); + let first_entry = response.pop().unwrap(); + let mut first_listing = match first_entry { + DataEntry::Listing(l) => l.children, + }; + first_listing.reverse(); + let entry = first_listing.pop().unwrap(); + + match entry { + DataEntryChild::T3(t3) => Ok(t3), + DataEntryChild::T1(_) => panic!("Invalid data entry t1"), + } +} + +#[cfg(test)] +mod test { + #[tokio::test] + async fn it_finds_post_images() { + let images = + super::get_post_images("https://www.reddit.com/r/196/comments/wmx2k3/dame_da_rule/") + .await + .unwrap(); + assert!(images.is_empty() == false); + } + + #[tokio::test] + async fn it_finds_multiple_post_images() { + let images = + super::get_post_images("https://www.reddit.com/r/dogelore/comments/wmas8c/le_yakuza/") + .await + .unwrap(); + assert!(images.is_empty() == false); + } + + #[tokio::test] + async fn it_finds_info_for_posts() { + let post = super::get_post("https://www.reddit.com/r/196/comments/wmx2k3/dame_da_rule/") + .await + .unwrap(); + println!("{:?}", post.url); + assert!(post.url.is_some()); + } + + #[tokio::test] + async fn it_finds_info_for_gallery_posts() { + let post = super::get_post("https://www.reddit.com/r/dogelore/comments/wmas8c/le_yakuza/") + .await + .unwrap(); + println!("{:?}", post.gallery_data); + assert!(post.gallery_data.is_some()); + let gallery_data = post.gallery_data.unwrap(); + assert!(gallery_data.items.is_empty() == false) + } +}