diff --git a/src/args.rs b/src/args.rs index be5d4d5..5c37f2e 100644 --- a/src/args.rs +++ b/src/args.rs @@ -23,6 +23,10 @@ pub enum Command { #[clap(name = "import-reddit-posts")] ImportRedditPosts(ImportUrlsOptions), + /// Looks up and imports fedi posts + #[clap(name = "import-fedi-posts")] + ImportFediPosts(ImportUrlsOptions), + /// Looks up a list of urls and imports media found for them #[clap(name = "import-urls")] ImportUrls(ImportUrlsOptions), diff --git a/src/main.rs b/src/main.rs index 90e7fe9..9d16925 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ pub mod utils; use crate::config::Config; use crate::config::SauceNaoConfig; use crate::error::Result; +use crate::operations::find_and_send_fedi_posts::find_and_send_fedi_posts; use crate::operations::find_and_send_tags::find_and_send_tags; use crate::operations::find_and_send_urls::find_and_send_urls; use args::*; @@ -44,6 +45,7 @@ async fn main() { send_tags_or_urls(opt, config.into_saucenao(), hydrus, false).await } Command::ImportRedditPosts(opt) => import_reddit_posts(opt, hydrus).await, + Command::ImportFediPosts(opt) => import_fedi_posts(opt, hydrus).await, Command::ImportUrls(opt) => import_urls(opt, hydrus).await, } .expect("Failed to send tags or urls"); @@ -121,14 +123,22 @@ async fn import_reddit_posts(opt: ImportUrlsOptions, hydrus: Hydrus) -> Result<( find_and_send_reddit_posts(&hydrus, urls).await } +#[tracing::instrument(level = "debug", skip(hydrus))] +async fn import_fedi_posts(opt: ImportUrlsOptions, hydrus: Hydrus) -> Result<()> { + let urls = get_urls_from_args(opt).await?; + find_and_send_fedi_posts(&hydrus, urls).await +} + async fn import_urls(opt: ImportUrlsOptions, hydrus: Hydrus) -> Result<()> { let urls = get_urls_from_args(opt).await?; let mut reddit_urls = Vec::new(); + let mut fedi_urls = Vec::new(); let mut unknown_urls = Vec::new(); for url in urls { - match find_url_type(&url) { + match find_url_type(&url).await { UrlType::Reddit => reddit_urls.push(url), + UrlType::Fedi => fedi_urls.push(url), UrlType::Other => { tracing::warn!("Unknown url type {url}"); unknown_urls.push(url) @@ -137,6 +147,7 @@ async fn import_urls(opt: ImportUrlsOptions, hydrus: Hydrus) -> Result<()> { } tracing::info!("Importing reddit posts..."); find_and_send_reddit_posts(&hydrus, reddit_urls).await?; + find_and_send_fedi_posts(&hydrus, fedi_urls).await?; tracing::info!("Importing unknown urls..."); diff --git a/src/operations/find_and_send_fedi_posts.rs b/src/operations/find_and_send_fedi_posts.rs new file mode 100644 index 0000000..3570c7c --- /dev/null +++ b/src/operations/find_and_send_fedi_posts.rs @@ -0,0 +1,35 @@ +use hydrus_api::Hydrus; + +use crate::error::Result; +use crate::utils::fedi::get_post_images; + +#[tracing::instrument(level = "debug", skip(hydrus))] +pub async fn find_and_send_fedi_posts(hydrus: &Hydrus, post_urls: Vec) -> Result<()> { + let total_posts = post_urls.len(); + + for (index, post) in post_urls.into_iter().enumerate() { + tracing::info!("Importing post {} of {}", index + 1, total_posts); + if let Err(e) = import_post(&post, hydrus).await { + tracing::error!("Failed to import {}: {}", post, e); + } + } + + Ok(()) +} + +#[tracing::instrument(level = "debug", skip(hydrus))] +async fn import_post(post_url: &str, hydrus: &Hydrus) -> Result<()> { + tracing::debug!("Post {}", post_url); + let images = get_post_images(post_url).await?; + tracing::info!("Found {} images for post {}", images.len(), post_url); + + for url in images { + let mut entry = hydrus.import().url(url).run().await?; + let files = entry.files().await?; + + for mut file in files { + file.associate_urls(vec![post_url.to_string()]).await?; + } + } + Ok(()) +} diff --git a/src/operations/mod.rs b/src/operations/mod.rs index 6b905c2..7644276 100644 --- a/src/operations/mod.rs +++ b/src/operations/mod.rs @@ -1,3 +1,4 @@ +pub mod find_and_send_fedi_posts; pub mod find_and_send_reddit_posts; pub mod find_and_send_tags; pub mod find_and_send_urls; diff --git a/src/utils/fedi.rs b/src/utils/fedi.rs new file mode 100644 index 0000000..304efe3 --- /dev/null +++ b/src/utils/fedi.rs @@ -0,0 +1,89 @@ +#![allow(unused)] +use std::collections::HashMap; + +use crate::Result; +use lazy_regex::regex; +use reqwest::header::{HeaderMap, HeaderValue}; +use reqwest::ClientBuilder; +use reqwest::{redirect::Policy, StatusCode}; +use serde::Deserialize; +use serde_json::Value; +use std::fmt::Debug; + +#[derive(Debug, Deserialize)] +#[serde(tag = "type")] +enum EntryData { + Page(PostData), +} + +#[derive(Debug, Deserialize)] +struct PostData { + id: String, + name: String, + attachment: Vec, + #[serde(flatten)] + _extra: HashMap, +} + +#[derive(Debug, Deserialize)] +#[serde(tag = "type")] +enum Attachment { + Link { href: String }, +} + +pub async fn is_fedi_url(url: &str) -> bool { + get_post(url).await.is_ok() +} + +/// Returns all images associated with a post +#[tracing::instrument(level = "debug")] +pub async fn get_post_images + Debug>(post_url: S) -> Result> { + let post_data = get_post(post_url.as_ref()).await?; + + let urls = post_data + .attachment + .into_iter() + .map(|p| { + let Attachment::Link { href } = p; + href + }) + .collect(); + + Ok(urls) +} + +#[tracing::instrument(level = "debug")] +async fn get_post(url: &str) -> Result { + let mut headers = HeaderMap::new(); + headers.insert( + "Accept", + HeaderValue::from_static("application/activity+json"), + ); + + let client = ClientBuilder::default() + .default_headers(headers) + .user_agent(fakeit::user_agent::random_platform()) + .build()?; + let mut response: EntryData = client.get(url).send().await?.json().await?; + + let EntryData::Page(post) = response; + + Ok(post) +} + +#[tokio::test] +async fn it_retrieves_post_data() { + let data = get_post("https://lemmy.blahaj.zone/post/113727") + .await + .unwrap(); + assert!(!data.attachment.is_empty()); +} + +#[tokio::test] +async fn it_retrieves_post_images() { + let images = get_post_images("https://lemmy.blahaj.zone/post/113727") + .await + .unwrap(); + assert!(!images.is_empty()); + assert!(images.get(0).unwrap().ends_with(".jpg")); +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 420169a..f227f30 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,6 +1,8 @@ +pub mod fedi; pub mod pixiv; pub mod reddit; pub mod urls; + use crate::error::Result; use directories::ProjectDirs; use std::{fs, path::PathBuf}; diff --git a/src/utils/urls.rs b/src/utils/urls.rs index 2c234b1..71e4c4d 100644 --- a/src/utils/urls.rs +++ b/src/utils/urls.rs @@ -1,13 +1,17 @@ +use super::fedi; use lazy_regex::regex; pub enum UrlType { Reddit, + Fedi, Other, } -pub fn find_url_type(url: &str) -> UrlType { +pub async fn find_url_type(url: &str) -> UrlType { if is_reddit_url(url) { UrlType::Reddit + } else if fedi::is_fedi_url(url).await { + UrlType::Fedi } else { UrlType::Other }