From a3596556f08e5b6d56beae0d019525ea3b29bd24 Mon Sep 17 00:00:00 2001 From: trivernis Date: Tue, 23 Aug 2022 16:56:42 +0200 Subject: [PATCH] Add generic subcommand to import any kind of url Signed-off-by: trivernis --- Cargo.lock | 24 ++++++++++++++++++++++++ Cargo.toml | 1 + src/args.rs | 4 ++++ src/main.rs | 36 ++++++++++++++++++++++++++++++++++++ src/utils/mod.rs | 1 + src/utils/urls.rs | 27 +++++++++++++++++++++++++++ 6 files changed, 93 insertions(+) create mode 100644 src/utils/urls.rs diff --git a/Cargo.lock b/Cargo.lock index 035db4d..a6dcb12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -936,6 +936,7 @@ dependencies = [ "directories", "egg-mode", "hydrus-api", + "lazy-regex", "pixiv-rs", "reqwest 0.11.11", "rustnao", @@ -1145,6 +1146,29 @@ dependencies = [ "winapi-build", ] +[[package]] +name = "lazy-regex" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6b12f2eb6ed7d39405c5eb25a034b4c106a9ad87a6d9be3298de6c5f32fd57d" +dependencies = [ + "lazy-regex-proc_macros", + "once_cell", + "regex", +] + +[[package]] +name = "lazy-regex-proc_macros" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2496e5264069bc726ccf37eb76b9cd89406ae110d836c3f76729f99c8a23293" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn", +] + [[package]] name = "lazy_static" version = "1.4.0" diff --git a/Cargo.toml b/Cargo.toml index 63ede9f..0068bd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ config = "0.13.2" directories = "4.0.1" color-eyre = "0.6.2" egg-mode = "0.16.0" +lazy-regex = "2.3.0" [dependencies.tokio] version = "1.20.1" diff --git a/src/args.rs b/src/args.rs index e9504e9..c26e5f8 100644 --- a/src/args.rs +++ b/src/args.rs @@ -26,6 +26,10 @@ pub enum Command { /// Looks up and imports tweets #[clap(name = "import-tweets")] ImportTweets(ImportUrlsOptions), + + /// Looks up a list of urls and imports media found for them + #[clap(name = "import-urls")] + ImportUrls(ImportUrlsOptions), } #[derive(Parser, Debug, Clone)] diff --git a/src/main.rs b/src/main.rs index 827b4ca..3aaeef7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,6 +26,8 @@ use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::time::{Duration, Instant}; use tracing_subscriber::fmt::format::FmtSpan; use tracing_subscriber::EnvFilter; +use utils::urls::find_url_type; +use utils::urls::UrlType; #[tokio::main(flavor = "current_thread")] async fn main() { @@ -45,6 +47,7 @@ async fn main() { } Command::ImportRedditPosts(opt) => import_reddit_posts(opt, hydrus).await, Command::ImportTweets(opt) => import_tweets(opt, config.into_twitter_cfg(), hydrus).await, + Command::ImportUrls(opt) => import_urls(opt, config, hydrus).await, } .expect("Failed to send tags or urls"); } @@ -131,6 +134,39 @@ async fn import_tweets( find_and_send_twitter_posts(&hydrus, twitter_cfg, urls).await } +async fn import_urls(opt: ImportUrlsOptions, cfg: Config, hydrus: Hydrus) -> Result<()> { + let urls = get_urls_from_args(opt).await?; + let mut reddit_urls = Vec::new(); + let mut twitter_urls = Vec::new(); + let mut unknown_urls = Vec::new(); + + for url in urls { + match find_url_type(&url) { + UrlType::Reddit => reddit_urls.push(url), + UrlType::Twitter => twitter_urls.push(url), + UrlType::Other => { + tracing::warn!("Unknown url type {url}"); + unknown_urls.push(url) + } + } + } + tracing::info!("Importing reddit posts..."); + find_and_send_reddit_posts(&hydrus, reddit_urls).await?; + + tracing::info!("Importing twitter posts..."); + find_and_send_twitter_posts(&hydrus, cfg.into_twitter_cfg(), twitter_urls).await?; + + tracing::info!("Importing unknown urls..."); + + for url in unknown_urls { + if let Err(e) = hydrus.import().url(&url).run().await { + tracing::error!("Failed to import {url}: {e}") + } + } + + Ok(()) +} + async fn get_urls_from_args(opt: ImportUrlsOptions) -> Result> { let mut urls = Vec::new(); if let Some(input_file) = opt.input { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 4b00887..dc4603a 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,6 +1,7 @@ pub mod pixiv; pub mod reddit; pub mod twitter; +pub mod urls; use crate::error::Result; use directories::ProjectDirs; use std::{fs, path::PathBuf}; diff --git a/src/utils/urls.rs b/src/utils/urls.rs new file mode 100644 index 0000000..8d579f4 --- /dev/null +++ b/src/utils/urls.rs @@ -0,0 +1,27 @@ +use lazy_regex::regex; + +pub enum UrlType { + Reddit, + Twitter, + Other, +} + +pub fn find_url_type(url: &str) -> UrlType { + if is_reddit_url(url) { + UrlType::Reddit + } else if is_twitter_url(url) { + UrlType::Twitter + } else { + UrlType::Other + } +} + +fn is_reddit_url(url: &str) -> bool { + let r = regex!(r#"^http(s)?://(www\.)?(reddit\.com|redd\.it).*$"#i); + r.is_match(url) +} + +fn is_twitter_url(url: &str) -> bool { + let r = regex!(r#"^http(s)?://(www\.)?twitter\.com.*$"#); + r.is_match(url) +}