Add reddit url import feature

Signed-off-by: trivernis <trivernis@protonmail.com>
main
trivernis 2 years ago
parent ecf883e6e7
commit bed1003f0b
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

1
.gitignore vendored

@ -1 +1,2 @@
/target
reddit-urls.txt

3
Cargo.lock generated

@ -632,7 +632,10 @@ dependencies = [
"clap",
"hydrus-api",
"pixiv-rs",
"reqwest 0.11.11",
"rustnao",
"serde",
"serde_json",
"tempdir",
"thiserror",
"tokio 1.20.1",

@ -19,7 +19,10 @@ thiserror = "1.0.32"
tracing-subscriber = { version = "0.3.15", features = ["env-filter"] }
tracing = "0.1.36"
clap = { version = "3.2.17", features = ["derive", "env"] }
serde = { version = "1.0.143", features = ["derive"] }
reqwest = { version = "0.11.11", features = ["json"] }
serde_json = "1.0.83"
[dependencies.tokio]
version = "1.20.1"
features = ["macros", "rt", "time"]
features = ["macros", "rt", "time", "fs"]

@ -1,3 +1,5 @@
use std::path::PathBuf;
use clap::{Parser, Subcommand};
#[derive(Parser, Debug)]
@ -17,17 +19,21 @@ pub struct Args {
#[derive(Subcommand, Clone, Debug)]
pub enum Command {
#[clap(name = "send-url")]
/// Looks up files on saucenao and sends urls to hydrus to be imported
FindAndSendUrl(Options),
#[clap(name = "send-url")]
FindAndSendUrl(LookupOptions),
#[clap(name = "send-tags")]
/// Looks up files on saucenao and maps the tags found on pixiv to the files
FindAndSendTags(Options),
#[clap(name = "send-tags")]
FindAndSendTags(LookupOptions),
/// Looks up and imports reddit posts
#[clap(name = "import-reddit-posts")]
ImportRedditPosts(ImportRedditOptions),
}
#[derive(Parser, Debug, Clone)]
pub struct Options {
pub struct LookupOptions {
/// The saucenao api key
#[clap(long, env)]
pub saucenao_key: String,
@ -44,3 +50,15 @@ pub struct Options {
#[clap(short, long)]
pub tags: Vec<String>,
}
#[derive(Parser, Debug, Clone)]
pub struct ImportRedditOptions {
/// A file containing all urls with each
/// url in a separate line
#[clap(short, long)]
pub input: Option<PathBuf>,
/// A list of urls to import
#[clap(short, long)]
pub urls: Option<Vec<String>>,
}

@ -18,6 +18,9 @@ pub enum Error {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Reqwest(#[from] reqwest::Error),
}
impl From<RustNaoError> for Error {

@ -11,10 +11,13 @@ use clap::Parser;
use hydrus_api::wrapper::service::ServiceName;
use hydrus_api::wrapper::tag::Tag;
use hydrus_api::{Client, Hydrus};
use operations::find_and_send_reddit_posts::find_and_send_reddit_posts;
use pixiv_rs::PixivClient;
use rustnao::{Handler, HandlerBuilder};
use std::str::FromStr;
use tempdir::TempDir;
use tokio::fs::File;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::time::{Duration, Instant};
use tracing_subscriber::fmt::format::FmtSpan;
use tracing_subscriber::EnvFilter;
@ -29,6 +32,7 @@ async fn main() {
match args.subcommand {
Command::FindAndSendUrl(opt) => send_tags_or_urls(opt, hydrus, true).await,
Command::FindAndSendTags(opt) => send_tags_or_urls(opt, hydrus, false).await,
Command::ImportRedditPosts(opt) => import_reddit_posts(opt, hydrus).await,
}
.expect("Failed to send tags or urls");
}
@ -47,7 +51,7 @@ fn init_logger() {
.init();
}
async fn send_tags_or_urls(opt: Options, hydrus: Hydrus, send_urls: bool) -> Result<()> {
async fn send_tags_or_urls(opt: LookupOptions, hydrus: Hydrus, send_urls: bool) -> Result<()> {
let pixiv = PixivClient::new();
let handler = HandlerBuilder::new()
@ -89,3 +93,23 @@ async fn send_tags_or_urls(opt: Options, hydrus: Hydrus, send_urls: bool) -> Res
Ok(())
}
async fn import_reddit_posts(opt: ImportRedditOptions, hydrus: Hydrus) -> Result<()> {
let mut urls = Vec::new();
if let Some(input_file) = opt.input {
let file = File::open(input_file).await?;
let reader = BufReader::new(file);
let mut lines = reader.lines();
while let Some(line) = lines.next_line().await? {
urls.push(line);
}
} else if let Some(args_urls) = opt.urls {
urls = args_urls;
} else {
panic!("No reddit post urls provided");
}
find_and_send_reddit_posts(&hydrus, urls).await
}

@ -0,0 +1,20 @@
use hydrus_api::Hydrus;
use crate::error::Result;
use crate::utils::reddit::get_post_images;
pub async fn find_and_send_reddit_posts(hydrus: &Hydrus, post_urls: Vec<String>) -> Result<()> {
let total_posts = post_urls.len();
for (index, post) in post_urls.into_iter().enumerate() {
tracing::info!("Importing post {} of {}", index + 1, total_posts);
let images = get_post_images(&post).await?;
tracing::info!("Found {} images for post {}", images.len(), post);
for url in images {
hydrus.import().url(url).run().await?;
}
}
Ok(())
}

@ -1,2 +1,3 @@
pub mod find_and_send_reddit_posts;
pub mod find_and_send_tags;
pub mod find_and_send_urls;

@ -1 +1,2 @@
pub mod pixiv;
pub mod reddit;

@ -0,0 +1,124 @@
#![allow(unused)]
use std::collections::HashMap;
use crate::Result;
use serde::Deserialize;
use serde_json::Value;
#[derive(Deserialize)]
#[serde(tag = "kind", content = "data")]
enum DataEntry {
Listing(ListingEntry),
}
#[derive(Deserialize)]
struct ListingEntry {
children: Vec<DataEntryChild>,
#[serde(flatten)]
_extra: HashMap<String, Value>,
}
#[derive(Deserialize)]
#[serde(tag = "kind", content = "data")]
enum DataEntryChild {
#[serde(alias = "t3")]
T3(T3Data),
#[serde(alias = "t1")]
T1(HashMap<String, Value>),
}
#[derive(Deserialize, Debug)]
struct T3Data {
id: String,
url: Option<String>,
gallery_data: Option<GalleryData>,
#[serde(flatten)]
_extra: HashMap<String, Value>,
}
#[derive(Deserialize, Debug)]
struct GalleryData {
items: Vec<GalleryItem>,
}
#[derive(Deserialize, Debug)]
struct GalleryItem {
media_id: String,
id: u64,
}
/// Returns all images associated with a post
pub async fn get_post_images<S: AsRef<str>>(post_url: S) -> Result<Vec<String>> {
let post_data = get_post(post_url.as_ref()).await?;
if let Some(gallery_data) = post_data.gallery_data {
let urls = gallery_data
.items
.into_iter()
.map(|item| item.media_id)
.map(|media_id| format!("https://i.redd.it/{}.jpg", media_id))
.collect();
Ok(urls)
} else if let Some(url) = post_data.url {
Ok(vec![url])
} else {
Ok(Vec::new())
}
}
async fn get_post(url: &str) -> Result<T3Data> {
let mut response: Vec<DataEntry> = reqwest::get(format!("{}.json", url)).await?.json().await?;
response.reverse();
let first_entry = response.pop().unwrap();
let mut first_listing = match first_entry {
DataEntry::Listing(l) => l.children,
};
first_listing.reverse();
let entry = first_listing.pop().unwrap();
match entry {
DataEntryChild::T3(t3) => Ok(t3),
DataEntryChild::T1(_) => panic!("Invalid data entry t1"),
}
}
#[cfg(test)]
mod test {
#[tokio::test]
async fn it_finds_post_images() {
let images =
super::get_post_images("https://www.reddit.com/r/196/comments/wmx2k3/dame_da_rule/")
.await
.unwrap();
assert!(images.is_empty() == false);
}
#[tokio::test]
async fn it_finds_multiple_post_images() {
let images =
super::get_post_images("https://www.reddit.com/r/dogelore/comments/wmas8c/le_yakuza/")
.await
.unwrap();
assert!(images.is_empty() == false);
}
#[tokio::test]
async fn it_finds_info_for_posts() {
let post = super::get_post("https://www.reddit.com/r/196/comments/wmx2k3/dame_da_rule/")
.await
.unwrap();
println!("{:?}", post.url);
assert!(post.url.is_some());
}
#[tokio::test]
async fn it_finds_info_for_gallery_posts() {
let post = super::get_post("https://www.reddit.com/r/dogelore/comments/wmas8c/le_yakuza/")
.await
.unwrap();
println!("{:?}", post.gallery_data);
assert!(post.gallery_data.is_some());
let gallery_data = post.gallery_data.unwrap();
assert!(gallery_data.items.is_empty() == false)
}
}
Loading…
Cancel
Save