Add support for fedi posts (lemmy in particular)

main
trivernis 11 months ago
parent 5c2339a8d9
commit 80fd7488c7
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

@ -23,6 +23,10 @@ pub enum Command {
#[clap(name = "import-reddit-posts")]
ImportRedditPosts(ImportUrlsOptions),
/// Looks up and imports fedi posts
#[clap(name = "import-fedi-posts")]
ImportFediPosts(ImportUrlsOptions),
/// Looks up a list of urls and imports media found for them
#[clap(name = "import-urls")]
ImportUrls(ImportUrlsOptions),

@ -7,6 +7,7 @@ pub mod utils;
use crate::config::Config;
use crate::config::SauceNaoConfig;
use crate::error::Result;
use crate::operations::find_and_send_fedi_posts::find_and_send_fedi_posts;
use crate::operations::find_and_send_tags::find_and_send_tags;
use crate::operations::find_and_send_urls::find_and_send_urls;
use args::*;
@ -44,6 +45,7 @@ async fn main() {
send_tags_or_urls(opt, config.into_saucenao(), hydrus, false).await
}
Command::ImportRedditPosts(opt) => import_reddit_posts(opt, hydrus).await,
Command::ImportFediPosts(opt) => import_fedi_posts(opt, hydrus).await,
Command::ImportUrls(opt) => import_urls(opt, hydrus).await,
}
.expect("Failed to send tags or urls");
@ -121,14 +123,22 @@ async fn import_reddit_posts(opt: ImportUrlsOptions, hydrus: Hydrus) -> Result<(
find_and_send_reddit_posts(&hydrus, urls).await
}
#[tracing::instrument(level = "debug", skip(hydrus))]
async fn import_fedi_posts(opt: ImportUrlsOptions, hydrus: Hydrus) -> Result<()> {
let urls = get_urls_from_args(opt).await?;
find_and_send_fedi_posts(&hydrus, urls).await
}
async fn import_urls(opt: ImportUrlsOptions, hydrus: Hydrus) -> Result<()> {
let urls = get_urls_from_args(opt).await?;
let mut reddit_urls = Vec::new();
let mut fedi_urls = Vec::new();
let mut unknown_urls = Vec::new();
for url in urls {
match find_url_type(&url) {
match find_url_type(&url).await {
UrlType::Reddit => reddit_urls.push(url),
UrlType::Fedi => fedi_urls.push(url),
UrlType::Other => {
tracing::warn!("Unknown url type {url}");
unknown_urls.push(url)
@ -137,6 +147,7 @@ async fn import_urls(opt: ImportUrlsOptions, hydrus: Hydrus) -> Result<()> {
}
tracing::info!("Importing reddit posts...");
find_and_send_reddit_posts(&hydrus, reddit_urls).await?;
find_and_send_fedi_posts(&hydrus, fedi_urls).await?;
tracing::info!("Importing unknown urls...");

@ -0,0 +1,35 @@
use hydrus_api::Hydrus;
use crate::error::Result;
use crate::utils::fedi::get_post_images;
#[tracing::instrument(level = "debug", skip(hydrus))]
pub async fn find_and_send_fedi_posts(hydrus: &Hydrus, post_urls: Vec<String>) -> Result<()> {
let total_posts = post_urls.len();
for (index, post) in post_urls.into_iter().enumerate() {
tracing::info!("Importing post {} of {}", index + 1, total_posts);
if let Err(e) = import_post(&post, hydrus).await {
tracing::error!("Failed to import {}: {}", post, e);
}
}
Ok(())
}
#[tracing::instrument(level = "debug", skip(hydrus))]
async fn import_post(post_url: &str, hydrus: &Hydrus) -> Result<()> {
tracing::debug!("Post {}", post_url);
let images = get_post_images(post_url).await?;
tracing::info!("Found {} images for post {}", images.len(), post_url);
for url in images {
let mut entry = hydrus.import().url(url).run().await?;
let files = entry.files().await?;
for mut file in files {
file.associate_urls(vec![post_url.to_string()]).await?;
}
}
Ok(())
}

@ -1,3 +1,4 @@
pub mod find_and_send_fedi_posts;
pub mod find_and_send_reddit_posts;
pub mod find_and_send_tags;
pub mod find_and_send_urls;

@ -0,0 +1,89 @@
#![allow(unused)]
use std::collections::HashMap;
use crate::Result;
use lazy_regex::regex;
use reqwest::header::{HeaderMap, HeaderValue};
use reqwest::ClientBuilder;
use reqwest::{redirect::Policy, StatusCode};
use serde::Deserialize;
use serde_json::Value;
use std::fmt::Debug;
#[derive(Debug, Deserialize)]
#[serde(tag = "type")]
enum EntryData {
Page(PostData),
}
#[derive(Debug, Deserialize)]
struct PostData {
id: String,
name: String,
attachment: Vec<Attachment>,
#[serde(flatten)]
_extra: HashMap<String, Value>,
}
#[derive(Debug, Deserialize)]
#[serde(tag = "type")]
enum Attachment {
Link { href: String },
}
pub async fn is_fedi_url(url: &str) -> bool {
get_post(url).await.is_ok()
}
/// Returns all images associated with a post
#[tracing::instrument(level = "debug")]
pub async fn get_post_images<S: AsRef<str> + Debug>(post_url: S) -> Result<Vec<String>> {
let post_data = get_post(post_url.as_ref()).await?;
let urls = post_data
.attachment
.into_iter()
.map(|p| {
let Attachment::Link { href } = p;
href
})
.collect();
Ok(urls)
}
#[tracing::instrument(level = "debug")]
async fn get_post(url: &str) -> Result<PostData> {
let mut headers = HeaderMap::new();
headers.insert(
"Accept",
HeaderValue::from_static("application/activity+json"),
);
let client = ClientBuilder::default()
.default_headers(headers)
.user_agent(fakeit::user_agent::random_platform())
.build()?;
let mut response: EntryData = client.get(url).send().await?.json().await?;
let EntryData::Page(post) = response;
Ok(post)
}
#[tokio::test]
async fn it_retrieves_post_data() {
let data = get_post("https://lemmy.blahaj.zone/post/113727")
.await
.unwrap();
assert!(!data.attachment.is_empty());
}
#[tokio::test]
async fn it_retrieves_post_images() {
let images = get_post_images("https://lemmy.blahaj.zone/post/113727")
.await
.unwrap();
assert!(!images.is_empty());
assert!(images.get(0).unwrap().ends_with(".jpg"));
}

@ -1,6 +1,8 @@
pub mod fedi;
pub mod pixiv;
pub mod reddit;
pub mod urls;
use crate::error::Result;
use directories::ProjectDirs;
use std::{fs, path::PathBuf};

@ -1,13 +1,17 @@
use super::fedi;
use lazy_regex::regex;
pub enum UrlType {
Reddit,
Fedi,
Other,
}
pub fn find_url_type(url: &str) -> UrlType {
pub async fn find_url_type(url: &str) -> UrlType {
if is_reddit_url(url) {
UrlType::Reddit
} else if fedi::is_fedi_url(url).await {
UrlType::Fedi
} else {
UrlType::Other
}

Loading…
Cancel
Save