From dbc5275f5da55a2fb2abb74dd3f3a8e8c5cc6028 Mon Sep 17 00:00:00 2001 From: trivernis Date: Fri, 6 Oct 2023 20:13:04 +0200 Subject: [PATCH] Fix reddit wanting a user agent now --- src/operations/find_and_send_reddit_posts.rs | 13 +++++- src/utils/fedi.rs | 8 ++++ src/utils/reddit.rs | 44 +++++++++++++------- 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/src/operations/find_and_send_reddit_posts.rs b/src/operations/find_and_send_reddit_posts.rs index d8a3281..e8473d0 100644 --- a/src/operations/find_and_send_reddit_posts.rs +++ b/src/operations/find_and_send_reddit_posts.rs @@ -2,6 +2,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use hydrus_api::Hydrus; +use tokio::sync::Semaphore; use crate::error::Result; use crate::utils::reddit::get_post_images; @@ -15,11 +16,21 @@ pub async fn find_and_send_reddit_posts(hydrus: &Hydrus, post_urls: Vec) tracing::info!("Retrieving post data..."); let counter = Arc::new(AtomicUsize::new(1)); + let sem = Arc::new(Semaphore::new(2)); let post_results = future::join_all(post_urls.into_iter().enumerate().map(|(i, p)| { let counter = Arc::clone(&counter); + let sem = Arc::clone(&sem); async move { - let img = get_post_images(&p).await?; + let permit = sem.acquire_owned().await.unwrap(); + let img = match get_post_images(&p).await { + Ok(img) => img, + Err(e) => { + tracing::error!("Failed to retrieve info for {p} : {e}"); + return Err(e); + } + }; + std::mem::drop(permit); tracing::info!( "Got info for {} of {total_posts}", counter.fetch_add(1, Ordering::SeqCst) diff --git a/src/utils/fedi.rs b/src/utils/fedi.rs index 304efe3..875807e 100644 --- a/src/utils/fedi.rs +++ b/src/utils/fedi.rs @@ -79,6 +79,14 @@ async fn it_retrieves_post_data() { assert!(!data.attachment.is_empty()); } +#[tokio::test] +async fn it_retrieves_post_misskey() { + let data = get_post("https://social.funkyfish.cool/notes/97ng0c9is3") + .await + .unwrap(); + assert!(!data.attachment.is_empty()); +} + #[tokio::test] async fn it_retrieves_post_images() { let images = get_post_images("https://lemmy.blahaj.zone/post/113727") diff --git a/src/utils/reddit.rs b/src/utils/reddit.rs index 01e8013..87243e8 100644 --- a/src/utils/reddit.rs +++ b/src/utils/reddit.rs @@ -111,22 +111,29 @@ async fn get_post(url: &str) -> Result { /// Resolves reddit redirects #[tracing::instrument(level = "debug")] async fn resolve_redirects(url: &str) -> Result { - if is_resolved(url) { - tracing::debug!("Url already resolved."); - return Ok(url.to_string()); + let mut url = url.to_string(); + + for _ in 0..10 { + if is_resolved(&url) { + tracing::debug!("Url already resolved."); + return Ok(url); + } + let client = reqwest::Client::builder() + .user_agent(fakeit::user_agent::random_platform()) + .redirect(Policy::none()) + .build()?; + let response = client.get(url).send().await?; + + if let Some(location) = response.headers().get("location") { + tracing::debug!("Redirect to {location:?} found"); + url = location.to_str().unwrap().to_string(); + } else { + tracing::debug!("No redirect found."); + return Ok(response.url().as_str().to_string()); + } } - let client = reqwest::Client::builder() - .redirect(Policy::none()) - .build()?; - let response = client.head(url).send().await?; - if let Some(location) = response.headers().get("location") { - tracing::debug!("Redirect to {location:?} found"); - Ok(location.to_str().unwrap().to_string()) - } else { - tracing::debug!("No redirect found."); - Ok(response.url().as_str().to_string()) - } + Ok(url) } /// Checks if the url is already in a format that can be used for retrieving information @@ -147,6 +154,14 @@ mod test { assert!(images.is_empty() == false); } + #[tokio::test] + async fn it_finds_post_images2() { + let images = super::get_post_images("https://reddit.com/r/HentaiBullying/s/S1gKoG4s2S/") + .await + .unwrap(); + assert!(images.is_empty() == false); + } + #[tokio::test] async fn it_finds_multiple_post_images() { let images = @@ -164,7 +179,6 @@ mod test { println!("{:?}", post.url); assert!(post.url.is_some()); } - #[tokio::test] async fn it_finds_info_for_gallery_posts() { let post = super::get_post("https://www.reddit.com/r/dogelore/comments/wmas8c/le_yakuza/")