Fix reddit wanting a user agent now

main
trivernis 7 months ago
parent f8228a5856
commit dbc5275f5d
Signed by: Trivernis
GPG Key ID: 7E6D18B61C8D2F4B

@ -2,6 +2,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use hydrus_api::Hydrus;
use tokio::sync::Semaphore;
use crate::error::Result;
use crate::utils::reddit::get_post_images;
@ -15,11 +16,21 @@ pub async fn find_and_send_reddit_posts(hydrus: &Hydrus, post_urls: Vec<String>)
tracing::info!("Retrieving post data...");
let counter = Arc::new(AtomicUsize::new(1));
let sem = Arc::new(Semaphore::new(2));
let post_results = future::join_all(post_urls.into_iter().enumerate().map(|(i, p)| {
let counter = Arc::clone(&counter);
let sem = Arc::clone(&sem);
async move {
let img = get_post_images(&p).await?;
let permit = sem.acquire_owned().await.unwrap();
let img = match get_post_images(&p).await {
Ok(img) => img,
Err(e) => {
tracing::error!("Failed to retrieve info for {p} : {e}");
return Err(e);
}
};
std::mem::drop(permit);
tracing::info!(
"Got info for {} of {total_posts}",
counter.fetch_add(1, Ordering::SeqCst)

@ -79,6 +79,14 @@ async fn it_retrieves_post_data() {
assert!(!data.attachment.is_empty());
}
#[tokio::test]
async fn it_retrieves_post_misskey() {
let data = get_post("https://social.funkyfish.cool/notes/97ng0c9is3")
.await
.unwrap();
assert!(!data.attachment.is_empty());
}
#[tokio::test]
async fn it_retrieves_post_images() {
let images = get_post_images("https://lemmy.blahaj.zone/post/113727")

@ -111,22 +111,29 @@ async fn get_post(url: &str) -> Result<T3Data> {
/// Resolves reddit redirects
#[tracing::instrument(level = "debug")]
async fn resolve_redirects(url: &str) -> Result<String> {
if is_resolved(url) {
tracing::debug!("Url already resolved.");
return Ok(url.to_string());
let mut url = url.to_string();
for _ in 0..10 {
if is_resolved(&url) {
tracing::debug!("Url already resolved.");
return Ok(url);
}
let client = reqwest::Client::builder()
.user_agent(fakeit::user_agent::random_platform())
.redirect(Policy::none())
.build()?;
let response = client.get(url).send().await?;
if let Some(location) = response.headers().get("location") {
tracing::debug!("Redirect to {location:?} found");
url = location.to_str().unwrap().to_string();
} else {
tracing::debug!("No redirect found.");
return Ok(response.url().as_str().to_string());
}
}
let client = reqwest::Client::builder()
.redirect(Policy::none())
.build()?;
let response = client.head(url).send().await?;
if let Some(location) = response.headers().get("location") {
tracing::debug!("Redirect to {location:?} found");
Ok(location.to_str().unwrap().to_string())
} else {
tracing::debug!("No redirect found.");
Ok(response.url().as_str().to_string())
}
Ok(url)
}
/// Checks if the url is already in a format that can be used for retrieving information
@ -147,6 +154,14 @@ mod test {
assert!(images.is_empty() == false);
}
#[tokio::test]
async fn it_finds_post_images2() {
let images = super::get_post_images("https://reddit.com/r/HentaiBullying/s/S1gKoG4s2S/")
.await
.unwrap();
assert!(images.is_empty() == false);
}
#[tokio::test]
async fn it_finds_multiple_post_images() {
let images =
@ -164,7 +179,6 @@ mod test {
println!("{:?}", post.url);
assert!(post.url.is_some());
}
#[tokio::test]
async fn it_finds_info_for_gallery_posts() {
let post = super::get_post("https://www.reddit.com/r/dogelore/comments/wmas8c/le_yakuza/")

Loading…
Cancel
Save