Improve reddit import speed

main
trivernis 10 months ago
parent 80fd7488c7
commit f8228a5856
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

94
Cargo.lock generated

@ -54,7 +54,7 @@ checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -220,7 +220,7 @@ dependencies = [
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -457,7 +457,7 @@ dependencies = [
"proc-macro2",
"quote",
"scratch",
"syn",
"syn 1.0.107",
]
[[package]]
@ -474,7 +474,7 @@ checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -487,7 +487,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustc_version 0.4.0",
"syn",
"syn 1.0.107",
]
[[package]]
@ -550,7 +550,7 @@ dependencies = [
"base64 0.13.1",
"chrono",
"derive_more",
"futures 0.3.26",
"futures 0.3.28",
"hmac",
"hyper 0.14.24",
"hyper-tls 0.5.0",
@ -632,7 +632,7 @@ checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
"synstructure",
]
@ -728,9 +728,9 @@ checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678"
[[package]]
name = "futures"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84"
checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
dependencies = [
"futures-channel",
"futures-core",
@ -743,9 +743,9 @@ dependencies = [
[[package]]
name = "futures-channel"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5"
checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
dependencies = [
"futures-core",
"futures-sink",
@ -753,9 +753,9 @@ dependencies = [
[[package]]
name = "futures-core"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608"
checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
[[package]]
name = "futures-cpupool"
@ -769,9 +769,9 @@ dependencies = [
[[package]]
name = "futures-executor"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e"
checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
dependencies = [
"futures-core",
"futures-task",
@ -780,38 +780,38 @@ dependencies = [
[[package]]
name = "futures-io"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531"
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
[[package]]
name = "futures-macro"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70"
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.23",
]
[[package]]
name = "futures-sink"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364"
checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
[[package]]
name = "futures-task"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366"
checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
[[package]]
name = "futures-util"
version = "0.3.26"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1"
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
dependencies = [
"futures-channel",
"futures-core",
@ -1013,6 +1013,7 @@ dependencies = [
"directories",
"egg-mode",
"fakeit",
"futures 0.3.28",
"hydrus-api",
"lazy-regex",
"pixiv-rs",
@ -1287,7 +1288,7 @@ dependencies = [
"proc-macro2",
"quote",
"regex",
"syn",
"syn 1.0.107",
]
[[package]]
@ -1582,7 +1583,7 @@ checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -1706,7 +1707,7 @@ dependencies = [
"pest_meta",
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -1764,7 +1765,7 @@ dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
"version_check",
]
@ -1781,9 +1782,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.51"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
dependencies = [
"unicode-ident",
]
@ -1800,9 +1801,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.23"
version = "1.0.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
dependencies = [
"proc-macro2",
]
@ -2267,7 +2268,7 @@ checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -2413,6 +2414,17 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "synstructure"
version = "0.12.6"
@ -2421,7 +2433,7 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
"unicode-xid",
]
@ -2475,7 +2487,7 @@ checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -2600,7 +2612,7 @@ checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -2734,7 +2746,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
]
[[package]]
@ -2967,7 +2979,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
"wasm-bindgen-shared",
]
@ -3001,7 +3013,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 1.0.107",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]

@ -28,6 +28,7 @@ color-eyre = "0.6.2"
egg-mode = "0.16.1"
lazy-regex = "2.4.1"
fakeit = "1.1.1"
futures = "0.3.28"
[dependencies.tokio]
version = "1.25.0"

@ -1,35 +1,65 @@
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use hydrus_api::Hydrus;
use crate::error::Result;
use crate::utils::reddit::get_post_images;
use futures::future;
#[tracing::instrument(level = "debug", skip(hydrus))]
pub async fn find_and_send_reddit_posts(hydrus: &Hydrus, post_urls: Vec<String>) -> Result<()> {
let total_posts = post_urls.len();
let mut posts_with_img = Vec::new();
tracing::info!("Retrieving post data...");
let counter = Arc::new(AtomicUsize::new(1));
let post_results = future::join_all(post_urls.into_iter().enumerate().map(|(i, p)| {
let counter = Arc::clone(&counter);
async move {
let img = get_post_images(&p).await?;
tracing::info!(
"Got info for {} of {total_posts}",
counter.fetch_add(1, Ordering::SeqCst)
);
Result::Ok((i, p, img))
}
}))
.await;
for (index, post) in post_urls.into_iter().enumerate() {
for result in post_results {
match result {
Ok(e) => {
posts_with_img.push(e);
}
Err(e) => {
tracing::error!("Failed to retrieve post info: {e}");
}
}
}
for (index, post, images) in posts_with_img {
tracing::info!("Importing post {} of {}", index + 1, total_posts);
if let Err(e) = import_post(&post, hydrus).await {
tracing::error!("Failed to import {}: {}", post, e);
if let Err(e) = import_post(hydrus, &post, images).await {
tracing::error!("Failed to import post {}: {}", post, e);
}
}
Ok(())
}
#[tracing::instrument(level = "debug", skip(hydrus))]
async fn import_post(post_url: &str, hydrus: &Hydrus) -> Result<()> {
tracing::debug!("Post {}", post_url);
let images = get_post_images(post_url).await?;
tracing::info!("Found {} images for post {}", images.len(), post_url);
async fn import_post(hydrus: &Hydrus, post: &String, images: Vec<String>) -> Result<()> {
for url in images {
let mut entry = hydrus.import().url(url).run().await?;
let files = entry.files().await?;
for mut file in files {
file.associate_urls(vec![post_url.to_string()]).await?;
file.associate_urls(vec![post.to_string()]).await?;
}
}
Ok(())
}

Loading…
Cancel
Save