From f8228a585643a199ce6085fa8c24678a5745ca3f Mon Sep 17 00:00:00 2001 From: trivernis Date: Thu, 6 Jul 2023 22:24:35 +0200 Subject: [PATCH] Improve reddit import speed --- Cargo.lock | 94 +++++++++++--------- Cargo.toml | 1 + src/operations/find_and_send_reddit_posts.rs | 50 ++++++++--- 3 files changed, 94 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1d3d9fd..d29e44b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,7 +54,7 @@ checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -220,7 +220,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -457,7 +457,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn", + "syn 1.0.107", ] [[package]] @@ -474,7 +474,7 @@ checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -487,7 +487,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version 0.4.0", - "syn", + "syn 1.0.107", ] [[package]] @@ -550,7 +550,7 @@ dependencies = [ "base64 0.13.1", "chrono", "derive_more", - "futures 0.3.26", + "futures 0.3.28", "hmac", "hyper 0.14.24", "hyper-tls 0.5.0", @@ -632,7 +632,7 @@ checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", "synstructure", ] @@ -728,9 +728,9 @@ checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" [[package]] name = "futures" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" dependencies = [ "futures-channel", "futures-core", @@ -743,9 +743,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -753,9 +753,9 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-cpupool" @@ -769,9 +769,9 @@ dependencies = [ [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" dependencies = [ "futures-core", "futures-task", @@ -780,38 +780,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" [[package]] name = "futures-macro" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.23", ] [[package]] name = "futures-sink" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-util" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-channel", "futures-core", @@ -1013,6 +1013,7 @@ dependencies = [ "directories", "egg-mode", "fakeit", + "futures 0.3.28", "hydrus-api", "lazy-regex", "pixiv-rs", @@ -1287,7 +1288,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn", + "syn 1.0.107", ] [[package]] @@ -1582,7 +1583,7 @@ checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -1706,7 +1707,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -1764,7 +1765,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.107", "version_check", ] @@ -1781,9 +1782,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" dependencies = [ "unicode-ident", ] @@ -1800,9 +1801,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.23" +version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" dependencies = [ "proc-macro2", ] @@ -2267,7 +2268,7 @@ checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -2413,6 +2414,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "synstructure" version = "0.12.6" @@ -2421,7 +2433,7 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", "unicode-xid", ] @@ -2475,7 +2487,7 @@ checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -2600,7 +2612,7 @@ checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -2734,7 +2746,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -2967,7 +2979,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-shared", ] @@ -3001,7 +3013,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index ae17cd3..1029238 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ color-eyre = "0.6.2" egg-mode = "0.16.1" lazy-regex = "2.4.1" fakeit = "1.1.1" +futures = "0.3.28" [dependencies.tokio] version = "1.25.0" diff --git a/src/operations/find_and_send_reddit_posts.rs b/src/operations/find_and_send_reddit_posts.rs index 272f56b..d8a3281 100644 --- a/src/operations/find_and_send_reddit_posts.rs +++ b/src/operations/find_and_send_reddit_posts.rs @@ -1,35 +1,65 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; + use hydrus_api::Hydrus; use crate::error::Result; use crate::utils::reddit::get_post_images; +use futures::future; #[tracing::instrument(level = "debug", skip(hydrus))] pub async fn find_and_send_reddit_posts(hydrus: &Hydrus, post_urls: Vec) -> Result<()> { let total_posts = post_urls.len(); + let mut posts_with_img = Vec::new(); + + tracing::info!("Retrieving post data..."); + let counter = Arc::new(AtomicUsize::new(1)); + + let post_results = future::join_all(post_urls.into_iter().enumerate().map(|(i, p)| { + let counter = Arc::clone(&counter); + + async move { + let img = get_post_images(&p).await?; + tracing::info!( + "Got info for {} of {total_posts}", + counter.fetch_add(1, Ordering::SeqCst) + ); + + Result::Ok((i, p, img)) + } + })) + .await; - for (index, post) in post_urls.into_iter().enumerate() { + for result in post_results { + match result { + Ok(e) => { + posts_with_img.push(e); + } + Err(e) => { + tracing::error!("Failed to retrieve post info: {e}"); + } + } + } + + for (index, post, images) in posts_with_img { tracing::info!("Importing post {} of {}", index + 1, total_posts); - if let Err(e) = import_post(&post, hydrus).await { - tracing::error!("Failed to import {}: {}", post, e); + if let Err(e) = import_post(hydrus, &post, images).await { + tracing::error!("Failed to import post {}: {}", post, e); } } Ok(()) } -#[tracing::instrument(level = "debug", skip(hydrus))] -async fn import_post(post_url: &str, hydrus: &Hydrus) -> Result<()> { - tracing::debug!("Post {}", post_url); - let images = get_post_images(post_url).await?; - tracing::info!("Found {} images for post {}", images.len(), post_url); - +async fn import_post(hydrus: &Hydrus, post: &String, images: Vec) -> Result<()> { for url in images { let mut entry = hydrus.import().url(url).run().await?; let files = entry.files().await?; for mut file in files { - file.associate_urls(vec![post_url.to_string()]).await?; + file.associate_urls(vec![post.to_string()]).await?; } } + Ok(()) }