diff --git a/Cargo.lock b/Cargo.lock index e41f628..d17c77d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1167,7 +1167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "snekdown" -version = "0.26.3" +version = "0.26.4" dependencies = [ "asciimath-rs 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)", "base64 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 6a50a36..4e08b42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snekdown" -version = "0.26.3" +version = "0.26.4" authors = ["trivernis "] edition = "2018" license-file = "LICENSE" diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 56346ee..6a9e5c1 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -4,14 +4,14 @@ use crate::format::PlaceholderTemplate; use crate::references::configuration::{ConfigRefEntry, Configuration, Value}; use crate::references::placeholders::ProcessPlaceholders; use crate::references::templates::{Template, TemplateVariable}; +use crate::utils::downloads::{DownloadManager, PendingDownload}; use asciimath_rs::elements::special::Expression; use bibliographix::bib_manager::BibManager; use bibliographix::bibliography::bibliography_entry::BibliographyEntryReference; use bibliographix::references::bib_reference::BibRefAnchor; +use crossbeam_utils::sync::WaitGroup; use std::collections::HashMap; -use std::fs::read; use std::iter::FromIterator; -use std::path::PathBuf; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex, RwLock}; @@ -72,6 +72,7 @@ pub struct Document { pub(crate) placeholders: Vec>>, pub config: Configuration, pub bibliography: BibManager, + pub downloads: Arc>, pub stylesheets: Vec, } @@ -232,6 +233,7 @@ pub struct Url { pub struct Image { pub(crate) url: Url, pub(crate) metadata: Option, + pub(crate) download: Arc>, } #[derive(Clone, Debug)] @@ -297,10 +299,15 @@ impl Document { config: Configuration::default(), bibliography: BibManager::new(), stylesheets: Vec::new(), + downloads: Arc::new(Mutex::new(DownloadManager::new())), } } - pub fn new_with_manager(is_root: bool, bibliography: BibManager) -> Self { + pub fn new_with_manager( + is_root: bool, + bibliography: BibManager, + downloads: Arc>, + ) -> Self { Self { elements: Vec::new(), is_root, @@ -309,6 +316,7 @@ impl Document { config: Configuration::default(), bibliography, stylesheets: Vec::new(), + downloads, } } @@ -680,28 +688,16 @@ impl Into> for InlineMetadata { impl Image { pub fn get_content(&self) -> Option> { - let path = PathBuf::from(&self.url.url); - if path.exists() { - if let Ok(content) = read(path) { - Some(content) - } else { - None - } - } else { - self.download_content() + let mut download = self.download.lock().unwrap(); + if let Some(wg) = &download.wg { + let wg = WaitGroup::clone(wg); + log::debug!("Waiting for content of {}", self.url.url.clone()); + wg.wait(); } - } + let mut data = None; + std::mem::swap(&mut data, &mut download.data); - fn download_content(&self) -> Option> { - if let Ok(content) = reqwest::blocking::get(&self.url.url) { - if let Ok(bytes) = content.bytes() { - Some(bytes.to_vec()) - } else { - None - } - } else { - None - } + data } } diff --git a/src/format/html/to_html.rs b/src/format/html/to_html.rs index 6f33001..871623e 100644 --- a/src/format/html/to_html.rs +++ b/src/format/html/to_html.rs @@ -7,6 +7,8 @@ use asciimath_rs::format::mathml::ToMathML; use htmlescape::encode_attribute; use minify::html::minify; use std::io; +use std::sync::Arc; +use std::thread; use syntect::highlighting::ThemeSet; use syntect::html::highlighted_html_for_string; use syntect::parsing::SyntaxSet; @@ -95,6 +97,10 @@ impl ToHtml for MetadataValue { impl ToHtml for Document { fn to_html(&self, writer: &mut HTMLWriter) -> io::Result<()> { + let downloads = Arc::clone(&self.downloads); + thread::spawn(move || { + downloads.lock().unwrap().download_all(); + }); let path = if let Some(path) = &self.path { format!("path=\"{}\"", encode_attribute(path.as_str())) } else { diff --git a/src/parser/inline.rs b/src/parser/inline.rs index 7ef0684..5393cd1 100644 --- a/src/parser/inline.rs +++ b/src/parser/inline.rs @@ -132,7 +132,12 @@ impl ParseInline for Parser { } else { None }; - Ok(Image { url, metadata }) + let path = url.url.clone(); + Ok(Image { + url, + metadata, + download: self.document.downloads.lock().unwrap().add_download(path), + }) } else { Err(self.ctm.rewind_with_error(start_index)) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 46de6d6..0363794 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9,6 +9,7 @@ use crate::references::configuration::keys::{ IMP_BIBLIOGRAPHY, IMP_CONFIGS, IMP_IGNORE, IMP_STYLESHEETS, }; use crate::references::configuration::{Configuration, Value}; +use crate::utils::downloads::DownloadManager; use bibliographix::bib_manager::BibManager; use charred::tapemachine::{CharTapeMachine, TapeError, TapeResult}; use crossbeam_utils::sync::WaitGroup; @@ -57,6 +58,7 @@ impl Parser { false, Box::new(BufReader::new(f)), BibManager::new(), + Arc::new(Mutex::new(DownloadManager::new())), )) } @@ -74,6 +76,7 @@ impl Parser { false, Box::new(Cursor::new(text_bytes.to_vec())), BibManager::new(), + Arc::new(Mutex::new(DownloadManager::new())), ) } @@ -83,6 +86,7 @@ impl Parser { path: PathBuf, paths: Arc>>, bib_manager: BibManager, + download_manager: Arc>, ) -> Self { let text_bytes = text.as_bytes(); Self::create( @@ -91,6 +95,7 @@ impl Parser { true, Box::new(Cursor::new(text_bytes.to_vec())), bib_manager, + download_manager, ) } @@ -99,6 +104,7 @@ impl Parser { path: PathBuf, paths: Arc>>, bib_manager: BibManager, + download_manager: Arc>, ) -> Result { let f = File::open(&path)?; Ok(Self::create( @@ -107,6 +113,7 @@ impl Parser { true, Box::new(BufReader::new(f)), bib_manager, + download_manager, )) } @@ -116,6 +123,7 @@ impl Parser { is_child: bool, mut reader: Box, bib_manager: BibManager, + download_manager: Arc>, ) -> Self { if let Some(path) = path.clone() { paths.lock().unwrap().push(path.clone()) @@ -128,7 +136,7 @@ impl Parser { text.push('\n'); } - let document = Document::new_with_manager(!is_child, bib_manager); + let document = Document::new_with_manager(!is_child, bib_manager, download_manager); Self { sections: Vec::new(), section_nesting: 0, @@ -216,9 +224,11 @@ impl Parser { let paths = Arc::clone(&self.paths); let config = self.document.config.clone(); let bibliography = self.document.bibliography.create_child(); + let download_manager = Arc::clone(&self.document.downloads); let _ = thread::spawn(move || { - let mut parser = Parser::child_from_file(path, paths, bibliography).unwrap(); + let mut parser = + Parser::child_from_file(path, paths, bibliography, download_manager).unwrap(); parser.set_config(config); let document = parser.parse(); anchor_clone.write().unwrap().set_document(document); diff --git a/src/utils/downloads.rs b/src/utils/downloads.rs new file mode 100644 index 0000000..2defcc4 --- /dev/null +++ b/src/utils/downloads.rs @@ -0,0 +1,84 @@ +use crossbeam_utils::sync::WaitGroup; +use rayon::prelude::*; +use std::fs::read; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; + +/// A manager for downloading urls in parallel +#[derive(Clone, Debug)] +pub struct DownloadManager { + downloads: Vec>>, +} + +impl DownloadManager { + /// Creates a new download manager + pub fn new() -> Self { + Self { + downloads: Vec::new(), + } + } + + /// Adds a new pending download + pub fn add_download(&mut self, path: String) -> Arc> { + let pending = Arc::new(Mutex::new(PendingDownload::new(path))); + self.downloads.push(Arc::clone(&pending)); + + pending + } + + /// Downloads all download entries + pub fn download_all(&self) { + self.downloads + .par_iter() + .for_each(|d| d.lock().unwrap().download()) + } +} + +/// A pending download entry. +/// Download does not necessarily mean that it's not a local file +#[derive(Clone, Debug)] +pub struct PendingDownload { + path: String, + pub(crate) data: Option>, + pub(crate) wg: Option, +} + +impl PendingDownload { + pub fn new(path: String) -> Self { + Self { + path, + data: None, + wg: Some(WaitGroup::new()), + } + } + + /// Downloads the file and writes the content to the content field + pub fn download(&mut self) { + let wg = std::mem::replace(&mut self.wg, None); + if let Some(wg) = wg { + log::debug!("Reading {}...", self.path); + self.data = self.read_content(); + log::debug!("{} read!", self.path); + drop(wg); + } + } + + /// Reads the fiels content or downloads it if it doesn't exist in the filesystem + fn read_content(&self) -> Option> { + let path = PathBuf::from(&self.path); + if path.exists() { + read(path).ok() + } else { + self.download_content() + } + } + + /// Downloads the content from the given url + fn download_content(&self) -> Option> { + reqwest::blocking::get(&self.path) + .ok() + .map(|c| c.bytes()) + .and_then(|b| b.ok()) + .map(|b| b.to_vec()) + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 29ec0ba..0c2f117 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1 +1,2 @@ +pub mod downloads; pub mod parsing;