Change downloading of images to be parallel

Signed-off-by: trivernis <trivernis@protonmail.com>
feature/epub-rendering
trivernis 4 years ago
parent 0b897aa6f2
commit 234b11ee93
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

2
Cargo.lock generated

@ -1167,7 +1167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "snekdown"
version = "0.26.3"
version = "0.26.4"
dependencies = [
"asciimath-rs 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",

@ -1,6 +1,6 @@
[package]
name = "snekdown"
version = "0.26.3"
version = "0.26.4"
authors = ["trivernis <trivernis@protonmail.com>"]
edition = "2018"
license-file = "LICENSE"

@ -4,14 +4,14 @@ use crate::format::PlaceholderTemplate;
use crate::references::configuration::{ConfigRefEntry, Configuration, Value};
use crate::references::placeholders::ProcessPlaceholders;
use crate::references::templates::{Template, TemplateVariable};
use crate::utils::downloads::{DownloadManager, PendingDownload};
use asciimath_rs::elements::special::Expression;
use bibliographix::bib_manager::BibManager;
use bibliographix::bibliography::bibliography_entry::BibliographyEntryReference;
use bibliographix::references::bib_reference::BibRefAnchor;
use crossbeam_utils::sync::WaitGroup;
use std::collections::HashMap;
use std::fs::read;
use std::iter::FromIterator;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, RwLock};
@ -72,6 +72,7 @@ pub struct Document {
pub(crate) placeholders: Vec<Arc<RwLock<Placeholder>>>,
pub config: Configuration,
pub bibliography: BibManager,
pub downloads: Arc<Mutex<DownloadManager>>,
pub stylesheets: Vec<String>,
}
@ -232,6 +233,7 @@ pub struct Url {
pub struct Image {
pub(crate) url: Url,
pub(crate) metadata: Option<InlineMetadata>,
pub(crate) download: Arc<Mutex<PendingDownload>>,
}
#[derive(Clone, Debug)]
@ -297,10 +299,15 @@ impl Document {
config: Configuration::default(),
bibliography: BibManager::new(),
stylesheets: Vec::new(),
downloads: Arc::new(Mutex::new(DownloadManager::new())),
}
}
pub fn new_with_manager(is_root: bool, bibliography: BibManager) -> Self {
pub fn new_with_manager(
is_root: bool,
bibliography: BibManager,
downloads: Arc<Mutex<DownloadManager>>,
) -> Self {
Self {
elements: Vec::new(),
is_root,
@ -309,6 +316,7 @@ impl Document {
config: Configuration::default(),
bibliography,
stylesheets: Vec::new(),
downloads,
}
}
@ -680,28 +688,16 @@ impl Into<HashMap<String, Value>> for InlineMetadata {
impl Image {
pub fn get_content(&self) -> Option<Vec<u8>> {
let path = PathBuf::from(&self.url.url);
if path.exists() {
if let Ok(content) = read(path) {
Some(content)
} else {
None
}
} else {
self.download_content()
}
let mut download = self.download.lock().unwrap();
if let Some(wg) = &download.wg {
let wg = WaitGroup::clone(wg);
log::debug!("Waiting for content of {}", self.url.url.clone());
wg.wait();
}
let mut data = None;
std::mem::swap(&mut data, &mut download.data);
fn download_content(&self) -> Option<Vec<u8>> {
if let Ok(content) = reqwest::blocking::get(&self.url.url) {
if let Ok(bytes) = content.bytes() {
Some(bytes.to_vec())
} else {
None
}
} else {
None
}
data
}
}

@ -7,6 +7,8 @@ use asciimath_rs::format::mathml::ToMathML;
use htmlescape::encode_attribute;
use minify::html::minify;
use std::io;
use std::sync::Arc;
use std::thread;
use syntect::highlighting::ThemeSet;
use syntect::html::highlighted_html_for_string;
use syntect::parsing::SyntaxSet;
@ -95,6 +97,10 @@ impl ToHtml for MetadataValue {
impl ToHtml for Document {
fn to_html(&self, writer: &mut HTMLWriter) -> io::Result<()> {
let downloads = Arc::clone(&self.downloads);
thread::spawn(move || {
downloads.lock().unwrap().download_all();
});
let path = if let Some(path) = &self.path {
format!("path=\"{}\"", encode_attribute(path.as_str()))
} else {

@ -132,7 +132,12 @@ impl ParseInline for Parser {
} else {
None
};
Ok(Image { url, metadata })
let path = url.url.clone();
Ok(Image {
url,
metadata,
download: self.document.downloads.lock().unwrap().add_download(path),
})
} else {
Err(self.ctm.rewind_with_error(start_index))
}

@ -9,6 +9,7 @@ use crate::references::configuration::keys::{
IMP_BIBLIOGRAPHY, IMP_CONFIGS, IMP_IGNORE, IMP_STYLESHEETS,
};
use crate::references::configuration::{Configuration, Value};
use crate::utils::downloads::DownloadManager;
use bibliographix::bib_manager::BibManager;
use charred::tapemachine::{CharTapeMachine, TapeError, TapeResult};
use crossbeam_utils::sync::WaitGroup;
@ -57,6 +58,7 @@ impl Parser {
false,
Box::new(BufReader::new(f)),
BibManager::new(),
Arc::new(Mutex::new(DownloadManager::new())),
))
}
@ -74,6 +76,7 @@ impl Parser {
false,
Box::new(Cursor::new(text_bytes.to_vec())),
BibManager::new(),
Arc::new(Mutex::new(DownloadManager::new())),
)
}
@ -83,6 +86,7 @@ impl Parser {
path: PathBuf,
paths: Arc<Mutex<Vec<PathBuf>>>,
bib_manager: BibManager,
download_manager: Arc<Mutex<DownloadManager>>,
) -> Self {
let text_bytes = text.as_bytes();
Self::create(
@ -91,6 +95,7 @@ impl Parser {
true,
Box::new(Cursor::new(text_bytes.to_vec())),
bib_manager,
download_manager,
)
}
@ -99,6 +104,7 @@ impl Parser {
path: PathBuf,
paths: Arc<Mutex<Vec<PathBuf>>>,
bib_manager: BibManager,
download_manager: Arc<Mutex<DownloadManager>>,
) -> Result<Self, io::Error> {
let f = File::open(&path)?;
Ok(Self::create(
@ -107,6 +113,7 @@ impl Parser {
true,
Box::new(BufReader::new(f)),
bib_manager,
download_manager,
))
}
@ -116,6 +123,7 @@ impl Parser {
is_child: bool,
mut reader: Box<dyn BufRead>,
bib_manager: BibManager,
download_manager: Arc<Mutex<DownloadManager>>,
) -> Self {
if let Some(path) = path.clone() {
paths.lock().unwrap().push(path.clone())
@ -128,7 +136,7 @@ impl Parser {
text.push('\n');
}
let document = Document::new_with_manager(!is_child, bib_manager);
let document = Document::new_with_manager(!is_child, bib_manager, download_manager);
Self {
sections: Vec::new(),
section_nesting: 0,
@ -216,9 +224,11 @@ impl Parser {
let paths = Arc::clone(&self.paths);
let config = self.document.config.clone();
let bibliography = self.document.bibliography.create_child();
let download_manager = Arc::clone(&self.document.downloads);
let _ = thread::spawn(move || {
let mut parser = Parser::child_from_file(path, paths, bibliography).unwrap();
let mut parser =
Parser::child_from_file(path, paths, bibliography, download_manager).unwrap();
parser.set_config(config);
let document = parser.parse();
anchor_clone.write().unwrap().set_document(document);

@ -0,0 +1,84 @@
use crossbeam_utils::sync::WaitGroup;
use rayon::prelude::*;
use std::fs::read;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
/// A manager for downloading urls in parallel
#[derive(Clone, Debug)]
pub struct DownloadManager {
downloads: Vec<Arc<Mutex<PendingDownload>>>,
}
impl DownloadManager {
/// Creates a new download manager
pub fn new() -> Self {
Self {
downloads: Vec::new(),
}
}
/// Adds a new pending download
pub fn add_download(&mut self, path: String) -> Arc<Mutex<PendingDownload>> {
let pending = Arc::new(Mutex::new(PendingDownload::new(path)));
self.downloads.push(Arc::clone(&pending));
pending
}
/// Downloads all download entries
pub fn download_all(&self) {
self.downloads
.par_iter()
.for_each(|d| d.lock().unwrap().download())
}
}
/// A pending download entry.
/// Download does not necessarily mean that it's not a local file
#[derive(Clone, Debug)]
pub struct PendingDownload {
path: String,
pub(crate) data: Option<Vec<u8>>,
pub(crate) wg: Option<WaitGroup>,
}
impl PendingDownload {
pub fn new(path: String) -> Self {
Self {
path,
data: None,
wg: Some(WaitGroup::new()),
}
}
/// Downloads the file and writes the content to the content field
pub fn download(&mut self) {
let wg = std::mem::replace(&mut self.wg, None);
if let Some(wg) = wg {
log::debug!("Reading {}...", self.path);
self.data = self.read_content();
log::debug!("{} read!", self.path);
drop(wg);
}
}
/// Reads the fiels content or downloads it if it doesn't exist in the filesystem
fn read_content(&self) -> Option<Vec<u8>> {
let path = PathBuf::from(&self.path);
if path.exists() {
read(path).ok()
} else {
self.download_content()
}
}
/// Downloads the content from the given url
fn download_content(&self) -> Option<Vec<u8>> {
reqwest::blocking::get(&self.path)
.ok()
.map(|c| c.bytes())
.and_then(|b| b.ok())
.map(|b| b.to_vec())
}
}

@ -1 +1,2 @@
pub mod downloads;
pub mod parsing;

Loading…
Cancel
Save