From 15b1a1e6b759f794731eab5cedf94308c5efb21f Mon Sep 17 00:00:00 2001 From: trivernis Date: Sat, 30 May 2020 17:02:45 +0200 Subject: [PATCH] Add multithreaded import parsing --- Cargo.lock | 33 ++++++++++++++++ Cargo.toml | 3 +- src/elements.rs | 24 +++++++++++- src/main.rs | 5 ++- src/parser.rs | 102 +++++++++++++++++++++++++++++++++++++++++++++++- src/tokens.rs | 15 ++++++- 6 files changed, 175 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec2965e..99a89d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,39 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "autocfg" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "crossbeam-utils" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "markdown-rs" version = "0.1.0" +dependencies = [ + "crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", +] +[metadata] +"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" +"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +"checksum crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" +"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" diff --git a/Cargo.toml b/Cargo.toml index 2589115..c2b9b06 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,5 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[dependencies] \ No newline at end of file +[dependencies] +crossbeam-utils = "0.7.2" \ No newline at end of file diff --git a/src/elements.rs b/src/elements.rs index 74c0205..ad77089 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -1,3 +1,5 @@ +use std::sync::{Arc, Mutex}; + #[derive(Clone, Debug)] pub enum Block { Section(Section), @@ -6,6 +8,7 @@ pub enum Block { Table(Table), CodeBlock(CodeBlock), Quote(Quote), + Import(Import), } #[derive(Clone, Debug)] @@ -86,6 +89,17 @@ pub struct Quote { pub(crate) text: Vec, } +#[derive(Clone, Debug)] +pub struct Import { + pub(crate) path: String, + pub(crate) anchor: Arc>, +} + +#[derive(Clone, Debug)] +pub struct ImportAnchor { + pub(crate) document: Option, +} + #[derive(Clone, Debug)] pub struct InlineMetadata { pub(crate) data: String, @@ -271,4 +285,12 @@ impl Quote { } } -// TODO: Images, URIs +impl ImportAnchor { + pub fn new() -> Self { + Self { document: None } + } + + pub fn set_document(&mut self, document: Document) { + self.document = Some(document); + } +} diff --git a/src/main.rs b/src/main.rs index 9f327e2..c2997bc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,10 @@ use std::time::Instant; fn main() { let start = Instant::now(); - let mut parser = Parser::new(read_to_string("test/document.md").unwrap()); + let mut parser = Parser::new( + read_to_string("test/document.md").unwrap(), + Some("test/".to_string()), + ); let document = parser.parse(); println!("Total duration: {:?}", start.elapsed()); write("test/document.ast", format!("{:#?}", document)).unwrap(); diff --git a/src/parser.rs b/src/parser.rs index 4d931dc..a4688ce 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,8 +1,13 @@ use crate::elements::*; use crate::tokens::*; +use crossbeam_utils::sync::WaitGroup; use std::error::Error; use std::fmt; use std::fmt::{Display, Formatter}; +use std::fs::read_to_string; +use std::path::Path; +use std::sync::{Arc, Mutex}; +use std::thread; macro_rules! parse_option { ($option:expr, $index:expr) => { @@ -35,20 +40,33 @@ pub struct Parser { current_char: char, section_nesting: u8, section_return: Option, + path: Option, + paths: Arc>>, + wg: WaitGroup, } impl Parser { - pub fn new(text: String) -> Self { + pub fn new(text: String, path: Option) -> Self { + Parser::new_as_child(text, path, Arc::new(Mutex::new(Vec::new()))) + } + + pub fn new_as_child( + text: String, + path: Option, + paths: Arc>>, + ) -> Self { let mut text: Vec = text.chars().collect(); text.append(&mut vec!['\n', ' ', '\n']); // push space and newline of eof. it fixes stuff and I don't know why. let current_char = text.get(0).unwrap().clone(); - Self { index: 0, text, current_char, section_nesting: 0, section_return: None, + path, + paths, + wg: WaitGroup::new(), } } @@ -164,6 +182,54 @@ impl Parser { Ok(()) } + fn transform_path(&mut self, path: String) -> String { + let mut path = path; + if let Some(selfpath) = &self.path { + let path_info = Path::new(&selfpath); + if path_info.is_file() { + if let Some(dir) = path_info.parent() { + path = format!("{}/{}", dir.to_str().unwrap(), path); + } + } + } + return path; + } + + /// starts up a new thread to parse the imported document + fn import_document(&mut self, path: String) -> Result>, ParseError> { + let mut path = self.transform_path(path); + let path_info = Path::new(&path); + if !path_info.exists() || !path_info.is_file() { + println!("Import of \"{}\" failed: The file doesn't exist.", path); + return Err(ParseError::new(self.index)); + } + path = path_info.to_str().unwrap().to_string(); + { + let mut paths = self.paths.lock().unwrap(); + if paths.iter().find(|item| **item == path) != None { + println!("Import of \"{}\" failed: Cyclic reference.", path); + return Err(ParseError::new(self.index)); + } + paths.push(path.clone()); + } + let anchor = Arc::new(Mutex::new(ImportAnchor::new())); + let anchor_clone = Arc::clone(&anchor); + let wg = self.wg.clone(); + let paths = Arc::clone(&self.paths); + + let _ = thread::spawn(move || { + let text = read_to_string(path.clone()).unwrap(); + + let mut parser = Parser::new_as_child(text.to_string(), Some(path), paths); + let document = parser.parse(); + anchor_clone.lock().unwrap().set_document(document); + + drop(wg); + }); + + Ok(anchor) + } + /// parses the given text into a document pub fn parse(&mut self) -> Document { let mut document = Document::new(); @@ -173,6 +239,9 @@ impl Parser { } } + let wg = self.wg.clone(); + self.wg = WaitGroup::new(); + wg.wait(); document } @@ -197,6 +266,8 @@ impl Parser { Block::CodeBlock(code_block) } else if let Ok(quote) = self.parse_quote() { Block::Quote(quote) + } else if let Ok(import) = self.parse_import() { + Block::Import(import) } else if let Ok(paragraph) = self.parse_paragraph() { Block::Paragraph(paragraph) } else { @@ -332,6 +403,33 @@ impl Parser { Ok(InlineMetadata { data: text }) } + /// parses an import and starts a new task to parse the document of the import + fn parse_import(&mut self) -> Result { + let start_index = self.index; + if !self.check_special(&IMPORT_START) + || self.next_char() == None + || !self.check_special(&IMPORT_OPEN) + { + return Err(self.revert_with_error(start_index)); + } + let mut path = String::new(); + while let Some(character) = self.next_char() { + if self.check_linebreak() || self.check_special(&IMPORT_CLOSE) { + break; + } + path.push(character); + } + if self.check_linebreak() || path.is_empty() { + return Err(self.revert_with_error(start_index)); + } + + if let Ok(anchor) = self.import_document(path.clone()) { + Ok(Import { path, anchor }) + } else { + Err(ParseError::new(self.index)) + } + } + /// Parses a paragraph fn parse_paragraph(&mut self) -> Result { let mut paragraph = Paragraph::new(); diff --git a/src/tokens.rs b/src/tokens.rs index da64b5f..e40f89a 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -17,6 +17,7 @@ pub(crate) const HASH: char = '#'; pub(crate) const O: char = 'o'; pub(crate) const X: char = 'x'; pub(crate) const GT: char = '>'; +pub(crate) const LT: char = '<'; pub(crate) const BANG: char = '!'; // aliases @@ -30,11 +31,21 @@ pub(crate) const DESC_CLOSE: char = L_BRACKET; pub(crate) const IMG_START: char = BANG; pub(crate) const URL_OPEN: char = R_PARENTH; pub(crate) const URL_CLOSE: char = L_PARENTH; +pub(crate) const IMPORT_START: char = LT; +pub(crate) const IMPORT_OPEN: char = R_BRACKET; +pub(crate) const IMPORT_CLOSE: char = L_BRACKET; // groups -pub(crate) const BLOCK_SPECIAL_CHARS: [char; 6] = - [HASH, MINUS, BACKTICK, PIPE, QUOTE_START, META_OPEN]; +pub(crate) const BLOCK_SPECIAL_CHARS: [char; 7] = [ + HASH, + MINUS, + BACKTICK, + PIPE, + QUOTE_START, + META_OPEN, + IMPORT_START, +]; pub(crate) const INLINE_SPECIAL_CHARS: [char; 6] = [LB, ASTERISK, UNDERSCR, TILDE, PIPE, BACKTICK]; pub(crate) const INLINE_SPECIAL_CHARS_SECOND: [char; 3] = [DESC_OPEN, IMG_START, URL_OPEN];