Add multithreaded import parsing

pull/1/head
trivernis 5 years ago
parent 7b901ee984
commit 15b1a1e6b7

33
Cargo.lock generated

@ -1,6 +1,39 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
[[package]]
name = "autocfg"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "crossbeam-utils"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "markdown-rs" name = "markdown-rs"
version = "0.1.0" version = "0.1.0"
dependencies = [
"crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8"
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

@ -7,3 +7,4 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
crossbeam-utils = "0.7.2"

@ -1,3 +1,5 @@
use std::sync::{Arc, Mutex};
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum Block { pub enum Block {
Section(Section), Section(Section),
@ -6,6 +8,7 @@ pub enum Block {
Table(Table), Table(Table),
CodeBlock(CodeBlock), CodeBlock(CodeBlock),
Quote(Quote), Quote(Quote),
Import(Import),
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -86,6 +89,17 @@ pub struct Quote {
pub(crate) text: Vec<Text>, pub(crate) text: Vec<Text>,
} }
#[derive(Clone, Debug)]
pub struct Import {
pub(crate) path: String,
pub(crate) anchor: Arc<Mutex<ImportAnchor>>,
}
#[derive(Clone, Debug)]
pub struct ImportAnchor {
pub(crate) document: Option<Document>,
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct InlineMetadata { pub struct InlineMetadata {
pub(crate) data: String, pub(crate) data: String,
@ -271,4 +285,12 @@ impl Quote {
} }
} }
// TODO: Images, URIs impl ImportAnchor {
pub fn new() -> Self {
Self { document: None }
}
pub fn set_document(&mut self, document: Document) {
self.document = Some(document);
}
}

@ -4,7 +4,10 @@ use std::time::Instant;
fn main() { fn main() {
let start = Instant::now(); let start = Instant::now();
let mut parser = Parser::new(read_to_string("test/document.md").unwrap()); let mut parser = Parser::new(
read_to_string("test/document.md").unwrap(),
Some("test/".to_string()),
);
let document = parser.parse(); let document = parser.parse();
println!("Total duration: {:?}", start.elapsed()); println!("Total duration: {:?}", start.elapsed());
write("test/document.ast", format!("{:#?}", document)).unwrap(); write("test/document.ast", format!("{:#?}", document)).unwrap();

@ -1,8 +1,13 @@
use crate::elements::*; use crate::elements::*;
use crate::tokens::*; use crate::tokens::*;
use crossbeam_utils::sync::WaitGroup;
use std::error::Error; use std::error::Error;
use std::fmt; use std::fmt;
use std::fmt::{Display, Formatter}; use std::fmt::{Display, Formatter};
use std::fs::read_to_string;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::thread;
macro_rules! parse_option { macro_rules! parse_option {
($option:expr, $index:expr) => { ($option:expr, $index:expr) => {
@ -35,20 +40,33 @@ pub struct Parser {
current_char: char, current_char: char,
section_nesting: u8, section_nesting: u8,
section_return: Option<u8>, section_return: Option<u8>,
path: Option<String>,
paths: Arc<Mutex<Vec<String>>>,
wg: WaitGroup,
} }
impl Parser { impl Parser {
pub fn new(text: String) -> Self { pub fn new(text: String, path: Option<String>) -> Self {
Parser::new_as_child(text, path, Arc::new(Mutex::new(Vec::new())))
}
pub fn new_as_child(
text: String,
path: Option<String>,
paths: Arc<Mutex<Vec<String>>>,
) -> Self {
let mut text: Vec<char> = text.chars().collect(); let mut text: Vec<char> = text.chars().collect();
text.append(&mut vec!['\n', ' ', '\n']); // push space and newline of eof. it fixes stuff and I don't know why. text.append(&mut vec!['\n', ' ', '\n']); // push space and newline of eof. it fixes stuff and I don't know why.
let current_char = text.get(0).unwrap().clone(); let current_char = text.get(0).unwrap().clone();
Self { Self {
index: 0, index: 0,
text, text,
current_char, current_char,
section_nesting: 0, section_nesting: 0,
section_return: None, section_return: None,
path,
paths,
wg: WaitGroup::new(),
} }
} }
@ -164,6 +182,54 @@ impl Parser {
Ok(()) Ok(())
} }
fn transform_path(&mut self, path: String) -> String {
let mut path = path;
if let Some(selfpath) = &self.path {
let path_info = Path::new(&selfpath);
if path_info.is_file() {
if let Some(dir) = path_info.parent() {
path = format!("{}/{}", dir.to_str().unwrap(), path);
}
}
}
return path;
}
/// starts up a new thread to parse the imported document
fn import_document(&mut self, path: String) -> Result<Arc<Mutex<ImportAnchor>>, ParseError> {
let mut path = self.transform_path(path);
let path_info = Path::new(&path);
if !path_info.exists() || !path_info.is_file() {
println!("Import of \"{}\" failed: The file doesn't exist.", path);
return Err(ParseError::new(self.index));
}
path = path_info.to_str().unwrap().to_string();
{
let mut paths = self.paths.lock().unwrap();
if paths.iter().find(|item| **item == path) != None {
println!("Import of \"{}\" failed: Cyclic reference.", path);
return Err(ParseError::new(self.index));
}
paths.push(path.clone());
}
let anchor = Arc::new(Mutex::new(ImportAnchor::new()));
let anchor_clone = Arc::clone(&anchor);
let wg = self.wg.clone();
let paths = Arc::clone(&self.paths);
let _ = thread::spawn(move || {
let text = read_to_string(path.clone()).unwrap();
let mut parser = Parser::new_as_child(text.to_string(), Some(path), paths);
let document = parser.parse();
anchor_clone.lock().unwrap().set_document(document);
drop(wg);
});
Ok(anchor)
}
/// parses the given text into a document /// parses the given text into a document
pub fn parse(&mut self) -> Document { pub fn parse(&mut self) -> Document {
let mut document = Document::new(); let mut document = Document::new();
@ -173,6 +239,9 @@ impl Parser {
} }
} }
let wg = self.wg.clone();
self.wg = WaitGroup::new();
wg.wait();
document document
} }
@ -197,6 +266,8 @@ impl Parser {
Block::CodeBlock(code_block) Block::CodeBlock(code_block)
} else if let Ok(quote) = self.parse_quote() { } else if let Ok(quote) = self.parse_quote() {
Block::Quote(quote) Block::Quote(quote)
} else if let Ok(import) = self.parse_import() {
Block::Import(import)
} else if let Ok(paragraph) = self.parse_paragraph() { } else if let Ok(paragraph) = self.parse_paragraph() {
Block::Paragraph(paragraph) Block::Paragraph(paragraph)
} else { } else {
@ -332,6 +403,33 @@ impl Parser {
Ok(InlineMetadata { data: text }) Ok(InlineMetadata { data: text })
} }
/// parses an import and starts a new task to parse the document of the import
fn parse_import(&mut self) -> Result<Import, ParseError> {
let start_index = self.index;
if !self.check_special(&IMPORT_START)
|| self.next_char() == None
|| !self.check_special(&IMPORT_OPEN)
{
return Err(self.revert_with_error(start_index));
}
let mut path = String::new();
while let Some(character) = self.next_char() {
if self.check_linebreak() || self.check_special(&IMPORT_CLOSE) {
break;
}
path.push(character);
}
if self.check_linebreak() || path.is_empty() {
return Err(self.revert_with_error(start_index));
}
if let Ok(anchor) = self.import_document(path.clone()) {
Ok(Import { path, anchor })
} else {
Err(ParseError::new(self.index))
}
}
/// Parses a paragraph /// Parses a paragraph
fn parse_paragraph(&mut self) -> Result<Paragraph, ParseError> { fn parse_paragraph(&mut self) -> Result<Paragraph, ParseError> {
let mut paragraph = Paragraph::new(); let mut paragraph = Paragraph::new();

@ -17,6 +17,7 @@ pub(crate) const HASH: char = '#';
pub(crate) const O: char = 'o'; pub(crate) const O: char = 'o';
pub(crate) const X: char = 'x'; pub(crate) const X: char = 'x';
pub(crate) const GT: char = '>'; pub(crate) const GT: char = '>';
pub(crate) const LT: char = '<';
pub(crate) const BANG: char = '!'; pub(crate) const BANG: char = '!';
// aliases // aliases
@ -30,11 +31,21 @@ pub(crate) const DESC_CLOSE: char = L_BRACKET;
pub(crate) const IMG_START: char = BANG; pub(crate) const IMG_START: char = BANG;
pub(crate) const URL_OPEN: char = R_PARENTH; pub(crate) const URL_OPEN: char = R_PARENTH;
pub(crate) const URL_CLOSE: char = L_PARENTH; pub(crate) const URL_CLOSE: char = L_PARENTH;
pub(crate) const IMPORT_START: char = LT;
pub(crate) const IMPORT_OPEN: char = R_BRACKET;
pub(crate) const IMPORT_CLOSE: char = L_BRACKET;
// groups // groups
pub(crate) const BLOCK_SPECIAL_CHARS: [char; 6] = pub(crate) const BLOCK_SPECIAL_CHARS: [char; 7] = [
[HASH, MINUS, BACKTICK, PIPE, QUOTE_START, META_OPEN]; HASH,
MINUS,
BACKTICK,
PIPE,
QUOTE_START,
META_OPEN,
IMPORT_START,
];
pub(crate) const INLINE_SPECIAL_CHARS: [char; 6] = [LB, ASTERISK, UNDERSCR, TILDE, PIPE, BACKTICK]; pub(crate) const INLINE_SPECIAL_CHARS: [char; 6] = [LB, ASTERISK, UNDERSCR, TILDE, PIPE, BACKTICK];
pub(crate) const INLINE_SPECIAL_CHARS_SECOND: [char; 3] = [DESC_OPEN, IMG_START, URL_OPEN]; pub(crate) const INLINE_SPECIAL_CHARS_SECOND: [char; 3] = [DESC_OPEN, IMG_START, URL_OPEN];

Loading…
Cancel
Save