From 7d910301d6215a27cfeb8062f62d01840daf16d4 Mon Sep 17 00:00:00 2001 From: trivernis Date: Mon, 1 Jun 2020 00:22:36 +0200 Subject: [PATCH] Improve parsing performance --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/format/html.rs | 5 +++- src/parsing/elements.rs | 2 +- src/parsing/parser.rs | 51 +++++++++++++++++++++++++++++++++++++---- src/parsing/tokens.rs | 17 +++++++------- 6 files changed, 63 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0d54641..49270dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -358,7 +358,7 @@ dependencies = [ [[package]] name = "snekdown" -version = "0.5.2" +version = "0.5.3" dependencies = [ "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index b8dc0ce..5549da4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snekdown" -version = "0.5.2" +version = "0.5.3" authors = ["trivernis "] edition = "2018" license-file = "LICENSE" diff --git a/src/format/html.rs b/src/format/html.rs index 55e14aa..93b65d5 100644 --- a/src/format/html.rs +++ b/src/format/html.rs @@ -347,7 +347,10 @@ impl ToHtml for StrikedText { impl ToHtml for MonospaceText { fn to_html(&self) -> String { - format!("{}", self.value.to_html()) + format!( + "{}", + encode_minimal(self.value.as_str()) + ) } } diff --git a/src/parsing/elements.rs b/src/parsing/elements.rs index 8bb9410..0581815 100644 --- a/src/parsing/elements.rs +++ b/src/parsing/elements.rs @@ -192,7 +192,7 @@ pub struct StrikedText { #[derive(Clone, Debug)] pub struct MonospaceText { - pub(crate) value: PlainText, + pub(crate) value: String, } #[derive(Clone, Debug)] diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 2dd2bb8..f523f85 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -104,6 +104,8 @@ pub struct Parser { } impl Parser { + /// TODO fn get_until(until: &[char], err_when: &[]) -> String + pub fn new_from_file(path: String) -> Result { let content = read_to_string(path.clone())?; Ok(Self::new(content, Some(path))) @@ -234,6 +236,17 @@ impl Parser { chars.contains(&self.current_char) && !self.check_escaped() } + /// checks if the next chars are a special sequence + fn check_special_group_sequence(&mut self, sequences: &[&[char]]) -> bool { + for sequence in sequences { + if let Ok(_) = self.check_special_sequence(sequence) { + return true; + } + } + + false + } + /// returns if the current character is a linebreak character /// Note: No one likes CRLF fn check_linebreak(&self) -> bool { @@ -252,8 +265,11 @@ impl Parser { fn check_special_sequence(&mut self, sequence: &[char]) -> Result<(), ParseError> { let start_index = self.index; self.seek_whitespace(); + if self.check_escaped() { + return Err(self.revert_with_error(start_index)); + } for sq_character in sequence { - if !self.check_special(sq_character) { + if self.current_char != *sq_character { return Err(self.revert_with_error(start_index)); } if self.next_char() == None { @@ -267,6 +283,30 @@ impl Parser { Ok(()) } + /// returns the string until a specific + fn get_string_until( + &mut self, + break_at: &[char], + err_at: &[char], + ) -> Result { + let start_index = self.index; + let mut result = String::new(); + result.push(self.current_char); + + while let Some(ch) = self.next_char() { + if self.check_special_group(&break_at) || self.check_special_group(&err_at) { + break; + } + result.push(ch); + } + + if self.check_special_group(&err_at) { + Err(self.revert_with_error(start_index)) + } else { + Ok(result) + } + } + /// transform an import path to be relative to the current parsers file fn transform_path(&mut self, path: String) -> String { let mut path = path; @@ -487,6 +527,9 @@ impl Parser { } text.push(character); } + for _ in 0..2 { + let _ = self.next_char(); + } Ok(CodeBlock { language, @@ -659,7 +702,7 @@ impl Parser { while let Ok(token) = self.parse_inline() { paragraph.add_element(token); let start_index = self.index; - if self.check_special_group(&BLOCK_SPECIAL_CHARS) { + if self.check_special_group_sequence(&BLOCK_SPECIAL_CHARS) { self.revert_to(start_index)?; break; } @@ -957,11 +1000,11 @@ impl Parser { } BACKTICK if !self.check_escaped() => { parse_option!(self.next_char(), self.index); - let plain_text = self.parse_plain_text()?; + let content = self.get_string_until(&[BACKTICK, LB], &[])?; if self.check_special(&BACKTICK) { parse_option!(self.next_char(), self.index) } - Ok(SubText::Monospace(MonospaceText { value: plain_text })) + Ok(SubText::Monospace(MonospaceText { value: content })) } PIPE if !self.check_escaped() => Err(ParseError::new(self.index)), // handling of table cells _ => Ok(SubText::Plain(self.parse_plain_text()?)), diff --git a/src/parsing/tokens.rs b/src/parsing/tokens.rs index a50d275..717576f 100644 --- a/src/parsing/tokens.rs +++ b/src/parsing/tokens.rs @@ -45,14 +45,15 @@ pub(crate) const PHOLDER_CLOSE: char = L_BRACKET; pub(crate) const QUOTES: [char; 2] = [SINGLE_QUOTE, DOUBLE_QUOTE]; -pub(crate) const BLOCK_SPECIAL_CHARS: [char; 7] = [ - HASH, - MINUS, - BACKTICK, - PIPE, - QUOTE_START, - META_OPEN, - IMPORT_START, +pub(crate) const BLOCK_SPECIAL_CHARS: [&[char]; 8] = [ + &[HASH], + &[HASH, META_OPEN], + &[MINUS, SPACE], + &[BACKTICK, BACKTICK, BACKTICK], + &[PIPE], + &[QUOTE_START], + &[META_OPEN], + &[IMPORT_START, IMPORT_OPEN], ]; pub(crate) const INLINE_SPECIAL_CHARS: [char; 5] = [LB, ASTERISK, UNDERSCR, TILDE, BACKTICK];