Improve parsing performance

pull/1/head
trivernis 5 years ago
parent 5dc26612d6
commit 7d910301d6

2
Cargo.lock generated

@ -358,7 +358,7 @@ dependencies = [
[[package]] [[package]]
name = "snekdown" name = "snekdown"
version = "0.5.2" version = "0.5.3"
dependencies = [ dependencies = [
"chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",

@ -1,6 +1,6 @@
[package] [package]
name = "snekdown" name = "snekdown"
version = "0.5.2" version = "0.5.3"
authors = ["trivernis <trivernis@protonmail.com>"] authors = ["trivernis <trivernis@protonmail.com>"]
edition = "2018" edition = "2018"
license-file = "LICENSE" license-file = "LICENSE"

@ -347,7 +347,10 @@ impl ToHtml for StrikedText {
impl ToHtml for MonospaceText { impl ToHtml for MonospaceText {
fn to_html(&self) -> String { fn to_html(&self) -> String {
format!("<code class='inlineCode'>{}</code>", self.value.to_html()) format!(
"<code class='inlineCode'>{}</code>",
encode_minimal(self.value.as_str())
)
} }
} }

@ -192,7 +192,7 @@ pub struct StrikedText {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct MonospaceText { pub struct MonospaceText {
pub(crate) value: PlainText, pub(crate) value: String,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]

@ -104,6 +104,8 @@ pub struct Parser {
} }
impl Parser { impl Parser {
/// TODO fn get_until(until: &[char], err_when: &[]) -> String
pub fn new_from_file(path: String) -> Result<Self, io::Error> { pub fn new_from_file(path: String) -> Result<Self, io::Error> {
let content = read_to_string(path.clone())?; let content = read_to_string(path.clone())?;
Ok(Self::new(content, Some(path))) Ok(Self::new(content, Some(path)))
@ -234,6 +236,17 @@ impl Parser {
chars.contains(&self.current_char) && !self.check_escaped() chars.contains(&self.current_char) && !self.check_escaped()
} }
/// checks if the next chars are a special sequence
fn check_special_group_sequence(&mut self, sequences: &[&[char]]) -> bool {
for sequence in sequences {
if let Ok(_) = self.check_special_sequence(sequence) {
return true;
}
}
false
}
/// returns if the current character is a linebreak character /// returns if the current character is a linebreak character
/// Note: No one likes CRLF /// Note: No one likes CRLF
fn check_linebreak(&self) -> bool { fn check_linebreak(&self) -> bool {
@ -252,8 +265,11 @@ impl Parser {
fn check_special_sequence(&mut self, sequence: &[char]) -> Result<(), ParseError> { fn check_special_sequence(&mut self, sequence: &[char]) -> Result<(), ParseError> {
let start_index = self.index; let start_index = self.index;
self.seek_whitespace(); self.seek_whitespace();
if self.check_escaped() {
return Err(self.revert_with_error(start_index));
}
for sq_character in sequence { for sq_character in sequence {
if !self.check_special(sq_character) { if self.current_char != *sq_character {
return Err(self.revert_with_error(start_index)); return Err(self.revert_with_error(start_index));
} }
if self.next_char() == None { if self.next_char() == None {
@ -267,6 +283,30 @@ impl Parser {
Ok(()) Ok(())
} }
/// returns the string until a specific
fn get_string_until(
&mut self,
break_at: &[char],
err_at: &[char],
) -> Result<String, ParseError> {
let start_index = self.index;
let mut result = String::new();
result.push(self.current_char);
while let Some(ch) = self.next_char() {
if self.check_special_group(&break_at) || self.check_special_group(&err_at) {
break;
}
result.push(ch);
}
if self.check_special_group(&err_at) {
Err(self.revert_with_error(start_index))
} else {
Ok(result)
}
}
/// transform an import path to be relative to the current parsers file /// transform an import path to be relative to the current parsers file
fn transform_path(&mut self, path: String) -> String { fn transform_path(&mut self, path: String) -> String {
let mut path = path; let mut path = path;
@ -487,6 +527,9 @@ impl Parser {
} }
text.push(character); text.push(character);
} }
for _ in 0..2 {
let _ = self.next_char();
}
Ok(CodeBlock { Ok(CodeBlock {
language, language,
@ -659,7 +702,7 @@ impl Parser {
while let Ok(token) = self.parse_inline() { while let Ok(token) = self.parse_inline() {
paragraph.add_element(token); paragraph.add_element(token);
let start_index = self.index; let start_index = self.index;
if self.check_special_group(&BLOCK_SPECIAL_CHARS) { if self.check_special_group_sequence(&BLOCK_SPECIAL_CHARS) {
self.revert_to(start_index)?; self.revert_to(start_index)?;
break; break;
} }
@ -957,11 +1000,11 @@ impl Parser {
} }
BACKTICK if !self.check_escaped() => { BACKTICK if !self.check_escaped() => {
parse_option!(self.next_char(), self.index); parse_option!(self.next_char(), self.index);
let plain_text = self.parse_plain_text()?; let content = self.get_string_until(&[BACKTICK, LB], &[])?;
if self.check_special(&BACKTICK) { if self.check_special(&BACKTICK) {
parse_option!(self.next_char(), self.index) parse_option!(self.next_char(), self.index)
} }
Ok(SubText::Monospace(MonospaceText { value: plain_text })) Ok(SubText::Monospace(MonospaceText { value: content }))
} }
PIPE if !self.check_escaped() => Err(ParseError::new(self.index)), // handling of table cells PIPE if !self.check_escaped() => Err(ParseError::new(self.index)), // handling of table cells
_ => Ok(SubText::Plain(self.parse_plain_text()?)), _ => Ok(SubText::Plain(self.parse_plain_text()?)),

@ -45,14 +45,15 @@ pub(crate) const PHOLDER_CLOSE: char = L_BRACKET;
pub(crate) const QUOTES: [char; 2] = [SINGLE_QUOTE, DOUBLE_QUOTE]; pub(crate) const QUOTES: [char; 2] = [SINGLE_QUOTE, DOUBLE_QUOTE];
pub(crate) const BLOCK_SPECIAL_CHARS: [char; 7] = [ pub(crate) const BLOCK_SPECIAL_CHARS: [&[char]; 8] = [
HASH, &[HASH],
MINUS, &[HASH, META_OPEN],
BACKTICK, &[MINUS, SPACE],
PIPE, &[BACKTICK, BACKTICK, BACKTICK],
QUOTE_START, &[PIPE],
META_OPEN, &[QUOTE_START],
IMPORT_START, &[META_OPEN],
&[IMPORT_START, IMPORT_OPEN],
]; ];
pub(crate) const INLINE_SPECIAL_CHARS: [char; 5] = [LB, ASTERISK, UNDERSCR, TILDE, BACKTICK]; pub(crate) const INLINE_SPECIAL_CHARS: [char; 5] = [LB, ASTERISK, UNDERSCR, TILDE, BACKTICK];

Loading…
Cancel
Save