From cb327180c60ca76a0d7c7ba1b55be3913e7ab98e Mon Sep 17 00:00:00 2001 From: trivernis Date: Thu, 30 Jul 2020 19:35:30 +0200 Subject: [PATCH] Add charred for char state operations --- Cargo.lock | 7 + Cargo.toml | 1 + src/elements/tokens.rs | 3 + src/parser/block.rs | 136 ++++++++--------- src/parser/charstate.rs | 3 +- src/parser/inline.rs | 322 ++++++++++++++++++++++------------------ src/parser/line.rs | 124 ++++++++-------- src/parser/mod.rs | 51 ++----- src/utils/parsing.rs | 6 +- 9 files changed, 339 insertions(+), 314 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c9fe940..fb86618 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,6 +70,11 @@ name = "cfg-if" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "charred" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "chrono" version = "0.4.11" @@ -568,6 +573,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" name = "snekdown" version = "0.17.0" dependencies = [ + "charred 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "colored 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -791,6 +797,7 @@ dependencies = [ "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" "checksum cc 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)" = "7bbb73db36c1246e9034e307d0fba23f9a2e251faa47ade70c1bd252220c8311" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +"checksum charred 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8cf73c7fbbaf59d5643f99c6a4413eba1b914a7489c39b730ec7d8d72e7bb061" "checksum chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" "checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" "checksum colored 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f4ffc801dacf156c5854b9df4f425a626539c3a6ef7893cc0c5084a23f0b6c59" diff --git a/Cargo.toml b/Cargo.toml index d6bbabd..5faca6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ name = "snekdown" path = "src/main.rs" [dependencies] +charred = "0.2.2" crossbeam-utils = "0.7.2" structopt = "0.3.14" minify = "1.1.1" diff --git a/src/elements/tokens.rs b/src/elements/tokens.rs index bcac279..61f6c41 100644 --- a/src/elements/tokens.rs +++ b/src/elements/tokens.rs @@ -107,6 +107,9 @@ pub(crate) const LIST_SPECIAL_CHARS: [char; 14] = [ MINUS, PLUS, ASTERISK, O, '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', ]; +pub(crate) const WHITESPACE: [char; 4] = [' ', '\t', '\r', '\n']; +pub(crate) const INLINE_WHITESPACE: [char; 3] = [' ', '\t', '\r']; + // sequences pub(crate) const SQ_CODE_BLOCK: [char; 3] = [BACKTICK, BACKTICK, BACKTICK]; diff --git a/src/parser/block.rs b/src/parser/block.rs index 6cfc1eb..247f421 100644 --- a/src/parser/block.rs +++ b/src/parser/block.rs @@ -5,6 +5,7 @@ use crate::parser::inline::ParseInline; use crate::parser::line::ParseLine; use crate::utils::parsing::{ParseError, ParseResult}; use crate::Parser; +use charred::tapemachine::{TapeError, TapeResult}; pub(crate) trait ParseBlock { fn parse_block(&mut self) -> ParseResult; @@ -22,10 +23,7 @@ impl ParseBlock for Parser { fn parse_block(&mut self) -> ParseResult { if let Some(section) = self.section_return { if section <= self.section_nesting && (self.section_nesting > 0) { - return Err(ParseError::new_with_message( - self.index, - "invalid section nesting", - )); + return Err(self.ctm.assert_error(None)); } else { self.section_return = None; } @@ -33,7 +31,7 @@ impl ParseBlock for Parser { let token = if let Ok(section) = self.parse_section() { Block::Section(section) } else if let Some(_) = self.section_return { - return Err(ParseError::new(self.index)); + return Err(self.ctm.err()); } else if let Ok(list) = self.parse_list() { Block::List(list) } else if let Ok(table) = self.parse_table() { @@ -45,13 +43,13 @@ impl ParseBlock for Parser { } else if let Ok(import) = self.parse_import() { Block::Import(import) } else if let Some(_) = self.section_return { - return Err(ParseError::new(self.index)); + return Err(self.ctm.err()); } else if let Ok(pholder) = self.parse_placeholder() { Block::Placeholder(pholder) } else if let Ok(paragraph) = self.parse_paragraph() { Block::Paragraph(paragraph) } else { - return Err(ParseError::new(self.index)); + return Err(self.ctm.err()); }; Ok(token) @@ -59,12 +57,12 @@ impl ParseBlock for Parser { /// Parses a section that consists of a header and one or more blocks fn parse_section(&mut self) -> ParseResult
{ - let start_index = self.index; - self.seek_whitespace(); - if self.check_special(&HASH) { + let start_index = self.ctm.get_index(); + self.ctm.seek_whitespace(); + if self.ctm.check_char(&HASH) { let mut size = 1; - while let Some(_) = self.next_char() { - if !self.check_special(&HASH) { + while let Some(_) = self.ctm.next_char() { + if !self.ctm.check_char(&HASH) { break; } size += 1; @@ -73,20 +71,20 @@ impl ParseBlock for Parser { if let Ok(meta) = self.parse_inline_metadata() { metadata = Some(meta); } - if size <= self.section_nesting || !self.current_char.is_whitespace() { + if size <= self.section_nesting || !self.ctm.get_current().is_whitespace() { if size <= self.section_nesting { self.section_return = Some(size); } - return Err(self.revert_with_error(start_index)); + return Err(self.ctm.rewind_with_error(start_index)); } - self.seek_inline_whitespace(); + self.ctm.seek_any(&INLINE_WHITESPACE); let mut header = self.parse_header()?; header.size = size; self.section_nesting = size; self.sections.push(size); let mut section = Section::new(header); section.metadata = metadata; - self.seek_whitespace(); + self.ctm.seek_whitespace(); while let Ok(block) = self.parse_block() { section.add_element(block); @@ -100,20 +98,22 @@ impl ParseBlock for Parser { } Ok(section) } else { - return Err(self.revert_with_error(start_index)); + return Err(self.ctm.rewind_with_error(start_index)); } } /// parses a code block fn parse_code_block(&mut self) -> ParseResult { - self.seek_whitespace(); - self.assert_special_sequence(&SQ_CODE_BLOCK, self.index)?; - self.skip_char(); - let language = self.get_string_until(&[LB], &[])?; - self.skip_char(); - let text = self.get_string_until_sequence(&[&SQ_CODE_BLOCK], &[])?; + let start_index = self.ctm.get_index(); + self.ctm.seek_whitespace(); + self.ctm + .assert_sequence(&SQ_CODE_BLOCK, Some(start_index))?; + self.ctm.seek_one(); + let language = self.ctm.get_string_until_any(&[LB], &[])?; + self.ctm.seek_one(); + let text = self.ctm.get_string_until_sequence(&[&SQ_CODE_BLOCK], &[])?; for _ in 0..2 { - self.skip_char(); + self.ctm.seek_one(); } Ok(CodeBlock { @@ -124,24 +124,25 @@ impl ParseBlock for Parser { /// parses a quote fn parse_quote(&mut self) -> ParseResult { - let start_index = self.index; - self.seek_whitespace(); + let start_index = self.ctm.get_index(); + self.ctm.seek_whitespace(); let metadata = if let Ok(meta) = self.parse_inline_metadata() { Some(meta) } else { None }; - if self.check_special(&META_CLOSE) { - if self.next_char() == None { - return Err(self.revert_with_error(start_index)); + if self.ctm.check_char(&META_CLOSE) { + if self.ctm.next_char() == None { + return Err(self.ctm.rewind_with_error(start_index)); } } let mut quote = Quote::new(metadata); - while self.check_special("E_START) - && self.next_char() != None - && (self.check_seek_inline_whitespace() || self.check_special(&LB)) + while self.ctm.check_char("E_START) + && self.ctm.next_char() != None + && (self.ctm.check_any(&WHITESPACE)) { + self.ctm.seek_any(&INLINE_WHITESPACE)?; if let Ok(text) = self.parse_text_line() { if text.subtext.len() > 0 { quote.add_text(text); @@ -151,7 +152,7 @@ impl ParseBlock for Parser { } } if quote.text.len() == 0 { - return Err(self.revert_with_error(start_index)); + return Err(self.ctm.rewind_with_error(start_index)); } Ok(quote) @@ -159,26 +160,26 @@ impl ParseBlock for Parser { /// Parses a paragraph fn parse_paragraph(&mut self) -> ParseResult { - self.seek_whitespace(); + self.ctm.seek_whitespace(); let mut paragraph = Paragraph::new(); while let Ok(token) = self.parse_line() { paragraph.add_element(token); - let start_index = self.index; - if self.check_special_sequence_group(&BLOCK_SPECIAL_CHARS) - || self.check_special_group(&self.block_break_at) + let start_index = self.ctm.get_index(); + if self.ctm.check_any_sequence(&BLOCK_SPECIAL_CHARS) + || self.ctm.check_any(&self.block_break_at) { - self.revert_to(start_index)?; + self.ctm.rewind(start_index); break; } - if !self.check_eof() { - self.revert_to(start_index)?; + if !self.ctm.check_eof() { + self.ctm.rewind(start_index); } } if paragraph.elements.len() > 0 { Ok(paragraph) } else { - Err(ParseError::new(self.index)) + Err(self.ctm.err()) } } @@ -186,10 +187,10 @@ impl ParseBlock for Parser { /// The parser is done iterative to resolve nested items fn parse_list(&mut self) -> ParseResult { let mut list = List::new(); - let start_index = self.index; - self.seek_whitespace(); + let start_index = self.ctm.get_index(); + self.ctm.seek_whitespace(); - let ordered = if self.check_special_group(&LIST_SPECIAL_CHARS) { + let ordered = if self.ctm.check_any(&LIST_SPECIAL_CHARS) { false } else { true @@ -242,31 +243,31 @@ impl ParseBlock for Parser { if list.items.len() > 0 { Ok(list) } else { - return Err(self.revert_with_error(start_index)); + return Err(self.ctm.rewind_with_error(start_index)); } } /// parses a markdown table fn parse_table(&mut self) -> ParseResult { let header = self.parse_row()?; - if self.check_linebreak() { - self.skip_char(); + if self.ctm.check_char(&LB) { + self.ctm.seek_one(); } - let seek_index = self.index; + let seek_index = self.ctm.get_index(); let mut table = Table::new(header); - while let Some(_) = self.next_char() { - self.seek_inline_whitespace(); - if !self.check_special_group(&[MINUS, PIPE]) || self.check_linebreak() { + while let Ok(_) = self.ctm.seek_one() { + self.ctm.seek_any(&INLINE_WHITESPACE); + if !self.ctm.check_any(&[MINUS, PIPE]) || self.ctm.check_char(&LB) { break; } } - if !self.check_linebreak() { - self.revert_to(seek_index)?; + if !self.ctm.check_char(&LB) { + self.ctm.rewind(seek_index); return Ok(table); } - self.seek_whitespace(); + self.ctm.seek_whitespace(); while let Ok(row) = self.parse_row() { table.add_row(row); } @@ -276,37 +277,36 @@ impl ParseBlock for Parser { /// parses an import and starts a new task to parse the document of the import fn parse_import(&mut self) -> ParseResult { - let start_index = self.index; - self.seek_whitespace(); - self.assert_special_sequence_group(&[&[IMPORT_START, IMPORT_OPEN]], start_index)?; + let start_index = self.ctm.get_index(); + self.ctm.seek_whitespace(); + self.ctm + .assert_any_sequence(&[&[IMPORT_START, IMPORT_OPEN]], Some(start_index))?; let mut path = String::new(); - while let Some(character) = self.next_char() { - if self.check_linebreak() || self.check_special(&IMPORT_CLOSE) { + while let Some(character) = self.ctm.next_char() { + if self.ctm.check_char(&LB) || self.ctm.check_char(&IMPORT_CLOSE) { break; } path.push(character); } - if self.check_linebreak() || path.is_empty() { - return Err(self.revert_with_error(start_index)); + if self.ctm.check_char(&LB) || path.is_empty() { + return Err(self.ctm.rewind_with_error(start_index)); } - if self.check_special(&IMPORT_CLOSE) { - self.skip_char(); + if self.ctm.check_char(&IMPORT_CLOSE) { + self.ctm.seek_one(); } // parser success if self.section_nesting > 0 { self.section_return = Some(0); - let err = ParseError::new_with_message(self.index, "import section nesting error"); - self.revert_to(start_index)?; - return Err(err); + return Err(self.ctm.rewind_with_error(start_index)); } - self.seek_whitespace(); + self.ctm.seek_whitespace(); if let Ok(anchor) = self.import_document(path.clone()) { Ok(Import { path, anchor }) } else { - Err(ParseError::new(self.index)) + Err(self.ctm.err()) } } } diff --git a/src/parser/charstate.rs b/src/parser/charstate.rs index 524923b..163247c 100644 --- a/src/parser/charstate.rs +++ b/src/parser/charstate.rs @@ -51,7 +51,7 @@ pub trait CharStateMachine { revert_index: usize, ) -> ParseResult; } - +/* impl CharStateMachine for Parser { /// Increments the current index and returns the /// char at the indexes position @@ -342,3 +342,4 @@ impl CharStateMachine for Parser { } } } +*/ diff --git a/src/parser/inline.rs b/src/parser/inline.rs index feea667..8d4d1c2 100644 --- a/src/parser/inline.rs +++ b/src/parser/inline.rs @@ -36,12 +36,14 @@ pub(crate) trait ParseInline { impl ParseInline for Parser { /// parses Inline surrounded by characters fn parse_surrounded(&mut self, surrounding: &char) -> ParseResult { - let start_index = self.index; - self.assert_special(surrounding, start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm.assert_char(surrounding, Some(start_index))?; + self.ctm.seek_one()?; let inline = self.parse_inline()?; - self.assert_special(surrounding, start_index)?; - self.skip_char(); + self.ctm.assert_char(surrounding, Some(start_index))?; + if !self.ctm.check_eof() { + self.ctm.seek_one()?; + } Ok(inline) } @@ -53,10 +55,10 @@ impl ParseInline for Parser { return Ok(Inline::TemplateVar(var)); } } - if self.check_special(&PIPE) || self.check_linebreak() { - Err(ParseError::new(self.index)) - } else if self.check_eof() { - Err(ParseError::eof(self.index)) + if self.ctm.check_char(&PIPE) || self.ctm.check_char(&LB) { + Err(self.ctm.err()) + } else if self.ctm.check_eof() { + Err(self.ctm.err()) } else if let Ok(image) = self.parse_image() { Ok(Inline::Image(image)) } else if let Ok(url) = self.parse_url(false) { @@ -90,10 +92,10 @@ impl ParseInline for Parser { /// parses an image url fn parse_image(&mut self) -> ParseResult { - let start_index = self.index; - self.seek_inline_whitespace(); - self.assert_special(&IMG_START, start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm.seek_any(&INLINE_WHITESPACE); + self.ctm.assert_char(&IMG_START, Some(start_index))?; + self.ctm.seek_one()?; if let Ok(url) = self.parse_url(true) { let metadata = if let Ok(meta) = self.parse_inline_metadata() { @@ -103,37 +105,34 @@ impl ParseInline for Parser { }; Ok(Image { url, metadata }) } else { - Err(self.revert_with_error(start_index)) + Err(self.ctm.rewind_with_error(start_index)) } } // parses an url fn parse_url(&mut self, short_syntax: bool) -> ParseResult { - let start_index = self.index; - self.seek_inline_whitespace(); + let start_index = self.ctm.get_index(); + self.ctm.seek_any(&INLINE_WHITESPACE); let mut description = String::new(); - if self.check_special(&DESC_OPEN) { - self.skip_char(); - description = if let Ok(desc) = self.get_string_until(&[DESC_CLOSE], &[LB]) { - desc - } else { - return Err(self.revert_with_error(start_index)); - }; + if self.ctm.check_char(&DESC_OPEN) { + self.ctm.seek_one(); + description = + self.ctm + .get_string_until_any_or_rewind(&[DESC_CLOSE], &[LB], start_index)?; } else if !short_syntax { - return Err(self.revert_with_error(start_index)); + return Err(self.ctm.rewind_with_error(start_index)); } - self.skip_char(); - self.assert_special(&URL_OPEN, start_index)?; - self.skip_char(); - self.seek_inline_whitespace(); + self.ctm.seek_one(); + self.ctm.assert_char(&URL_OPEN, Some(start_index))?; + self.ctm.seek_one(); + self.ctm.seek_any(&INLINE_WHITESPACE); - let url = if let Ok(url_str) = self.get_string_until(&[URL_CLOSE], &[LB]) { - url_str - } else { - return Err(self.revert_with_error(start_index)); - }; - self.skip_char(); + let url = self + .ctm + .get_string_until_any_or_rewind(&[URL_CLOSE], &[LB], start_index)?; + + self.ctm.seek_one(); if description.is_empty() { Ok(Url::new(None, url)) @@ -144,31 +143,31 @@ impl ParseInline for Parser { /// parses a markdown checkbox fn parse_checkbox(&mut self) -> ParseResult { - let start_index = self.index; - self.assert_special(&CHECK_OPEN, start_index)?; - self.skip_char(); - let checked = if self.check_special(&CHECK_CHECKED) { + let start_index = self.ctm.get_index(); + self.ctm.assert_char(&CHECK_OPEN, Some(start_index))?; + self.ctm.seek_one(); + let checked = if self.ctm.check_char(&CHECK_CHECKED) { true - } else if self.check_special(&SPACE) { + } else if self.ctm.check_char(&SPACE) { false } else { - return Err(self.revert_with_error(start_index)); + return Err(self.ctm.rewind_with_error(start_index)); }; - self.skip_char(); - self.assert_special(&CHECK_CLOSE, start_index)?; - self.skip_char(); + self.ctm.seek_one(); + self.ctm.assert_char(&CHECK_CLOSE, Some(start_index))?; + self.ctm.seek_one(); Ok(Checkbox { value: checked }) } /// parses bold text with must start with two asterisks fn parse_bold(&mut self) -> ParseResult { - let start_index = self.index; - self.assert_special_sequence(&BOLD, start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm.assert_sequence(&BOLD, Some(start_index))?; + self.ctm.seek_one(); let inline = self.parse_inline()?; - self.assert_special_sequence(&BOLD, start_index)?; - self.skip_char(); + self.ctm.assert_sequence(&BOLD, Some(start_index))?; + self.ctm.seek_one(); Ok(BoldText { value: Box::new(inline), @@ -189,12 +188,14 @@ impl ParseInline for Parser { /// parses monospace text (inline-code) that isn't allowed to contain special characters fn parse_monospace(&mut self) -> ParseResult { - let start_index = self.index; - self.assert_special(&BACKTICK, start_index)?; - self.skip_char(); - let content = self.get_string_until(&[BACKTICK, LB], &[])?; - self.assert_special(&BACKTICK, start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm.assert_char(&BACKTICK, Some(start_index))?; + self.ctm.seek_one(); + let content = self + .ctm + .get_string_until_any_or_rewind(&[BACKTICK, LB], &[], start_index)?; + self.ctm.assert_char(&BACKTICK, Some(start_index))?; + self.ctm.seek_one(); Ok(MonospaceText { value: content }) } @@ -212,11 +213,13 @@ impl ParseInline for Parser { } fn parse_emoji(&mut self) -> ParseResult { - let start_index = self.index; - self.assert_special(&EMOJI, start_index)?; - self.skip_char(); - let name = self.get_string_until_or_revert(&[EMOJI], &[SPACE, LB], start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm.assert_char(&EMOJI, Some(start_index))?; + self.ctm.seek_one(); + let name = self + .ctm + .get_string_until_any_or_rewind(&[EMOJI], &[SPACE, LB], start_index)?; + self.ctm.seek_one(); if let Some(emoji) = gh_emoji::get(name.as_str()) { let emoji_char = *emoji.chars().collect::>().first().unwrap(); Ok(Emoji { @@ -224,20 +227,24 @@ impl ParseInline for Parser { name, }) } else { - Err(self.revert_with_error(start_index)) + Err(self.ctm.rewind_with_error(start_index)) } } /// parses colored text fn parse_colored(&mut self) -> ParseResult { - let start_index = self.index; - self.assert_special_sequence(&SQ_COLOR_START, start_index)?; - self.skip_char(); - let color = - self.get_string_until_or_revert(&[COLOR_CLOSE], &[SPACE, LB, SEMICOLON], start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm + .assert_sequence(&SQ_COLOR_START, Some(start_index))?; + self.ctm.seek_one(); + let color = self.ctm.get_string_until_any_or_rewind( + &[COLOR_CLOSE], + &[SPACE, LB, SEMICOLON], + start_index, + )?; + self.ctm.seek_one(); if color.is_empty() { - return Err(ParseError::new(self.index)); + return Err(self.ctm.err()); } Ok(Colored { value: Box::new(self.parse_inline()?), @@ -246,11 +253,14 @@ impl ParseInline for Parser { } fn parse_bibref(&mut self) -> ParseResult>> { - let start_index = self.index; - self.assert_special_sequence(&SQ_BIBREF_START, start_index)?; - self.skip_char(); - let key = self.get_string_until_or_revert(&[BIBREF_CLOSE], &[SPACE, LB], start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm + .assert_sequence(&SQ_BIBREF_START, Some(start_index))?; + self.ctm.seek_one(); + let key = + self.ctm + .get_string_until_any_or_rewind(&[BIBREF_CLOSE], &[SPACE, LB], start_index)?; + self.ctm.seek_one(); let ref_entry = Arc::new(RwLock::new(BibReference::new( key, self.document.config.get_ref_entry(BIB_REF_DISPLAY), @@ -264,15 +274,21 @@ impl ParseInline for Parser { /// parses a template variable {prefix{name}suffix} fn parse_template_variable(&mut self) -> ParseResult>> { - let start_index = self.index; - self.assert_special(&TEMP_VAR_OPEN, start_index)?; - self.skip_char(); - let prefix = self.get_string_until_or_revert(&[TEMP_VAR_OPEN], &[LB], start_index)?; - self.skip_char(); - let name = self.get_string_until_or_revert(&[TEMP_VAR_CLOSE], &[LB], start_index)?; - self.skip_char(); - let suffix = self.get_string_until_or_revert(&[TEMP_VAR_CLOSE], &[LB], start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm.assert_char(&TEMP_VAR_OPEN, Some(start_index))?; + self.ctm.seek_one(); + let prefix = + self.ctm + .get_string_until_any_or_rewind(&[TEMP_VAR_OPEN], &[LB], start_index)?; + self.ctm.seek_one(); + let name = + self.ctm + .get_string_until_any_or_rewind(&[TEMP_VAR_CLOSE], &[LB], start_index)?; + self.ctm.seek_one(); + let suffix = + self.ctm + .get_string_until_any_or_rewind(&[TEMP_VAR_CLOSE], &[LB], start_index)?; + self.ctm.seek_one(); Ok(Arc::new(RwLock::new(TemplateVariable { value: None, name, @@ -283,15 +299,15 @@ impl ParseInline for Parser { /// parses plain text as a string until it encounters an unescaped special inline char fn parse_plain(&mut self) -> ParseResult { - if self.check_linebreak() { - return Err(ParseError::new(self.index)); + if self.ctm.check_char(&LB) { + return Err(self.ctm.err()); } let mut characters = String::new(); - characters.push(self.current_char); - while let Some(ch) = self.next_char() { - if self.check_special_group(&INLINE_SPECIAL_CHARS) - || self.check_special_group(&self.inline_break_at) - || (self.parse_variables && self.check_special(&TEMP_VAR_OPEN)) + characters.push(self.ctm.get_current()); + while let Some(ch) = self.ctm.next_char() { + if self.ctm.check_any(&INLINE_SPECIAL_CHARS) + || self.ctm.check_any(&self.inline_break_at) + || (self.parse_variables && self.ctm.check_char(&TEMP_VAR_OPEN)) { break; } @@ -301,44 +317,74 @@ impl ParseInline for Parser { if characters.len() > 0 { Ok(PlainText { value: characters }) } else { - Err(ParseError::new_with_message( - self.index, - "no plaintext characters parsed", - )) + Err(self.ctm.err()) + } + } + + /// Parses metadata + fn parse_inline_metadata(&mut self) -> ParseResult<InlineMetadata> { + let start_index = self.ctm.get_index(); + self.ctm.assert_char(&META_OPEN, Some(start_index))?; + self.ctm.seek_one(); + + let mut values = HashMap::new(); + while let Ok((key, value)) = self.parse_metadata_pair() { + values.insert(key, value); + if self.ctm.check_char(&META_CLOSE) || self.ctm.check_char(&LB) { + // abort the parser of the inner content when encountering a closing tag or linebreak + break; + } + } + if self.ctm.check_char(&META_CLOSE) { + self.ctm.seek_one(); + } + if values.len() == 0 { + // if there was a linebreak (the metadata wasn't closed) or there is no inner data + // return an error + return Err(self.ctm.rewind_with_error(start_index)); } + + Ok(InlineMetadata { data: values }) } /// parses a key-value metadata pair fn parse_metadata_pair(&mut self) -> Result<(String, MetadataValue), ParseError> { - self.seek_inline_whitespace(); - let name = self.get_string_until(&[META_CLOSE, EQ, SPACE, LB], &[])?; + self.ctm.seek_any(&INLINE_WHITESPACE); + let name = self + .ctm + .get_string_until_any(&[META_CLOSE, EQ, SPACE, LB], &[])?; - self.seek_inline_whitespace(); + self.ctm.seek_any(&INLINE_WHITESPACE); let mut value = MetadataValue::Bool(true); - if self.check_special(&EQ) { - self.skip_char(); - self.seek_inline_whitespace(); + if self.ctm.check_char(&EQ) { + self.ctm.seek_one(); + self.ctm.seek_any(&INLINE_WHITESPACE); if let Ok(ph) = self.parse_placeholder() { value = MetadataValue::Placeholder(ph); } else if let Ok(template) = self.parse_template() { value = MetadataValue::Template(template) } else { - let quoted_string = self.check_special_group(&QUOTES); + let quoted_string = self.ctm.check_any(&QUOTES); + let parse_until = if quoted_string { - let quote_start = self.current_char; - self.skip_char(); + let quote_start = self.ctm.get_current(); + self.ctm.seek_one(); vec![quote_start, META_CLOSE, LB] } else { vec![META_CLOSE, LB, SPACE] }; - let raw_value = self.get_string_until(&parse_until, &[])?; - if self.check_special_group(&QUOTES) { - self.skip_char(); + + let raw_value = self.ctm.get_string_until_any(&parse_until, &[])?; + + if self.ctm.check_any(&QUOTES) { + self.ctm.seek_one(); } - self.seek_inline_whitespace(); - if self.check_special(&COMMA) { - self.skip_char(); + self.ctm.seek_any(&INLINE_WHITESPACE); + + if self.ctm.check_char(&COMMA) { + self.ctm.seek_one(); } + value = if quoted_string { MetadataValue::String(raw_value) } else if raw_value.to_lowercase().as_str() == "true" { @@ -358,44 +404,21 @@ impl ParseInline for Parser { Ok((name, value)) } - /// Parses metadata - fn parse_inline_metadata(&mut self) -> ParseResult<InlineMetadata> { - let start_index = self.index; - self.assert_special(&META_OPEN, start_index)?; - self.skip_char(); - - let mut values = HashMap::new(); - while let Ok((key, value)) = self.parse_metadata_pair() { - values.insert(key, value); - if self.check_special(&META_CLOSE) || self.check_linebreak() { - // abort the parser of the inner content when encountering a closing tag or linebreak - break; - } - } - if self.check_special(&META_CLOSE) { - self.skip_char(); - } - if values.len() == 0 { - // if there was a linebreak (the metadata wasn't closed) or there is no inner data - // return an error - return Err(self.revert_with_error(start_index)); - } - - Ok(InlineMetadata { data: values }) - } - /// parses a placeholder element fn parse_placeholder(&mut self) -> ParseResult<Arc<RwLock<Placeholder>>> { - let start_index = self.index; - self.assert_special_sequence(&SQ_PHOLDER_START, self.index)?; - self.skip_char(); - let name = if let Ok(name_str) = self.get_string_until_sequence(&[&SQ_PHOLDER_STOP], &[LB]) + let start_index = self.ctm.get_index(); + self.ctm.assert_sequence(&SQ_PHOLDER_START, None)?; + self.ctm.seek_one(); + + let name = if let Ok(name_str) = self + .ctm + .get_string_until_sequence(&[&SQ_PHOLDER_STOP], &[&[LB]]) { name_str } else { - return Err(self.revert_with_error(start_index)); + return Err(self.ctm.rewind_with_error(start_index)); }; - self.skip_char(); + self.ctm.seek_one(); let metadata = if let Ok(meta) = self.parse_inline_metadata() { Some(meta) @@ -411,27 +434,32 @@ impl ParseInline for Parser { /// parses a template fn parse_template(&mut self) -> ParseResult<Template> { - let start_index = self.index; - self.assert_special(&TEMPLATE, start_index)?; - self.skip_char(); - if self.check_special(&TEMPLATE) { - return Err(self.revert_with_error(start_index)); + let start_index = self.ctm.get_index(); + + self.ctm.assert_char(&TEMPLATE, None)?; + self.ctm.seek_one(); + + if self.ctm.check_char(&TEMPLATE) { + return Err(self.ctm.rewind_with_error(start_index)); } + let mut elements = Vec::new(); self.block_break_at.push(TEMPLATE); self.inline_break_at.push(TEMPLATE); self.parse_variables = true; + while let Ok(e) = self.parse_block() { elements.push(Element::Block(Box::new(e))); - if self.check_special(&TEMPLATE) { + if self.ctm.check_char(&TEMPLATE) { break; } } self.parse_variables = false; self.block_break_at.clear(); self.inline_break_at.clear(); - self.assert_special(&TEMPLATE, start_index)?; - self.skip_char(); + self.ctm.assert_char(&TEMPLATE, Some(start_index))?; + self.ctm.seek_one(); + let vars: HashMap<String, Arc<RwLock<TemplateVariable>>> = elements .iter() .map(|e| e.get_template_variables()) diff --git a/src/parser/line.rs b/src/parser/line.rs index 39f9eef..e8e734a 100644 --- a/src/parser/line.rs +++ b/src/parser/line.rs @@ -21,8 +21,8 @@ pub(crate) trait ParseLine { impl ParseLine for Parser { /// parses inline definitions fn parse_line(&mut self) -> ParseResult<Line> { - if self.index > self.text.len() { - Err(ParseError::new(self.index)) + if self.ctm.check_eof() { + Err(self.ctm.err()) } else { if let Ok(ruler) = self.parse_ruler() { Ok(Line::Ruler(ruler)) @@ -33,17 +33,17 @@ impl ParseLine for Parser { } else if let Ok(text) = self.parse_text_line() { Ok(Line::Text(text)) } else { - Err(ParseError::new(self.index)) + Err(self.ctm.err()) } } } /// parses the header of a section fn parse_header(&mut self) -> ParseResult<Header> { - let start_index = self.index; + let start_index = self.ctm.get_index(); let line = self.parse_line()?; let mut anchor = String::new(); - self.text[start_index..self.index] + self.ctm.get_text()[start_index..self.ctm.get_index()] .iter() .for_each(|e| anchor.push(*e)); anchor.retain(|c| !c.is_whitespace()); @@ -52,21 +52,23 @@ impl ParseLine for Parser { /// parses a single list item defined with - fn parse_list_item(&mut self) -> ParseResult<ListItem> { - let start_index = self.index; - self.seek_inline_whitespace(); - let level = self.index - start_index; - self.assert_special_group(&LIST_SPECIAL_CHARS, start_index)?; - let ordered = self.current_char.is_numeric(); - self.skip_char(); - if self.check_special(&DOT) { - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm.seek_any(&INLINE_WHITESPACE); + let level = self.ctm.get_index() - start_index; + self.ctm + .assert_any(&LIST_SPECIAL_CHARS, Some(start_index))?; + let ordered = self.ctm.get_current().is_numeric(); + self.ctm.seek_one()?; + + if self.ctm.check_char(&DOT) { + self.ctm.seek_one()?; } - if !self.check_seek_inline_whitespace() { - return Err(self.revert_with_error(start_index)); + if !self.ctm.check_any(&INLINE_WHITESPACE) { + return Err(self.ctm.rewind_with_error(start_index)); } - self.seek_inline_whitespace(); - if self.check_special(&MINUS) { - return Err(self.revert_with_error(start_index)); + self.ctm.seek_any(&INLINE_WHITESPACE); + if self.ctm.check_char(&MINUS) { + return Err(self.ctm.rewind_with_error(start_index)); } let item = ListItem::new(self.parse_line()?, level as u16, ordered); @@ -76,65 +78,68 @@ impl ParseLine for Parser { /// parses a table row/head fn parse_row(&mut self) -> ParseResult<Row> { - let start_index = self.index; - self.seek_inline_whitespace(); - self.assert_special(&PIPE, start_index)?; - self.skip_char(); - if self.check_special(&PIPE) { - return Err(self.revert_with_error(start_index)); + let start_index = self.ctm.get_index(); + self.ctm.seek_any(&INLINE_WHITESPACE); + self.ctm.assert_char(&PIPE, Some(start_index))?; + self.ctm.seek_one(); + if self.ctm.check_char(&PIPE) { + return Err(self.ctm.rewind_with_error(start_index)); } self.inline_break_at.push(PIPE); - self.seek_inline_whitespace(); + self.ctm.seek_any(&INLINE_WHITESPACE); let mut row = Row::new(); loop { let mut element = TextLine::new(); while let Ok(inline) = self.parse_inline() { element.subtext.push(inline); - if self.check_linebreak() || self.check_special(&PIPE) || self.check_eof() { + if self.ctm.check_char(&LB) || self.ctm.check_char(&PIPE) || self.ctm.check_eof() { break; } } row.add_cell(Cell { text: Line::Text(element), }); - if self.check_special(&PIPE) { - self.skip_char(); + if self.ctm.check_char(&PIPE) { + self.ctm.seek_one(); } - if self.check_linebreak() || self.check_eof() { + if self.ctm.check_char(&LB) || self.ctm.check_eof() { break; } - self.seek_inline_whitespace(); + self.ctm.seek_any(&INLINE_WHITESPACE); } self.inline_break_at.clear(); - if self.check_special(&PIPE) { - self.skip_char(); - self.skip_char(); - } else { - self.skip_char(); + + if self.ctm.check_char(&PIPE) { + self.ctm.seek_one(); } + self.ctm.seek_one(); if row.cells.len() > 0 { Ok(row) } else { - return Err(self.revert_with_error(start_index)); + return Err(self.ctm.rewind_with_error(start_index)); } } fn parse_bib_entry(&mut self) -> ParseResult<Arc<RwLock<BibEntry>>> { - let start_index = self.index; - self.seek_inline_whitespace(); - self.assert_special(&BIB_KEY_OPEN, start_index)?; - self.skip_char(); - let key = self.get_string_until_or_revert(&[BIB_KEY_CLOSE], &[LB, SPACE], start_index)?; - self.skip_char(); - self.assert_special(&BIB_DATA_START, start_index)?; - self.skip_char(); - self.seek_inline_whitespace(); + let start_index = self.ctm.get_index(); + self.ctm.seek_any(&INLINE_WHITESPACE); + self.ctm.assert_char(&BIB_KEY_OPEN, Some(start_index))?; + self.ctm.seek_one(); + let key = + self.ctm + .get_string_until_any_or_rewind(&[BIB_KEY_CLOSE], &[LB, SPACE], start_index)?; + self.ctm.seek_one(); + self.ctm.assert_char(&BIB_DATA_START, Some(start_index))?; + self.ctm.seek_one(); + self.ctm.seek_any(&INLINE_WHITESPACE); let entry = if let Ok(meta) = self.parse_inline_metadata() { BibEntry::from_metadata(key, Box::new(meta), &self.document.config) } else { - let url = self.get_string_until_or_revert(&[LB], &[], start_index)?; + let url = self + .ctm + .get_string_until_any_or_rewind(&[LB], &[], start_index)?; BibEntry::from_url(key, url, &self.document.config) }; let entry_ref = Arc::new(RwLock::new(entry)); @@ -147,9 +152,10 @@ impl ParseLine for Parser { /// parses centered text fn parse_centered(&mut self) -> ParseResult<Centered> { - let start_index = self.index; - self.assert_special_sequence(&SQ_CENTERED_START, start_index)?; - self.skip_char(); + let start_index = self.ctm.get_index(); + self.ctm + .assert_sequence(&SQ_CENTERED_START, Some(start_index))?; + self.ctm.seek_one(); let line = self.parse_text_line()?; Ok(Centered { line }) @@ -157,10 +163,12 @@ impl ParseLine for Parser { /// parses a ruler fn parse_ruler(&mut self) -> ParseResult<Ruler> { - let start_index = self.index; - self.seek_inline_whitespace(); - self.assert_special_sequence(&SQ_RULER, start_index)?; - self.seek_until_linebreak(); + let start_index = self.ctm.get_index(); + self.ctm.seek_any(&INLINE_WHITESPACE); + self.ctm.assert_sequence(&SQ_RULER, Some(start_index))?; + while !self.ctm.check_char(&LB) { + self.ctm.seek_one(); + } Ok(Ruler {}) } @@ -169,19 +177,19 @@ impl ParseLine for Parser { let mut text = TextLine::new(); while let Ok(subtext) = self.parse_inline() { text.add_subtext(subtext); - if self.check_eof() || self.check_special_group(&self.inline_break_at) { + if self.ctm.check_eof() || self.ctm.check_any(&self.inline_break_at) { break; } } - if self.check_linebreak() { - self.skip_char(); + if self.ctm.check_char(&LB) { + self.ctm.seek_one(); } - if text.subtext.len() > 0 || !self.check_eof() { + if text.subtext.len() > 0 || !self.ctm.check_eof() { Ok(text) } else { - Err(ParseError::eof(self.index)) + Err(self.ctm.err()) } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 20490b4..c1c866d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7,6 +7,7 @@ use self::block::ParseBlock; use crate::elements::{Document, ImportAnchor}; use crate::references::configuration::Configuration; use crate::utils::parsing::{ParseError, ParseResult}; +use charred::tapemachine::CharTapeMachine; use colored::*; use crossbeam_utils::sync::WaitGroup; use std::fs::File; @@ -17,9 +18,7 @@ use std::sync::{Arc, Mutex, RwLock}; use std::thread; pub struct Parser { - pub(crate) index: usize, - pub(crate) text: Vec<char>, - pub(crate) current_char: char, + pub(crate) ctm: CharTapeMachine, section_nesting: u8, sections: Vec<u8>, section_return: Option<u8>, @@ -30,7 +29,6 @@ pub struct Parser { pub(crate) block_break_at: Vec<char>, pub(crate) inline_break_at: Vec<char>, pub(crate) document: Document, - pub(crate) previous_char: char, pub(crate) reader: Box<dyn BufRead>, pub(crate) parse_variables: bool, } @@ -112,9 +110,6 @@ impl Parser { } let document = Document::new(!is_child); Self { - index: 0, - text, - current_char, sections: Vec::new(), section_nesting: 0, section_return: None, @@ -122,7 +117,7 @@ impl Parser { paths, wg: WaitGroup::new(), is_child, - previous_char: ' ', + ctm: CharTapeMachine::new(text), inline_break_at: Vec::new(), block_break_at: Vec::new(), document, @@ -137,7 +132,8 @@ impl Parser { /// Returns the text of the parser as a string fn get_text(&self) -> String { - self.text + self.ctm + .get_text() .iter() .fold("".to_string(), |a, b| format!("{}{}", a, b)) } @@ -174,15 +170,13 @@ impl Parser { ) .red() ); - return Err(ParseError::new_with_message( - self.index, - "file does not exist", - )); + eprintln!("file {} does not exist", path.to_str().unwrap()); + return Err(self.ctm.assert_error(None)); } { let mut paths = self.paths.lock().unwrap(); if paths.iter().find(|item| **item == path) != None { - println!( + eprintln!( "{}", format!( "Import of \"{}\" failed: Cyclic import.", @@ -190,7 +184,7 @@ impl Parser { ) .yellow() ); - return Err(ParseError::new_with_message(self.index, "cyclic import")); + return Err(self.ctm.assert_error(None)); } paths.push(path.clone()); } @@ -220,35 +214,14 @@ impl Parser { None }; - while self.index < self.text.len() { + while !self.ctm.check_eof() { match self.parse_block() { Ok(block) => self.document.add_element(block), Err(err) => { - if err.eof { + if self.ctm.check_eof() { break; } - if let Some(path) = &self.path { - if let Some(position) = err.get_position(&self.get_text()) { - println!( - "{}", - format!( - "Error in File {}:{}:{} - {}", - path.to_str().unwrap(), - position.0, - position.1, - err - ) - .red() - ); - } else { - println!( - "{}", - format!("Error in File {}: {}", path.to_str().unwrap(), err).red() - ); - } - } else { - println!("{}", err); - } + eprintln!("{}", err); break; } } diff --git a/src/utils/parsing.rs b/src/utils/parsing.rs index 51aed2a..faffd95 100644 --- a/src/utils/parsing.rs +++ b/src/utils/parsing.rs @@ -1,3 +1,4 @@ +use charred::tapemachine::{TapeError, TapeResult}; use colored::*; use std::error::Error; use std::fmt; @@ -10,8 +11,10 @@ macro_rules! parse { }; } -pub type ParseResult<T> = Result<T, ParseError>; +pub type ParseResult<T> = TapeResult<T>; +pub type ParseError = TapeError; +/* #[derive(Debug)] pub struct ParseError { index: usize, @@ -81,3 +84,4 @@ impl ParseError { } } } +*/