Add charred for char state operations

feature/epub-rendering
trivernis 4 years ago
parent dd65c65c9d
commit cb327180c6

7
Cargo.lock generated

@ -70,6 +70,11 @@ name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "charred"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "chrono"
version = "0.4.11"
@ -568,6 +573,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
name = "snekdown"
version = "0.17.0"
dependencies = [
"charred 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)",
"colored 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -791,6 +797,7 @@ dependencies = [
"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
"checksum cc 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)" = "7bbb73db36c1246e9034e307d0fba23f9a2e251faa47ade70c1bd252220c8311"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum charred 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8cf73c7fbbaf59d5643f99c6a4413eba1b914a7489c39b730ec7d8d72e7bb061"
"checksum chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2"
"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
"checksum colored 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f4ffc801dacf156c5854b9df4f425a626539c3a6ef7893cc0c5084a23f0b6c59"

@ -17,6 +17,7 @@ name = "snekdown"
path = "src/main.rs"
[dependencies]
charred = "0.2.2"
crossbeam-utils = "0.7.2"
structopt = "0.3.14"
minify = "1.1.1"

@ -107,6 +107,9 @@ pub(crate) const LIST_SPECIAL_CHARS: [char; 14] = [
MINUS, PLUS, ASTERISK, O, '1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
];
pub(crate) const WHITESPACE: [char; 4] = [' ', '\t', '\r', '\n'];
pub(crate) const INLINE_WHITESPACE: [char; 3] = [' ', '\t', '\r'];
// sequences
pub(crate) const SQ_CODE_BLOCK: [char; 3] = [BACKTICK, BACKTICK, BACKTICK];

@ -5,6 +5,7 @@ use crate::parser::inline::ParseInline;
use crate::parser::line::ParseLine;
use crate::utils::parsing::{ParseError, ParseResult};
use crate::Parser;
use charred::tapemachine::{TapeError, TapeResult};
pub(crate) trait ParseBlock {
fn parse_block(&mut self) -> ParseResult<Block>;
@ -22,10 +23,7 @@ impl ParseBlock for Parser {
fn parse_block(&mut self) -> ParseResult<Block> {
if let Some(section) = self.section_return {
if section <= self.section_nesting && (self.section_nesting > 0) {
return Err(ParseError::new_with_message(
self.index,
"invalid section nesting",
));
return Err(self.ctm.assert_error(None));
} else {
self.section_return = None;
}
@ -33,7 +31,7 @@ impl ParseBlock for Parser {
let token = if let Ok(section) = self.parse_section() {
Block::Section(section)
} else if let Some(_) = self.section_return {
return Err(ParseError::new(self.index));
return Err(self.ctm.err());
} else if let Ok(list) = self.parse_list() {
Block::List(list)
} else if let Ok(table) = self.parse_table() {
@ -45,13 +43,13 @@ impl ParseBlock for Parser {
} else if let Ok(import) = self.parse_import() {
Block::Import(import)
} else if let Some(_) = self.section_return {
return Err(ParseError::new(self.index));
return Err(self.ctm.err());
} else if let Ok(pholder) = self.parse_placeholder() {
Block::Placeholder(pholder)
} else if let Ok(paragraph) = self.parse_paragraph() {
Block::Paragraph(paragraph)
} else {
return Err(ParseError::new(self.index));
return Err(self.ctm.err());
};
Ok(token)
@ -59,12 +57,12 @@ impl ParseBlock for Parser {
/// Parses a section that consists of a header and one or more blocks
fn parse_section(&mut self) -> ParseResult<Section> {
let start_index = self.index;
self.seek_whitespace();
if self.check_special(&HASH) {
let start_index = self.ctm.get_index();
self.ctm.seek_whitespace();
if self.ctm.check_char(&HASH) {
let mut size = 1;
while let Some(_) = self.next_char() {
if !self.check_special(&HASH) {
while let Some(_) = self.ctm.next_char() {
if !self.ctm.check_char(&HASH) {
break;
}
size += 1;
@ -73,20 +71,20 @@ impl ParseBlock for Parser {
if let Ok(meta) = self.parse_inline_metadata() {
metadata = Some(meta);
}
if size <= self.section_nesting || !self.current_char.is_whitespace() {
if size <= self.section_nesting || !self.ctm.get_current().is_whitespace() {
if size <= self.section_nesting {
self.section_return = Some(size);
}
return Err(self.revert_with_error(start_index));
return Err(self.ctm.rewind_with_error(start_index));
}
self.seek_inline_whitespace();
self.ctm.seek_any(&INLINE_WHITESPACE);
let mut header = self.parse_header()?;
header.size = size;
self.section_nesting = size;
self.sections.push(size);
let mut section = Section::new(header);
section.metadata = metadata;
self.seek_whitespace();
self.ctm.seek_whitespace();
while let Ok(block) = self.parse_block() {
section.add_element(block);
@ -100,20 +98,22 @@ impl ParseBlock for Parser {
}
Ok(section)
} else {
return Err(self.revert_with_error(start_index));
return Err(self.ctm.rewind_with_error(start_index));
}
}
/// parses a code block
fn parse_code_block(&mut self) -> ParseResult<CodeBlock> {
self.seek_whitespace();
self.assert_special_sequence(&SQ_CODE_BLOCK, self.index)?;
self.skip_char();
let language = self.get_string_until(&[LB], &[])?;
self.skip_char();
let text = self.get_string_until_sequence(&[&SQ_CODE_BLOCK], &[])?;
let start_index = self.ctm.get_index();
self.ctm.seek_whitespace();
self.ctm
.assert_sequence(&SQ_CODE_BLOCK, Some(start_index))?;
self.ctm.seek_one();
let language = self.ctm.get_string_until_any(&[LB], &[])?;
self.ctm.seek_one();
let text = self.ctm.get_string_until_sequence(&[&SQ_CODE_BLOCK], &[])?;
for _ in 0..2 {
self.skip_char();
self.ctm.seek_one();
}
Ok(CodeBlock {
@ -124,24 +124,25 @@ impl ParseBlock for Parser {
/// parses a quote
fn parse_quote(&mut self) -> ParseResult<Quote> {
let start_index = self.index;
self.seek_whitespace();
let start_index = self.ctm.get_index();
self.ctm.seek_whitespace();
let metadata = if let Ok(meta) = self.parse_inline_metadata() {
Some(meta)
} else {
None
};
if self.check_special(&META_CLOSE) {
if self.next_char() == None {
return Err(self.revert_with_error(start_index));
if self.ctm.check_char(&META_CLOSE) {
if self.ctm.next_char() == None {
return Err(self.ctm.rewind_with_error(start_index));
}
}
let mut quote = Quote::new(metadata);
while self.check_special(&QUOTE_START)
&& self.next_char() != None
&& (self.check_seek_inline_whitespace() || self.check_special(&LB))
while self.ctm.check_char(&QUOTE_START)
&& self.ctm.next_char() != None
&& (self.ctm.check_any(&WHITESPACE))
{
self.ctm.seek_any(&INLINE_WHITESPACE)?;
if let Ok(text) = self.parse_text_line() {
if text.subtext.len() > 0 {
quote.add_text(text);
@ -151,7 +152,7 @@ impl ParseBlock for Parser {
}
}
if quote.text.len() == 0 {
return Err(self.revert_with_error(start_index));
return Err(self.ctm.rewind_with_error(start_index));
}
Ok(quote)
@ -159,26 +160,26 @@ impl ParseBlock for Parser {
/// Parses a paragraph
fn parse_paragraph(&mut self) -> ParseResult<Paragraph> {
self.seek_whitespace();
self.ctm.seek_whitespace();
let mut paragraph = Paragraph::new();
while let Ok(token) = self.parse_line() {
paragraph.add_element(token);
let start_index = self.index;
if self.check_special_sequence_group(&BLOCK_SPECIAL_CHARS)
|| self.check_special_group(&self.block_break_at)
let start_index = self.ctm.get_index();
if self.ctm.check_any_sequence(&BLOCK_SPECIAL_CHARS)
|| self.ctm.check_any(&self.block_break_at)
{
self.revert_to(start_index)?;
self.ctm.rewind(start_index);
break;
}
if !self.check_eof() {
self.revert_to(start_index)?;
if !self.ctm.check_eof() {
self.ctm.rewind(start_index);
}
}
if paragraph.elements.len() > 0 {
Ok(paragraph)
} else {
Err(ParseError::new(self.index))
Err(self.ctm.err())
}
}
@ -186,10 +187,10 @@ impl ParseBlock for Parser {
/// The parser is done iterative to resolve nested items
fn parse_list(&mut self) -> ParseResult<List> {
let mut list = List::new();
let start_index = self.index;
self.seek_whitespace();
let start_index = self.ctm.get_index();
self.ctm.seek_whitespace();
let ordered = if self.check_special_group(&LIST_SPECIAL_CHARS) {
let ordered = if self.ctm.check_any(&LIST_SPECIAL_CHARS) {
false
} else {
true
@ -242,31 +243,31 @@ impl ParseBlock for Parser {
if list.items.len() > 0 {
Ok(list)
} else {
return Err(self.revert_with_error(start_index));
return Err(self.ctm.rewind_with_error(start_index));
}
}
/// parses a markdown table
fn parse_table(&mut self) -> ParseResult<Table> {
let header = self.parse_row()?;
if self.check_linebreak() {
self.skip_char();
if self.ctm.check_char(&LB) {
self.ctm.seek_one();
}
let seek_index = self.index;
let seek_index = self.ctm.get_index();
let mut table = Table::new(header);
while let Some(_) = self.next_char() {
self.seek_inline_whitespace();
if !self.check_special_group(&[MINUS, PIPE]) || self.check_linebreak() {
while let Ok(_) = self.ctm.seek_one() {
self.ctm.seek_any(&INLINE_WHITESPACE);
if !self.ctm.check_any(&[MINUS, PIPE]) || self.ctm.check_char(&LB) {
break;
}
}
if !self.check_linebreak() {
self.revert_to(seek_index)?;
if !self.ctm.check_char(&LB) {
self.ctm.rewind(seek_index);
return Ok(table);
}
self.seek_whitespace();
self.ctm.seek_whitespace();
while let Ok(row) = self.parse_row() {
table.add_row(row);
}
@ -276,37 +277,36 @@ impl ParseBlock for Parser {
/// parses an import and starts a new task to parse the document of the import
fn parse_import(&mut self) -> ParseResult<Import> {
let start_index = self.index;
self.seek_whitespace();
self.assert_special_sequence_group(&[&[IMPORT_START, IMPORT_OPEN]], start_index)?;
let start_index = self.ctm.get_index();
self.ctm.seek_whitespace();
self.ctm
.assert_any_sequence(&[&[IMPORT_START, IMPORT_OPEN]], Some(start_index))?;
let mut path = String::new();
while let Some(character) = self.next_char() {
if self.check_linebreak() || self.check_special(&IMPORT_CLOSE) {
while let Some(character) = self.ctm.next_char() {
if self.ctm.check_char(&LB) || self.ctm.check_char(&IMPORT_CLOSE) {
break;
}
path.push(character);
}
if self.check_linebreak() || path.is_empty() {
return Err(self.revert_with_error(start_index));
if self.ctm.check_char(&LB) || path.is_empty() {
return Err(self.ctm.rewind_with_error(start_index));
}
if self.check_special(&IMPORT_CLOSE) {
self.skip_char();
if self.ctm.check_char(&IMPORT_CLOSE) {
self.ctm.seek_one();
}
// parser success
if self.section_nesting > 0 {
self.section_return = Some(0);
let err = ParseError::new_with_message(self.index, "import section nesting error");
self.revert_to(start_index)?;
return Err(err);
return Err(self.ctm.rewind_with_error(start_index));
}
self.seek_whitespace();
self.ctm.seek_whitespace();
if let Ok(anchor) = self.import_document(path.clone()) {
Ok(Import { path, anchor })
} else {
Err(ParseError::new(self.index))
Err(self.ctm.err())
}
}
}

@ -51,7 +51,7 @@ pub trait CharStateMachine {
revert_index: usize,
) -> ParseResult<String>;
}
/*
impl CharStateMachine for Parser {
/// Increments the current index and returns the
/// char at the indexes position
@ -342,3 +342,4 @@ impl CharStateMachine for Parser {
}
}
}
*/

@ -36,12 +36,14 @@ pub(crate) trait ParseInline {
impl ParseInline for Parser {
/// parses Inline surrounded by characters
fn parse_surrounded(&mut self, surrounding: &char) -> ParseResult<Inline> {
let start_index = self.index;
self.assert_special(surrounding, start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm.assert_char(surrounding, Some(start_index))?;
self.ctm.seek_one()?;
let inline = self.parse_inline()?;
self.assert_special(surrounding, start_index)?;
self.skip_char();
self.ctm.assert_char(surrounding, Some(start_index))?;
if !self.ctm.check_eof() {
self.ctm.seek_one()?;
}
Ok(inline)
}
@ -53,10 +55,10 @@ impl ParseInline for Parser {
return Ok(Inline::TemplateVar(var));
}
}
if self.check_special(&PIPE) || self.check_linebreak() {
Err(ParseError::new(self.index))
} else if self.check_eof() {
Err(ParseError::eof(self.index))
if self.ctm.check_char(&PIPE) || self.ctm.check_char(&LB) {
Err(self.ctm.err())
} else if self.ctm.check_eof() {
Err(self.ctm.err())
} else if let Ok(image) = self.parse_image() {
Ok(Inline::Image(image))
} else if let Ok(url) = self.parse_url(false) {
@ -90,10 +92,10 @@ impl ParseInline for Parser {
/// parses an image url
fn parse_image(&mut self) -> ParseResult<Image> {
let start_index = self.index;
self.seek_inline_whitespace();
self.assert_special(&IMG_START, start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm.seek_any(&INLINE_WHITESPACE);
self.ctm.assert_char(&IMG_START, Some(start_index))?;
self.ctm.seek_one()?;
if let Ok(url) = self.parse_url(true) {
let metadata = if let Ok(meta) = self.parse_inline_metadata() {
@ -103,37 +105,34 @@ impl ParseInline for Parser {
};
Ok(Image { url, metadata })
} else {
Err(self.revert_with_error(start_index))
Err(self.ctm.rewind_with_error(start_index))
}
}
// parses an url
fn parse_url(&mut self, short_syntax: bool) -> ParseResult<Url> {
let start_index = self.index;
self.seek_inline_whitespace();
let start_index = self.ctm.get_index();
self.ctm.seek_any(&INLINE_WHITESPACE);
let mut description = String::new();
if self.check_special(&DESC_OPEN) {
self.skip_char();
description = if let Ok(desc) = self.get_string_until(&[DESC_CLOSE], &[LB]) {
desc
} else {
return Err(self.revert_with_error(start_index));
};
if self.ctm.check_char(&DESC_OPEN) {
self.ctm.seek_one();
description =
self.ctm
.get_string_until_any_or_rewind(&[DESC_CLOSE], &[LB], start_index)?;
} else if !short_syntax {
return Err(self.revert_with_error(start_index));
return Err(self.ctm.rewind_with_error(start_index));
}
self.skip_char();
self.assert_special(&URL_OPEN, start_index)?;
self.skip_char();
self.seek_inline_whitespace();
self.ctm.seek_one();
self.ctm.assert_char(&URL_OPEN, Some(start_index))?;
self.ctm.seek_one();
self.ctm.seek_any(&INLINE_WHITESPACE);
let url = if let Ok(url_str) = self.get_string_until(&[URL_CLOSE], &[LB]) {
url_str
} else {
return Err(self.revert_with_error(start_index));
};
self.skip_char();
let url = self
.ctm
.get_string_until_any_or_rewind(&[URL_CLOSE], &[LB], start_index)?;
self.ctm.seek_one();
if description.is_empty() {
Ok(Url::new(None, url))
@ -144,31 +143,31 @@ impl ParseInline for Parser {
/// parses a markdown checkbox
fn parse_checkbox(&mut self) -> ParseResult<Checkbox> {
let start_index = self.index;
self.assert_special(&CHECK_OPEN, start_index)?;
self.skip_char();
let checked = if self.check_special(&CHECK_CHECKED) {
let start_index = self.ctm.get_index();
self.ctm.assert_char(&CHECK_OPEN, Some(start_index))?;
self.ctm.seek_one();
let checked = if self.ctm.check_char(&CHECK_CHECKED) {
true
} else if self.check_special(&SPACE) {
} else if self.ctm.check_char(&SPACE) {
false
} else {
return Err(self.revert_with_error(start_index));
return Err(self.ctm.rewind_with_error(start_index));
};
self.skip_char();
self.assert_special(&CHECK_CLOSE, start_index)?;
self.skip_char();
self.ctm.seek_one();
self.ctm.assert_char(&CHECK_CLOSE, Some(start_index))?;
self.ctm.seek_one();
Ok(Checkbox { value: checked })
}
/// parses bold text with must start with two asterisks
fn parse_bold(&mut self) -> ParseResult<BoldText> {
let start_index = self.index;
self.assert_special_sequence(&BOLD, start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm.assert_sequence(&BOLD, Some(start_index))?;
self.ctm.seek_one();
let inline = self.parse_inline()?;
self.assert_special_sequence(&BOLD, start_index)?;
self.skip_char();
self.ctm.assert_sequence(&BOLD, Some(start_index))?;
self.ctm.seek_one();
Ok(BoldText {
value: Box::new(inline),
@ -189,12 +188,14 @@ impl ParseInline for Parser {
/// parses monospace text (inline-code) that isn't allowed to contain special characters
fn parse_monospace(&mut self) -> ParseResult<MonospaceText> {
let start_index = self.index;
self.assert_special(&BACKTICK, start_index)?;
self.skip_char();
let content = self.get_string_until(&[BACKTICK, LB], &[])?;
self.assert_special(&BACKTICK, start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm.assert_char(&BACKTICK, Some(start_index))?;
self.ctm.seek_one();
let content = self
.ctm
.get_string_until_any_or_rewind(&[BACKTICK, LB], &[], start_index)?;
self.ctm.assert_char(&BACKTICK, Some(start_index))?;
self.ctm.seek_one();
Ok(MonospaceText { value: content })
}
@ -212,11 +213,13 @@ impl ParseInline for Parser {
}
fn parse_emoji(&mut self) -> ParseResult<Emoji> {
let start_index = self.index;
self.assert_special(&EMOJI, start_index)?;
self.skip_char();
let name = self.get_string_until_or_revert(&[EMOJI], &[SPACE, LB], start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm.assert_char(&EMOJI, Some(start_index))?;
self.ctm.seek_one();
let name = self
.ctm
.get_string_until_any_or_rewind(&[EMOJI], &[SPACE, LB], start_index)?;
self.ctm.seek_one();
if let Some(emoji) = gh_emoji::get(name.as_str()) {
let emoji_char = *emoji.chars().collect::<Vec<char>>().first().unwrap();
Ok(Emoji {
@ -224,20 +227,24 @@ impl ParseInline for Parser {
name,
})
} else {
Err(self.revert_with_error(start_index))
Err(self.ctm.rewind_with_error(start_index))
}
}
/// parses colored text
fn parse_colored(&mut self) -> ParseResult<Colored> {
let start_index = self.index;
self.assert_special_sequence(&SQ_COLOR_START, start_index)?;
self.skip_char();
let color =
self.get_string_until_or_revert(&[COLOR_CLOSE], &[SPACE, LB, SEMICOLON], start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm
.assert_sequence(&SQ_COLOR_START, Some(start_index))?;
self.ctm.seek_one();
let color = self.ctm.get_string_until_any_or_rewind(
&[COLOR_CLOSE],
&[SPACE, LB, SEMICOLON],
start_index,
)?;
self.ctm.seek_one();
if color.is_empty() {
return Err(ParseError::new(self.index));
return Err(self.ctm.err());
}
Ok(Colored {
value: Box::new(self.parse_inline()?),
@ -246,11 +253,14 @@ impl ParseInline for Parser {
}
fn parse_bibref(&mut self) -> ParseResult<Arc<RwLock<BibReference>>> {
let start_index = self.index;
self.assert_special_sequence(&SQ_BIBREF_START, start_index)?;
self.skip_char();
let key = self.get_string_until_or_revert(&[BIBREF_CLOSE], &[SPACE, LB], start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm
.assert_sequence(&SQ_BIBREF_START, Some(start_index))?;
self.ctm.seek_one();
let key =
self.ctm
.get_string_until_any_or_rewind(&[BIBREF_CLOSE], &[SPACE, LB], start_index)?;
self.ctm.seek_one();
let ref_entry = Arc::new(RwLock::new(BibReference::new(
key,
self.document.config.get_ref_entry(BIB_REF_DISPLAY),
@ -264,15 +274,21 @@ impl ParseInline for Parser {
/// parses a template variable {prefix{name}suffix}
fn parse_template_variable(&mut self) -> ParseResult<Arc<RwLock<TemplateVariable>>> {
let start_index = self.index;
self.assert_special(&TEMP_VAR_OPEN, start_index)?;
self.skip_char();
let prefix = self.get_string_until_or_revert(&[TEMP_VAR_OPEN], &[LB], start_index)?;
self.skip_char();
let name = self.get_string_until_or_revert(&[TEMP_VAR_CLOSE], &[LB], start_index)?;
self.skip_char();
let suffix = self.get_string_until_or_revert(&[TEMP_VAR_CLOSE], &[LB], start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm.assert_char(&TEMP_VAR_OPEN, Some(start_index))?;
self.ctm.seek_one();
let prefix =
self.ctm
.get_string_until_any_or_rewind(&[TEMP_VAR_OPEN], &[LB], start_index)?;
self.ctm.seek_one();
let name =
self.ctm
.get_string_until_any_or_rewind(&[TEMP_VAR_CLOSE], &[LB], start_index)?;
self.ctm.seek_one();
let suffix =
self.ctm
.get_string_until_any_or_rewind(&[TEMP_VAR_CLOSE], &[LB], start_index)?;
self.ctm.seek_one();
Ok(Arc::new(RwLock::new(TemplateVariable {
value: None,
name,
@ -283,15 +299,15 @@ impl ParseInline for Parser {
/// parses plain text as a string until it encounters an unescaped special inline char
fn parse_plain(&mut self) -> ParseResult<PlainText> {
if self.check_linebreak() {
return Err(ParseError::new(self.index));
if self.ctm.check_char(&LB) {
return Err(self.ctm.err());
}
let mut characters = String::new();
characters.push(self.current_char);
while let Some(ch) = self.next_char() {
if self.check_special_group(&INLINE_SPECIAL_CHARS)
|| self.check_special_group(&self.inline_break_at)
|| (self.parse_variables && self.check_special(&TEMP_VAR_OPEN))
characters.push(self.ctm.get_current());
while let Some(ch) = self.ctm.next_char() {
if self.ctm.check_any(&INLINE_SPECIAL_CHARS)
|| self.ctm.check_any(&self.inline_break_at)
|| (self.parse_variables && self.ctm.check_char(&TEMP_VAR_OPEN))
{
break;
}
@ -301,44 +317,74 @@ impl ParseInline for Parser {
if characters.len() > 0 {
Ok(PlainText { value: characters })
} else {
Err(ParseError::new_with_message(
self.index,
"no plaintext characters parsed",
))
Err(self.ctm.err())
}
}
/// Parses metadata
fn parse_inline_metadata(&mut self) -> ParseResult<InlineMetadata> {
let start_index = self.ctm.get_index();
self.ctm.assert_char(&META_OPEN, Some(start_index))?;
self.ctm.seek_one();
let mut values = HashMap::new();
while let Ok((key, value)) = self.parse_metadata_pair() {
values.insert(key, value);
if self.ctm.check_char(&META_CLOSE) || self.ctm.check_char(&LB) {
// abort the parser of the inner content when encountering a closing tag or linebreak
break;
}
}
if self.ctm.check_char(&META_CLOSE) {
self.ctm.seek_one();
}
if values.len() == 0 {
// if there was a linebreak (the metadata wasn't closed) or there is no inner data
// return an error
return Err(self.ctm.rewind_with_error(start_index));
}
Ok(InlineMetadata { data: values })
}
/// parses a key-value metadata pair
fn parse_metadata_pair(&mut self) -> Result<(String, MetadataValue), ParseError> {
self.seek_inline_whitespace();
let name = self.get_string_until(&[META_CLOSE, EQ, SPACE, LB], &[])?;
self.ctm.seek_any(&INLINE_WHITESPACE);
let name = self
.ctm
.get_string_until_any(&[META_CLOSE, EQ, SPACE, LB], &[])?;
self.seek_inline_whitespace();
self.ctm.seek_any(&INLINE_WHITESPACE);
let mut value = MetadataValue::Bool(true);
if self.check_special(&EQ) {
self.skip_char();
self.seek_inline_whitespace();
if self.ctm.check_char(&EQ) {
self.ctm.seek_one();
self.ctm.seek_any(&INLINE_WHITESPACE);
if let Ok(ph) = self.parse_placeholder() {
value = MetadataValue::Placeholder(ph);
} else if let Ok(template) = self.parse_template() {
value = MetadataValue::Template(template)
} else {
let quoted_string = self.check_special_group(&QUOTES);
let quoted_string = self.ctm.check_any(&QUOTES);
let parse_until = if quoted_string {
let quote_start = self.current_char;
self.skip_char();
let quote_start = self.ctm.get_current();
self.ctm.seek_one();
vec![quote_start, META_CLOSE, LB]
} else {
vec![META_CLOSE, LB, SPACE]
};
let raw_value = self.get_string_until(&parse_until, &[])?;
if self.check_special_group(&QUOTES) {
self.skip_char();
let raw_value = self.ctm.get_string_until_any(&parse_until, &[])?;
if self.ctm.check_any(&QUOTES) {
self.ctm.seek_one();
}
self.seek_inline_whitespace();
if self.check_special(&COMMA) {
self.skip_char();
self.ctm.seek_any(&INLINE_WHITESPACE);
if self.ctm.check_char(&COMMA) {
self.ctm.seek_one();
}
value = if quoted_string {
MetadataValue::String(raw_value)
} else if raw_value.to_lowercase().as_str() == "true" {
@ -358,44 +404,21 @@ impl ParseInline for Parser {
Ok((name, value))
}
/// Parses metadata
fn parse_inline_metadata(&mut self) -> ParseResult<InlineMetadata> {
let start_index = self.index;
self.assert_special(&META_OPEN, start_index)?;
self.skip_char();
let mut values = HashMap::new();
while let Ok((key, value)) = self.parse_metadata_pair() {
values.insert(key, value);
if self.check_special(&META_CLOSE) || self.check_linebreak() {
// abort the parser of the inner content when encountering a closing tag or linebreak
break;
}
}
if self.check_special(&META_CLOSE) {
self.skip_char();
}
if values.len() == 0 {
// if there was a linebreak (the metadata wasn't closed) or there is no inner data
// return an error
return Err(self.revert_with_error(start_index));
}
Ok(InlineMetadata { data: values })
}
/// parses a placeholder element
fn parse_placeholder(&mut self) -> ParseResult<Arc<RwLock<Placeholder>>> {
let start_index = self.index;
self.assert_special_sequence(&SQ_PHOLDER_START, self.index)?;
self.skip_char();
let name = if let Ok(name_str) = self.get_string_until_sequence(&[&SQ_PHOLDER_STOP], &[LB])
let start_index = self.ctm.get_index();
self.ctm.assert_sequence(&SQ_PHOLDER_START, None)?;
self.ctm.seek_one();
let name = if let Ok(name_str) = self
.ctm
.get_string_until_sequence(&[&SQ_PHOLDER_STOP], &[&[LB]])
{
name_str
} else {
return Err(self.revert_with_error(start_index));
return Err(self.ctm.rewind_with_error(start_index));
};
self.skip_char();
self.ctm.seek_one();
let metadata = if let Ok(meta) = self.parse_inline_metadata() {
Some(meta)
@ -411,27 +434,32 @@ impl ParseInline for Parser {
/// parses a template
fn parse_template(&mut self) -> ParseResult<Template> {
let start_index = self.index;
self.assert_special(&TEMPLATE, start_index)?;
self.skip_char();
if self.check_special(&TEMPLATE) {
return Err(self.revert_with_error(start_index));
let start_index = self.ctm.get_index();
self.ctm.assert_char(&TEMPLATE, None)?;
self.ctm.seek_one();
if self.ctm.check_char(&TEMPLATE) {
return Err(self.ctm.rewind_with_error(start_index));
}
let mut elements = Vec::new();
self.block_break_at.push(TEMPLATE);
self.inline_break_at.push(TEMPLATE);
self.parse_variables = true;
while let Ok(e) = self.parse_block() {
elements.push(Element::Block(Box::new(e)));
if self.check_special(&TEMPLATE) {
if self.ctm.check_char(&TEMPLATE) {
break;
}
}
self.parse_variables = false;
self.block_break_at.clear();
self.inline_break_at.clear();
self.assert_special(&TEMPLATE, start_index)?;
self.skip_char();
self.ctm.assert_char(&TEMPLATE, Some(start_index))?;
self.ctm.seek_one();
let vars: HashMap<String, Arc<RwLock<TemplateVariable>>> = elements
.iter()
.map(|e| e.get_template_variables())

@ -21,8 +21,8 @@ pub(crate) trait ParseLine {
impl ParseLine for Parser {
/// parses inline definitions
fn parse_line(&mut self) -> ParseResult<Line> {
if self.index > self.text.len() {
Err(ParseError::new(self.index))
if self.ctm.check_eof() {
Err(self.ctm.err())
} else {
if let Ok(ruler) = self.parse_ruler() {
Ok(Line::Ruler(ruler))
@ -33,17 +33,17 @@ impl ParseLine for Parser {
} else if let Ok(text) = self.parse_text_line() {
Ok(Line::Text(text))
} else {
Err(ParseError::new(self.index))
Err(self.ctm.err())
}
}
}
/// parses the header of a section
fn parse_header(&mut self) -> ParseResult<Header> {
let start_index = self.index;
let start_index = self.ctm.get_index();
let line = self.parse_line()?;
let mut anchor = String::new();
self.text[start_index..self.index]
self.ctm.get_text()[start_index..self.ctm.get_index()]
.iter()
.for_each(|e| anchor.push(*e));
anchor.retain(|c| !c.is_whitespace());
@ -52,21 +52,23 @@ impl ParseLine for Parser {
/// parses a single list item defined with -
fn parse_list_item(&mut self) -> ParseResult<ListItem> {
let start_index = self.index;
self.seek_inline_whitespace();
let level = self.index - start_index;
self.assert_special_group(&LIST_SPECIAL_CHARS, start_index)?;
let ordered = self.current_char.is_numeric();
self.skip_char();
if self.check_special(&DOT) {
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm.seek_any(&INLINE_WHITESPACE);
let level = self.ctm.get_index() - start_index;
self.ctm
.assert_any(&LIST_SPECIAL_CHARS, Some(start_index))?;
let ordered = self.ctm.get_current().is_numeric();
self.ctm.seek_one()?;
if self.ctm.check_char(&DOT) {
self.ctm.seek_one()?;
}
if !self.check_seek_inline_whitespace() {
return Err(self.revert_with_error(start_index));
if !self.ctm.check_any(&INLINE_WHITESPACE) {
return Err(self.ctm.rewind_with_error(start_index));
}
self.seek_inline_whitespace();
if self.check_special(&MINUS) {
return Err(self.revert_with_error(start_index));
self.ctm.seek_any(&INLINE_WHITESPACE);
if self.ctm.check_char(&MINUS) {
return Err(self.ctm.rewind_with_error(start_index));
}
let item = ListItem::new(self.parse_line()?, level as u16, ordered);
@ -76,65 +78,68 @@ impl ParseLine for Parser {
/// parses a table row/head
fn parse_row(&mut self) -> ParseResult<Row> {
let start_index = self.index;
self.seek_inline_whitespace();
self.assert_special(&PIPE, start_index)?;
self.skip_char();
if self.check_special(&PIPE) {
return Err(self.revert_with_error(start_index));
let start_index = self.ctm.get_index();
self.ctm.seek_any(&INLINE_WHITESPACE);
self.ctm.assert_char(&PIPE, Some(start_index))?;
self.ctm.seek_one();
if self.ctm.check_char(&PIPE) {
return Err(self.ctm.rewind_with_error(start_index));
}
self.inline_break_at.push(PIPE);
self.seek_inline_whitespace();
self.ctm.seek_any(&INLINE_WHITESPACE);
let mut row = Row::new();
loop {
let mut element = TextLine::new();
while let Ok(inline) = self.parse_inline() {
element.subtext.push(inline);
if self.check_linebreak() || self.check_special(&PIPE) || self.check_eof() {
if self.ctm.check_char(&LB) || self.ctm.check_char(&PIPE) || self.ctm.check_eof() {
break;
}
}
row.add_cell(Cell {
text: Line::Text(element),
});
if self.check_special(&PIPE) {
self.skip_char();
if self.ctm.check_char(&PIPE) {
self.ctm.seek_one();
}
if self.check_linebreak() || self.check_eof() {
if self.ctm.check_char(&LB) || self.ctm.check_eof() {
break;
}
self.seek_inline_whitespace();
self.ctm.seek_any(&INLINE_WHITESPACE);
}
self.inline_break_at.clear();
if self.check_special(&PIPE) {
self.skip_char();
self.skip_char();
} else {
self.skip_char();
if self.ctm.check_char(&PIPE) {
self.ctm.seek_one();
}
self.ctm.seek_one();
if row.cells.len() > 0 {
Ok(row)
} else {
return Err(self.revert_with_error(start_index));
return Err(self.ctm.rewind_with_error(start_index));
}
}
fn parse_bib_entry(&mut self) -> ParseResult<Arc<RwLock<BibEntry>>> {
let start_index = self.index;
self.seek_inline_whitespace();
self.assert_special(&BIB_KEY_OPEN, start_index)?;
self.skip_char();
let key = self.get_string_until_or_revert(&[BIB_KEY_CLOSE], &[LB, SPACE], start_index)?;
self.skip_char();
self.assert_special(&BIB_DATA_START, start_index)?;
self.skip_char();
self.seek_inline_whitespace();
let start_index = self.ctm.get_index();
self.ctm.seek_any(&INLINE_WHITESPACE);
self.ctm.assert_char(&BIB_KEY_OPEN, Some(start_index))?;
self.ctm.seek_one();
let key =
self.ctm
.get_string_until_any_or_rewind(&[BIB_KEY_CLOSE], &[LB, SPACE], start_index)?;
self.ctm.seek_one();
self.ctm.assert_char(&BIB_DATA_START, Some(start_index))?;
self.ctm.seek_one();
self.ctm.seek_any(&INLINE_WHITESPACE);
let entry = if let Ok(meta) = self.parse_inline_metadata() {
BibEntry::from_metadata(key, Box::new(meta), &self.document.config)
} else {
let url = self.get_string_until_or_revert(&[LB], &[], start_index)?;
let url = self
.ctm
.get_string_until_any_or_rewind(&[LB], &[], start_index)?;
BibEntry::from_url(key, url, &self.document.config)
};
let entry_ref = Arc::new(RwLock::new(entry));
@ -147,9 +152,10 @@ impl ParseLine for Parser {
/// parses centered text
fn parse_centered(&mut self) -> ParseResult<Centered> {
let start_index = self.index;
self.assert_special_sequence(&SQ_CENTERED_START, start_index)?;
self.skip_char();
let start_index = self.ctm.get_index();
self.ctm
.assert_sequence(&SQ_CENTERED_START, Some(start_index))?;
self.ctm.seek_one();
let line = self.parse_text_line()?;
Ok(Centered { line })
@ -157,10 +163,12 @@ impl ParseLine for Parser {
/// parses a ruler
fn parse_ruler(&mut self) -> ParseResult<Ruler> {
let start_index = self.index;
self.seek_inline_whitespace();
self.assert_special_sequence(&SQ_RULER, start_index)?;
self.seek_until_linebreak();
let start_index = self.ctm.get_index();
self.ctm.seek_any(&INLINE_WHITESPACE);
self.ctm.assert_sequence(&SQ_RULER, Some(start_index))?;
while !self.ctm.check_char(&LB) {
self.ctm.seek_one();
}
Ok(Ruler {})
}
@ -169,19 +177,19 @@ impl ParseLine for Parser {
let mut text = TextLine::new();
while let Ok(subtext) = self.parse_inline() {
text.add_subtext(subtext);
if self.check_eof() || self.check_special_group(&self.inline_break_at) {
if self.ctm.check_eof() || self.ctm.check_any(&self.inline_break_at) {
break;
}
}
if self.check_linebreak() {
self.skip_char();
if self.ctm.check_char(&LB) {
self.ctm.seek_one();
}
if text.subtext.len() > 0 || !self.check_eof() {
if text.subtext.len() > 0 || !self.ctm.check_eof() {
Ok(text)
} else {
Err(ParseError::eof(self.index))
Err(self.ctm.err())
}
}
}

@ -7,6 +7,7 @@ use self::block::ParseBlock;
use crate::elements::{Document, ImportAnchor};
use crate::references::configuration::Configuration;
use crate::utils::parsing::{ParseError, ParseResult};
use charred::tapemachine::CharTapeMachine;
use colored::*;
use crossbeam_utils::sync::WaitGroup;
use std::fs::File;
@ -17,9 +18,7 @@ use std::sync::{Arc, Mutex, RwLock};
use std::thread;
pub struct Parser {
pub(crate) index: usize,
pub(crate) text: Vec<char>,
pub(crate) current_char: char,
pub(crate) ctm: CharTapeMachine,
section_nesting: u8,
sections: Vec<u8>,
section_return: Option<u8>,
@ -30,7 +29,6 @@ pub struct Parser {
pub(crate) block_break_at: Vec<char>,
pub(crate) inline_break_at: Vec<char>,
pub(crate) document: Document,
pub(crate) previous_char: char,
pub(crate) reader: Box<dyn BufRead>,
pub(crate) parse_variables: bool,
}
@ -112,9 +110,6 @@ impl Parser {
}
let document = Document::new(!is_child);
Self {
index: 0,
text,
current_char,
sections: Vec::new(),
section_nesting: 0,
section_return: None,
@ -122,7 +117,7 @@ impl Parser {
paths,
wg: WaitGroup::new(),
is_child,
previous_char: ' ',
ctm: CharTapeMachine::new(text),
inline_break_at: Vec::new(),
block_break_at: Vec::new(),
document,
@ -137,7 +132,8 @@ impl Parser {
/// Returns the text of the parser as a string
fn get_text(&self) -> String {
self.text
self.ctm
.get_text()
.iter()
.fold("".to_string(), |a, b| format!("{}{}", a, b))
}
@ -174,15 +170,13 @@ impl Parser {
)
.red()
);
return Err(ParseError::new_with_message(
self.index,
"file does not exist",
));
eprintln!("file {} does not exist", path.to_str().unwrap());
return Err(self.ctm.assert_error(None));
}
{
let mut paths = self.paths.lock().unwrap();
if paths.iter().find(|item| **item == path) != None {
println!(
eprintln!(
"{}",
format!(
"Import of \"{}\" failed: Cyclic import.",
@ -190,7 +184,7 @@ impl Parser {
)
.yellow()
);
return Err(ParseError::new_with_message(self.index, "cyclic import"));
return Err(self.ctm.assert_error(None));
}
paths.push(path.clone());
}
@ -220,35 +214,14 @@ impl Parser {
None
};
while self.index < self.text.len() {
while !self.ctm.check_eof() {
match self.parse_block() {
Ok(block) => self.document.add_element(block),
Err(err) => {
if err.eof {
if self.ctm.check_eof() {
break;
}
if let Some(path) = &self.path {
if let Some(position) = err.get_position(&self.get_text()) {
println!(
"{}",
format!(
"Error in File {}:{}:{} - {}",
path.to_str().unwrap(),
position.0,
position.1,
err
)
.red()
);
} else {
println!(
"{}",
format!("Error in File {}: {}", path.to_str().unwrap(), err).red()
);
}
} else {
println!("{}", err);
}
eprintln!("{}", err);
break;
}
}

@ -1,3 +1,4 @@
use charred::tapemachine::{TapeError, TapeResult};
use colored::*;
use std::error::Error;
use std::fmt;
@ -10,8 +11,10 @@ macro_rules! parse {
};
}
pub type ParseResult<T> = Result<T, ParseError>;
pub type ParseResult<T> = TapeResult<T>;
pub type ParseError = TapeError;
/*
#[derive(Debug)]
pub struct ParseError {
index: usize,
@ -81,3 +84,4 @@ impl ParseError {
}
}
}
*/

Loading…
Cancel
Save