From dd65c65c9d48d9cf3075c7f2533dd2cbb359d2aa Mon Sep 17 00:00:00 2001 From: trivernis Date: Mon, 15 Jun 2020 21:02:04 +0200 Subject: [PATCH] Refactor project folder and file structure - add folders for references and utils as well as elements - extract the line and inline parsing of the parser into separate traits --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/{parsing/elements.rs => elements/mod.rs} | 10 +- src/{parsing => elements}/tokens.rs | 0 src/format/html.rs | 8 +- src/lib.rs | 10 +- src/parser/block.rs | 312 +++++++ src/{parsing => parser}/charstate.rs | 4 +- src/{parsing => parser}/inline.rs | 162 +++- src/parser/line.rs | 187 ++++ src/parser/mod.rs | 266 ++++++ src/parsing/mod.rs | 12 - src/parsing/parser.rs | 868 ------------------ src/{parsing => references}/bibliography.rs | 6 +- .../configuration/config.rs | 0 .../configuration/default.toml | 0 .../configuration/keys.rs | 0 .../configuration/mod.rs | 8 +- src/references/mod.rs | 4 + src/{parsing => references}/placeholders.rs | 2 +- src/{parsing => references}/templates.rs | 2 +- src/utils/mod.rs | 1 + src/{parsing/utils.rs => utils/parsing.rs} | 0 tests/parsing_tests.rs | 2 +- 24 files changed, 957 insertions(+), 911 deletions(-) rename src/{parsing/elements.rs => elements/mod.rs} (98%) rename src/{parsing => elements}/tokens.rs (100%) create mode 100644 src/parser/block.rs rename src/{parsing => parser}/charstate.rs (98%) rename src/{parsing => parser}/inline.rs (64%) create mode 100644 src/parser/line.rs create mode 100644 src/parser/mod.rs delete mode 100644 src/parsing/mod.rs delete mode 100644 src/parsing/parser.rs rename src/{parsing => references}/bibliography.rs (96%) rename src/{parsing => references}/configuration/config.rs (100%) rename src/{parsing => references}/configuration/default.toml (100%) rename src/{parsing => references}/configuration/keys.rs (100%) rename src/{parsing => references}/configuration/mod.rs (95%) create mode 100644 src/references/mod.rs rename src/{parsing => references}/placeholders.rs (99%) rename src/{parsing => references}/templates.rs (99%) create mode 100644 src/utils/mod.rs rename src/{parsing/utils.rs => utils/parsing.rs} (100%) diff --git a/Cargo.lock b/Cargo.lock index 9d64805..c9fe940 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -566,7 +566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "snekdown" -version = "0.16.0" +version = "0.17.0" dependencies = [ "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "colored 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 294d44e..d6bbabd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snekdown" -version = "0.16.0" +version = "0.17.0" authors = ["trivernis "] edition = "2018" license-file = "LICENSE" diff --git a/src/parsing/elements.rs b/src/elements/mod.rs similarity index 98% rename from src/parsing/elements.rs rename to src/elements/mod.rs index cbd3035..fea2d85 100644 --- a/src/parsing/elements.rs +++ b/src/elements/mod.rs @@ -1,7 +1,9 @@ -use crate::parsing::bibliography::{BibEntry, BibReference, Bibliography}; -use crate::parsing::configuration::Configuration; -use crate::parsing::placeholders::ProcessPlaceholders; -use crate::parsing::templates::{Template, TemplateVariable}; +pub mod tokens; + +use crate::references::bibliography::{BibEntry, BibReference, Bibliography}; +use crate::references::configuration::Configuration; +use crate::references::placeholders::ProcessPlaceholders; +use crate::references::templates::{Template, TemplateVariable}; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, RwLock}; diff --git a/src/parsing/tokens.rs b/src/elements/tokens.rs similarity index 100% rename from src/parsing/tokens.rs rename to src/elements/tokens.rs diff --git a/src/format/html.rs b/src/format/html.rs index 5380d3e..b3ad830 100644 --- a/src/format/html.rs +++ b/src/format/html.rs @@ -1,8 +1,8 @@ +use crate::elements::*; use crate::format::PlaceholderTemplate; -use crate::parsing::bibliography::{BibEntry, BibReference}; -use crate::parsing::configuration::Value; -use crate::parsing::elements::*; -use crate::parsing::templates::{Template, TemplateVariable}; +use crate::references::bibliography::{BibEntry, BibReference}; +use crate::references::configuration::Value; +use crate::references::templates::{Template, TemplateVariable}; use htmlescape::{encode_attribute, encode_minimal}; use minify::html::minify; use std::cell::RefCell; diff --git a/src/lib.rs b/src/lib.rs index 4b64b10..81e8702 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,8 @@ +pub mod elements; pub mod format; -pub mod parsing; -pub use parsing::parser::Parser; -pub use parsing::utils; +pub mod parser; +pub mod references; +pub mod utils; + +pub use parser::Parser; +pub use utils::parsing; diff --git a/src/parser/block.rs b/src/parser/block.rs new file mode 100644 index 0000000..6cfc1eb --- /dev/null +++ b/src/parser/block.rs @@ -0,0 +1,312 @@ +use crate::elements::tokens::*; +use crate::elements::{Block, CodeBlock, Import, List, ListItem, Paragraph, Quote, Section, Table}; +use crate::parser::charstate::CharStateMachine; +use crate::parser::inline::ParseInline; +use crate::parser::line::ParseLine; +use crate::utils::parsing::{ParseError, ParseResult}; +use crate::Parser; + +pub(crate) trait ParseBlock { + fn parse_block(&mut self) -> ParseResult; + fn parse_section(&mut self) -> ParseResult
; + fn parse_code_block(&mut self) -> ParseResult; + fn parse_quote(&mut self) -> ParseResult; + fn parse_paragraph(&mut self) -> ParseResult; + fn parse_list(&mut self) -> ParseResult; + fn parse_table(&mut self) -> ParseResult; + fn parse_import(&mut self) -> ParseResult; +} + +impl ParseBlock for Parser { + /// Parses a block Token + fn parse_block(&mut self) -> ParseResult { + if let Some(section) = self.section_return { + if section <= self.section_nesting && (self.section_nesting > 0) { + return Err(ParseError::new_with_message( + self.index, + "invalid section nesting", + )); + } else { + self.section_return = None; + } + } + let token = if let Ok(section) = self.parse_section() { + Block::Section(section) + } else if let Some(_) = self.section_return { + return Err(ParseError::new(self.index)); + } else if let Ok(list) = self.parse_list() { + Block::List(list) + } else if let Ok(table) = self.parse_table() { + Block::Table(table) + } else if let Ok(code_block) = self.parse_code_block() { + Block::CodeBlock(code_block) + } else if let Ok(quote) = self.parse_quote() { + Block::Quote(quote) + } else if let Ok(import) = self.parse_import() { + Block::Import(import) + } else if let Some(_) = self.section_return { + return Err(ParseError::new(self.index)); + } else if let Ok(pholder) = self.parse_placeholder() { + Block::Placeholder(pholder) + } else if let Ok(paragraph) = self.parse_paragraph() { + Block::Paragraph(paragraph) + } else { + return Err(ParseError::new(self.index)); + }; + + Ok(token) + } + + /// Parses a section that consists of a header and one or more blocks + fn parse_section(&mut self) -> ParseResult
{ + let start_index = self.index; + self.seek_whitespace(); + if self.check_special(&HASH) { + let mut size = 1; + while let Some(_) = self.next_char() { + if !self.check_special(&HASH) { + break; + } + size += 1; + } + let mut metadata = None; + if let Ok(meta) = self.parse_inline_metadata() { + metadata = Some(meta); + } + if size <= self.section_nesting || !self.current_char.is_whitespace() { + if size <= self.section_nesting { + self.section_return = Some(size); + } + return Err(self.revert_with_error(start_index)); + } + self.seek_inline_whitespace(); + let mut header = self.parse_header()?; + header.size = size; + self.section_nesting = size; + self.sections.push(size); + let mut section = Section::new(header); + section.metadata = metadata; + self.seek_whitespace(); + + while let Ok(block) = self.parse_block() { + section.add_element(block); + } + + self.sections.pop(); + if let Some(sec) = self.sections.last() { + self.section_nesting = *sec + } else { + self.section_nesting = 0; + } + Ok(section) + } else { + return Err(self.revert_with_error(start_index)); + } + } + + /// parses a code block + fn parse_code_block(&mut self) -> ParseResult { + self.seek_whitespace(); + self.assert_special_sequence(&SQ_CODE_BLOCK, self.index)?; + self.skip_char(); + let language = self.get_string_until(&[LB], &[])?; + self.skip_char(); + let text = self.get_string_until_sequence(&[&SQ_CODE_BLOCK], &[])?; + for _ in 0..2 { + self.skip_char(); + } + + Ok(CodeBlock { + language, + code: text, + }) + } + + /// parses a quote + fn parse_quote(&mut self) -> ParseResult { + let start_index = self.index; + self.seek_whitespace(); + let metadata = if let Ok(meta) = self.parse_inline_metadata() { + Some(meta) + } else { + None + }; + if self.check_special(&META_CLOSE) { + if self.next_char() == None { + return Err(self.revert_with_error(start_index)); + } + } + let mut quote = Quote::new(metadata); + + while self.check_special("E_START) + && self.next_char() != None + && (self.check_seek_inline_whitespace() || self.check_special(&LB)) + { + if let Ok(text) = self.parse_text_line() { + if text.subtext.len() > 0 { + quote.add_text(text); + } + } else { + break; + } + } + if quote.text.len() == 0 { + return Err(self.revert_with_error(start_index)); + } + + Ok(quote) + } + + /// Parses a paragraph + fn parse_paragraph(&mut self) -> ParseResult { + self.seek_whitespace(); + let mut paragraph = Paragraph::new(); + while let Ok(token) = self.parse_line() { + paragraph.add_element(token); + let start_index = self.index; + if self.check_special_sequence_group(&BLOCK_SPECIAL_CHARS) + || self.check_special_group(&self.block_break_at) + { + self.revert_to(start_index)?; + break; + } + if !self.check_eof() { + self.revert_to(start_index)?; + } + } + + if paragraph.elements.len() > 0 { + Ok(paragraph) + } else { + Err(ParseError::new(self.index)) + } + } + + /// parses a list which consists of one or more list items + /// The parser is done iterative to resolve nested items + fn parse_list(&mut self) -> ParseResult { + let mut list = List::new(); + let start_index = self.index; + self.seek_whitespace(); + + let ordered = if self.check_special_group(&LIST_SPECIAL_CHARS) { + false + } else { + true + }; + list.ordered = ordered; + let mut list_hierarchy: Vec = Vec::new(); + while let Ok(mut item) = self.parse_list_item() { + while let Some(parent_item) = list_hierarchy.pop() { + if parent_item.level < item.level { + // the parent item is the actual parent of the next item + list_hierarchy.push(parent_item); + break; + } else if parent_item.level == item.level { + // the parent item is a sibling and has to be appended to a parent + if list_hierarchy.is_empty() { + list.add_item(parent_item); + } else { + let mut parent_parent = list_hierarchy.pop().unwrap(); + parent_parent.add_child(parent_item); + list_hierarchy.push(parent_parent); + } + break; + } else { + // the parent item is a child of a sibling of the current item + if list_hierarchy.is_empty() { + item.add_child(parent_item); + } else { + let mut parent_parent = list_hierarchy.pop().unwrap(); + parent_parent.add_child(parent_item); + list_hierarchy.push(parent_parent); + } + } + } + list_hierarchy.push(item); + } + + // the remaining items in the hierarchy need to be combined + while let Some(item) = list_hierarchy.pop() { + if !list_hierarchy.is_empty() { + let mut parent_item = list_hierarchy.pop().unwrap(); + parent_item.add_child(item); + list_hierarchy.push(parent_item); + } else { + list_hierarchy.push(item); + break; + } + } + list.items.append(&mut list_hierarchy); + + if list.items.len() > 0 { + Ok(list) + } else { + return Err(self.revert_with_error(start_index)); + } + } + + /// parses a markdown table + fn parse_table(&mut self) -> ParseResult
{ + let header = self.parse_row()?; + if self.check_linebreak() { + self.skip_char(); + } + let seek_index = self.index; + let mut table = Table::new(header); + while let Some(_) = self.next_char() { + self.seek_inline_whitespace(); + if !self.check_special_group(&[MINUS, PIPE]) || self.check_linebreak() { + break; + } + } + + if !self.check_linebreak() { + self.revert_to(seek_index)?; + return Ok(table); + } + + self.seek_whitespace(); + while let Ok(row) = self.parse_row() { + table.add_row(row); + } + + Ok(table) + } + + /// parses an import and starts a new task to parse the document of the import + fn parse_import(&mut self) -> ParseResult { + let start_index = self.index; + self.seek_whitespace(); + self.assert_special_sequence_group(&[&[IMPORT_START, IMPORT_OPEN]], start_index)?; + let mut path = String::new(); + while let Some(character) = self.next_char() { + if self.check_linebreak() || self.check_special(&IMPORT_CLOSE) { + break; + } + path.push(character); + } + if self.check_linebreak() || path.is_empty() { + return Err(self.revert_with_error(start_index)); + } + if self.check_special(&IMPORT_CLOSE) { + self.skip_char(); + } + // parser success + + if self.section_nesting > 0 { + self.section_return = Some(0); + let err = ParseError::new_with_message(self.index, "import section nesting error"); + self.revert_to(start_index)?; + return Err(err); + } + + self.seek_whitespace(); + + if let Ok(anchor) = self.import_document(path.clone()) { + Ok(Import { path, anchor }) + } else { + Err(ParseError::new(self.index)) + } + } +} diff --git a/src/parsing/charstate.rs b/src/parser/charstate.rs similarity index 98% rename from src/parsing/charstate.rs rename to src/parser/charstate.rs index 9c59453..524923b 100644 --- a/src/parsing/charstate.rs +++ b/src/parser/charstate.rs @@ -1,5 +1,5 @@ -use crate::parsing::tokens::{LB, SPECIAL_ESCAPE}; -use crate::parsing::utils::{ParseError, ParseResult}; +use crate::elements::tokens::{LB, SPECIAL_ESCAPE}; +use crate::utils::parsing::{ParseError, ParseResult}; use crate::Parser; pub trait CharStateMachine { diff --git a/src/parsing/inline.rs b/src/parser/inline.rs similarity index 64% rename from src/parsing/inline.rs rename to src/parser/inline.rs index ca07d1e..feea667 100644 --- a/src/parsing/inline.rs +++ b/src/parser/inline.rs @@ -1,11 +1,13 @@ use super::charstate::CharStateMachine; -use super::elements::*; -use super::tokens::*; -use crate::parsing::bibliography::BibReference; -use crate::parsing::configuration::keys::BIB_REF_DISPLAY; -use crate::parsing::templates::TemplateVariable; -use crate::parsing::utils::{ParseError, ParseResult}; +use crate::elements::tokens::*; +use crate::elements::*; +use crate::parser::block::ParseBlock; +use crate::references::bibliography::BibReference; +use crate::references::configuration::keys::BIB_REF_DISPLAY; +use crate::references::templates::{GetTemplateVariables, Template, TemplateVariable}; +use crate::utils::parsing::{ParseError, ParseResult}; use crate::Parser; +use std::collections::HashMap; use std::sync::{Arc, RwLock}; pub(crate) trait ParseInline { @@ -25,6 +27,10 @@ pub(crate) trait ParseInline { fn parse_bibref(&mut self) -> ParseResult>>; fn parse_template_variable(&mut self) -> ParseResult>>; fn parse_plain(&mut self) -> ParseResult; + fn parse_inline_metadata(&mut self) -> ParseResult<InlineMetadata>; + fn parse_metadata_pair(&mut self) -> ParseResult<(String, MetadataValue)>; + fn parse_placeholder(&mut self) -> ParseResult<Arc<RwLock<Placeholder>>>; + fn parse_template(&mut self) -> ParseResult<Template>; } impl ParseInline for Parser { @@ -301,4 +307,148 @@ impl ParseInline for Parser { )) } } + + /// parses a key-value metadata pair + fn parse_metadata_pair(&mut self) -> Result<(String, MetadataValue), ParseError> { + self.seek_inline_whitespace(); + let name = self.get_string_until(&[META_CLOSE, EQ, SPACE, LB], &[])?; + + self.seek_inline_whitespace(); + let mut value = MetadataValue::Bool(true); + if self.check_special(&EQ) { + self.skip_char(); + self.seek_inline_whitespace(); + if let Ok(ph) = self.parse_placeholder() { + value = MetadataValue::Placeholder(ph); + } else if let Ok(template) = self.parse_template() { + value = MetadataValue::Template(template) + } else { + let quoted_string = self.check_special_group(&QUOTES); + let parse_until = if quoted_string { + let quote_start = self.current_char; + self.skip_char(); + vec![quote_start, META_CLOSE, LB] + } else { + vec![META_CLOSE, LB, SPACE] + }; + let raw_value = self.get_string_until(&parse_until, &[])?; + if self.check_special_group(&QUOTES) { + self.skip_char(); + } + self.seek_inline_whitespace(); + if self.check_special(&COMMA) { + self.skip_char(); + } + value = if quoted_string { + MetadataValue::String(raw_value) + } else if raw_value.to_lowercase().as_str() == "true" { + MetadataValue::Bool(true) + } else if raw_value.to_lowercase().as_str() == "false" { + MetadataValue::Bool(false) + } else if let Ok(num) = raw_value.parse::<i64>() { + MetadataValue::Integer(num) + } else if let Ok(num) = raw_value.parse::<f64>() { + MetadataValue::Float(num) + } else { + MetadataValue::String(raw_value) + } + } + } + + Ok((name, value)) + } + + /// Parses metadata + fn parse_inline_metadata(&mut self) -> ParseResult<InlineMetadata> { + let start_index = self.index; + self.assert_special(&META_OPEN, start_index)?; + self.skip_char(); + + let mut values = HashMap::new(); + while let Ok((key, value)) = self.parse_metadata_pair() { + values.insert(key, value); + if self.check_special(&META_CLOSE) || self.check_linebreak() { + // abort the parser of the inner content when encountering a closing tag or linebreak + break; + } + } + if self.check_special(&META_CLOSE) { + self.skip_char(); + } + if values.len() == 0 { + // if there was a linebreak (the metadata wasn't closed) or there is no inner data + // return an error + return Err(self.revert_with_error(start_index)); + } + + Ok(InlineMetadata { data: values }) + } + + /// parses a placeholder element + fn parse_placeholder(&mut self) -> ParseResult<Arc<RwLock<Placeholder>>> { + let start_index = self.index; + self.assert_special_sequence(&SQ_PHOLDER_START, self.index)?; + self.skip_char(); + let name = if let Ok(name_str) = self.get_string_until_sequence(&[&SQ_PHOLDER_STOP], &[LB]) + { + name_str + } else { + return Err(self.revert_with_error(start_index)); + }; + self.skip_char(); + + let metadata = if let Ok(meta) = self.parse_inline_metadata() { + Some(meta) + } else { + None + }; + + let placeholder = Arc::new(RwLock::new(Placeholder::new(name, metadata))); + self.document.add_placeholder(Arc::clone(&placeholder)); + + Ok(placeholder) + } + + /// parses a template + fn parse_template(&mut self) -> ParseResult<Template> { + let start_index = self.index; + self.assert_special(&TEMPLATE, start_index)?; + self.skip_char(); + if self.check_special(&TEMPLATE) { + return Err(self.revert_with_error(start_index)); + } + let mut elements = Vec::new(); + self.block_break_at.push(TEMPLATE); + self.inline_break_at.push(TEMPLATE); + self.parse_variables = true; + while let Ok(e) = self.parse_block() { + elements.push(Element::Block(Box::new(e))); + if self.check_special(&TEMPLATE) { + break; + } + } + self.parse_variables = false; + self.block_break_at.clear(); + self.inline_break_at.clear(); + self.assert_special(&TEMPLATE, start_index)?; + self.skip_char(); + let vars: HashMap<String, Arc<RwLock<TemplateVariable>>> = elements + .iter() + .map(|e| e.get_template_variables()) + .flatten() + .map(|e: Arc<RwLock<TemplateVariable>>| { + let name; + { + name = e.read().unwrap().name.clone(); + }; + + (name, e) + }) + .collect(); + + Ok(Template { + text: elements, + variables: vars, + }) + } } diff --git a/src/parser/line.rs b/src/parser/line.rs new file mode 100644 index 0000000..39f9eef --- /dev/null +++ b/src/parser/line.rs @@ -0,0 +1,187 @@ +use crate::elements::tokens::*; +use crate::elements::{Cell, Centered, Header, Line, ListItem, Row, Ruler, TextLine}; +use crate::parser::charstate::CharStateMachine; +use crate::parser::inline::ParseInline; +use crate::references::bibliography::BibEntry; +use crate::utils::parsing::{ParseError, ParseResult}; +use crate::Parser; +use std::sync::{Arc, RwLock}; + +pub(crate) trait ParseLine { + fn parse_line(&mut self) -> ParseResult<Line>; + fn parse_header(&mut self) -> ParseResult<Header>; + fn parse_list_item(&mut self) -> ParseResult<ListItem>; + fn parse_row(&mut self) -> ParseResult<Row>; + fn parse_centered(&mut self) -> ParseResult<Centered>; + fn parse_ruler(&mut self) -> ParseResult<Ruler>; + fn parse_text_line(&mut self) -> ParseResult<TextLine>; + fn parse_bib_entry(&mut self) -> ParseResult<Arc<RwLock<BibEntry>>>; +} + +impl ParseLine for Parser { + /// parses inline definitions + fn parse_line(&mut self) -> ParseResult<Line> { + if self.index > self.text.len() { + Err(ParseError::new(self.index)) + } else { + if let Ok(ruler) = self.parse_ruler() { + Ok(Line::Ruler(ruler)) + } else if let Ok(centered) = self.parse_centered() { + Ok(Line::Centered(centered)) + } else if let Ok(bib) = self.parse_bib_entry() { + Ok(Line::BibEntry(bib)) + } else if let Ok(text) = self.parse_text_line() { + Ok(Line::Text(text)) + } else { + Err(ParseError::new(self.index)) + } + } + } + + /// parses the header of a section + fn parse_header(&mut self) -> ParseResult<Header> { + let start_index = self.index; + let line = self.parse_line()?; + let mut anchor = String::new(); + self.text[start_index..self.index] + .iter() + .for_each(|e| anchor.push(*e)); + anchor.retain(|c| !c.is_whitespace()); + Ok(Header::new(line, anchor)) + } + + /// parses a single list item defined with - + fn parse_list_item(&mut self) -> ParseResult<ListItem> { + let start_index = self.index; + self.seek_inline_whitespace(); + let level = self.index - start_index; + self.assert_special_group(&LIST_SPECIAL_CHARS, start_index)?; + let ordered = self.current_char.is_numeric(); + self.skip_char(); + if self.check_special(&DOT) { + self.skip_char(); + } + if !self.check_seek_inline_whitespace() { + return Err(self.revert_with_error(start_index)); + } + self.seek_inline_whitespace(); + if self.check_special(&MINUS) { + return Err(self.revert_with_error(start_index)); + } + + let item = ListItem::new(self.parse_line()?, level as u16, ordered); + + Ok(item) + } + + /// parses a table row/head + fn parse_row(&mut self) -> ParseResult<Row> { + let start_index = self.index; + self.seek_inline_whitespace(); + self.assert_special(&PIPE, start_index)?; + self.skip_char(); + if self.check_special(&PIPE) { + return Err(self.revert_with_error(start_index)); + } + self.inline_break_at.push(PIPE); + + self.seek_inline_whitespace(); + let mut row = Row::new(); + loop { + let mut element = TextLine::new(); + while let Ok(inline) = self.parse_inline() { + element.subtext.push(inline); + if self.check_linebreak() || self.check_special(&PIPE) || self.check_eof() { + break; + } + } + row.add_cell(Cell { + text: Line::Text(element), + }); + if self.check_special(&PIPE) { + self.skip_char(); + } + if self.check_linebreak() || self.check_eof() { + break; + } + self.seek_inline_whitespace(); + } + self.inline_break_at.clear(); + if self.check_special(&PIPE) { + self.skip_char(); + self.skip_char(); + } else { + self.skip_char(); + } + + if row.cells.len() > 0 { + Ok(row) + } else { + return Err(self.revert_with_error(start_index)); + } + } + + fn parse_bib_entry(&mut self) -> ParseResult<Arc<RwLock<BibEntry>>> { + let start_index = self.index; + self.seek_inline_whitespace(); + self.assert_special(&BIB_KEY_OPEN, start_index)?; + self.skip_char(); + let key = self.get_string_until_or_revert(&[BIB_KEY_CLOSE], &[LB, SPACE], start_index)?; + self.skip_char(); + self.assert_special(&BIB_DATA_START, start_index)?; + self.skip_char(); + self.seek_inline_whitespace(); + let entry = if let Ok(meta) = self.parse_inline_metadata() { + BibEntry::from_metadata(key, Box::new(meta), &self.document.config) + } else { + let url = self.get_string_until_or_revert(&[LB], &[], start_index)?; + BibEntry::from_url(key, url, &self.document.config) + }; + let entry_ref = Arc::new(RwLock::new(entry)); + self.document + .bibliography + .add_bib_entry(Arc::clone(&entry_ref)); + + Ok(entry_ref) + } + + /// parses centered text + fn parse_centered(&mut self) -> ParseResult<Centered> { + let start_index = self.index; + self.assert_special_sequence(&SQ_CENTERED_START, start_index)?; + self.skip_char(); + let line = self.parse_text_line()?; + + Ok(Centered { line }) + } + + /// parses a ruler + fn parse_ruler(&mut self) -> ParseResult<Ruler> { + let start_index = self.index; + self.seek_inline_whitespace(); + self.assert_special_sequence(&SQ_RULER, start_index)?; + self.seek_until_linebreak(); + Ok(Ruler {}) + } + + /// Parses a line of text + fn parse_text_line(&mut self) -> Result<TextLine, ParseError> { + let mut text = TextLine::new(); + while let Ok(subtext) = self.parse_inline() { + text.add_subtext(subtext); + if self.check_eof() || self.check_special_group(&self.inline_break_at) { + break; + } + } + + if self.check_linebreak() { + self.skip_char(); + } + + if text.subtext.len() > 0 || !self.check_eof() { + Ok(text) + } else { + Err(ParseError::eof(self.index)) + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..20490b4 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,266 @@ +pub(crate) mod block; +pub(crate) mod charstate; +pub(crate) mod inline; +pub(crate) mod line; + +use self::block::ParseBlock; +use crate::elements::{Document, ImportAnchor}; +use crate::references::configuration::Configuration; +use crate::utils::parsing::{ParseError, ParseResult}; +use colored::*; +use crossbeam_utils::sync::WaitGroup; +use std::fs::File; +use std::io; +use std::io::{BufRead, BufReader, Cursor}; +use std::path::PathBuf; +use std::sync::{Arc, Mutex, RwLock}; +use std::thread; + +pub struct Parser { + pub(crate) index: usize, + pub(crate) text: Vec<char>, + pub(crate) current_char: char, + section_nesting: u8, + sections: Vec<u8>, + section_return: Option<u8>, + path: Option<PathBuf>, + paths: Arc<Mutex<Vec<PathBuf>>>, + wg: WaitGroup, + is_child: bool, + pub(crate) block_break_at: Vec<char>, + pub(crate) inline_break_at: Vec<char>, + pub(crate) document: Document, + pub(crate) previous_char: char, + pub(crate) reader: Box<dyn BufRead>, + pub(crate) parse_variables: bool, +} + +impl Parser { + /// Creates a new parser from a path + pub fn new_from_file(path: PathBuf) -> Result<Self, io::Error> { + let f = File::open(&path)?; + Ok(Self::create( + Some(PathBuf::from(path)), + Arc::new(Mutex::new(Vec::new())), + false, + Box::new(BufReader::new(f)), + )) + } + + /// Creates a new parser with text being the markdown text + pub fn new(text: String, path: Option<PathBuf>) -> Self { + let text_bytes = text.as_bytes(); + let path = if let Some(inner_path) = path { + Some(PathBuf::from(inner_path)) + } else { + None + }; + Parser::create( + path, + Arc::new(Mutex::new(Vec::new())), + false, + Box::new(Cursor::new(text_bytes.to_vec())), + ) + } + + /// Creates a child parser from string text + pub fn child(text: String, path: PathBuf, paths: Arc<Mutex<Vec<PathBuf>>>) -> Self { + let text_bytes = text.as_bytes(); + Self::create( + Some(PathBuf::from(path)), + paths, + true, + Box::new(Cursor::new(text_bytes.to_vec())), + ) + } + + /// Creates a child parser from a file + pub fn child_from_file( + path: PathBuf, + paths: Arc<Mutex<Vec<PathBuf>>>, + ) -> Result<Self, io::Error> { + let f = File::open(&path)?; + Ok(Self::create( + Some(PathBuf::from(path)), + paths, + true, + Box::new(BufReader::new(f)), + )) + } + + fn create( + path: Option<PathBuf>, + paths: Arc<Mutex<Vec<PathBuf>>>, + is_child: bool, + mut reader: Box<dyn BufRead>, + ) -> Self { + if let Some(path) = path.clone() { + paths.lock().unwrap().push(path.clone()) + } + let mut text = Vec::new(); + let mut current_char = ' '; + for _ in 0..8 { + let mut buf = String::new(); + if let Ok(_) = reader.read_line(&mut buf) { + text.append(&mut buf.chars().collect::<Vec<char>>()); + } else { + break; + } + } + if let Some(ch) = text.get(0) { + current_char = *ch + } + let document = Document::new(!is_child); + Self { + index: 0, + text, + current_char, + sections: Vec::new(), + section_nesting: 0, + section_return: None, + path, + paths, + wg: WaitGroup::new(), + is_child, + previous_char: ' ', + inline_break_at: Vec::new(), + block_break_at: Vec::new(), + document, + reader, + parse_variables: false, + } + } + + pub fn set_config(&mut self, config: Configuration) { + self.document.config = config; + } + + /// Returns the text of the parser as a string + fn get_text(&self) -> String { + self.text + .iter() + .fold("".to_string(), |a, b| format!("{}{}", a, b)) + } + + /// Returns the import paths of the parser + pub fn get_paths(&self) -> Vec<PathBuf> { + self.paths.lock().unwrap().clone() + } + + /// transform an import path to be relative to the current parsers file + fn transform_path(&mut self, path: String) -> PathBuf { + let mut path = PathBuf::from(path); + + if !path.is_absolute() { + if let Some(selfpath) = &self.path { + if let Some(dir) = selfpath.parent() { + path = PathBuf::new().join(dir).join(path); + } + } + } + + path + } + + /// starts up a new thread to parse the imported document + fn import_document(&mut self, path: String) -> ParseResult<Arc<RwLock<ImportAnchor>>> { + let path = self.transform_path(path); + if !path.exists() || !path.is_file() { + println!( + "{}", + format!( + "Import of \"{}\" failed: The file doesn't exist.", + path.to_str().unwrap() + ) + .red() + ); + return Err(ParseError::new_with_message( + self.index, + "file does not exist", + )); + } + { + let mut paths = self.paths.lock().unwrap(); + if paths.iter().find(|item| **item == path) != None { + println!( + "{}", + format!( + "Import of \"{}\" failed: Cyclic import.", + path.to_str().unwrap() + ) + .yellow() + ); + return Err(ParseError::new_with_message(self.index, "cyclic import")); + } + paths.push(path.clone()); + } + let anchor = Arc::new(RwLock::new(ImportAnchor::new())); + let anchor_clone = Arc::clone(&anchor); + let wg = self.wg.clone(); + let paths = Arc::clone(&self.paths); + let config = self.document.config.clone(); + + let _ = thread::spawn(move || { + let mut parser = Parser::child_from_file(path, paths).unwrap(); + parser.set_config(config); + let document = parser.parse(); + anchor_clone.write().unwrap().set_document(document); + + drop(wg); + }); + + Ok(anchor) + } + + /// parses the given text into a document + pub fn parse(&mut self) -> Document { + self.document.path = if let Some(path) = &self.path { + Some(path.canonicalize().unwrap().to_str().unwrap().to_string()) + } else { + None + }; + + while self.index < self.text.len() { + match self.parse_block() { + Ok(block) => self.document.add_element(block), + Err(err) => { + if err.eof { + break; + } + if let Some(path) = &self.path { + if let Some(position) = err.get_position(&self.get_text()) { + println!( + "{}", + format!( + "Error in File {}:{}:{} - {}", + path.to_str().unwrap(), + position.0, + position.1, + err + ) + .red() + ); + } else { + println!( + "{}", + format!("Error in File {}: {}", path.to_str().unwrap(), err).red() + ); + } + } else { + println!("{}", err); + } + break; + } + } + } + + let wg = self.wg.clone(); + self.wg = WaitGroup::new(); + wg.wait(); + self.document.post_process(); + let document = self.document.clone(); + self.document = Document::new(!self.is_child); + + document + } +} diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs deleted file mode 100644 index 7de0b32..0000000 --- a/src/parsing/mod.rs +++ /dev/null @@ -1,12 +0,0 @@ -pub mod bibliography; -pub mod charstate; -pub mod configuration; -pub mod elements; -pub mod inline; -pub mod parser; -pub mod placeholders; -pub mod templates; -pub mod tokens; - -#[macro_use] -pub mod utils; diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs deleted file mode 100644 index d8e46e3..0000000 --- a/src/parsing/parser.rs +++ /dev/null @@ -1,868 +0,0 @@ -use super::elements::*; -use super::tokens::*; -use crate::parsing::bibliography::BibEntry; -use crate::parsing::charstate::CharStateMachine; -use crate::parsing::configuration::Configuration; -use crate::parsing::inline::ParseInline; -use crate::parsing::templates::{GetTemplateVariables, Template, TemplateVariable}; -use crate::parsing::utils::{ParseError, ParseResult}; -use colored::*; -use crossbeam_utils::sync::WaitGroup; -use std::collections::HashMap; -use std::fs::File; -use std::io; -use std::io::{BufRead, BufReader, Cursor}; -use std::path::PathBuf; -use std::sync::{Arc, Mutex, RwLock}; -use std::thread; - -pub struct Parser { - pub(crate) index: usize, - pub(crate) text: Vec<char>, - pub(crate) current_char: char, - section_nesting: u8, - sections: Vec<u8>, - section_return: Option<u8>, - path: Option<PathBuf>, - paths: Arc<Mutex<Vec<PathBuf>>>, - wg: WaitGroup, - is_child: bool, - pub(crate) block_break_at: Vec<char>, - pub(crate) inline_break_at: Vec<char>, - pub(crate) document: Document, - pub(crate) previous_char: char, - pub(crate) reader: Box<dyn BufRead>, - pub(crate) parse_variables: bool, -} - -impl Parser { - /// Creates a new parser from a path - pub fn new_from_file(path: PathBuf) -> Result<Self, io::Error> { - let f = File::open(&path)?; - Ok(Self::create( - Some(PathBuf::from(path)), - Arc::new(Mutex::new(Vec::new())), - false, - Box::new(BufReader::new(f)), - )) - } - - /// Creates a new parser with text being the markdown text - pub fn new(text: String, path: Option<PathBuf>) -> Self { - let text_bytes = text.as_bytes(); - let path = if let Some(inner_path) = path { - Some(PathBuf::from(inner_path)) - } else { - None - }; - Parser::create( - path, - Arc::new(Mutex::new(Vec::new())), - false, - Box::new(Cursor::new(text_bytes.to_vec())), - ) - } - - /// Creates a child parser from string text - pub fn child(text: String, path: PathBuf, paths: Arc<Mutex<Vec<PathBuf>>>) -> Self { - let text_bytes = text.as_bytes(); - Self::create( - Some(PathBuf::from(path)), - paths, - true, - Box::new(Cursor::new(text_bytes.to_vec())), - ) - } - - /// Creates a child parser from a file - pub fn child_from_file( - path: PathBuf, - paths: Arc<Mutex<Vec<PathBuf>>>, - ) -> Result<Self, io::Error> { - let f = File::open(&path)?; - Ok(Self::create( - Some(PathBuf::from(path)), - paths, - true, - Box::new(BufReader::new(f)), - )) - } - - fn create( - path: Option<PathBuf>, - paths: Arc<Mutex<Vec<PathBuf>>>, - is_child: bool, - mut reader: Box<dyn BufRead>, - ) -> Self { - if let Some(path) = path.clone() { - paths.lock().unwrap().push(path.clone()) - } - let mut text = Vec::new(); - let mut current_char = ' '; - for _ in 0..8 { - let mut buf = String::new(); - if let Ok(_) = reader.read_line(&mut buf) { - text.append(&mut buf.chars().collect::<Vec<char>>()); - } else { - break; - } - } - if let Some(ch) = text.get(0) { - current_char = *ch - } - let document = Document::new(!is_child); - Self { - index: 0, - text, - current_char, - sections: Vec::new(), - section_nesting: 0, - section_return: None, - path, - paths, - wg: WaitGroup::new(), - is_child, - previous_char: ' ', - inline_break_at: Vec::new(), - block_break_at: Vec::new(), - document, - reader, - parse_variables: false, - } - } - - pub fn set_config(&mut self, config: Configuration) { - self.document.config = config; - } - - /// Returns the text of the parser as a string - fn get_text(&self) -> String { - self.text - .iter() - .fold("".to_string(), |a, b| format!("{}{}", a, b)) - } - - /// Returns the import paths of the parser - pub fn get_paths(&self) -> Vec<PathBuf> { - self.paths.lock().unwrap().clone() - } - - /// transform an import path to be relative to the current parsers file - fn transform_path(&mut self, path: String) -> PathBuf { - let mut path = PathBuf::from(path); - - if !path.is_absolute() { - if let Some(selfpath) = &self.path { - if let Some(dir) = selfpath.parent() { - path = PathBuf::new().join(dir).join(path); - } - } - } - - path - } - - /// starts up a new thread to parse the imported document - fn import_document(&mut self, path: String) -> ParseResult<Arc<RwLock<ImportAnchor>>> { - let path = self.transform_path(path); - if !path.exists() || !path.is_file() { - println!( - "{}", - format!( - "Import of \"{}\" failed: The file doesn't exist.", - path.to_str().unwrap() - ) - .red() - ); - return Err(ParseError::new_with_message( - self.index, - "file does not exist", - )); - } - { - let mut paths = self.paths.lock().unwrap(); - if paths.iter().find(|item| **item == path) != None { - println!( - "{}", - format!( - "Import of \"{}\" failed: Cyclic import.", - path.to_str().unwrap() - ) - .yellow() - ); - return Err(ParseError::new_with_message(self.index, "cyclic import")); - } - paths.push(path.clone()); - } - let anchor = Arc::new(RwLock::new(ImportAnchor::new())); - let anchor_clone = Arc::clone(&anchor); - let wg = self.wg.clone(); - let paths = Arc::clone(&self.paths); - let config = self.document.config.clone(); - - let _ = thread::spawn(move || { - let mut parser = Parser::child_from_file(path, paths).unwrap(); - parser.set_config(config); - let document = parser.parse(); - anchor_clone.write().unwrap().set_document(document); - - drop(wg); - }); - - Ok(anchor) - } - - /// parses the given text into a document - pub fn parse(&mut self) -> Document { - self.document.path = if let Some(path) = &self.path { - Some(path.canonicalize().unwrap().to_str().unwrap().to_string()) - } else { - None - }; - - while self.index < self.text.len() { - match self.parse_block() { - Ok(block) => self.document.add_element(block), - Err(err) => { - if err.eof { - break; - } - if let Some(path) = &self.path { - if let Some(position) = err.get_position(&self.get_text()) { - println!( - "{}", - format!( - "Error in File {}:{}:{} - {}", - path.to_str().unwrap(), - position.0, - position.1, - err - ) - .red() - ); - } else { - println!( - "{}", - format!("Error in File {}: {}", path.to_str().unwrap(), err).red() - ); - } - } else { - println!("{}", err); - } - break; - } - } - } - - let wg = self.wg.clone(); - self.wg = WaitGroup::new(); - wg.wait(); - self.document.post_process(); - let document = self.document.clone(); - self.document = Document::new(!self.is_child); - - document - } - - /// Parses a block Token - pub fn parse_block(&mut self) -> Result<Block, ParseError> { - if let Some(section) = self.section_return { - if section <= self.section_nesting && (self.section_nesting > 0) { - return Err(ParseError::new_with_message( - self.index, - "invalid section nesting", - )); - } else { - self.section_return = None; - } - } - let token = if let Ok(section) = self.parse_section() { - Block::Section(section) - } else if let Some(_) = self.section_return { - return Err(ParseError::new(self.index)); - } else if let Ok(list) = self.parse_list() { - Block::List(list) - } else if let Ok(table) = self.parse_table() { - Block::Table(table) - } else if let Ok(code_block) = self.parse_code_block() { - Block::CodeBlock(code_block) - } else if let Ok(quote) = self.parse_quote() { - Block::Quote(quote) - } else if let Ok(import) = self.parse_import() { - Block::Import(import) - } else if let Some(_) = self.section_return { - return Err(ParseError::new(self.index)); - } else if let Ok(pholder) = self.parse_placeholder() { - Block::Placeholder(pholder) - } else if let Ok(paragraph) = self.parse_paragraph() { - Block::Paragraph(paragraph) - } else { - return Err(ParseError::new(self.index)); - }; - - Ok(token) - } - - /// Parses a section that consists of a header and one or more blocks - fn parse_section(&mut self) -> Result<Section, ParseError> { - let start_index = self.index; - self.seek_whitespace(); - if self.check_special(&HASH) { - let mut size = 1; - while let Some(_) = self.next_char() { - if !self.check_special(&HASH) { - break; - } - size += 1; - } - let mut metadata = None; - if let Ok(meta) = self.parse_inline_metadata() { - metadata = Some(meta); - } - if size <= self.section_nesting || !self.current_char.is_whitespace() { - if size <= self.section_nesting { - self.section_return = Some(size); - } - return Err(self.revert_with_error(start_index)); - } - self.seek_inline_whitespace(); - let mut header = self.parse_header()?; - header.size = size; - self.section_nesting = size; - self.sections.push(size); - let mut section = Section::new(header); - section.metadata = metadata; - self.seek_whitespace(); - - while let Ok(block) = self.parse_block() { - section.add_element(block); - } - - self.sections.pop(); - if let Some(sec) = self.sections.last() { - self.section_nesting = *sec - } else { - self.section_nesting = 0; - } - Ok(section) - } else { - return Err(self.revert_with_error(start_index)); - } - } - - /// parses the header of a section - fn parse_header(&mut self) -> Result<Header, ParseError> { - let start_index = self.index; - let line = self.parse_line()?; - let mut anchor = String::new(); - self.text[start_index..self.index] - .iter() - .for_each(|e| anchor.push(*e)); - anchor.retain(|c| !c.is_whitespace()); - Ok(Header::new(line, anchor)) - } - - /// parses a code block - fn parse_code_block(&mut self) -> Result<CodeBlock, ParseError> { - self.seek_whitespace(); - self.assert_special_sequence(&SQ_CODE_BLOCK, self.index)?; - self.skip_char(); - let language = self.get_string_until(&[LB], &[])?; - self.skip_char(); - let text = self.get_string_until_sequence(&[&SQ_CODE_BLOCK], &[])?; - for _ in 0..2 { - self.skip_char(); - } - - Ok(CodeBlock { - language, - code: text, - }) - } - - /// parses a quote - fn parse_quote(&mut self) -> Result<Quote, ParseError> { - let start_index = self.index; - self.seek_whitespace(); - let metadata = if let Ok(meta) = self.parse_inline_metadata() { - Some(meta) - } else { - None - }; - if self.check_special(&META_CLOSE) { - if self.next_char() == None { - return Err(self.revert_with_error(start_index)); - } - } - let mut quote = Quote::new(metadata); - - while self.check_special(&QUOTE_START) - && self.next_char() != None - && (self.check_seek_inline_whitespace() || self.check_special(&LB)) - { - if let Ok(text) = self.parse_text_line() { - if text.subtext.len() > 0 { - quote.add_text(text); - } - } else { - break; - } - } - if quote.text.len() == 0 { - return Err(self.revert_with_error(start_index)); - } - - Ok(quote) - } - - /// Parses metadata - pub(crate) fn parse_inline_metadata(&mut self) -> Result<InlineMetadata, ParseError> { - let start_index = self.index; - self.assert_special(&META_OPEN, start_index)?; - self.skip_char(); - - let mut values = HashMap::new(); - while let Ok((key, value)) = self.parse_metadata_pair() { - values.insert(key, value); - if self.check_special(&META_CLOSE) || self.check_linebreak() { - // abort the parsing of the inner content when encountering a closing tag or linebreak - break; - } - } - if self.check_special(&META_CLOSE) { - self.skip_char(); - } - if values.len() == 0 { - // if there was a linebreak (the metadata wasn't closed) or there is no inner data - // return an error - return Err(self.revert_with_error(start_index)); - } - - Ok(InlineMetadata { data: values }) - } - - /// parses a key-value metadata pair - fn parse_metadata_pair(&mut self) -> Result<(String, MetadataValue), ParseError> { - self.seek_inline_whitespace(); - let name = self.get_string_until(&[META_CLOSE, EQ, SPACE, LB], &[])?; - - self.seek_inline_whitespace(); - let mut value = MetadataValue::Bool(true); - if self.check_special(&EQ) { - self.skip_char(); - self.seek_inline_whitespace(); - if let Ok(ph) = self.parse_placeholder() { - value = MetadataValue::Placeholder(ph); - } else if let Ok(template) = self.parse_template() { - value = MetadataValue::Template(template) - } else { - let quoted_string = self.check_special_group(&QUOTES); - let parse_until = if quoted_string { - let quote_start = self.current_char; - self.skip_char(); - vec![quote_start, META_CLOSE, LB] - } else { - vec![META_CLOSE, LB, SPACE] - }; - let raw_value = self.get_string_until(&parse_until, &[])?; - if self.check_special_group(&QUOTES) { - self.skip_char(); - } - self.seek_inline_whitespace(); - if self.check_special(&COMMA) { - self.skip_char(); - } - value = if quoted_string { - MetadataValue::String(raw_value) - } else if raw_value.to_lowercase().as_str() == "true" { - MetadataValue::Bool(true) - } else if raw_value.to_lowercase().as_str() == "false" { - MetadataValue::Bool(false) - } else if let Ok(num) = raw_value.parse::<i64>() { - MetadataValue::Integer(num) - } else if let Ok(num) = raw_value.parse::<f64>() { - MetadataValue::Float(num) - } else { - MetadataValue::String(raw_value) - } - } - } - - Ok((name, value)) - } - - /// parses an import and starts a new task to parse the document of the import - fn parse_import(&mut self) -> Result<Import, ParseError> { - let start_index = self.index; - self.seek_whitespace(); - self.assert_special_sequence_group(&[&[IMPORT_START, IMPORT_OPEN]], start_index)?; - let mut path = String::new(); - while let Some(character) = self.next_char() { - if self.check_linebreak() || self.check_special(&IMPORT_CLOSE) { - break; - } - path.push(character); - } - if self.check_linebreak() || path.is_empty() { - return Err(self.revert_with_error(start_index)); - } - if self.check_special(&IMPORT_CLOSE) { - self.skip_char(); - } - // parsing success - - if self.section_nesting > 0 { - self.section_return = Some(0); - let err = ParseError::new_with_message(self.index, "import section nesting error"); - self.revert_to(start_index)?; - return Err(err); - } - - self.seek_whitespace(); - - if let Ok(anchor) = self.import_document(path.clone()) { - Ok(Import { path, anchor }) - } else { - Err(ParseError::new(self.index)) - } - } - - /// Parses a paragraph - fn parse_paragraph(&mut self) -> Result<Paragraph, ParseError> { - self.seek_whitespace(); - let mut paragraph = Paragraph::new(); - while let Ok(token) = self.parse_line() { - paragraph.add_element(token); - let start_index = self.index; - if self.check_special_sequence_group(&BLOCK_SPECIAL_CHARS) - || self.check_special_group(&self.block_break_at) - { - self.revert_to(start_index)?; - break; - } - if !self.check_eof() { - self.revert_to(start_index)?; - } - } - - if paragraph.elements.len() > 0 { - Ok(paragraph) - } else { - Err(ParseError::new(self.index)) - } - } - - /// parses a list which consists of one or more list items - /// The parsing is done iterative to resolve nested items - fn parse_list(&mut self) -> Result<List, ParseError> { - let mut list = List::new(); - let start_index = self.index; - self.seek_whitespace(); - - let ordered = if self.check_special_group(&LIST_SPECIAL_CHARS) { - false - } else { - true - }; - list.ordered = ordered; - let mut list_hierarchy: Vec<ListItem> = Vec::new(); - while let Ok(mut item) = self.parse_list_item() { - while let Some(parent_item) = list_hierarchy.pop() { - if parent_item.level < item.level { - // the parent item is the actual parent of the next item - list_hierarchy.push(parent_item); - break; - } else if parent_item.level == item.level { - // the parent item is a sibling and has to be appended to a parent - if list_hierarchy.is_empty() { - list.add_item(parent_item); - } else { - let mut parent_parent = list_hierarchy.pop().unwrap(); - parent_parent.add_child(parent_item); - list_hierarchy.push(parent_parent); - } - break; - } else { - // the parent item is a child of a sibling of the current item - if list_hierarchy.is_empty() { - item.add_child(parent_item); - } else { - let mut parent_parent = list_hierarchy.pop().unwrap(); - parent_parent.add_child(parent_item); - list_hierarchy.push(parent_parent); - } - } - } - list_hierarchy.push(item); - } - - // the remaining items in the hierarchy need to be combined - while let Some(item) = list_hierarchy.pop() { - if !list_hierarchy.is_empty() { - let mut parent_item = list_hierarchy.pop().unwrap(); - parent_item.add_child(item); - list_hierarchy.push(parent_item); - } else { - list_hierarchy.push(item); - break; - } - } - list.items.append(&mut list_hierarchy); - - if list.items.len() > 0 { - Ok(list) - } else { - return Err(self.revert_with_error(start_index)); - } - } - - /// parses a single list item defined with - - fn parse_list_item(&mut self) -> Result<ListItem, ParseError> { - let start_index = self.index; - self.seek_inline_whitespace(); - let level = self.index - start_index; - self.assert_special_group(&LIST_SPECIAL_CHARS, start_index)?; - let ordered = self.current_char.is_numeric(); - self.skip_char(); - if self.check_special(&DOT) { - self.skip_char(); - } - if !self.check_seek_inline_whitespace() { - return Err(self.revert_with_error(start_index)); - } - self.seek_inline_whitespace(); - if self.check_special(&MINUS) { - return Err(self.revert_with_error(start_index)); - } - - let item = ListItem::new(self.parse_line()?, level as u16, ordered); - - Ok(item) - } - - /// parses a markdown table - fn parse_table(&mut self) -> Result<Table, ParseError> { - let header = self.parse_row()?; - if self.check_linebreak() { - self.skip_char(); - } - let seek_index = self.index; - let mut table = Table::new(header); - while let Some(_) = self.next_char() { - self.seek_inline_whitespace(); - if !self.check_special_group(&[MINUS, PIPE]) || self.check_linebreak() { - break; - } - } - - if !self.check_linebreak() { - self.revert_to(seek_index)?; - return Ok(table); - } - - self.seek_whitespace(); - while let Ok(row) = self.parse_row() { - table.add_row(row); - } - - Ok(table) - } - - /// parses a table row/head - pub fn parse_row(&mut self) -> Result<Row, ParseError> { - let start_index = self.index; - self.seek_inline_whitespace(); - self.assert_special(&PIPE, start_index)?; - self.skip_char(); - if self.check_special(&PIPE) { - return Err(self.revert_with_error(start_index)); - } - self.inline_break_at.push(PIPE); - - self.seek_inline_whitespace(); - let mut row = Row::new(); - loop { - let mut element = TextLine::new(); - while let Ok(inline) = self.parse_inline() { - element.subtext.push(inline); - if self.check_linebreak() || self.check_special(&PIPE) || self.check_eof() { - break; - } - } - row.add_cell(Cell { - text: Line::Text(element), - }); - if self.check_special(&PIPE) { - self.skip_char(); - } - if self.check_linebreak() || self.check_eof() { - break; - } - self.seek_inline_whitespace(); - } - self.inline_break_at.clear(); - if self.check_special(&PIPE) { - self.skip_char(); - self.skip_char(); - } else { - self.skip_char(); - } - - if row.cells.len() > 0 { - Ok(row) - } else { - return Err(self.revert_with_error(start_index)); - } - } - - /// parses inline definitions - fn parse_line(&mut self) -> Result<Line, ParseError> { - if self.index > self.text.len() { - Err(ParseError::new(self.index)) - } else { - if let Ok(ruler) = self.parse_ruler() { - Ok(Line::Ruler(ruler)) - } else if let Ok(centered) = self.parse_centered() { - Ok(Line::Centered(centered)) - } else if let Ok(bib) = self.parse_bib_entry() { - Ok(Line::BibEntry(bib)) - } else if let Ok(text) = self.parse_text_line() { - Ok(Line::Text(text)) - } else { - Err(ParseError::new(self.index)) - } - } - } - - fn parse_bib_entry(&mut self) -> ParseResult<Arc<RwLock<BibEntry>>> { - let start_index = self.index; - self.seek_inline_whitespace(); - self.assert_special(&BIB_KEY_OPEN, start_index)?; - self.skip_char(); - let key = self.get_string_until_or_revert(&[BIB_KEY_CLOSE], &[LB, SPACE], start_index)?; - self.skip_char(); - self.assert_special(&BIB_DATA_START, start_index)?; - self.skip_char(); - self.seek_inline_whitespace(); - let entry = if let Ok(meta) = self.parse_inline_metadata() { - BibEntry::from_metadata(key, Box::new(meta), &self.document.config) - } else { - let url = self.get_string_until_or_revert(&[LB], &[], start_index)?; - BibEntry::from_url(key, url, &self.document.config) - }; - let entry_ref = Arc::new(RwLock::new(entry)); - self.document - .bibliography - .add_bib_entry(Arc::clone(&entry_ref)); - - Ok(entry_ref) - } - - /// parses centered text - fn parse_centered(&mut self) -> Result<Centered, ParseError> { - let start_index = self.index; - self.assert_special_sequence(&SQ_CENTERED_START, start_index)?; - self.skip_char(); - let line = self.parse_text_line()?; - - Ok(Centered { line }) - } - - /// parses a placeholder element - pub(crate) fn parse_placeholder(&mut self) -> Result<Arc<RwLock<Placeholder>>, ParseError> { - let start_index = self.index; - self.assert_special_sequence(&SQ_PHOLDER_START, self.index)?; - self.skip_char(); - let name = if let Ok(name_str) = self.get_string_until_sequence(&[&SQ_PHOLDER_STOP], &[LB]) - { - name_str - } else { - return Err(self.revert_with_error(start_index)); - }; - self.skip_char(); - - let metadata = if let Ok(meta) = self.parse_inline_metadata() { - Some(meta) - } else { - None - }; - - let placeholder = Arc::new(RwLock::new(Placeholder::new(name, metadata))); - self.document.add_placeholder(Arc::clone(&placeholder)); - - Ok(placeholder) - } - - /// parses a ruler - fn parse_ruler(&mut self) -> Result<Ruler, ParseError> { - let start_index = self.index; - self.seek_inline_whitespace(); - self.assert_special_sequence(&SQ_RULER, start_index)?; - self.seek_until_linebreak(); - Ok(Ruler {}) - } - - /// Parses a line of text - fn parse_text_line(&mut self) -> Result<TextLine, ParseError> { - let mut text = TextLine::new(); - while let Ok(subtext) = self.parse_inline() { - text.add_subtext(subtext); - if self.check_eof() || self.check_special_group(&self.inline_break_at) { - break; - } - } - - if self.check_linebreak() { - self.skip_char(); - } - - if text.subtext.len() > 0 || !self.check_eof() { - Ok(text) - } else { - Err(ParseError::eof(self.index)) - } - } - - /// parses a template - fn parse_template(&mut self) -> ParseResult<Template> { - let start_index = self.index; - self.assert_special(&TEMPLATE, start_index)?; - self.skip_char(); - if self.check_special(&TEMPLATE) { - return Err(self.revert_with_error(start_index)); - } - let mut elements = Vec::new(); - self.block_break_at.push(TEMPLATE); - self.inline_break_at.push(TEMPLATE); - self.parse_variables = true; - while let Ok(e) = self.parse_block() { - elements.push(Element::Block(Box::new(e))); - if self.check_special(&TEMPLATE) { - break; - } - } - self.parse_variables = false; - self.block_break_at.clear(); - self.inline_break_at.clear(); - self.assert_special(&TEMPLATE, start_index)?; - self.skip_char(); - let vars: HashMap<String, Arc<RwLock<TemplateVariable>>> = elements - .iter() - .map(|e| e.get_template_variables()) - .flatten() - .map(|e: Arc<RwLock<TemplateVariable>>| { - let name; - { - name = e.read().unwrap().name.clone(); - }; - - (name, e) - }) - .collect(); - - Ok(Template { - text: elements, - variables: vars, - }) - } -} diff --git a/src/parsing/bibliography.rs b/src/references/bibliography.rs similarity index 96% rename from src/parsing/bibliography.rs rename to src/references/bibliography.rs index 80fce30..d27a65b 100644 --- a/src/parsing/bibliography.rs +++ b/src/references/bibliography.rs @@ -1,7 +1,7 @@ +use crate::elements::Metadata; use crate::format::PlaceholderTemplate; -use crate::parsing::configuration::keys::{BIB_DISPLAY, BIB_HIDE_UNUSED}; -use crate::parsing::configuration::{ConfigRefEntry, Configuration, Value}; -use crate::parsing::elements::Metadata; +use crate::references::configuration::keys::{BIB_DISPLAY, BIB_HIDE_UNUSED}; +use crate::references::configuration::{ConfigRefEntry, Configuration, Value}; use std::collections::HashMap; use std::sync::{Arc, RwLock}; diff --git a/src/parsing/configuration/config.rs b/src/references/configuration/config.rs similarity index 100% rename from src/parsing/configuration/config.rs rename to src/references/configuration/config.rs diff --git a/src/parsing/configuration/default.toml b/src/references/configuration/default.toml similarity index 100% rename from src/parsing/configuration/default.toml rename to src/references/configuration/default.toml diff --git a/src/parsing/configuration/keys.rs b/src/references/configuration/keys.rs similarity index 100% rename from src/parsing/configuration/keys.rs rename to src/references/configuration/keys.rs diff --git a/src/parsing/configuration/mod.rs b/src/references/configuration/mod.rs similarity index 95% rename from src/parsing/configuration/mod.rs rename to src/references/configuration/mod.rs index a009cf3..209ac67 100644 --- a/src/parsing/configuration/mod.rs +++ b/src/references/configuration/mod.rs @@ -1,9 +1,9 @@ -use crate::parsing::configuration::config::RootConfig; -use crate::parsing::configuration::keys::{ +use crate::elements::MetadataValue; +use crate::references::configuration::config::RootConfig; +use crate::references::configuration::keys::{ BIB_DISPLAY, BIB_HIDE_UNUSED, BIB_REF_DISPLAY, META_AUTHOR, META_DATE, META_TITLE, }; -use crate::parsing::elements::MetadataValue; -use crate::parsing::templates::Template; +use crate::references::templates::Template; use std::collections::HashMap; use std::sync::{Arc, RwLock}; diff --git a/src/references/mod.rs b/src/references/mod.rs new file mode 100644 index 0000000..f5f5f64 --- /dev/null +++ b/src/references/mod.rs @@ -0,0 +1,4 @@ +pub mod bibliography; +pub mod configuration; +pub mod placeholders; +pub mod templates; diff --git a/src/parsing/placeholders.rs b/src/references/placeholders.rs similarity index 99% rename from src/parsing/placeholders.rs rename to src/references/placeholders.rs index 4e02e69..6a9cb06 100644 --- a/src/parsing/placeholders.rs +++ b/src/references/placeholders.rs @@ -1,4 +1,4 @@ -use super::elements::*; +use crate::elements::*; use chrono::prelude::*; use regex::Regex; diff --git a/src/parsing/templates.rs b/src/references/templates.rs similarity index 99% rename from src/parsing/templates.rs rename to src/references/templates.rs index baf0526..c0e84ba 100644 --- a/src/parsing/templates.rs +++ b/src/references/templates.rs @@ -1,4 +1,4 @@ -use crate::parsing::elements::{Block, Element, Inline, Line, ListItem}; +use crate::elements::{Block, Element, Inline, Line, ListItem}; use std::collections::HashMap; use std::sync::{Arc, RwLock}; diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..29ec0ba --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1 @@ +pub mod parsing; diff --git a/src/parsing/utils.rs b/src/utils/parsing.rs similarity index 100% rename from src/parsing/utils.rs rename to src/utils/parsing.rs diff --git a/tests/parsing_tests.rs b/tests/parsing_tests.rs index f917464..a8d972c 100644 --- a/tests/parsing_tests.rs +++ b/tests/parsing_tests.rs @@ -1,5 +1,5 @@ use snekdown::parse; -use snekdown::parsing::elements::Block; +use snekdown::parser::elements::Block; use snekdown::Parser; macro_rules! count_block_elements {