From 110109c8823753bcafefe2b5ed48be78809cd659 Mon Sep 17 00:00:00 2001 From: trivernis Date: Sat, 30 May 2020 11:14:50 +0200 Subject: [PATCH] Add escape character handling --- src/elements.rs | 246 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 +- src/parser.rs | 101 +++++++++++------- src/tokens.rs | 270 ++++++------------------------------------------ 4 files changed, 343 insertions(+), 277 deletions(-) create mode 100644 src/elements.rs diff --git a/src/elements.rs b/src/elements.rs new file mode 100644 index 0000000..c43696f --- /dev/null +++ b/src/elements.rs @@ -0,0 +1,246 @@ +#[derive(Clone, Debug)] +pub enum Block { + Section(Section), + Paragraph(Paragraph), + List(List), + Table(Table), +} + +#[derive(Clone, Debug)] +pub enum Inline { + Text(Text), +} + +#[derive(Clone, Debug)] +pub struct Document { + elements: Vec, +} + +#[derive(Clone, Debug)] +pub struct Section { + header: Header, + elements: Vec, +} + +#[derive(Clone, Debug)] +pub struct Header { + pub size: u8, + pub line: Inline, +} + +#[derive(Clone, Debug)] +pub struct BlockQuote { + paragraph: Paragraph, +} + +#[derive(Clone, Debug)] +pub struct Paragraph { + pub elements: Vec, +} + +#[derive(Clone, Debug)] +pub struct List { + pub ordered: bool, + pub items: Vec, +} + +#[derive(Clone, Debug)] +pub struct ListItem { + text: Inline, + pub(crate) level: u16, + pub(crate) children: Vec, +} + +#[derive(Clone, Debug)] +pub struct Table { + header: Row, + pub rows: Vec, +} + +#[derive(Clone, Debug)] +pub struct Row { + pub(crate) cells: Vec, +} + +#[derive(Clone, Debug)] +pub struct Cell { + pub(crate) text: Inline, +} + +#[derive(Clone, Debug)] +pub struct CodeBlock { + language: String, + code: String, +} + +#[derive(Clone, Debug)] +pub struct Code { + code: String, +} + +#[derive(Clone, Debug)] +pub struct Text { + pub subtext: Vec, +} + +#[derive(Clone, Debug)] +pub enum SubText { + Plain(PlainText), + Code(Code), + Bold(BoldText), + Italic(ItalicText), + Underlined(UnderlinedText), + Striked(StrikedText), + Monospace(MonospaceText), + Url(Url), +} + +#[derive(Clone, Debug)] +pub struct PlainText { + pub(crate) value: String, +} + +#[derive(Clone, Debug)] +pub struct BoldText { + pub(crate) value: Box, +} + +#[derive(Clone, Debug)] +pub struct ItalicText { + pub(crate) value: Box, +} + +#[derive(Clone, Debug)] +pub struct UnderlinedText { + pub(crate) value: Box, +} + +#[derive(Clone, Debug)] +pub struct StrikedText { + pub(crate) value: Box, +} + +#[derive(Clone, Debug)] +pub struct MonospaceText { + pub(crate) value: PlainText, +} + +#[derive(Clone, Debug)] +pub struct Url { + title: String, + url: String, +} + +#[derive(Clone, Debug)] +pub struct Image { + url: Url, +} + +// implementations + +impl Document { + pub fn new() -> Self { + Self { + elements: Vec::new(), + } + } + + pub fn add_element(&mut self, element: Block) { + self.elements.push(element) + } +} + +impl Section { + pub fn new(header: Header) -> Self { + Self { + header, + elements: Vec::new(), + } + } + + pub fn add_element(&mut self, element: Block) { + self.elements.push(element) + } +} + +impl Paragraph { + pub fn new() -> Self { + Self { + elements: Vec::new(), + } + } + + pub fn add_element(&mut self, element: Inline) { + self.elements.push(element) + } +} + +impl List { + pub fn new() -> Self { + Self { + ordered: false, + items: Vec::new(), + } + } + + pub fn add_item(&mut self, item: ListItem) { + self.items.push(item) + } +} + +impl ListItem { + pub fn new(text: Inline, level: u16) -> Self { + Self { + text, + level, + children: Vec::new(), + } + } + + pub fn add_child(&mut self, child: ListItem) { + self.children.push(child) + } +} + +impl Text { + pub fn new() -> Self { + Self { + subtext: Vec::new(), + } + } + + pub fn add_subtext(&mut self, subtext: SubText) { + self.subtext.push(subtext) + } +} + +impl Table { + pub fn new(header: Row) -> Self { + Self { + header, + rows: Vec::new(), + } + } + + pub fn add_row(&mut self, row: Row) { + self.rows.push(row) + } +} + +impl Row { + pub fn new() -> Self { + Self { cells: Vec::new() } + } + + pub fn add_cell(&mut self, cell: Cell) { + self.cells.push(cell) + } +} + +impl Url { + pub fn new(title: String, url: String) -> Self { + Self { title, url } + } +} + +// TODO: Images, URIs diff --git a/src/lib.rs b/src/lib.rs index 8faaef6..30340b7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,3 @@ +pub mod elements; +pub mod parser; pub mod tokens; -pub mod parser; \ No newline at end of file diff --git a/src/parser.rs b/src/parser.rs index a1d7b96..4e033ab 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,3 +1,4 @@ +use crate::elements::*; use crate::tokens::*; use std::error::Error; use std::fmt; @@ -75,9 +76,9 @@ impl Parser { /// Skips characters until it encounters a character /// that isn't an inline whitespace character pub fn seek_inline_whitespace(&mut self) { - if self.current_char.is_whitespace() && self.current_char != '\n' { + if self.current_char.is_whitespace() && !self.check_linebreak() { while let Some(next_char) = self.next_char() { - if !next_char.is_whitespace() || self.current_char == '\n' { + if !next_char.is_whitespace() || self.check_linebreak() { break; } } @@ -96,6 +97,34 @@ impl Parser { } } + /// checks if the input character is escaped + pub fn check_escaped(&self) -> bool { + if self.index == 0 { + return false; + } + if let Some(previous_char) = self.text.get(self.index - 1) { + if previous_char == &SPECIAL_ESCAPE { + return true; + } + } + return false; + } + + /// checks if the current character is the given input character and not escaped + pub fn check_special(&self, character: &char) -> bool { + self.current_char == *character && !self.check_escaped() + } + + /// checks if the current character is part of the given group + pub fn check_special_group(&self, chars: &[char]) -> bool { + chars.contains(&self.current_char) && !self.check_escaped() + } + + pub fn check_linebreak(&self) -> bool { + self.current_char == LB && !self.check_escaped() + } + + /// parses the given text into a document pub fn parse(&mut self) -> Document { let mut document = Document::new(); while self.index < self.text.len() { @@ -137,10 +166,10 @@ impl Parser { fn parse_section(&mut self) -> Result { let start_index = self.index; self.seek_whitespace(); - if self.current_char == '#' { + if self.check_special(&HASH) { let mut size = 1; - while let Some(next_char) = self.next_char() { - if next_char == '#' { + while let Some(_) = self.next_char() { + if self.check_special(&HASH) { size += 1; } else { break; @@ -187,7 +216,7 @@ impl Parser { paragraph.add_element(token); let start_index = self.index; self.seek_inline_whitespace(); - if ['-', '#', '`', '|'].contains(&self.current_char) { + if self.check_special_group(&BLOCK_SPECIAL_CHARS) { self.revert_to(start_index)?; break; } @@ -208,7 +237,7 @@ impl Parser { let start_index = self.index; self.seek_whitespace(); - let ordered = if ['-', '*', 'o'].contains(&self.current_char) { + let ordered = if self.check_special_group(&LIST_SPECIAL_CHARS) { false } else { true @@ -272,7 +301,7 @@ impl Parser { self.seek_inline_whitespace(); let level = self.index - start_index; - if !['-', '*', 'o'].contains(&self.current_char) && !self.current_char.is_numeric() { + if !self.check_special_group(&LIST_SPECIAL_CHARS) && !self.current_char.is_numeric() { let err = ParseError::new(self.index); self.revert_to(start_index)?; return Err(err); @@ -297,8 +326,8 @@ impl Parser { let header = self.parse_row()?; let start_index = self.index; self.seek_whitespace(); - if self.current_char == '-' { - if self.next_char() != Some('|') { + if self.check_special(&MINUS) { + if self.next_char() != Some(PIPE) { let err_index = self.index; self.revert_to(start_index)?; return Err(ParseError::new(err_index)); @@ -325,7 +354,7 @@ impl Parser { let start_index = self.index; self.seek_inline_whitespace(); - if self.current_char == '|' { + if self.check_special(&PIPE) { if self.next_char() == None { let err_index = self.index; self.revert_to(start_index)?; @@ -338,12 +367,12 @@ impl Parser { let mut row = Row::new(); while let Ok(element) = self.parse_inline() { row.add_cell(Cell { text: element }); - if self.current_char == '|' { + if self.check_special(&PIPE) { if self.next_char() == None { break; } } - if self.current_char == '\n' { + if self.check_linebreak() { break; } } @@ -377,7 +406,7 @@ impl Parser { self.revert_to(current_index)?; } - if self.current_char == '\n' { + if self.check_linebreak() { parse_option!(self.next_char(), self.index); } @@ -389,15 +418,15 @@ impl Parser { return Ok(SubText::Url(url)); } match self.current_char { - '*' => { + ASTERISK if !self.check_escaped() => { parse_option!(self.next_char(), self.index); - if self.current_char == '*' { + if self.check_special(&ASTERISK) { parse_option!(self.next_char(), self.index); let subtext = self.parse_subtext()?; - if self.current_char == '*' { + if self.check_special(&ASTERISK) { parse_option!(self.next_char(), self.index); - if self.current_char == '*' { + if self.check_special(&ASTERISK) { parse_option!(self.next_char(), self.index); } } @@ -412,7 +441,7 @@ impl Parser { })) } } - '_' => { + UNDERSCR if !self.check_escaped() => { parse_option!(self.next_char(), self.index); let subtext = self.parse_subtext()?; parse_option!(self.next_char(), self.index); @@ -420,25 +449,25 @@ impl Parser { value: Box::new(subtext), })) } - '~' => { + TILDE if !self.check_escaped() => { parse_option!(self.next_char(), self.index); let subtext = self.parse_subtext()?; - if self.current_char == '~' { + if self.check_special(&TILDE) { parse_option!(self.next_char(), self.index); } Ok(SubText::Striked(StrikedText { value: Box::new(subtext), })) } - '`' => { + BACKTICK if !self.check_escaped() => { parse_option!(self.next_char(), self.index); let plain_text = self.parse_plain_text()?; - if self.current_char == '`' { + if self.check_special(&BACKTICK) { parse_option!(self.next_char(), self.index) } Ok(SubText::Monospace(MonospaceText { value: plain_text })) } - '\n' | '|' => Err(ParseError::new(self.index)), + LB | PIPE if !self.check_escaped() => Err(ParseError::new(self.index)), _ => Ok(SubText::Plain(self.parse_plain_text()?)), } } @@ -448,26 +477,26 @@ impl Parser { let start_index = self.index; self.seek_inline_whitespace(); - if self.current_char != '[' { + if !self.check_special(&R_BRACKET) { let err = ParseError::new(self.index); self.revert_to(start_index)?; return Err(err); } let mut title = String::new(); while let Some(character) = self.next_char() { - if character == ']' || character == '\n' { + if self.check_special(&L_BRACKET) || self.check_linebreak() { break; } title.push(character); } - if self.current_char != ']' { + if !self.check_special(&L_BRACKET) { // it stopped at a linebreak or EOF let err = ParseError::new(self.index); self.revert_to(start_index)?; return Err(err); } - if let Some(character) = self.next_char() { - if character != '(' { + if let Some(_) = self.next_char() { + if !self.check_special(&R_PARENTH) { // the next char isn't the start of the encased url let err = ParseError::new(self.index); self.revert_to(start_index)?; @@ -477,12 +506,12 @@ impl Parser { self.seek_inline_whitespace(); let mut url = String::new(); while let Some(character) = self.next_char() { - if character == ')' || character == '\n' { + if self.check_special(&L_PARENTH) || self.check_linebreak() { break; } url.push(character); } - if self.current_char != ')' || url.is_empty() { + if !self.check_special(&L_PARENTH) || url.is_empty() { let err = ParseError::new(self.index); self.revert_to(start_index)?; return Err(err); @@ -501,10 +530,12 @@ impl Parser { let mut characters = String::new(); let mut count = 0; loop { - match current_char { - '\n' | '*' | '_' | '~' | '|' | '`' => break, - '[' if count > 0 => break, // if its the first it means that the url parsing has failed - _ => characters.push(current_char), + if self.check_special_group(&INLINE_SPECIAL_CHARS) + || (count > 0 && self.check_special(&R_BRACKET)) + { + break; + } else { + characters.push(current_char) } if let Some(character) = self.next_char() { current_char = character; diff --git a/src/tokens.rs b/src/tokens.rs index f8be10e..b1d6bf7 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,241 +1,29 @@ -#[derive(Clone, Debug)] -pub enum Block { - Section(Section), - Paragraph(Paragraph), - List(List), - Table(Table), -} - -#[derive(Clone, Debug)] -pub enum Inline { - Text(Text), -} - -#[derive(Clone, Debug)] -pub struct Document { - elements: Vec, -} - -#[derive(Clone, Debug)] -pub struct Section { - header: Header, - elements: Vec, -} - -#[derive(Clone, Debug)] -pub struct Header { - pub size: u8, - pub line: Inline, -} - -#[derive(Clone, Debug)] -pub struct BlockQuote { - paragraph: Paragraph, -} - -#[derive(Clone, Debug)] -pub struct Paragraph { - pub elements: Vec, -} - -#[derive(Clone, Debug)] -pub struct List { - pub ordered: bool, - pub items: Vec, -} - -#[derive(Clone, Debug)] -pub struct ListItem { - text: Inline, - pub(crate) level: u16, - pub(crate) children: Vec, -} - -#[derive(Clone, Debug)] -pub struct Table { - header: Row, - pub rows: Vec, -} - -#[derive(Clone, Debug)] -pub struct Row { - pub(crate) cells: Vec, -} - -#[derive(Clone, Debug)] -pub struct Cell { - pub(crate) text: Inline, -} - -#[derive(Clone, Debug)] -pub struct CodeBlock { - language: String, - code: String, -} - -#[derive(Clone, Debug)] -pub struct Code { - code: String, -} - -#[derive(Clone, Debug)] -pub struct Text { - pub subtext: Vec, -} - -#[derive(Clone, Debug)] -pub enum SubText { - Plain(PlainText), - Code(Code), - Bold(BoldText), - Italic(ItalicText), - Underlined(UnderlinedText), - Striked(StrikedText), - Monospace(MonospaceText), - Url(Url), -} - -#[derive(Clone, Debug)] -pub struct PlainText { - pub(crate) value: String, -} - -#[derive(Clone, Debug)] -pub struct BoldText { - pub(crate) value: Box, -} - -#[derive(Clone, Debug)] -pub struct ItalicText { - pub(crate) value: Box, -} - -#[derive(Clone, Debug)] -pub struct UnderlinedText { - pub(crate) value: Box, -} - -#[derive(Clone, Debug)] -pub struct StrikedText { - pub(crate) value: Box, -} - -#[derive(Clone, Debug)] -pub struct MonospaceText { - pub(crate) value: PlainText, -} - -#[derive(Clone, Debug)] -pub struct Url { - title: String, - url: String, -} - -// implementations - -impl Document { - pub fn new() -> Self { - Self { - elements: Vec::new(), - } - } - - pub fn add_element(&mut self, element: Block) { - self.elements.push(element) - } -} - -impl Section { - pub fn new(header: Header) -> Self { - Self { - header, - elements: Vec::new(), - } - } - - pub fn add_element(&mut self, element: Block) { - self.elements.push(element) - } -} - -impl Paragraph { - pub fn new() -> Self { - Self { - elements: Vec::new(), - } - } - - pub fn add_element(&mut self, element: Inline) { - self.elements.push(element) - } -} - -impl List { - pub fn new() -> Self { - Self { - ordered: false, - items: Vec::new(), - } - } - - pub fn add_item(&mut self, item: ListItem) { - self.items.push(item) - } -} - -impl ListItem { - pub fn new(text: Inline, level: u16) -> Self { - Self { - text, - level, - children: Vec::new(), - } - } - - pub fn add_child(&mut self, child: ListItem) { - self.children.push(child) - } -} - -impl Text { - pub fn new() -> Self { - Self { - subtext: Vec::new(), - } - } - - pub fn add_subtext(&mut self, subtext: SubText) { - self.subtext.push(subtext) - } -} - -impl Table { - pub fn new(header: Row) -> Self { - Self { - header, - rows: Vec::new(), - } - } - - pub fn add_row(&mut self, row: Row) { - self.rows.push(row) - } -} - -impl Row { - pub fn new() -> Self { - Self { cells: Vec::new() } - } - - pub fn add_cell(&mut self, cell: Cell) { - self.cells.push(cell) - } -} - -impl Url { - pub fn new(title: String, url: String) -> Self { - Self { title, url } - } -} - -// TODO: Images, URIs +#![allow(unused)] + +pub(crate) const BACKSLASH: char = '\\'; +pub(crate) const LB: char = '\n'; +pub(crate) const ASTERISK: char = '*'; +pub(crate) const UNDERSCR: char = '_'; +pub(crate) const TILDE: char = '~'; +pub(crate) const PIPE: char = '|'; +pub(crate) const BACKTICK: char = '`'; +pub(crate) const R_BRACKET: char = '['; +pub(crate) const L_BRACKET: char = ']'; +pub(crate) const R_PARENTH: char = '('; +pub(crate) const L_PARENTH: char = ')'; +pub(crate) const MINUS: char = '-'; +pub(crate) const PLUS: char = '+'; +pub(crate) const HASH: char = '#'; +pub(crate) const O: char = 'o'; +pub(crate) const X: char = 'x'; + +// aliases + +pub(crate) const SPECIAL_ESCAPE: char = BACKSLASH; + +// groups + +pub(crate) const BLOCK_SPECIAL_CHARS: [char; 4] = [HASH, MINUS, BACKTICK, PIPE]; +pub(crate) const INLINE_SPECIAL_CHARS: [char; 6] = [LB, ASTERISK, UNDERSCR, TILDE, PIPE, BACKTICK]; + +pub(crate) const LIST_SPECIAL_CHARS: [char; 4] = [MINUS, PLUS, ASTERISK, O];