From 2204f93cf31a89dfef75fa97ffc6c9365e7f844b Mon Sep 17 00:00:00 2001 From: trivernis Date: Sat, 30 May 2020 11:37:23 +0200 Subject: [PATCH] Add code block parsing --- src/elements.rs | 5 +++-- src/parser.rs | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ src/tokens.rs | 4 ++++ 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/elements.rs b/src/elements.rs index c43696f..09bf8f0 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -4,6 +4,7 @@ pub enum Block { Paragraph(Paragraph), List(List), Table(Table), + CodeBlock(CodeBlock), } #[derive(Clone, Debug)] @@ -69,8 +70,8 @@ pub struct Cell { #[derive(Clone, Debug)] pub struct CodeBlock { - language: String, - code: String, + pub(crate) language: String, + pub(crate) code: String, } #[derive(Clone, Debug)] diff --git a/src/parser.rs b/src/parser.rs index 4e033ab..374466a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -120,10 +120,36 @@ impl Parser { chars.contains(&self.current_char) && !self.check_escaped() } + /// returns if the current character is a linebreak character + /// Note: No one likes CRLF pub fn check_linebreak(&self) -> bool { self.current_char == LB && !self.check_escaped() } + /// checks if the next characters match a special sequence + pub fn check_special_sequence(&mut self, sequence: &[char]) -> Result<(), ParseError> { + let start_index = self.index; + self.seek_whitespace(); + for sq_character in sequence { + if !self.check_special(sq_character) { + let err = ParseError::new(self.index); + self.revert_to(start_index)?; + // should work + return Err(err); + } + if self.next_char() == None { + let err = ParseError::new(self.index); + self.revert_to(start_index)?; + return Err(err); + } + } + if self.index > 0 { + self.revert_to(self.index - 1)?; + } + + Ok(()) + } + /// parses the given text into a document pub fn parse(&mut self) -> Document { let mut document = Document::new(); @@ -153,6 +179,8 @@ impl Parser { Block::List(list) } else if let Ok(table) = self.parse_table() { Block::Table(table) + } else if let Ok(code_block) = self.parse_code_block() { + Block::CodeBlock(code_block) } else if let Ok(paragraph) = self.parse_paragraph() { Block::Paragraph(paragraph) } else { @@ -202,6 +230,7 @@ impl Parser { } } + /// parses the header of a section fn parse_header(&mut self) -> Result { Ok(Header { size: 0, @@ -209,6 +238,30 @@ impl Parser { }) } + /// parses a code block + fn parse_code_block(&mut self) -> Result { + let mut language = String::new(); + self.check_special_sequence(&SQ_CODE_BLOCK)?; + while let Some(character) = self.next_char() { + if self.check_linebreak() { + break; + } + language.push(character); + } + let mut text = String::new(); + while let Some(character) = self.next_char() { + if let Ok(_) = self.check_special_sequence(&SQ_CODE_BLOCK) { + break; + } + text.push(character); + } + + Ok(CodeBlock { + language, + code: text, + }) + } + /// Parses a paragraph fn parse_paragraph(&mut self) -> Result { let mut paragraph = Paragraph::new(); diff --git a/src/tokens.rs b/src/tokens.rs index b1d6bf7..8fac64a 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -27,3 +27,7 @@ pub(crate) const BLOCK_SPECIAL_CHARS: [char; 4] = [HASH, MINUS, BACKTICK, PIPE]; pub(crate) const INLINE_SPECIAL_CHARS: [char; 6] = [LB, ASTERISK, UNDERSCR, TILDE, PIPE, BACKTICK]; pub(crate) const LIST_SPECIAL_CHARS: [char; 4] = [MINUS, PLUS, ASTERISK, O]; + +// sequences + +pub(crate) const SQ_CODE_BLOCK: [char; 3] = [BACKTICK, BACKTICK, BACKTICK];