diff --git a/.gitignore b/.gitignore index 4ae822d..7fde081 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /target **/*.rs.bk -.idea \ No newline at end of file +.idea +.ast +test \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index e7a11a9..39b3980 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,8 @@ +use markdown_rs::parser::Parser; +use std::fs::{read_to_string, write}; + fn main() { - println!("Hello, world!"); + let mut parser = Parser::new(read_to_string("test/document.md").unwrap()); + let document = parser.parse(); + write("test/document.ast", format!("{:#?}", document)).unwrap(); } diff --git a/src/parser.rs b/src/parser.rs index c581aca..f56618f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,6 +3,15 @@ use std::error::Error; use std::fmt; use std::fmt::{Display, Formatter}; +macro_rules! parse_option { + ($option:expr, $index:expr) => { + if let Some(_) = $option { + } else { + return Err(ParseError::new($index)); + } + }; +} + #[derive(Debug)] pub struct ParseError { index: usize, @@ -24,9 +33,25 @@ pub struct Parser { text: Vec, current_char: char, section_nesting: u8, + section_return: Option, } impl Parser { + pub fn new(text: String) -> Self { + let text: Vec = text.chars().collect(); + let current_char = text.get(0).unwrap().clone(); + + Self { + index: 0, + text, + current_char, + section_nesting: 0, + section_return: None, + } + } + + /// Increments the current index and returns the + /// char at the indexes position pub fn next_char(&mut self) -> Option { self.index += 1; @@ -35,8 +60,9 @@ impl Parser { Some(self.current_char) } + /// Returns to an index position pub fn revert_to(&mut self, index: usize) -> Result<(), ParseError> { - self.index = index; + self.index = index - 1; if let Some(_) = self.next_char() { Ok(()) } else { @@ -44,6 +70,20 @@ impl Parser { } } + /// Skips characters until it encounters a character + /// that isn't an inline whitespace character + pub fn seek_inline_whitespace(&mut self) { + if self.current_char.is_whitespace() && self.current_char != '\n' { + while let Some(next_char) = self.next_char() { + if !next_char.is_whitespace() || self.current_char == '\n' { + break; + } + } + } + } + + /// Skips characters until it encounters a character + /// that isn't a whitespace character pub fn seek_whitespace(&mut self) { if self.current_char.is_whitespace() { while let Some(next_char) = self.next_char() { @@ -54,38 +94,47 @@ impl Parser { } } - pub fn parse(&mut self) { + pub fn parse(&mut self) -> Document { let mut document = Document::new(); while self.index < self.text.len() { if let Ok(token) = self.parse_block() { document.add_element(token); } } + + document } + /// Parses a block Token pub fn parse_block(&mut self) -> Result { - if let Some(_) = self.next_char() { - let token = if let Ok(section) = self.parse_section() { - Block::Section(section) - } else if let Ok(list) = self.parse_list() { - Block::List(list) - } else if let Ok(table) = self.parse_table() { - Block::Table(table) - } else if let Ok(paragraph) = self.parse_paragraph() { - Block::Paragraph(paragraph) - } else { + if let Some(section) = self.section_return { + if section <= self.section_nesting { return Err(ParseError::new(self.index)); - }; - - Ok(token) - } else { - Err(ParseError::new(self.index)) + } else { + self.section_return = None; + } } + let token = if let Ok(section) = self.parse_section() { + Block::Section(section) + } else if let Some(_) = self.section_return { + return Err(ParseError::new(self.index)); + } else if let Ok(list) = self.parse_list() { + Block::List(list) + } else if let Ok(table) = self.parse_table() { + Block::Table(table) + } else if let Ok(paragraph) = self.parse_paragraph() { + Block::Paragraph(paragraph) + } else { + return Err(ParseError::new(self.index)); + }; + + Ok(token) } /// Parses a section that consists of a header and one or more blocks pub fn parse_section(&mut self) -> Result { let start_index = self.index; + self.seek_whitespace(); if self.current_char == '#' { let mut size = 1; while let Some(next_char) = self.next_char() { @@ -97,10 +146,13 @@ impl Parser { } if size <= self.section_nesting || !self.current_char.is_whitespace() { let index = self.index; + if size <= self.section_nesting { + self.section_return = Some(size); + } self.revert_to(start_index)?; return Err(ParseError::new(index)); } - self.seek_whitespace(); + self.seek_inline_whitespace(); let mut header = self.parse_header()?; header.size = size; self.section_nesting = size; @@ -110,16 +162,23 @@ impl Parser { section.add_element(block); } + self.section_nesting -= 1; Ok(section) } else { - Err(ParseError::new(self.index)) + let error_index = self.index; + self.revert_to(start_index)?; + Err(ParseError::new(error_index)) } } + /// Parses a paragraph pub fn parse_paragraph(&mut self) -> Result { let mut paragraph = Paragraph::new(); while let Ok(token) = self.parse_inline() { paragraph.add_element(token); + if ['-', '#', '`', '|'].contains(&self.current_char) { + break; + } } if paragraph.elements.len() > 0 { @@ -129,12 +188,25 @@ impl Parser { } } + /// parses a list which consists of one or more list items pub fn parse_list(&mut self) -> Result { - unimplemented!() + let mut list = List::new(); + let start_index = self.index; + self.seek_whitespace(); + while let Ok(token) = self.parse_list_item() { + list.add_item(token); + } + + if list.items.len() > 0 { + Ok(list) + } else { + self.revert_to(start_index)?; + Err(ParseError::new(self.index)) + } } pub fn parse_table(&mut self) -> Result { - unimplemented!() + Err(ParseError::new(self.index)) } pub fn parse_header(&mut self) -> Result { @@ -144,15 +216,118 @@ impl Parser { }) } + /// parses a single list item defined with - pub fn parse_list_item(&mut self) -> Result { - unimplemented!() + let start_index = self.index; + self.seek_inline_whitespace(); + + if self.current_char != '-' { + let err = ParseError::new(self.index); + self.revert_to(start_index)?; + return Err(err); + } + self.seek_inline_whitespace(); + let item = ListItem { + text: self.parse_inline()?, + }; + + Ok(item) } pub fn parse_inline(&mut self) -> Result { - unimplemented!() + if self.index > self.text.len() { + Err(ParseError::new(self.index)) + } else { + Ok(Inline::Text(self.parse_text()?)) + } } + /// Parses a line of text pub fn parse_text(&mut self) -> Result { - unimplemented!() + let mut text = Text::new(); + while let Ok(subtext) = self.parse_subtext() { + text.add_subtext(subtext); + let current_index = self.index; + if self.next_char() == None { + break; + } + self.revert_to(current_index)?; + } + + if self.current_char == '\n' { + parse_option!(self.next_char(), self.index); + } + + Ok(text) + } + + pub fn parse_subtext(&mut self) -> Result { + match self.current_char { + '*' => { + parse_option!(self.next_char(), self.index); + + if self.current_char == '*' { + parse_option!(self.next_char(), self.index); + let subtext = self.parse_subtext()?; + if self.current_char == '*' { + parse_option!(self.next_char(), self.index); + if self.current_char == '*' { + parse_option!(self.next_char(), self.index); + } + } + Ok(SubText::Bold(BoldText { + value: Box::new(subtext), + })) + } else { + let subtext = self.parse_subtext()?; + parse_option!(self.next_char(), self.index); + Ok(SubText::Italic(ItalicText { + value: Box::new(subtext), + })) + } + } + '_' => { + parse_option!(self.next_char(), self.index); + let subtext = self.parse_subtext()?; + parse_option!(self.next_char(), self.index); + Ok(SubText::Underlined(UnderlinedText { + value: Box::new(subtext), + })) + } + '~' => { + parse_option!(self.next_char(), self.index); + let subtext = self.parse_subtext()?; + if self.current_char == '~' { + parse_option!(self.next_char(), self.index); + } + Ok(SubText::Striked(StrikedText { + value: Box::new(subtext), + })) + } + '\n' => Err(ParseError::new(self.index)), + _ => Ok(SubText::Plain(self.parse_plain_text()?)), + } + } + + pub fn parse_plain_text(&mut self) -> Result { + let mut current_char = self.current_char; + let mut characters = String::new(); + loop { + match current_char { + '\n' | '*' | '_' | '~' => break, + _ => characters.push(current_char), + } + if let Some(character) = self.next_char() { + current_char = character; + } else { + break; + } + } + + if characters.len() > 0 { + Ok(PlainText { value: characters }) + } else { + Err(ParseError::new(self.index)) + } } } diff --git a/src/tokens.rs b/src/tokens.rs index 1b7b44a..485f851 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,5 +1,4 @@ -use std::ops::Sub; - +#[derive(Clone, Debug)] pub enum Block { Section(Section), Paragraph(Paragraph), @@ -7,67 +6,82 @@ pub enum Block { Table(Table), } +#[derive(Clone, Debug)] pub enum Inline { Text(Text), } +#[derive(Clone, Debug)] pub struct Document { elements: Vec, } +#[derive(Clone, Debug)] pub struct Section { header: Header, elements: Vec, } +#[derive(Clone, Debug)] pub struct Header { pub size: u8, pub line: Inline, } +#[derive(Clone, Debug)] pub struct BlockQuote { paragraph: Paragraph, } +#[derive(Clone, Debug)] pub struct Paragraph { pub elements: Vec, } +#[derive(Clone, Debug)] pub struct List { pub ordered: bool, pub items: Vec, } +#[derive(Clone, Debug)] pub struct ListItem { - text: Inline, + pub(crate) text: Inline, } +#[derive(Clone, Debug)] pub struct Table { header: Row, rows: Vec, } +#[derive(Clone, Debug)] pub struct Row { text: Vec, } +#[derive(Clone, Debug)] pub struct Cell { text: Inline, } +#[derive(Clone, Debug)] pub struct CodeBlock { language: String, code: String, } +#[derive(Clone, Debug)] pub struct Code { code: String, } +#[derive(Clone, Debug)] pub struct Text { - subtext: Vec, + pub subtext: Vec, } +#[derive(Clone, Debug)] pub enum SubText { Plain(PlainText), Code(Code), @@ -77,24 +91,29 @@ pub enum SubText { Striked(StrikedText), } +#[derive(Clone, Debug)] pub struct PlainText { - value: String, + pub(crate) value: String, } +#[derive(Clone, Debug)] pub struct BoldText { - value: Box, + pub(crate) value: Box, } +#[derive(Clone, Debug)] pub struct ItalicText { - value: Box, + pub(crate) value: Box, } +#[derive(Clone, Debug)] pub struct UnderlinedText { - value: Box, + pub(crate) value: Box, } +#[derive(Clone, Debug)] pub struct StrikedText { - value: Box, + pub(crate) value: Box, } impl Document { @@ -147,4 +166,16 @@ impl List { } } +impl Text { + pub fn new() -> Self { + Self { + subtext: Vec::new(), + } + } + + pub fn add_subtext(&mut self, subtext: SubText) { + self.subtext.push(subtext) + } +} + // TODO: Images, URIs