From e9e7dca783619f3b1533826a194377d6f9801879 Mon Sep 17 00:00:00 2001 From: trivernis Date: Tue, 12 May 2020 20:06:43 +0200 Subject: [PATCH] Add tokens and started with the parsing --- .gitignore | 2 + .idea/.gitignore | 8 ++ .idea/dictionaries/trivernis.xml | 7 ++ .idea/markdown-rs.iml | 11 +++ .idea/misc.xml | 6 ++ .idea/modules.xml | 8 ++ .idea/vcs.xml | 6 ++ Cargo.lock | 6 ++ Cargo.toml | 9 +++ src/lib.rs | 2 + src/main.rs | 3 + src/parser.rs | 130 +++++++++++++++++++++++++++++++ src/tokens.rs | 97 +++++++++++++++++++++++ 13 files changed, 295 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/dictionaries/trivernis.xml create mode 100644 .idea/markdown-rs.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 src/parser.rs create mode 100644 src/tokens.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53eaa21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +**/*.rs.bk diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/dictionaries/trivernis.xml b/.idea/dictionaries/trivernis.xml new file mode 100644 index 0000000..03c99f1 --- /dev/null +++ b/.idea/dictionaries/trivernis.xml @@ -0,0 +1,7 @@ + + + + striked + + + \ No newline at end of file diff --git a/.idea/markdown-rs.iml b/.idea/markdown-rs.iml new file mode 100644 index 0000000..c254557 --- /dev/null +++ b/.idea/markdown-rs.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..28a804d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..87cca7f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..ec2965e --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,6 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "markdown-rs" +version = "0.1.0" + diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..64700b4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "markdown-rs" +version = "0.1.0" +authors = ["trivernis "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8faaef6 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,2 @@ +pub mod tokens; +pub mod parser; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..447f875 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,130 @@ +use crate::tokens::*; +use std::error::Error; +use std::fmt; +use std::fmt::{Display, Formatter}; + +#[derive(Debug)] +pub struct ParseError { + index: usize, +} +impl Display for ParseError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "parse error at index {}", self.index) + } +} +impl Error for ParseError {} +impl ParseError { + pub fn new(index: usize) -> Self { + Self { index } + } +} + +pub struct Parser { + index: usize, + text: Vec, + current_char: char, + section_nesting: u8, +} + +impl Parser { + pub fn next_char(&mut self) -> Option { + self.index += 1; + + self.current_char = self.text.get(self.index)?.clone(); + + Some(self.current_char) + } + + pub fn revert_to(&mut self, index: usize) -> Result<(), ParseError> { + self.index = index; + if let Some(_) = self.next_char() { + Ok(()) + } else { + Err(ParseError::new(index)) + } + } + + pub fn parse(&mut self) { + let mut document = Document::new(); + while self.index < self.text.len() { + if let Ok(token) = self.parse_block() { + document.add_element(token); + } + } + } + + pub fn parse_block(&mut self) -> Result { + if let Some(_) = self.next_char() { + let token = if let Ok(section) = self.parse_section() { + Block::Section(section) + } else if let Ok(list) = self.parse_list() { + Block::List(list) + } else if let Ok(table) = self.parse_table() { + Block::Table(table) + } else if let Ok(paragraph) = self.parse_paragraph() { + Block::Paragraph(paragraph) + } else { + return Err(ParseError::new(self.index)); + }; + + Ok(token) + } else { + Err(ParseError::new(self.index)) + } + } + + pub fn parse_section(&mut self) -> Result { + let start_index = self.index; + if self.current_char == '#' { + let mut size = 1; + while let Some(next_char) = self.next_char() { + if next_char == '#' { + size += 1; + } else { + break; + } + } + if size <= self.section_nesting || !self.current_char.is_whitespace() { + let index = self.index; + self.revert_to(start_index)?; + return Err(ParseError::new(index)); + } + let _ = self.next_char(); + let mut header = self.parse_header()?; + header.size = size; + self.section_nesting = size; + let mut section = Section::new(header); + + while let Ok(block) = self.parse_block() { + section.add_element(block); + } + + Ok(section) + } else { + Err(ParseError::new(self.index)) + } + } + + pub fn parse_paragraph(&mut self) -> Result { + unimplemented!() + } + + pub fn parse_list(&mut self) -> Result { + unimplemented!() + } + + pub fn parse_table(&mut self) -> Result { + unimplemented!() + } + + pub fn parse_header(&mut self) -> Result { + Ok(Header { + size: 0, + text: self.parse_text()?, + }) + } + + pub fn parse_text(&mut self) -> Result { + unimplemented!() + } +} diff --git a/src/tokens.rs b/src/tokens.rs new file mode 100644 index 0000000..ad01649 --- /dev/null +++ b/src/tokens.rs @@ -0,0 +1,97 @@ +pub enum Block { + Section(Section), + Paragraph(Paragraph), + List(List), + Table(Table), +} + +pub enum Inline { + Text(Text), + Code(Code), +} + +pub struct Document { + elements: Vec, +} + +pub struct Section { + header: Header, + elements: Vec, +} + +pub struct Header { + pub(crate) size: u8, + pub(crate) text: Text, +} + +pub struct BlockQuote { + paragraph: Paragraph, +} + +pub struct Paragraph { + elements: Vec, +} + +pub struct List { + ordered: bool, + items: Vec, +} + +pub struct ListItem { + text: Vec, +} + +pub struct Table { + header: Row, + rows: Vec, +} + +pub struct Row { + text: Vec, +} + +pub struct Cell { + text: Vec, +} + +pub struct CodeBlock { + language: String, + code: String, +} + +pub struct Code { + code: String, +} + +pub struct Text { + bold: bool, + italic: bool, + underlined: bool, + striked: bool, + value: String, +} + +impl Document { + pub fn new() -> Self { + Self { + elements: Vec::new(), + } + } + + pub fn add_element(&mut self, element: Block) { + self.elements.push(element) + } +} + +impl Section { + pub fn new(header: Header) -> Self { + Self { + header, + elements: Vec::new(), + } + } + + pub fn add_element(&mut self, element: Block) { + self.elements.push(element) + } +}