Add subtext parsing

- lots of fixes
- kinda works
pull/1/head
trivernis 4 years ago
parent c89b9d83fb
commit 70ba5a7062

4
.gitignore vendored

@ -1,3 +1,5 @@
/target /target
**/*.rs.bk **/*.rs.bk
.idea .idea
.ast
test

@ -1,3 +1,8 @@
use markdown_rs::parser::Parser;
use std::fs::{read_to_string, write};
fn main() { fn main() {
println!("Hello, world!"); let mut parser = Parser::new(read_to_string("test/document.md").unwrap());
let document = parser.parse();
write("test/document.ast", format!("{:#?}", document)).unwrap();
} }

@ -3,6 +3,15 @@ use std::error::Error;
use std::fmt; use std::fmt;
use std::fmt::{Display, Formatter}; use std::fmt::{Display, Formatter};
macro_rules! parse_option {
($option:expr, $index:expr) => {
if let Some(_) = $option {
} else {
return Err(ParseError::new($index));
}
};
}
#[derive(Debug)] #[derive(Debug)]
pub struct ParseError { pub struct ParseError {
index: usize, index: usize,
@ -24,9 +33,25 @@ pub struct Parser {
text: Vec<char>, text: Vec<char>,
current_char: char, current_char: char,
section_nesting: u8, section_nesting: u8,
section_return: Option<u8>,
} }
impl Parser { impl Parser {
pub fn new(text: String) -> Self {
let text: Vec<char> = text.chars().collect();
let current_char = text.get(0).unwrap().clone();
Self {
index: 0,
text,
current_char,
section_nesting: 0,
section_return: None,
}
}
/// Increments the current index and returns the
/// char at the indexes position
pub fn next_char(&mut self) -> Option<char> { pub fn next_char(&mut self) -> Option<char> {
self.index += 1; self.index += 1;
@ -35,8 +60,9 @@ impl Parser {
Some(self.current_char) Some(self.current_char)
} }
/// Returns to an index position
pub fn revert_to(&mut self, index: usize) -> Result<(), ParseError> { pub fn revert_to(&mut self, index: usize) -> Result<(), ParseError> {
self.index = index; self.index = index - 1;
if let Some(_) = self.next_char() { if let Some(_) = self.next_char() {
Ok(()) Ok(())
} else { } else {
@ -44,6 +70,20 @@ impl Parser {
} }
} }
/// Skips characters until it encounters a character
/// that isn't an inline whitespace character
pub fn seek_inline_whitespace(&mut self) {
if self.current_char.is_whitespace() && self.current_char != '\n' {
while let Some(next_char) = self.next_char() {
if !next_char.is_whitespace() || self.current_char == '\n' {
break;
}
}
}
}
/// Skips characters until it encounters a character
/// that isn't a whitespace character
pub fn seek_whitespace(&mut self) { pub fn seek_whitespace(&mut self) {
if self.current_char.is_whitespace() { if self.current_char.is_whitespace() {
while let Some(next_char) = self.next_char() { while let Some(next_char) = self.next_char() {
@ -54,38 +94,47 @@ impl Parser {
} }
} }
pub fn parse(&mut self) { pub fn parse(&mut self) -> Document {
let mut document = Document::new(); let mut document = Document::new();
while self.index < self.text.len() { while self.index < self.text.len() {
if let Ok(token) = self.parse_block() { if let Ok(token) = self.parse_block() {
document.add_element(token); document.add_element(token);
} }
} }
document
} }
/// Parses a block Token
pub fn parse_block(&mut self) -> Result<Block, ParseError> { pub fn parse_block(&mut self) -> Result<Block, ParseError> {
if let Some(_) = self.next_char() { if let Some(section) = self.section_return {
let token = if let Ok(section) = self.parse_section() { if section <= self.section_nesting {
Block::Section(section)
} else if let Ok(list) = self.parse_list() {
Block::List(list)
} else if let Ok(table) = self.parse_table() {
Block::Table(table)
} else if let Ok(paragraph) = self.parse_paragraph() {
Block::Paragraph(paragraph)
} else {
return Err(ParseError::new(self.index)); return Err(ParseError::new(self.index));
}; } else {
self.section_return = None;
Ok(token) }
} else {
Err(ParseError::new(self.index))
} }
let token = if let Ok(section) = self.parse_section() {
Block::Section(section)
} else if let Some(_) = self.section_return {
return Err(ParseError::new(self.index));
} else if let Ok(list) = self.parse_list() {
Block::List(list)
} else if let Ok(table) = self.parse_table() {
Block::Table(table)
} else if let Ok(paragraph) = self.parse_paragraph() {
Block::Paragraph(paragraph)
} else {
return Err(ParseError::new(self.index));
};
Ok(token)
} }
/// Parses a section that consists of a header and one or more blocks /// Parses a section that consists of a header and one or more blocks
pub fn parse_section(&mut self) -> Result<Section, ParseError> { pub fn parse_section(&mut self) -> Result<Section, ParseError> {
let start_index = self.index; let start_index = self.index;
self.seek_whitespace();
if self.current_char == '#' { if self.current_char == '#' {
let mut size = 1; let mut size = 1;
while let Some(next_char) = self.next_char() { while let Some(next_char) = self.next_char() {
@ -97,10 +146,13 @@ impl Parser {
} }
if size <= self.section_nesting || !self.current_char.is_whitespace() { if size <= self.section_nesting || !self.current_char.is_whitespace() {
let index = self.index; let index = self.index;
if size <= self.section_nesting {
self.section_return = Some(size);
}
self.revert_to(start_index)?; self.revert_to(start_index)?;
return Err(ParseError::new(index)); return Err(ParseError::new(index));
} }
self.seek_whitespace(); self.seek_inline_whitespace();
let mut header = self.parse_header()?; let mut header = self.parse_header()?;
header.size = size; header.size = size;
self.section_nesting = size; self.section_nesting = size;
@ -110,16 +162,23 @@ impl Parser {
section.add_element(block); section.add_element(block);
} }
self.section_nesting -= 1;
Ok(section) Ok(section)
} else { } else {
Err(ParseError::new(self.index)) let error_index = self.index;
self.revert_to(start_index)?;
Err(ParseError::new(error_index))
} }
} }
/// Parses a paragraph
pub fn parse_paragraph(&mut self) -> Result<Paragraph, ParseError> { pub fn parse_paragraph(&mut self) -> Result<Paragraph, ParseError> {
let mut paragraph = Paragraph::new(); let mut paragraph = Paragraph::new();
while let Ok(token) = self.parse_inline() { while let Ok(token) = self.parse_inline() {
paragraph.add_element(token); paragraph.add_element(token);
if ['-', '#', '`', '|'].contains(&self.current_char) {
break;
}
} }
if paragraph.elements.len() > 0 { if paragraph.elements.len() > 0 {
@ -129,12 +188,25 @@ impl Parser {
} }
} }
/// parses a list which consists of one or more list items
pub fn parse_list(&mut self) -> Result<List, ParseError> { pub fn parse_list(&mut self) -> Result<List, ParseError> {
unimplemented!() let mut list = List::new();
let start_index = self.index;
self.seek_whitespace();
while let Ok(token) = self.parse_list_item() {
list.add_item(token);
}
if list.items.len() > 0 {
Ok(list)
} else {
self.revert_to(start_index)?;
Err(ParseError::new(self.index))
}
} }
pub fn parse_table(&mut self) -> Result<Table, ParseError> { pub fn parse_table(&mut self) -> Result<Table, ParseError> {
unimplemented!() Err(ParseError::new(self.index))
} }
pub fn parse_header(&mut self) -> Result<Header, ParseError> { pub fn parse_header(&mut self) -> Result<Header, ParseError> {
@ -144,15 +216,118 @@ impl Parser {
}) })
} }
/// parses a single list item defined with -
pub fn parse_list_item(&mut self) -> Result<ListItem, ParseError> { pub fn parse_list_item(&mut self) -> Result<ListItem, ParseError> {
unimplemented!() let start_index = self.index;
self.seek_inline_whitespace();
if self.current_char != '-' {
let err = ParseError::new(self.index);
self.revert_to(start_index)?;
return Err(err);
}
self.seek_inline_whitespace();
let item = ListItem {
text: self.parse_inline()?,
};
Ok(item)
} }
pub fn parse_inline(&mut self) -> Result<Inline, ParseError> { pub fn parse_inline(&mut self) -> Result<Inline, ParseError> {
unimplemented!() if self.index > self.text.len() {
Err(ParseError::new(self.index))
} else {
Ok(Inline::Text(self.parse_text()?))
}
} }
/// Parses a line of text
pub fn parse_text(&mut self) -> Result<Text, ParseError> { pub fn parse_text(&mut self) -> Result<Text, ParseError> {
unimplemented!() let mut text = Text::new();
while let Ok(subtext) = self.parse_subtext() {
text.add_subtext(subtext);
let current_index = self.index;
if self.next_char() == None {
break;
}
self.revert_to(current_index)?;
}
if self.current_char == '\n' {
parse_option!(self.next_char(), self.index);
}
Ok(text)
}
pub fn parse_subtext(&mut self) -> Result<SubText, ParseError> {
match self.current_char {
'*' => {
parse_option!(self.next_char(), self.index);
if self.current_char == '*' {
parse_option!(self.next_char(), self.index);
let subtext = self.parse_subtext()?;
if self.current_char == '*' {
parse_option!(self.next_char(), self.index);
if self.current_char == '*' {
parse_option!(self.next_char(), self.index);
}
}
Ok(SubText::Bold(BoldText {
value: Box::new(subtext),
}))
} else {
let subtext = self.parse_subtext()?;
parse_option!(self.next_char(), self.index);
Ok(SubText::Italic(ItalicText {
value: Box::new(subtext),
}))
}
}
'_' => {
parse_option!(self.next_char(), self.index);
let subtext = self.parse_subtext()?;
parse_option!(self.next_char(), self.index);
Ok(SubText::Underlined(UnderlinedText {
value: Box::new(subtext),
}))
}
'~' => {
parse_option!(self.next_char(), self.index);
let subtext = self.parse_subtext()?;
if self.current_char == '~' {
parse_option!(self.next_char(), self.index);
}
Ok(SubText::Striked(StrikedText {
value: Box::new(subtext),
}))
}
'\n' => Err(ParseError::new(self.index)),
_ => Ok(SubText::Plain(self.parse_plain_text()?)),
}
}
pub fn parse_plain_text(&mut self) -> Result<PlainText, ParseError> {
let mut current_char = self.current_char;
let mut characters = String::new();
loop {
match current_char {
'\n' | '*' | '_' | '~' => break,
_ => characters.push(current_char),
}
if let Some(character) = self.next_char() {
current_char = character;
} else {
break;
}
}
if characters.len() > 0 {
Ok(PlainText { value: characters })
} else {
Err(ParseError::new(self.index))
}
} }
} }

@ -1,5 +1,4 @@
use std::ops::Sub; #[derive(Clone, Debug)]
pub enum Block { pub enum Block {
Section(Section), Section(Section),
Paragraph(Paragraph), Paragraph(Paragraph),
@ -7,67 +6,82 @@ pub enum Block {
Table(Table), Table(Table),
} }
#[derive(Clone, Debug)]
pub enum Inline { pub enum Inline {
Text(Text), Text(Text),
} }
#[derive(Clone, Debug)]
pub struct Document { pub struct Document {
elements: Vec<Block>, elements: Vec<Block>,
} }
#[derive(Clone, Debug)]
pub struct Section { pub struct Section {
header: Header, header: Header,
elements: Vec<Block>, elements: Vec<Block>,
} }
#[derive(Clone, Debug)]
pub struct Header { pub struct Header {
pub size: u8, pub size: u8,
pub line: Inline, pub line: Inline,
} }
#[derive(Clone, Debug)]
pub struct BlockQuote { pub struct BlockQuote {
paragraph: Paragraph, paragraph: Paragraph,
} }
#[derive(Clone, Debug)]
pub struct Paragraph { pub struct Paragraph {
pub elements: Vec<Inline>, pub elements: Vec<Inline>,
} }
#[derive(Clone, Debug)]
pub struct List { pub struct List {
pub ordered: bool, pub ordered: bool,
pub items: Vec<ListItem>, pub items: Vec<ListItem>,
} }
#[derive(Clone, Debug)]
pub struct ListItem { pub struct ListItem {
text: Inline, pub(crate) text: Inline,
} }
#[derive(Clone, Debug)]
pub struct Table { pub struct Table {
header: Row, header: Row,
rows: Vec<Row>, rows: Vec<Row>,
} }
#[derive(Clone, Debug)]
pub struct Row { pub struct Row {
text: Vec<Cell>, text: Vec<Cell>,
} }
#[derive(Clone, Debug)]
pub struct Cell { pub struct Cell {
text: Inline, text: Inline,
} }
#[derive(Clone, Debug)]
pub struct CodeBlock { pub struct CodeBlock {
language: String, language: String,
code: String, code: String,
} }
#[derive(Clone, Debug)]
pub struct Code { pub struct Code {
code: String, code: String,
} }
#[derive(Clone, Debug)]
pub struct Text { pub struct Text {
subtext: Vec<SubText>, pub subtext: Vec<SubText>,
} }
#[derive(Clone, Debug)]
pub enum SubText { pub enum SubText {
Plain(PlainText), Plain(PlainText),
Code(Code), Code(Code),
@ -77,24 +91,29 @@ pub enum SubText {
Striked(StrikedText), Striked(StrikedText),
} }
#[derive(Clone, Debug)]
pub struct PlainText { pub struct PlainText {
value: String, pub(crate) value: String,
} }
#[derive(Clone, Debug)]
pub struct BoldText { pub struct BoldText {
value: Box<SubText>, pub(crate) value: Box<SubText>,
} }
#[derive(Clone, Debug)]
pub struct ItalicText { pub struct ItalicText {
value: Box<SubText>, pub(crate) value: Box<SubText>,
} }
#[derive(Clone, Debug)]
pub struct UnderlinedText { pub struct UnderlinedText {
value: Box<SubText>, pub(crate) value: Box<SubText>,
} }
#[derive(Clone, Debug)]
pub struct StrikedText { pub struct StrikedText {
value: Box<SubText>, pub(crate) value: Box<SubText>,
} }
impl Document { impl Document {
@ -147,4 +166,16 @@ impl List {
} }
} }
impl Text {
pub fn new() -> Self {
Self {
subtext: Vec::new(),
}
}
pub fn add_subtext(&mut self, subtext: SubText) {
self.subtext.push(subtext)
}
}
// TODO: Images, URIs // TODO: Images, URIs

Loading…
Cancel
Save