diff --git a/.gitignore b/.gitignore index 408b8a5..c7d2ebe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target Cargo.lock -.idea \ No newline at end of file +.idea +test-files \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 13d779f..c03b696 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,34 @@ -#[macro_use] extern crate maplit; -#[macro_use] extern crate lazy_static; +#[macro_use] +extern crate maplit; +#[macro_use] +extern crate lazy_static; pub mod elements; pub mod tokenizer; mod tokens; #[cfg(test)] mod tests { + use crate::tokenizer::Tokenizer; + use crate::tokens::{Grouping, Misc, Operation, Relation, Text, Token}; + #[test] fn it_works() { - assert_eq!(2 + 2, 4); + let expression = "sum_(i=1)^n"; + let mut tokenizer = Tokenizer::new(expression.to_string()); + let tokens = tokenizer.parse(); + assert_eq!( + tokens, + vec![ + Token::Operation(Operation::Sum), + Token::Misc(Misc::Sub), + Token::Grouping(Grouping::RParen), + Token::Text(Text::Plain("i".to_string())), + Token::Relation(Relation::Eq), + Token::Text(Text::Plain("1".to_string())), + Token::Grouping(Grouping::LParen), + Token::Misc(Misc::Pow), + Token::Text(Text::Plain("n".to_string())) + ] + ); } } diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index b8a6560..725a35c 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -5,25 +5,74 @@ use crate::tokens::mappings::{ get_relation_mapping, }; use crate::tokens::{ - Accent, Arrow, FontCommand, Greek, Grouping, Logical, Misc, Operation, Relation, Token, + Accent, Arrow, FontCommand, Greek, Grouping, Logical, Misc, Operation, Relation, Text, Token, }; use charred::tapemachine::CharTapeMachine; use std::collections::HashMap; -use std::fmt::Debug; pub struct Tokenizer { ctm: CharTapeMachine, - tokens: Vec, } impl Tokenizer { pub fn new(text: String) -> Self { + let mut chars = text.chars().collect::>(); + chars.push('\n'); Self { - ctm: CharTapeMachine::new(text.chars().collect()), - tokens: Vec::new(), + ctm: CharTapeMachine::new(chars), } } + pub fn parse(&mut self) -> Vec { + let mut tokens = Vec::::new(); + let mut tmp_string = String::new(); + self.ctm.seek_whitespace(); + + while !self.ctm.check_eof() { + if let Some(grouping) = self.parse_grouping() { + tokens.push(Token::Grouping(grouping)) + } else if let Some(arrow) = self.parse_arrows() { + tokens.push(Token::Arrow(arrow)) + } else if let Some(relation) = self.parse_relation() { + tokens.push(Token::Relation(relation)) + } else if let Some(operation) = self.parse_operation() { + tokens.push(Token::Operation(operation)) + } else if let Some(misc) = self.parse_misc() { + tokens.push(Token::Misc(misc)) + } else if let Some(logical) = self.parse_logical() { + tokens.push(Token::Logical(logical)) + } else if let Some(accent) = self.parse_accent() { + tokens.push(Token::Accent(accent)) + } else if let Some(greek) = self.parse_greek() { + tokens.push(Token::Greek(greek)) + } else if let Some(font) = self.parse_font_command() { + tokens.push(Token::Font(font)) + } else if let Some(whitespace) = self.parse_whitespace() { + tokens.push(Token::Text(whitespace)) + } else { + tmp_string.push(self.ctm.get_current()); + let _ = self.ctm.seek_one(); + continue; + } + if !tmp_string.is_empty() { + let last = tokens.pop().unwrap(); + tokens.push(Token::Text(Text::Plain(tmp_string.clone()))); + tmp_string.clear(); + tokens.push(last); + } + let _ = self.ctm.seek_one(); + } + if !tmp_string.is_empty() { + tokens.push(Token::Text(Text::Plain(tmp_string))); + } + // stripping the whitespace at the end + if let Some(Token::Text(Text::Whitespace)) = tokens.last() { + tokens.pop().unwrap(); + } + + tokens + } + fn parse_misc(&mut self) -> Option { lazy_static! { static ref MISC_MAPPINGS: Vec> = get_misc_mappings(); @@ -153,4 +202,14 @@ impl Tokenizer { } None } + + fn parse_whitespace(&mut self) -> Option { + if self.ctm.get_current().is_whitespace() { + self.ctm.seek_whitespace(); + self.ctm.rewind(self.ctm.get_index() - 1); + Some(Text::Whitespace) + } else { + None + } + } } diff --git a/src/tokens/constants/misc.rs b/src/tokens/constants/misc.rs index 66aebed..d10b66b 100644 --- a/src/tokens/constants/misc.rs +++ b/src/tokens/constants/misc.rs @@ -1,6 +1,7 @@ pub const G_A_FRAC: &'static [&str] = &["/"]; pub const G_T_FRAC: &'static [&str] = &["frac"]; +pub const G_SUB: &'static [&str] = &["_"]; pub const G_POW: &'static [&str] = &["^"]; pub const G_SQRT: &'static [&str] = &["sqrt"]; pub const G_ROOT: &'static [&str] = &["root"]; diff --git a/src/tokens/mappings.rs b/src/tokens/mappings.rs index 22ba58f..60f3318 100644 --- a/src/tokens/mappings.rs +++ b/src/tokens/mappings.rs @@ -12,7 +12,6 @@ use crate::tokens::constants::TokenPattern; use crate::tokens::{ Accent, Arrow, FontCommand, Greek, Grouping, Logical, Misc, Operation, Relation, }; -use std::cell::RefCell; use std::collections::HashMap; pub fn get_operation_mappings() -> Vec> { @@ -64,6 +63,7 @@ pub fn get_misc_mappings() -> Vec> { G_A_FRAC => Misc::AsciiFrac, G_T_FRAC => Misc::LatexFrac, G_POW => Misc::Pow, + G_SUB => Misc::Sub, G_SQRT => Misc::Sqrt, G_ROOT => Misc::Root, G_INT => Misc::Int, diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs index 00a2eaf..53f072c 100644 --- a/src/tokens/mod.rs +++ b/src/tokens/mod.rs @@ -1,7 +1,7 @@ pub mod constants; pub mod mappings; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Token { Operation(Operation), Misc(Misc), @@ -12,9 +12,16 @@ pub enum Token { Accent(Accent), Greek(Greek), Font(FontCommand), + Text(Text), } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] +pub enum Text { + Plain(String), + Whitespace, +} + +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Operation { Plus, Minus, @@ -44,10 +51,11 @@ pub enum Operation { BigCup, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Misc { AsciiFrac, LatexFrac, + Sub, Pow, Sqrt, Root, @@ -85,7 +93,7 @@ pub enum Misc { LatexText, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Relation { Eq, Ne, @@ -109,7 +117,7 @@ pub enum Relation { PropTo, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Logical { And, Or, @@ -125,7 +133,7 @@ pub enum Logical { Models, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Grouping { RParen, LParen, @@ -143,7 +151,7 @@ pub enum Grouping { Norm, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Arrow { UpArrow, DownArrow, @@ -160,7 +168,7 @@ pub enum Arrow { BigLeftRightArrow, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Accent { Hat, Overline, @@ -176,7 +184,7 @@ pub enum Accent { Cancel, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Greek { Alpha, Beta, @@ -216,7 +224,7 @@ pub enum Greek { BigOmega, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum FontCommand { Big, BigOutline,