Add parsing function and test

pull/1/head
trivernis 4 years ago
parent 8a5b6ed2a9
commit ef7bbae025

1
.gitignore vendored

@ -1,3 +1,4 @@
/target /target
Cargo.lock Cargo.lock
.idea .idea
test-files

@ -1,13 +1,34 @@
#[macro_use] extern crate maplit; #[macro_use]
#[macro_use] extern crate lazy_static; extern crate maplit;
#[macro_use]
extern crate lazy_static;
pub mod elements; pub mod elements;
pub mod tokenizer; pub mod tokenizer;
mod tokens; mod tokens;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::tokenizer::Tokenizer;
use crate::tokens::{Grouping, Misc, Operation, Relation, Text, Token};
#[test] #[test]
fn it_works() { fn it_works() {
assert_eq!(2 + 2, 4); let expression = "sum_(i=1)^n";
let mut tokenizer = Tokenizer::new(expression.to_string());
let tokens = tokenizer.parse();
assert_eq!(
tokens,
vec![
Token::Operation(Operation::Sum),
Token::Misc(Misc::Sub),
Token::Grouping(Grouping::RParen),
Token::Text(Text::Plain("i".to_string())),
Token::Relation(Relation::Eq),
Token::Text(Text::Plain("1".to_string())),
Token::Grouping(Grouping::LParen),
Token::Misc(Misc::Pow),
Token::Text(Text::Plain("n".to_string()))
]
);
} }
} }

@ -5,25 +5,74 @@ use crate::tokens::mappings::{
get_relation_mapping, get_relation_mapping,
}; };
use crate::tokens::{ use crate::tokens::{
Accent, Arrow, FontCommand, Greek, Grouping, Logical, Misc, Operation, Relation, Token, Accent, Arrow, FontCommand, Greek, Grouping, Logical, Misc, Operation, Relation, Text, Token,
}; };
use charred::tapemachine::CharTapeMachine; use charred::tapemachine::CharTapeMachine;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::Debug;
pub struct Tokenizer { pub struct Tokenizer {
ctm: CharTapeMachine, ctm: CharTapeMachine,
tokens: Vec<Token>,
} }
impl Tokenizer { impl Tokenizer {
pub fn new(text: String) -> Self { pub fn new(text: String) -> Self {
let mut chars = text.chars().collect::<Vec<char>>();
chars.push('\n');
Self { Self {
ctm: CharTapeMachine::new(text.chars().collect()), ctm: CharTapeMachine::new(chars),
tokens: Vec::new(),
} }
} }
pub fn parse(&mut self) -> Vec<Token> {
let mut tokens = Vec::<Token>::new();
let mut tmp_string = String::new();
self.ctm.seek_whitespace();
while !self.ctm.check_eof() {
if let Some(grouping) = self.parse_grouping() {
tokens.push(Token::Grouping(grouping))
} else if let Some(arrow) = self.parse_arrows() {
tokens.push(Token::Arrow(arrow))
} else if let Some(relation) = self.parse_relation() {
tokens.push(Token::Relation(relation))
} else if let Some(operation) = self.parse_operation() {
tokens.push(Token::Operation(operation))
} else if let Some(misc) = self.parse_misc() {
tokens.push(Token::Misc(misc))
} else if let Some(logical) = self.parse_logical() {
tokens.push(Token::Logical(logical))
} else if let Some(accent) = self.parse_accent() {
tokens.push(Token::Accent(accent))
} else if let Some(greek) = self.parse_greek() {
tokens.push(Token::Greek(greek))
} else if let Some(font) = self.parse_font_command() {
tokens.push(Token::Font(font))
} else if let Some(whitespace) = self.parse_whitespace() {
tokens.push(Token::Text(whitespace))
} else {
tmp_string.push(self.ctm.get_current());
let _ = self.ctm.seek_one();
continue;
}
if !tmp_string.is_empty() {
let last = tokens.pop().unwrap();
tokens.push(Token::Text(Text::Plain(tmp_string.clone())));
tmp_string.clear();
tokens.push(last);
}
let _ = self.ctm.seek_one();
}
if !tmp_string.is_empty() {
tokens.push(Token::Text(Text::Plain(tmp_string)));
}
// stripping the whitespace at the end
if let Some(Token::Text(Text::Whitespace)) = tokens.last() {
tokens.pop().unwrap();
}
tokens
}
fn parse_misc(&mut self) -> Option<Misc> { fn parse_misc(&mut self) -> Option<Misc> {
lazy_static! { lazy_static! {
static ref MISC_MAPPINGS: Vec<HashMap<TokenPattern, Misc>> = get_misc_mappings(); static ref MISC_MAPPINGS: Vec<HashMap<TokenPattern, Misc>> = get_misc_mappings();
@ -153,4 +202,14 @@ impl Tokenizer {
} }
None None
} }
fn parse_whitespace(&mut self) -> Option<Text> {
if self.ctm.get_current().is_whitespace() {
self.ctm.seek_whitespace();
self.ctm.rewind(self.ctm.get_index() - 1);
Some(Text::Whitespace)
} else {
None
}
}
} }

@ -1,6 +1,7 @@
pub const G_A_FRAC: &'static [&str] = &["/"]; pub const G_A_FRAC: &'static [&str] = &["/"];
pub const G_T_FRAC: &'static [&str] = &["frac"]; pub const G_T_FRAC: &'static [&str] = &["frac"];
pub const G_SUB: &'static [&str] = &["_"];
pub const G_POW: &'static [&str] = &["^"]; pub const G_POW: &'static [&str] = &["^"];
pub const G_SQRT: &'static [&str] = &["sqrt"]; pub const G_SQRT: &'static [&str] = &["sqrt"];
pub const G_ROOT: &'static [&str] = &["root"]; pub const G_ROOT: &'static [&str] = &["root"];

@ -12,7 +12,6 @@ use crate::tokens::constants::TokenPattern;
use crate::tokens::{ use crate::tokens::{
Accent, Arrow, FontCommand, Greek, Grouping, Logical, Misc, Operation, Relation, Accent, Arrow, FontCommand, Greek, Grouping, Logical, Misc, Operation, Relation,
}; };
use std::cell::RefCell;
use std::collections::HashMap; use std::collections::HashMap;
pub fn get_operation_mappings() -> Vec<HashMap<TokenPattern, Operation>> { pub fn get_operation_mappings() -> Vec<HashMap<TokenPattern, Operation>> {
@ -64,6 +63,7 @@ pub fn get_misc_mappings() -> Vec<HashMap<TokenPattern, Misc>> {
G_A_FRAC => Misc::AsciiFrac, G_A_FRAC => Misc::AsciiFrac,
G_T_FRAC => Misc::LatexFrac, G_T_FRAC => Misc::LatexFrac,
G_POW => Misc::Pow, G_POW => Misc::Pow,
G_SUB => Misc::Sub,
G_SQRT => Misc::Sqrt, G_SQRT => Misc::Sqrt,
G_ROOT => Misc::Root, G_ROOT => Misc::Root,
G_INT => Misc::Int, G_INT => Misc::Int,

@ -1,7 +1,7 @@
pub mod constants; pub mod constants;
pub mod mappings; pub mod mappings;
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Token { pub enum Token {
Operation(Operation), Operation(Operation),
Misc(Misc), Misc(Misc),
@ -12,9 +12,16 @@ pub enum Token {
Accent(Accent), Accent(Accent),
Greek(Greek), Greek(Greek),
Font(FontCommand), Font(FontCommand),
Text(Text),
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Text {
Plain(String),
Whitespace,
}
#[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Operation { pub enum Operation {
Plus, Plus,
Minus, Minus,
@ -44,10 +51,11 @@ pub enum Operation {
BigCup, BigCup,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Misc { pub enum Misc {
AsciiFrac, AsciiFrac,
LatexFrac, LatexFrac,
Sub,
Pow, Pow,
Sqrt, Sqrt,
Root, Root,
@ -85,7 +93,7 @@ pub enum Misc {
LatexText, LatexText,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Relation { pub enum Relation {
Eq, Eq,
Ne, Ne,
@ -109,7 +117,7 @@ pub enum Relation {
PropTo, PropTo,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Logical { pub enum Logical {
And, And,
Or, Or,
@ -125,7 +133,7 @@ pub enum Logical {
Models, Models,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Grouping { pub enum Grouping {
RParen, RParen,
LParen, LParen,
@ -143,7 +151,7 @@ pub enum Grouping {
Norm, Norm,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Arrow { pub enum Arrow {
UpArrow, UpArrow,
DownArrow, DownArrow,
@ -160,7 +168,7 @@ pub enum Arrow {
BigLeftRightArrow, BigLeftRightArrow,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Accent { pub enum Accent {
Hat, Hat,
Overline, Overline,
@ -176,7 +184,7 @@ pub enum Accent {
Cancel, Cancel,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum Greek { pub enum Greek {
Alpha, Alpha,
Beta, Beta,
@ -216,7 +224,7 @@ pub enum Greek {
BigOmega, BigOmega,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum FontCommand { pub enum FontCommand {
Big, Big,
BigOutline, BigOutline,

Loading…
Cancel
Save