diff --git a/Cargo.toml b/Cargo.toml index a85033e..0d58a2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "asciimath-rs" description = "AsciiMath parser" repository = "https://github.com/trivernis/asciimath-rs" -version = "0.2.0" +version = "0.3.0" authors = ["trivernis "] edition = "2018" readme = "README.md" diff --git a/src/elements/group.rs b/src/elements/group.rs index 4b47cf2..8667ece 100644 --- a/src/elements/group.rs +++ b/src/elements/group.rs @@ -2,6 +2,7 @@ use crate::elements::special::Expression; #[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum Group { + MSep, Parentheses(Parentheses), Brackets(Brackets), Braces(Braces), @@ -11,6 +12,8 @@ pub enum Group { Floor(Floor), Ceil(Ceil), Norm(Norm), + Matrix(Matrix), + Vector(Vector), } #[derive(Debug, Clone, PartialOrd, PartialEq)] @@ -57,3 +60,13 @@ pub struct Ceil { pub struct Norm { pub inner: Box, } + +#[derive(Debug, Clone, PartialOrd, PartialEq)] +pub struct Matrix { + pub inner: Vec>, +} + +#[derive(Debug, Clone, PartialOrd, PartialEq)] +pub struct Vector { + pub inner: Vec>, +} diff --git a/src/elements/special.rs b/src/elements/special.rs index 89ae407..d83616b 100644 --- a/src/elements/special.rs +++ b/src/elements/special.rs @@ -3,7 +3,7 @@ use crate::utils::Boxed; #[derive(Debug, Clone, PartialOrd, PartialEq)] pub struct Expression { - children: Vec, + pub children: Vec, } #[derive(Debug, Clone, PartialOrd, PartialEq)] diff --git a/src/format/mathml.rs b/src/format/mathml.rs new file mode 100644 index 0000000..9051e4c --- /dev/null +++ b/src/format/mathml.rs @@ -0,0 +1,50 @@ +use crate::tokens::Greek; + +pub trait ToMathML { + fn to_mathml(&self) -> String; +} + +impl ToMathML for Greek { + fn to_mathml(&self) -> String { + let inner = match self { + Greek::Alpha => "α", + Greek::Beta => "β", + Greek::Gamma => "γ", + Greek::BigGamma => "Γ", + Greek::Delta => "δ", + Greek::BigDelta => "Δ", + Greek::Epsilon => "ε", + Greek::VarEpsilon => "ε", + Greek::Zeta => "ζ", + Greek::Eta => "η", + Greek::Theta => "θ", + Greek::BigTheta => "Θ", + Greek::VarTheta => "θ", + Greek::Iota => "ι", + Greek::Kappa => "κ", + Greek::Lambda => "λ", + Greek::BigLambda => "Λ", + Greek::Mu => "μ", + Greek::Nu => "ν", + Greek::Xi => "ξ", + Greek::BigXi => "Ξ", + Greek::Pi => "π", + Greek::BigPi => "Π", + Greek::Rho => "ρ", + Greek::Sigma => "σ", + Greek::BigSigma => "Σ", + Greek::Tau => "τ", + Greek::Upsilon => "υ", + Greek::Phi => "φ", + Greek::BigPhi => "Φ", + Greek::VarPhi => "φ", + Greek::Chi => "χ", + Greek::Psi => "ψ", + Greek::BigPsi => "Ψ", + Greek::Omega => "ω", + Greek::BigOmega => "Ω", + }; + + format!("{}", inner) + } +} diff --git a/src/format/mod.rs b/src/format/mod.rs new file mode 100644 index 0000000..6b8555d --- /dev/null +++ b/src/format/mod.rs @@ -0,0 +1 @@ +pub mod mathml; diff --git a/src/lib.rs b/src/lib.rs index 0874804..138b74a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ use crate::parsing::tokenizer::Tokenizer; use crate::parsing::tree_parser::TreeParser; pub mod elements; +pub mod format; pub mod parsing; pub mod tokens; pub(crate) mod utils; @@ -27,6 +28,7 @@ pub fn parse(content: String) -> Expression { #[cfg(test)] mod tests { + use crate::elements::group::{Brackets, Group, Matrix, Vector}; use crate::elements::literal::{Literal, Number}; use crate::elements::special::{Expression, Special, Sum}; use crate::elements::Element; @@ -101,6 +103,33 @@ mod tests { ); } + #[test] + fn it_tokenizes_expressions3() { + let expression = "[[1, 2],[3, 4]]"; + let mut tokenizer = Tokenizer::new(expression.to_string()); + let tokens = tokenizer.parse(); + assert_eq!( + tokens, + vec![ + Token::Grouping(Grouping::RBracket), + Token::Grouping(Grouping::RBracket), + Token::Text(Text::Number("1".to_string())), + Token::Grouping(Grouping::MSep), + Token::Text(Text::Whitespace), + Token::Text(Text::Number("2".to_string())), + Token::Grouping(Grouping::LBracket), + Token::Grouping(Grouping::MSep), + Token::Grouping(Grouping::RBracket), + Token::Text(Text::Number("3".to_string())), + Token::Grouping(Grouping::MSep), + Token::Text(Text::Whitespace), + Token::Text(Text::Number("4".to_string())), + Token::Grouping(Grouping::LBracket), + Token::Grouping(Grouping::LBracket), + ] + ); + } + #[test] fn it_tokenizes_text1() { let expression = "\"just plain text\""; @@ -158,12 +187,178 @@ mod tests { assert_eq!(expression, test_expression) } + #[test] + fn it_parses_matrices() { + assert_eq!( + parse("[[1, 2],[3,4]]".to_string()), + Expression { + children: vec![Element::Group(Group::Matrix(Matrix { + inner: vec![ + vec![ + Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "1".to_string() + })),] + }, + Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "2".to_string() + })),] + } + ], + vec![ + Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "3".to_string() + })),] + }, + Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "4".to_string() + })),] + } + ] + ] + }))] + } + ); + } + + #[test] + fn it_rejects_invalid_matrices() { + assert_eq!( + parse("[[1, 3, 4],[3,4]]".to_string()), + Expression { + children: vec![Element::Group(Group::Brackets(Brackets { + inner: Expression { + children: vec![ + Element::Group(Group::Brackets(Brackets { + inner: Expression { + children: vec![ + Element::Literal(Literal::Number(Number { + number: "1".to_string() + })), + Element::Group(Group::MSep), + Element::Literal(Literal::Number(Number { + number: "3".to_string() + })), + Element::Group(Group::MSep), + Element::Literal(Literal::Number(Number { + number: "4".to_string() + })) + ] + } + .boxed() + })), + Element::Group(Group::MSep), + Element::Group(Group::Brackets(Brackets { + inner: Expression { + children: vec![ + Element::Literal(Literal::Number(Number { + number: "3".to_string() + })), + Element::Group(Group::MSep), + Element::Literal(Literal::Number(Number { + number: "4".to_string() + })) + ] + } + .boxed() + })) + ] + } + .boxed() + }))] + } + ); + assert_eq!( + parse("[[1]]".to_string()), + Expression { + children: vec![Element::Group(Group::Brackets(Brackets { + inner: Expression { + children: vec![Element::Group(Group::Brackets(Brackets { + inner: Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "1".to_string() + })),] + } + .boxed() + })),] + } + .boxed() + }))] + } + ); + } + + #[test] + fn it_parses_vectors() { + assert_eq!( + parse("((1), (2))".to_string()), + Expression { + children: vec![Element::Group(Group::Vector(Vector { + inner: vec![ + vec![Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "1".to_string() + }))] + }], + vec![Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "2".to_string() + }))] + }] + ] + }))] + } + ); + assert_eq!( + parse("((1, 3), (2, 5))".to_string()), + Expression { + children: vec![Element::Group(Group::Vector(Vector { + inner: vec![ + vec![ + Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "1".to_string() + }))] + }, + Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "3".to_string() + }))] + } + ], + vec![ + Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "2".to_string() + }))] + }, + Expression { + children: vec![Element::Literal(Literal::Number(Number { + number: "5".to_string() + }))] + } + ] + ] + }))] + } + ) + } + //#[test] - fn it_parses_into_a_tree2() { + fn it_parses_into_a_tree3() { fs::write( "test-files/test.txt", - format!("{:#?}", parse("color(red)(a) * b^4 - c(c-2)".to_string())), - ); + format!( + "{:#?}", + parse( + "color(red)(a) * b^4 - c(c-2) [[1, 3, 2 + 2],[3 - x, 4] ((2),(3))".to_string() + ) + ), + ) + .unwrap(); } #[bench] diff --git a/src/parsing/tree_parser.rs b/src/parsing/tree_parser.rs index db37653..6ddf84b 100644 --- a/src/parsing/tree_parser.rs +++ b/src/parsing/tree_parser.rs @@ -1,6 +1,6 @@ use crate::elements::accent::{Color, ExpressionAccent, GenericAccent, OverSet, UnderSet}; use crate::elements::group::{ - Abs, Angles, Braces, Brackets, Ceil, Floor, Group, Norm, Parentheses, XGroup, + Abs, Angles, Braces, Brackets, Ceil, Floor, Group, Matrix, Norm, Parentheses, Vector, XGroup, }; use crate::elements::literal::{Literal, Number, PlainText, Symbol}; use crate::elements::special::{ @@ -102,7 +102,11 @@ impl TreeParser { Token::Operation(op) => Some(self.parse_operation(op)), Token::Misc(m) => Some(self.parse_misc(m)), Token::Grouping(g) => { - if let Some(group) = self.parse_group(g) { + if let Some(group) = self.parse_matrix() { + Some(Element::Group(group)) + } else if let Some(group) = self.parse_vector() { + Some(Element::Group(group)) + } else if let Some(group) = self.parse_group(g) { Some(Element::Group(group)) } else { None @@ -254,6 +258,84 @@ impl TreeParser { } } + fn parse_matrix(&mut self) -> Option { + let token = self.current_token().clone(); + let start_index = self.index; + + if let Token::Grouping(Grouping::RBracket) = token { + let mut expressions = Vec::new(); + + while !self.end_reached() { + if let Some(Token::Grouping(Grouping::RBracket)) = self.peek() { + self.step(); + self.step(); + expressions.push(self.parse_expression()); + self.step(); + + if let Token::Grouping(Grouping::LBracket) = self.current_token() { + self.step(); + } + if let Token::Grouping(Grouping::LBracket) = self.current_token() { + break; + } + } else { + break; + } + } + // Remapping the expression into a matrix + let expression_matrix = self.transform_vec_to_matrix(expressions); + + if !self.validate_matrix(&expression_matrix) { + self.index = start_index; + None + } else { + Some(Group::Matrix(Matrix { + inner: expression_matrix, + })) + } + } else { + None + } + } + + fn parse_vector(&mut self) -> Option { + let token = self.current_token().clone(); + let start_index = self.index; + + if let Token::Grouping(Grouping::RParen) = token { + let mut expressions = Vec::new(); + + while !self.end_reached() { + if let Some(Token::Grouping(Grouping::RParen)) = self.peek() { + self.step(); + self.step(); + expressions.push(self.parse_expression()); + + if let Token::Grouping(Grouping::LParen) = self.current_token() { + self.step(); + } + if let Token::Grouping(Grouping::LParen) = self.current_token() { + break; + } + } else { + break; + } + } + let expression_matrix = self.transform_vec_to_matrix(expressions); + + if !self.validate_matrix(&expression_matrix) { + self.index = start_index; + None + } else { + Some(Group::Vector(Vector { + inner: expression_matrix, + })) + } + } else { + None + } + } + fn parse_group(&mut self, token: Grouping) -> Option { match token { Grouping::RParen => { @@ -309,6 +391,7 @@ impl TreeParser { let inner = self.parse_expression().boxed(); Some(Group::Norm(Norm { inner })) } + Grouping::MSep => Some(Group::MSep), _ => { self.group_return = true; None @@ -343,4 +426,36 @@ impl TreeParser { None } } + + /// Remaps an expresion vector into a matrix of expressions by splitting on each MSep token + fn transform_vec_to_matrix(&self, expressions: Vec) -> Vec> { + expressions + .iter() + .map(|e| { + let children = e.children.clone(); + let mut expressions = Vec::new(); + + for elements in children.split(|e| e == &Element::Group(Group::MSep)) { + expressions.push(Expression { + children: elements.to_vec(), + }) + } + expressions + }) + .collect::>>() + } + + /// Validates a matrix of expressions if every row has the same length + fn validate_matrix(&self, matrix: &Vec>) -> bool { + if matrix.is_empty() { + false + } else { + let first_length = matrix.first().unwrap().len(); + if first_length * matrix.len() == 1 { + false + } else { + matrix.iter().all(|e| e.len() == first_length) + } + } + } } diff --git a/src/tokens/constants/grouping.rs b/src/tokens/constants/grouping.rs index 9b2e00b..a77371e 100644 --- a/src/tokens/constants/grouping.rs +++ b/src/tokens/constants/grouping.rs @@ -1,11 +1,14 @@ pub const G_RPAREN: &'static [&str] = &["("]; pub const G_LPAREN: &'static [&str] = &[")"]; -pub const G_RBRAC: &'static [&str] = &["["]; -pub const G_LBRAC: &'static [&str] = &["]"]; +pub const G_RBRACKET: &'static [&str] = &["["]; +pub const G_LBRACKET: &'static [&str] = &["]"]; -pub const G_RCURL: &'static [&str] = &["{"]; -pub const G_LCURL: &'static [&str] = &["}"]; +pub const G_RBRACE: &'static [&str] = &["{"]; +pub const G_LBRACE: &'static [&str] = &["}"]; + +pub const G_RBRACE_HIDDEN: &'static [&str] = &["{:"]; +pub const G_LBRACE_HIDDEN: &'static [&str] = &[":}"]; pub const G_LANGLE: &'static [&str] = &["(:", "<<", "langle"]; pub const G_RANGLE: &'static [&str] = &[":)", ">>", "rangle"]; @@ -16,4 +19,6 @@ pub const G_FLOOR: &'static [&str] = &["floor"]; pub const G_CEIL: &'static [&str] = &["ceil"]; pub const G_NORM: &'static [&str] = &["norm"]; +pub const G_MATRIX_SEP: &'static [&str] = &[","]; + pub const T_LPAREN: char = ')'; diff --git a/src/tokens/mappings.rs b/src/tokens/mappings.rs index 0681781..1ac381f 100644 --- a/src/tokens/mappings.rs +++ b/src/tokens/mappings.rs @@ -159,14 +159,15 @@ pub fn get_grouping_mappings() -> Vec> { G_RANGLE => Grouping::RAngle, G_RXPAR => Grouping::RXPar, G_LXPAR => Grouping::LXPar, + G_MATRIX_SEP => Grouping::MSep, }, hashmap! { G_RPAREN => Grouping::RParen, G_LPAREN => Grouping::LParen, - G_RBRAC => Grouping::RBracket, - G_LBRAC => Grouping::LBracket, - G_RCURL => Grouping::RBrace, - G_LCURL => Grouping::LBrace, + G_RBRACKET => Grouping::RBracket, + G_LBRACKET => Grouping::LBracket, + G_RBRACE => Grouping::RBrace, + G_LBRACE => Grouping::LBrace, G_ABS => Grouping::Abs, G_FLOOR => Grouping::Floor, G_CEIL => Grouping::Ceil, diff --git a/src/tokens/mod.rs b/src/tokens/mod.rs index 6964d4a..914fde2 100644 --- a/src/tokens/mod.rs +++ b/src/tokens/mod.rs @@ -152,6 +152,7 @@ pub enum Grouping { Floor, Ceil, Norm, + MSep, } #[derive(Debug, Clone, PartialOrd, PartialEq)]