From d5e2db3e1f1f67ff80d2974b8c1a2c545df45d3f Mon Sep 17 00:00:00 2001 From: trivernis Date: Thu, 4 Jun 2020 20:24:53 +0200 Subject: [PATCH] Add emoji parsing --- .gitignore | 3 +- Cargo.lock | 37 ++++++++- Cargo.toml | 5 +- README.md | 12 +-- src/format/html.rs | 11 +++ src/parsing/charstate.rs | 174 ++++++++++++++++++++++----------------- src/parsing/elements.rs | 15 +++- src/parsing/inline.rs | 70 ++++++++++------ src/parsing/mod.rs | 1 + src/parsing/parser.rs | 66 +-------------- src/parsing/tokens.rs | 6 +- src/parsing/utils.rs | 66 +++++++++++++++ 12 files changed, 285 insertions(+), 181 deletions(-) create mode 100644 src/parsing/utils.rs diff --git a/.gitignore b/.gitignore index 7fde081..78c3ae5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ **/*.rs.bk .idea .ast -test \ No newline at end of file +test +perf.data \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 818af82..5255230 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -138,6 +138,15 @@ name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "gh-emoji" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.3.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "heck" version = "0.3.1" @@ -255,6 +264,22 @@ dependencies = [ "pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "pkg-config" version = "0.3.17" @@ -372,13 +397,19 @@ dependencies = [ "serde 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "siphasher" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "snekdown" -version = "0.10.1" +version = "0.11.0" dependencies = [ "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "colored 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "gh-emoji 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", "htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "minify 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -574,6 +605,7 @@ dependencies = [ "checksum crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" "checksum flate2 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2cfff41391129e0a856d6d822600b8d71179d46879e310417eb9c762eb178b42" "checksum fnv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +"checksum gh-emoji 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a17a050b7eb420553344e1cf1db648e8b584c79e98b74e6e6d119eeedd9ddcbc" "checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" "checksum hermit-abi 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71" "checksum htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" @@ -591,6 +623,8 @@ dependencies = [ "checksum num-traits 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" "checksum onig 6.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bd91ccd8a02fce2f7e8a86655aec67bc6c171e6f8e704118a0e8c4b866a05a8a" "checksum onig_sys 69.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3814583fad89f3c60ae0701d80e87e1fd3028741723deda72d0d4a0ecf0cb0db" +"checksum phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +"checksum phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" "checksum pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)" = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677" "checksum plist 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b336d94e8e4ce29bf15bba393164629764744c567e8ad306cc1fdd0119967fd" "checksum proc-macro-error 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678" @@ -605,6 +639,7 @@ dependencies = [ "checksum serde 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)" = "c9124df5b40cbd380080b2cc6ab894c040a3070d995f5c9dc77e18c34a8ae37d" "checksum serde_derive 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)" = "3f2c3ac8e6ca1e9c80b8be1023940162bf81ae3cffbb1809474152f2ce1eb250" "checksum serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)" = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2" +"checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef" "checksum structopt-derive 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "d239ca4b13aee7a2142e6795cbd69e457665ff8037aed33b3effdc430d2f927a" diff --git a/Cargo.toml b/Cargo.toml index 42a01ae..e0933d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snekdown" -version = "0.10.1" +version = "0.11.0" authors = ["trivernis "] edition = "2018" license-file = "LICENSE" @@ -25,4 +25,5 @@ syntect = "4.2.0" chrono = "0.4.11" regex = "1.3.9" lazy_static = "1.4.0" -colored = "1.9.3" \ No newline at end of file +colored = "1.9.3" +gh-emoji = "1.0.3" \ No newline at end of file diff --git a/README.md b/README.md index 3f21923..6e84920 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Snekdown - A wonderful markdown parser +# :snake: Snekdown - More than just Markdown This projects goal is to implement a fast markdown parser with an extended syntax fitted for my needs. @@ -157,22 +157,24 @@ Set options for placeholders _Underlined_ ^Superscript^ `Monospace` +:Emoji: ``` ## Roadmap -The end goal is to have a markdown language similar to LaTeX. +The end goal is to have a markup language with features similar to LaTeX. - [x] Checkboxes -- [ ] Emojis (\:emoji:) +- [x] Emojis (\:emoji:) +- [ ] Metadata files - [ ] Bibliography - [ ] Math -- [ ] Figures - [ ] Text sizes - [ ] Colors -- [ ] Cross References - [ ] Title pages - [ ] Glossary +- [ ] Cross References +- [ ] Figures - [ ] EPUB Rendering (PDF is too hard) - [ ] Custom Elements via templates - [ ] Custom Stylesheets \ No newline at end of file diff --git a/src/format/html.rs b/src/format/html.rs index f4f7d2a..88811f3 100644 --- a/src/format/html.rs +++ b/src/format/html.rs @@ -58,6 +58,7 @@ impl ToHtml for Inline { Inline::Reference(reference) => reference.to_html(), Inline::Superscript(superscript) => superscript.to_html(), Inline::Checkbox(checkbox) => checkbox.to_html(), + Inline::Emoji(emoji) => emoji.to_html(), } } } @@ -487,3 +488,13 @@ impl ToHtml for Checkbox { } } } + +impl ToHtml for Emoji { + fn to_html(&self) -> String { + format!( + "{}", + encode_attribute(self.name.as_str()), + self.value + ) + } +} diff --git a/src/parsing/charstate.rs b/src/parsing/charstate.rs index b931089..7992eae 100644 --- a/src/parsing/charstate.rs +++ b/src/parsing/charstate.rs @@ -1,5 +1,5 @@ -use crate::parsing::parser::ParseError; use crate::parsing::tokens::{LB, SPECIAL_ESCAPE}; +use crate::parsing::utils::{ParseError, ParseResult}; use crate::Parser; pub trait CharStateMachine { @@ -43,6 +43,12 @@ pub trait CharStateMachine { break_at: &[&[char]], err_at: &[char], ) -> Result; + fn get_string_until_or_revert( + &mut self, + break_et: &[char], + err_at: &[char], + revert_index: usize, + ) -> ParseResult; } impl CharStateMachine for Parser { @@ -112,6 +118,28 @@ impl CharStateMachine for Parser { } } + /// seeks until it encounters a linebreak character + fn seek_until_linebreak(&mut self) { + if self.check_special(&LB) { + self.skip_char(); + return; + } + while let Some(_) = self.next_char() { + if self.check_special(&LB) { + self.skip_char(); + return; + } + } + } + + /// seeks inline whitespaces and returns if there + /// were seeked whitespaces + fn check_seek_inline_whitespace(&mut self) -> bool { + let start_index = self.index; + self.seek_inline_whitespace(); + self.index > start_index + } + /// checks if the input character is escaped fn check_escaped(&self) -> bool { if self.index == 0 { @@ -133,31 +161,6 @@ impl CharStateMachine for Parser { chars.contains(&self.current_char) && !self.check_escaped() } - /// checks if the next chars are a special sequence - fn check_special_sequence_group(&mut self, sequences: &[&[char]]) -> bool { - for sequence in sequences { - if self.check_special_sequence(*sequence) { - return true; - } - } - - false - } - - /// returns if the current character is a linebreak character - /// Note: No one likes CRLF - fn check_linebreak(&self) -> bool { - self.current_char == LB && !self.check_escaped() - } - - /// seeks inline whitespaces and returns if there - /// were seeked whitespaces - fn check_seek_inline_whitespace(&mut self) -> bool { - let start_index = self.index; - self.seek_inline_whitespace(); - self.index > start_index - } - /// checks if the next characters match a special sequence fn check_special_sequence(&mut self, sequence: &[char]) -> bool { let start_index = self.index; @@ -182,6 +185,67 @@ impl CharStateMachine for Parser { true } + /// checks if the next chars are a special sequence + fn check_special_sequence_group(&mut self, sequences: &[&[char]]) -> bool { + for sequence in sequences { + if self.check_special_sequence(*sequence) { + return true; + } + } + + false + } + + /// returns if the current character is a linebreak character + /// Note: No one likes CRLF + fn check_linebreak(&self) -> bool { + self.current_char == LB && !self.check_escaped() + } + + fn assert_special(&mut self, character: &char, revert_index: usize) -> Result<(), ParseError> { + if self.check_special(character) { + Ok(()) + } else { + Err(self.revert_with_error(revert_index)) + } + } + + fn assert_special_group( + &mut self, + group: &[char], + revert_index: usize, + ) -> Result<(), ParseError> { + if self.check_special_group(group) { + Ok(()) + } else { + Err(self.revert_with_error(revert_index)) + } + } + + fn assert_special_sequence( + &mut self, + sequence: &[char], + revert_index: usize, + ) -> Result<(), ParseError> { + if self.check_special_sequence(sequence) { + Ok(()) + } else { + Err(self.revert_with_error(revert_index)) + } + } + + fn assert_special_sequence_group( + &mut self, + sequences: &[&[char]], + revert_index: usize, + ) -> Result<(), ParseError> { + if self.check_special_sequence_group(sequences) { + Ok(()) + } else { + Err(self.revert_with_error(revert_index)) + } + } + /// returns the string until a specific fn get_string_until( &mut self, @@ -240,61 +304,17 @@ impl CharStateMachine for Parser { } } - fn assert_special(&mut self, character: &char, revert_index: usize) -> Result<(), ParseError> { - if self.check_special(character) { - Ok(()) - } else { - Err(self.revert_with_error(revert_index)) - } - } - - fn assert_special_group( - &mut self, - group: &[char], - revert_index: usize, - ) -> Result<(), ParseError> { - if self.check_special_group(group) { - Ok(()) - } else { - Err(self.revert_with_error(revert_index)) - } - } - - fn assert_special_sequence( - &mut self, - sequence: &[char], - revert_index: usize, - ) -> Result<(), ParseError> { - if self.check_special_sequence(sequence) { - Ok(()) - } else { - Err(self.revert_with_error(revert_index)) - } - } - - fn assert_special_sequence_group( + /// returns the string until a specific character or reverts back to the given position + fn get_string_until_or_revert( &mut self, - sequences: &[&[char]], + break_at: &[char], + err_at: &[char], revert_index: usize, - ) -> Result<(), ParseError> { - if self.check_special_sequence_group(sequences) { - Ok(()) + ) -> ParseResult { + if let Ok(string) = self.get_string_until(break_at, err_at) { + Ok(string) } else { Err(self.revert_with_error(revert_index)) } } - - /// seeks until it encounters a linebreak character - fn seek_until_linebreak(&mut self) { - if self.check_special(&LB) { - self.skip_char(); - return; - } - while let Some(_) = self.next_char() { - if self.check_special(&LB) { - self.skip_char(); - return; - } - } - } } diff --git a/src/parsing/elements.rs b/src/parsing/elements.rs index 6ca0f13..10c7f67 100644 --- a/src/parsing/elements.rs +++ b/src/parsing/elements.rs @@ -157,6 +157,7 @@ pub enum Inline { Placeholder(Arc>), Reference(Reference), Checkbox(Checkbox), + Emoji(Emoji), } #[derive(Clone, Debug)] @@ -252,10 +253,10 @@ pub struct ConfigValue { pub(crate) value: MetadataValue, } -impl ConfigValue { - fn set_value(&mut self, value: MetadataValue) { - self.value = value; - } +#[derive(Clone, Debug)] +pub struct Emoji { + pub(crate) value: char, + pub(crate) name: String, } // implementations @@ -402,6 +403,12 @@ impl Document { } } +impl ConfigValue { + fn set_value(&mut self, value: MetadataValue) { + self.value = value; + } +} + impl Section { pub fn new(header: Header) -> Self { Self { diff --git a/src/parsing/inline.rs b/src/parsing/inline.rs index 4535674..34a604c 100644 --- a/src/parsing/inline.rs +++ b/src/parsing/inline.rs @@ -1,27 +1,28 @@ use super::charstate::CharStateMachine; use super::elements::*; -use super::parser::ParseError; use super::tokens::*; +use crate::parsing::utils::{ParseError, ParseResult}; use crate::Parser; pub(crate) trait ParseInline { - fn parse_inline(&mut self) -> Result; - fn parse_image(&mut self) -> Result; - fn parse_url(&mut self, short_syntax: bool) -> Result; - fn parse_checkbox(&mut self) -> Result; - fn parse_bold(&mut self) -> Result; - fn parse_italic(&mut self) -> Result; - fn parse_striked(&mut self) -> Result; - fn parse_monospace(&mut self) -> Result; - fn parse_underlined(&mut self) -> Result; - fn parse_superscript(&mut self) -> Result; - fn parse_plain(&mut self) -> Result; - fn parse_surrounded(&mut self, surrounding: &char) -> Result; + fn parse_surrounded(&mut self, surrounding: &char) -> ParseResult; + fn parse_inline(&mut self) -> ParseResult; + fn parse_image(&mut self) -> ParseResult; + fn parse_url(&mut self, short_syntax: bool) -> ParseResult; + fn parse_checkbox(&mut self) -> ParseResult; + fn parse_bold(&mut self) -> ParseResult; + fn parse_italic(&mut self) -> ParseResult; + fn parse_striked(&mut self) -> ParseResult; + fn parse_monospace(&mut self) -> ParseResult; + fn parse_underlined(&mut self) -> ParseResult; + fn parse_superscript(&mut self) -> ParseResult; + fn parse_emoji(&mut self) -> ParseResult; + fn parse_plain(&mut self) -> ParseResult; } impl ParseInline for Parser { /// parses Inline, the formatting parts of a line (Text) - fn parse_inline(&mut self) -> Result<Inline, ParseError> { + fn parse_inline(&mut self) -> ParseResult<Inline> { if self.check_special(&PIPE) || self.check_linebreak() { Err(ParseError::new(self.index)) } else if let Ok(image) = self.parse_image() { @@ -44,13 +45,15 @@ impl ParseInline for Parser { Ok(Inline::Superscript(superscript)) } else if let Ok(checkbox) = self.parse_checkbox() { Ok(Inline::Checkbox(checkbox)) + } else if let Ok(emoji) = self.parse_emoji() { + Ok(Inline::Emoji(emoji)) } else { Ok(Inline::Plain(self.parse_plain()?)) } } /// parses an image url - fn parse_image(&mut self) -> Result<Image, ParseError> { + fn parse_image(&mut self) -> ParseResult<Image> { let start_index = self.index; self.seek_inline_whitespace(); self.assert_special(&IMG_START, start_index)?; @@ -69,7 +72,7 @@ impl ParseInline for Parser { } // parses an url - fn parse_url(&mut self, short_syntax: bool) -> Result<Url, ParseError> { + fn parse_url(&mut self, short_syntax: bool) -> ParseResult<Url> { let start_index = self.index; self.seek_inline_whitespace(); @@ -104,7 +107,7 @@ impl ParseInline for Parser { } /// parses a markdown checkbox - fn parse_checkbox(&mut self) -> Result<Checkbox, ParseError> { + fn parse_checkbox(&mut self) -> ParseResult<Checkbox> { let start_index = self.index; self.assert_special(&CHECK_OPEN, start_index)?; self.skip_char(); @@ -123,7 +126,7 @@ impl ParseInline for Parser { } /// parses bold text with must start with two asterisks - fn parse_bold(&mut self) -> Result<BoldText, ParseError> { + fn parse_bold(&mut self) -> ParseResult<BoldText> { let start_index = self.index; self.assert_special_sequence(&BOLD, start_index)?; self.skip_char(); @@ -136,20 +139,20 @@ impl ParseInline for Parser { }) } - fn parse_italic(&mut self) -> Result<ItalicText, ParseError> { + fn parse_italic(&mut self) -> ParseResult<ItalicText> { Ok(ItalicText { value: Box::new(self.parse_surrounded(&ITALIC)?), }) } - fn parse_striked(&mut self) -> Result<StrikedText, ParseError> { + fn parse_striked(&mut self) -> ParseResult<StrikedText> { Ok(StrikedText { value: Box::new(self.parse_surrounded(&STRIKED)?), }) } /// parses monospace text (inline-code) that isn't allowed to contain special characters - fn parse_monospace(&mut self) -> Result<MonospaceText, ParseError> { + fn parse_monospace(&mut self) -> ParseResult<MonospaceText> { let start_index = self.index; self.assert_special(&BACKTICK, start_index)?; self.skip_char(); @@ -160,20 +163,20 @@ impl ParseInline for Parser { Ok(MonospaceText { value: content }) } - fn parse_underlined(&mut self) -> Result<UnderlinedText, ParseError> { + fn parse_underlined(&mut self) -> ParseResult<UnderlinedText> { Ok(UnderlinedText { value: Box::new(self.parse_surrounded(&UNDERLINED)?), }) } - fn parse_superscript(&mut self) -> Result<SuperscriptText, ParseError> { + fn parse_superscript(&mut self) -> ParseResult<SuperscriptText> { Ok(SuperscriptText { value: Box::new(self.parse_surrounded(&SUPER)?), }) } /// parses plain text as a string until it encounters an unescaped special inline char - fn parse_plain(&mut self) -> Result<PlainText, ParseError> { + fn parse_plain(&mut self) -> ParseResult<PlainText> { if self.check_linebreak() { return Err(ParseError::new(self.index)); } @@ -196,7 +199,7 @@ impl ParseInline for Parser { } /// parses Inline surrounded by characters - fn parse_surrounded(&mut self, surrounding: &char) -> Result<Inline, ParseError> { + fn parse_surrounded(&mut self, surrounding: &char) -> ParseResult<Inline> { let start_index = self.index; self.assert_special(surrounding, start_index)?; self.skip_char(); @@ -206,4 +209,21 @@ impl ParseInline for Parser { Ok(inline) } + + fn parse_emoji(&mut self) -> ParseResult<Emoji> { + let start_index = self.index; + self.assert_special(&EMOJI, start_index)?; + self.skip_char(); + let name = self.get_string_until_or_revert(&[EMOJI], &[], start_index)?; + self.skip_char(); + if let Some(emoji) = gh_emoji::get(name.as_str()) { + let emoji_char = *emoji.chars().collect::<Vec<char>>().first().unwrap(); + Ok(Emoji { + value: emoji_char, + name, + }) + } else { + Err(self.revert_with_error(start_index)) + } + } } diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 3c8d04b..0d02b89 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -4,3 +4,4 @@ pub mod inline; pub mod parser; pub mod placeholders; pub mod tokens; +pub(crate) mod utils; diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index bc1141a..c9d729b 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -3,12 +3,10 @@ use super::tokens::*; use crate::parsing::charstate::CharStateMachine; use crate::parsing::inline::ParseInline; use crate::parsing::placeholders::ProcessPlaceholders; +use crate::parsing::utils::{ParseError, ParseResult}; use colored::*; use crossbeam_utils::sync::WaitGroup; use std::collections::HashMap; -use std::error::Error; -use std::fmt; -use std::fmt::{Display, Formatter}; use std::fs::read_to_string; use std::io; use std::path::Path; @@ -24,66 +22,6 @@ macro_rules! parse_option { }; } -#[derive(Debug)] -pub struct ParseError { - index: usize, - message: Option<String>, -} -impl Display for ParseError { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - if let Some(message) = &self.message { - write!( - f, - "{}", - format!("Parse Error at index {}: {}", self.index, message).red() - ) - } else { - write!( - f, - "{}", - format!("Parse Error at index {}", self.index).red() - ) - } - } -} -impl Error for ParseError {} -impl ParseError { - pub fn new(index: usize) -> Self { - Self { - index, - message: None, - } - } - - pub fn new_with_message(index: usize, message: &str) -> Self { - Self { - index, - message: Some(message.to_string()), - } - } - - pub fn set_message(&mut self, message: &str) { - self.message = Some(message.to_string()); - } - - pub fn get_position(&self, content: &str) -> Option<(usize, usize)> { - if content.len() <= self.index { - return None; - } - let split_content = content.split_at(self.index); - let line_number = split_content.0.matches("\n").count() as usize; - let overshoot_position = self.index as isize - split_content.0.len() as isize; - - if let Some(line) = split_content.0.lines().last() { - let inline_position = (line.len() as isize + overshoot_position) as usize; - - Some((line_number, inline_position)) - } else { - None - } - } -} - pub struct Parser { pub(crate) index: usize, pub(crate) text: Vec<char>, @@ -173,7 +111,7 @@ impl Parser { } /// starts up a new thread to parse the imported document - fn import_document(&mut self, path: String) -> Result<Arc<Mutex<ImportAnchor>>, ParseError> { + fn import_document(&mut self, path: String) -> ParseResult<Arc<Mutex<ImportAnchor>>> { let path = self.transform_path(path); let path_info = Path::new(&path); if !path_info.exists() || !path_info.is_file() { diff --git a/src/parsing/tokens.rs b/src/parsing/tokens.rs index d16217b..479d6db 100644 --- a/src/parsing/tokens.rs +++ b/src/parsing/tokens.rs @@ -25,6 +25,7 @@ pub(crate) const DOUBLE_QUOTE: char = '"'; pub(crate) const SINGLE_QUOTE: char = '\''; pub(crate) const DOT: char = '.'; pub(crate) const UP: char = '^'; +pub(crate) const COLON: char = ':'; // aliases @@ -51,6 +52,7 @@ pub(crate) const MONOSPACE: char = BACKTICK; pub(crate) const STRIKED: char = TILDE; pub(crate) const UNDERLINED: char = UNDERSCR; pub(crate) const SUPER: char = UP; +pub(crate) const EMOJI: char = COLON; pub(crate) const BOLD: [char; 2] = [ASTERISK, ASTERISK]; // groups @@ -69,8 +71,8 @@ pub(crate) const BLOCK_SPECIAL_CHARS: [&[char]; 9] = [ &SQ_CENTERED_START, ]; -pub(crate) const INLINE_SPECIAL_CHARS: [char; 9] = [ - BACKTICK, TILDE, UNDERSCR, ASTERISK, DESC_OPEN, IMG_START, URL_OPEN, LB, SUPER, +pub(crate) const INLINE_SPECIAL_CHARS: [char; 10] = [ + BACKTICK, TILDE, UNDERSCR, ASTERISK, DESC_OPEN, IMG_START, URL_OPEN, LB, SUPER, EMOJI, ]; pub(crate) const LIST_SPECIAL_CHARS: [char; 14] = [ diff --git a/src/parsing/utils.rs b/src/parsing/utils.rs new file mode 100644 index 0000000..0732872 --- /dev/null +++ b/src/parsing/utils.rs @@ -0,0 +1,66 @@ +use colored::*; +use std::error::Error; +use std::fmt; +use std::fmt::{Display, Formatter}; + +pub type ParseResult<T> = Result<T, ParseError>; + +#[derive(Debug)] +pub struct ParseError { + index: usize, + message: Option<String>, +} +impl Display for ParseError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if let Some(message) = &self.message { + write!( + f, + "{}", + format!("Parse Error at index {}: {}", self.index, message).red() + ) + } else { + write!( + f, + "{}", + format!("Parse Error at index {}", self.index).red() + ) + } + } +} +impl Error for ParseError {} +impl ParseError { + pub fn new(index: usize) -> Self { + Self { + index, + message: None, + } + } + + pub fn new_with_message(index: usize, message: &str) -> Self { + Self { + index, + message: Some(message.to_string()), + } + } + + pub fn set_message(&mut self, message: &str) { + self.message = Some(message.to_string()); + } + + pub fn get_position(&self, content: &str) -> Option<(usize, usize)> { + if content.len() <= self.index { + return None; + } + let split_content = content.split_at(self.index); + let line_number = split_content.0.matches("\n").count() as usize; + let overshoot_position = self.index as isize - split_content.0.len() as isize; + + if let Some(line) = split_content.0.lines().last() { + let inline_position = (line.len() as isize + overshoot_position) as usize; + + Some((line_number, inline_position)) + } else { + None + } + } +}