From 6db54b449f5f394bb04472c487aa5b00b54a0944 Mon Sep 17 00:00:00 2001 From: trivernis Date: Sat, 30 May 2020 10:23:49 +0200 Subject: [PATCH] Add url parsing --- src/parser.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++++++--- src/tokens.rs | 17 +++++++++++++- 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 0fd0b62..a1d7b96 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -385,6 +385,9 @@ impl Parser { } fn parse_subtext(&mut self) -> Result { + if let Ok(url) = self.parse_url() { + return Ok(SubText::Url(url)); + } match self.current_char { '*' => { parse_option!(self.next_char(), self.index); @@ -433,21 +436,74 @@ impl Parser { if self.current_char == '`' { parse_option!(self.next_char(), self.index) } - Ok(SubText::Monospace(MonospaceText { - value: Box::new(plain_text), - })) + Ok(SubText::Monospace(MonospaceText { value: plain_text })) } '\n' | '|' => Err(ParseError::new(self.index)), _ => Ok(SubText::Plain(self.parse_plain_text()?)), } } + // parses an url + fn parse_url(&mut self) -> Result { + let start_index = self.index; + self.seek_inline_whitespace(); + + if self.current_char != '[' { + let err = ParseError::new(self.index); + self.revert_to(start_index)?; + return Err(err); + } + let mut title = String::new(); + while let Some(character) = self.next_char() { + if character == ']' || character == '\n' { + break; + } + title.push(character); + } + if self.current_char != ']' { + // it stopped at a linebreak or EOF + let err = ParseError::new(self.index); + self.revert_to(start_index)?; + return Err(err); + } + if let Some(character) = self.next_char() { + if character != '(' { + // the next char isn't the start of the encased url + let err = ParseError::new(self.index); + self.revert_to(start_index)?; + return Err(err); + } + } + self.seek_inline_whitespace(); + let mut url = String::new(); + while let Some(character) = self.next_char() { + if character == ')' || character == '\n' { + break; + } + url.push(character); + } + if self.current_char != ')' || url.is_empty() { + let err = ParseError::new(self.index); + self.revert_to(start_index)?; + return Err(err); + } + parse_option!(self.next_char(), self.index); + + if title.is_empty() { + Ok(Url::new(url.clone(), url)) + } else { + Ok(Url::new(title, url)) + } + } + fn parse_plain_text(&mut self) -> Result { let mut current_char = self.current_char; let mut characters = String::new(); + let mut count = 0; loop { match current_char { '\n' | '*' | '_' | '~' | '|' | '`' => break, + '[' if count > 0 => break, // if its the first it means that the url parsing has failed _ => characters.push(current_char), } if let Some(character) = self.next_char() { @@ -455,6 +511,7 @@ impl Parser { } else { break; } + count += 1; } if characters.len() > 0 { diff --git a/src/tokens.rs b/src/tokens.rs index 507727f..f8be10e 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -92,6 +92,7 @@ pub enum SubText { Underlined(UnderlinedText), Striked(StrikedText), Monospace(MonospaceText), + Url(Url), } #[derive(Clone, Debug)] @@ -121,9 +122,17 @@ pub struct StrikedText { #[derive(Clone, Debug)] pub struct MonospaceText { - pub(crate) value: Box, + pub(crate) value: PlainText, } +#[derive(Clone, Debug)] +pub struct Url { + title: String, + url: String, +} + +// implementations + impl Document { pub fn new() -> Self { Self { @@ -223,4 +232,10 @@ impl Row { } } +impl Url { + pub fn new(title: String, url: String) -> Self { + Self { title, url } + } +} + // TODO: Images, URIs