From 90348b889f397f2753473764c2d7a02986beddeb Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Mon, 13 Mar 2023 19:27:54 +0100 Subject: [PATCH] revamped snippet text element parsing Snippet text elements can contain escape sequences that must be treated properly. Furthermore snippets must always escape certain characters (like `}` or `\`). The function has been updated to account for that. `text` is now also included with `anything` to match the grammar and can also match empty text. To avoid infinite loops the `non-empty` combinator has been added which is automatically used in the `one_or_more` and `zero_or more` combinator where the problemn would occur. --- helix-lsp/src/snippet.rs | 117 ++++++++++++++++++++++++--------------- helix-parsec/src/lib.rs | 13 +++++ 2 files changed, 85 insertions(+), 45 deletions(-) diff --git a/helix-lsp/src/snippet.rs b/helix-lsp/src/snippet.rs index 77f44d4e..f64f29f2 100644 --- a/helix-lsp/src/snippet.rs +++ b/helix-lsp/src/snippet.rs @@ -12,7 +12,7 @@ pub enum CaseChange { #[derive(Debug, PartialEq, Eq)] pub enum FormatItem<'a> { - Text(&'a str), + Text(Tendril), Capture(usize), CaseChange(usize, CaseChange), Conditional(usize, Option<&'a str>, Option<&'a str>), @@ -20,9 +20,9 @@ pub enum FormatItem<'a> { #[derive(Debug, PartialEq, Eq)] pub struct Regex<'a> { - value: &'a str, + value: Tendril, replacement: Vec>, - options: Option<&'a str>, + options: Tendril, } #[derive(Debug, PartialEq, Eq)] @@ -36,14 +36,14 @@ pub enum SnippetElement<'a> { }, Choice { tabstop: usize, - choices: Vec<&'a str>, + choices: Vec, }, Variable { name: &'a str, default: Option<&'a str>, regex: Option>, }, - Text(&'a str), + Text(Tendril), } #[derive(Debug, PartialEq, Eq)] @@ -67,12 +67,12 @@ fn render_elements( for element in snippet_elements { match element { - &Text(text) => { + Text(text) => { // small optimization to avoid calling replace when it's unnecessary let text = if text.contains('\n') { Cow::Owned(text.replace('\n', newline_with_offset)) } else { - Cow::Borrowed(text) + Cow::Borrowed(text.as_str()) }; *offset += text.chars().count(); insert.push_str(&text); @@ -160,6 +160,7 @@ pub fn render( } mod parser { + use helix_core::Tendril; use helix_parsec::*; use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement}; @@ -210,8 +211,32 @@ mod parser { } } - fn text<'a, const SIZE: usize>(cs: [char; SIZE]) -> impl Parser<'a, Output = &'a str> { - take_while(move |c| cs.into_iter().all(|c1| c != c1)) + const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$']; + const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/']; + const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ',']; + + fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> { + move |input: &'a str| { + let mut chars = input.char_indices(); + let mut res = Tendril::new(); + while let Some((i, c)) = chars.next() { + match c { + '\\' => { + if let Some((_, c)) = chars.next() { + if escape_chars.contains(&c) { + res.push(c); + continue; + } + } + return Ok((&input[i..], res)); + } + c if escape_chars.contains(&c) => return Ok((&input[i..], res)), + c => res.push(c), + } + } + + Ok(("", res)) + } } fn digit<'a>() -> impl Parser<'a, Output = usize> { @@ -274,20 +299,18 @@ mod parser { } fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> { - let text = map(text(['$', '/']), FormatItem::Text); - let replacement = reparse_as( - take_until(|c| c == '/'), - one_or_more(choice!(format(), text)), - ); - map( seq!( "/", - take_until(|c| c == '/'), + // TODO parse as ECMAScript and convert to rust regex + non_empty(text(&['/', '\\'])), "/", - replacement, + one_or_more(choice!( + format(), + map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text) + )), "/", - optional(take_until(|c| c == '}')), + text(&['}', '\\',]), ), |(_, value, _, replacement, _, options)| Regex { value, @@ -308,13 +331,12 @@ mod parser { } fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> { - let text = map(text(['$', '}']), SnippetElement::Text); map( seq!( "${", digit(), ":", - one_or_more(choice!(anything(), text)), + one_or_more(anything(TEXT_ESCAPE_CHARS)), "}" ), |seq| SnippetElement::Placeholder { @@ -330,7 +352,7 @@ mod parser { "${", digit(), "|", - sep(take_until(|c| c == ',' || c == '|'), ","), + sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","), "|}", ), |seq| SnippetElement::Choice { @@ -368,17 +390,21 @@ mod parser { ) } - fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> { - // The parser has to be constructed lazily to avoid infinite opaque type recursion - |input: &'a str| { - let parser = choice!(tabstop(), placeholder(), choice(), variable()); + fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> { + move |input: &'a str| { + let parser = choice!( + tabstop(), + placeholder(), + choice(), + variable(), + map(text(escape_chars), SnippetElement::Text) + ); parser.parse(input) } } fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> { - let text = map(text(['$']), SnippetElement::Text); - map(one_or_more(choice!(anything(), text)), |parts| Snippet { + map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet { elements: parts, }) } @@ -392,6 +418,7 @@ mod parser { } }) } + #[cfg(test)] mod test { use super::SnippetElement::*; @@ -407,12 +434,12 @@ mod parser { assert_eq!( Ok(Snippet { elements: vec![ - Text("match("), + Text("match(".into()), Placeholder { tabstop: 1, - value: vec!(Text("Arg1")), + value: vec!(Text("Arg1".into())), }, - Text(")") + Text(")".into()) ] }), parse("match(${1:Arg1})") @@ -446,15 +473,15 @@ mod parser { assert_eq!( Ok(Snippet { elements: vec![ - Text("local "), + Text("local ".into()), Placeholder { tabstop: 1, - value: vec!(Text("var")), + value: vec!(Text("var".into())), }, - Text(" = "), + Text(" = ".into()), Placeholder { tabstop: 1, - value: vec!(Text("value")), + value: vec!(Text("value".into())), }, ] }), @@ -468,7 +495,7 @@ mod parser { Ok(Snippet { elements: vec![Placeholder { tabstop: 1, - value: vec!(Text("var, "), Tabstop { tabstop: 2 },), + value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },), },] }), parse("${1:var, $2}") @@ -482,10 +509,10 @@ mod parser { elements: vec![Placeholder { tabstop: 1, value: vec!( - Text("foo "), + Text("foo ".into()), Placeholder { tabstop: 2, - value: vec!(Text("bar")), + value: vec!(Text("bar".into())), }, ), },] @@ -499,27 +526,27 @@ mod parser { assert_eq!( Ok(Snippet { elements: vec![ - Text("hello "), + Text("hello ".into()), Tabstop { tabstop: 1 }, Tabstop { tabstop: 2 }, - Text(" "), + Text(" ".into()), Choice { tabstop: 1, - choices: vec!["one", "two", "three"] + choices: vec!["one".into(), "two".into(), "three".into()] }, - Text(" "), + Text(" ".into()), Variable { name: "name", default: Some("foo"), regex: None }, - Text(" "), + Text(" ".into()), Variable { name: "var", default: None, regex: None }, - Text(" "), + Text(" ".into()), Variable { name: "TM", default: None, @@ -539,9 +566,9 @@ mod parser { name: "TM_FILENAME", default: None, regex: Some(Regex { - value: "(.*).+$", + value: "(.*).+$".into(), replacement: vec![FormatItem::Capture(1)], - options: None, + options: Tendril::new(), }), }] }), diff --git a/helix-parsec/src/lib.rs b/helix-parsec/src/lib.rs index e09814b8..846d02d6 100644 --- a/helix-parsec/src/lib.rs +++ b/helix-parsec/src/lib.rs @@ -459,6 +459,7 @@ pub fn zero_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec> where P: Parser<'a, Output = T>, { + let parser = non_empty(parser); move |mut input| { let mut values = Vec::new(); @@ -491,6 +492,7 @@ pub fn one_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec> where P: Parser<'a, Output = T>, { + let parser = non_empty(parser); move |mut input| { let mut values = Vec::new(); @@ -559,3 +561,14 @@ where Ok((input, values)) } } + +pub fn non_empty<'a, T>(p: impl Parser<'a, Output = T>) -> impl Parser<'a, Output = T> { + move |input| { + let (new_input, res) = p.parse(input)?; + if new_input.len() == input.len() { + Err(input) + } else { + Ok((new_input, res)) + } + } +}