You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
helix-plus/helix-lsp/src/snippet.rs

1011 lines
33 KiB
Rust

use std::borrow::Cow;
use anyhow::{anyhow, Result};
use helix_core::{smallvec, SmallVec, Tendril};
#[derive(Debug, PartialEq, Eq)]
pub enum CaseChange {
Upcase,
Downcase,
Capitalize,
}
#[derive(Debug, PartialEq, Eq)]
pub enum FormatItem {
Text(Tendril),
Capture(usize),
CaseChange(usize, CaseChange),
Conditional(usize, Option<Tendril>, Option<Tendril>),
}
#[derive(Debug, PartialEq, Eq)]
pub struct Regex {
value: Tendril,
replacement: Vec<FormatItem>,
options: Tendril,
}
#[derive(Debug, PartialEq, Eq)]
pub enum SnippetElement<'a> {
Tabstop {
tabstop: usize,
},
Placeholder {
tabstop: usize,
value: Vec<SnippetElement<'a>>,
},
Choice {
tabstop: usize,
choices: Vec<Tendril>,
},
Variable {
name: &'a str,
default: Option<Vec<SnippetElement<'a>>>,
regex: Option<Regex>,
},
Text(Tendril),
}
#[derive(Debug, PartialEq, Eq)]
pub struct Snippet<'a> {
elements: Vec<SnippetElement<'a>>,
}
pub fn parse(s: &str) -> Result<Snippet<'_>> {
parser::parse(s).map_err(|rest| anyhow!("Failed to parse snippet. Remaining input: {}", rest))
}
fn render_elements(
snippet_elements: &[SnippetElement<'_>],
insert: &mut Tendril,
offset: &mut usize,
tabstops: &mut Vec<(usize, (usize, usize))>,
newline_with_offset: &str,
include_placeholder: bool,
) {
use SnippetElement::*;
for element in snippet_elements {
match element {
Text(text) => {
// small optimization to avoid calling replace when it's unnecessary
let text = if text.contains('\n') {
Cow::Owned(text.replace('\n', newline_with_offset))
} else {
Cow::Borrowed(text.as_str())
};
*offset += text.chars().count();
insert.push_str(&text);
}
Variable {
name: _,
regex: _,
r#default,
} => {
// TODO: variables. For now, fall back to the default, which defaults to "".
render_elements(
r#default.as_deref().unwrap_or_default(),
insert,
offset,
tabstops,
newline_with_offset,
include_placeholder,
);
}
&Tabstop { tabstop } => {
tabstops.push((tabstop, (*offset, *offset)));
}
Placeholder {
tabstop,
value: inner_snippet_elements,
} => {
let start_offset = *offset;
if include_placeholder {
render_elements(
inner_snippet_elements,
insert,
offset,
tabstops,
newline_with_offset,
include_placeholder,
);
}
tabstops.push((*tabstop, (start_offset, *offset)));
}
&Choice {
tabstop,
choices: _,
} => {
// TODO: choices
tabstops.push((tabstop, (*offset, *offset)));
}
}
}
}
#[allow(clippy::type_complexity)] // only used one time
pub fn render(
snippet: &Snippet<'_>,
newline_with_offset: &str,
include_placeholder: bool,
) -> (Tendril, Vec<SmallVec<[(usize, usize); 1]>>) {
let mut insert = Tendril::new();
let mut tabstops = Vec::new();
let mut offset = 0;
render_elements(
&snippet.elements,
&mut insert,
&mut offset,
&mut tabstops,
newline_with_offset,
include_placeholder,
);
// sort in ascending order (except for 0, which should always be the last one (per lsp doc))
tabstops.sort_unstable_by_key(|(n, _)| if *n == 0 { usize::MAX } else { *n });
// merge tabstops with the same index (we take advantage of the fact that we just sorted them
// above to simply look backwards)
let mut ntabstops = Vec::<SmallVec<[(usize, usize); 1]>>::new();
{
let mut prev = None;
for (tabstop, r) in tabstops {
if prev == Some(tabstop) {
let len_1 = ntabstops.len() - 1;
ntabstops[len_1].push(r);
} else {
prev = Some(tabstop);
ntabstops.push(smallvec![r]);
}
}
}
(insert, ntabstops)
}
mod parser {
use helix_core::Tendril;
use helix_parsec::*;
use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
/*
https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#snippet_syntax
any ::= tabstop | placeholder | choice | variable | text
tabstop ::= '$' int | '${' int '}'
placeholder ::= '${' int ':' any '}'
choice ::= '${' int '|' text (',' text)* '|}'
variable ::= '$' var | '${' var }'
| '${' var ':' any '}'
| '${' var '/' regex '/' (format | text)+ '/' options '}'
format ::= '$' int | '${' int '}'
| '${' int ':' '/upcase' | '/downcase' | '/capitalize' '}'
| '${' int ':+' if '}'
| '${' int ':?' if ':' else '}'
| '${' int ':-' else '}' | '${' int ':' else '}'
regex ::= Regular Expression value (ctor-string)
options ::= Regular Expression option (ctor-options)
var ::= [_a-zA-Z] [_a-zA-Z0-9]*
int ::= [0-9]+
text ::= .*
if ::= text
else ::= text
*/
fn var<'a>() -> impl Parser<'a, Output = &'a str> {
// var = [_a-zA-Z][_a-zA-Z0-9]*
move |input: &'a str| {
input
.char_indices()
.take_while(|(p, c)| {
*c == '_'
|| if *p == 0 {
c.is_ascii_alphabetic()
} else {
c.is_ascii_alphanumeric()
}
})
.last()
.map(|(index, c)| {
let index = index + c.len_utf8();
(&input[index..], &input[0..index])
})
.ok_or(input)
}
}
const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$'];
const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '|', ','];
fn text<'a>(
escape_chars: &'static [char],
term_chars: &'static [char],
) -> impl Parser<'a, Output = Tendril> {
move |input: &'a str| {
let mut chars = input.char_indices().peekable();
let mut res = Tendril::new();
while let Some((i, c)) = chars.next() {
match c {
'\\' => {
if let Some(&(_, c)) = chars.peek() {
if escape_chars.contains(&c) {
chars.next();
res.push(c);
continue;
}
}
res.push('\\');
}
c if term_chars.contains(&c) => return Ok((&input[i..], res)),
c => res.push(c),
}
}
Ok(("", res))
}
}
fn digit<'a>() -> impl Parser<'a, Output = usize> {
filter_map(take_while(|c| c.is_ascii_digit()), |s| s.parse().ok())
}
fn case_change<'a>() -> impl Parser<'a, Output = CaseChange> {
use CaseChange::*;
choice!(
map("upcase", |_| Upcase),
map("downcase", |_| Downcase),
map("capitalize", |_| Capitalize),
)
}
fn format<'a>() -> impl Parser<'a, Output = FormatItem> {
use FormatItem::*;
choice!(
// '$' int
map(right("$", digit()), Capture),
// '${' int '}'
map(seq!("${", digit(), "}"), |seq| Capture(seq.1)),
// '${' int ':' '/upcase' | '/downcase' | '/capitalize' '}'
map(seq!("${", digit(), ":/", case_change(), "}"), |seq| {
CaseChange(seq.1, seq.3)
}),
// '${' int ':+' if '}'
map(
seq!("${", digit(), ":+", text(TEXT_ESCAPE_CHARS, &['}']), "}"),
|seq| { Conditional(seq.1, Some(seq.3), None) }
),
// '${' int ':?' if ':' else '}'
map(
seq!(
"${",
digit(),
":?",
text(TEXT_ESCAPE_CHARS, &[':']),
":",
text(TEXT_ESCAPE_CHARS, &['}']),
"}"
),
|seq| { Conditional(seq.1, Some(seq.3), Some(seq.5)) }
),
// '${' int ':-' else '}' | '${' int ':' else '}'
map(
seq!(
"${",
digit(),
":",
optional("-"),
text(TEXT_ESCAPE_CHARS, &['}']),
"}"
),
|seq| { Conditional(seq.1, None, Some(seq.4)) }
),
)
}
fn regex<'a>() -> impl Parser<'a, Output = Regex> {
map(
seq!(
"/",
// TODO parse as ECMAScript and convert to rust regex
text(&['/'], &['/']),
"/",
zero_or_more(choice!(
format(),
// text doesn't parse $, if format fails we just accept the $ as text
map("$", |_| FormatItem::Text("$".into())),
map(text(&['\\', '/'], &['/', '$']), FormatItem::Text),
)),
"/",
// vscode really doesn't allow escaping } here
// so it's impossible to write a regex escape containing a }
// we can consider deviating here and allowing the escape
text(&[], &['}']),
),
|(_, value, _, replacement, _, options)| Regex {
value,
replacement,
options,
},
)
}
fn tabstop<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
map(
or(
right("$", digit()),
map(seq!("${", digit(), "}"), |values| values.1),
),
|digit| SnippetElement::Tabstop { tabstop: digit },
)
}
fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
map(
seq!(
"${",
digit(),
":",
// according to the grammar there is just a single anything here.
// However in the prose it is explained that placeholders can be nested.
// The example there contains both a placeholder text and a nested placeholder
// which indicates a list. Looking at the VSCode sourcecode, the placeholder
// is indeed parsed as zero_or_more so the grammar is simply incorrect here
zero_or_more(anything(TEXT_ESCAPE_CHARS, true)),
"}"
),
|seq| SnippetElement::Placeholder {
tabstop: seq.1,
value: seq.3,
},
)
}
fn choice<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
map(
seq!(
"${",
digit(),
"|",
sep(text(CHOICE_TEXT_ESCAPE_CHARS, &['|', ',']), ","),
"|}",
),
|seq| SnippetElement::Choice {
tabstop: seq.1,
choices: seq.3,
},
)
}
fn variable<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
choice!(
// $var
map(right("$", var()), |name| SnippetElement::Variable {
name,
default: None,
regex: None,
}),
// ${var}
map(seq!("${", var(), "}",), |values| SnippetElement::Variable {
name: values.1,
default: None,
regex: None,
}),
// ${var:default}
map(
seq!(
"${",
var(),
":",
zero_or_more(anything(TEXT_ESCAPE_CHARS, true)),
"}",
),
|values| SnippetElement::Variable {
name: values.1,
default: Some(values.3),
regex: None,
}
),
// ${var/value/format/options}
map(seq!("${", var(), regex(), "}"), |values| {
SnippetElement::Variable {
name: values.1,
default: None,
regex: Some(values.2),
}
}),
)
}
fn anything<'a>(
escape_chars: &'static [char],
end_at_brace: bool,
) -> impl Parser<'a, Output = SnippetElement<'a>> {
let term_chars: &[_] = if end_at_brace { &['$', '}'] } else { &['$'] };
move |input: &'a str| {
let parser = choice!(
tabstop(),
placeholder(),
choice(),
variable(),
map("$", |_| SnippetElement::Text("$".into())),
map(text(escape_chars, term_chars), SnippetElement::Text),
);
parser.parse(input)
}
}
fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
map(one_or_more(anything(TEXT_ESCAPE_CHARS, false)), |parts| {
Snippet { elements: parts }
})
}
pub fn parse(s: &str) -> Result<Snippet, &str> {
snippet().parse(s).and_then(|(remainder, snippet)| {
if remainder.is_empty() {
Ok(snippet)
} else {
Err(remainder)
}
})
}
#[cfg(test)]
mod test {
use super::SnippetElement::*;
use super::*;
#[test]
fn empty_string_is_error() {
assert_eq!(Err(""), parse(""));
}
#[test]
fn parse_placeholders_in_function_call() {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("match(".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("Arg1".into())),
},
Text(")".into())
]
}),
parse("match(${1:Arg1})")
)
}
#[test]
fn unterminated_placeholder() {
assert_eq!(
Ok(Snippet {
elements: vec![Text("match(".into()), Text("$".into()), Text("{1:)".into())]
}),
parse("match(${1:)")
)
}
#[test]
fn parse_empty_placeholder() {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("match(".into()),
Placeholder {
tabstop: 1,
value: vec![],
},
Text(")".into())
]
}),
parse("match(${1:})")
)
}
#[test]
fn parse_placeholders_in_statement() {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("local ".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("var".into())),
},
Text(" = ".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("value".into())),
},
]
}),
parse("local ${1:var} = ${1:value}")
)
}
#[test]
fn parse_tabstop_nested_in_placeholder() {
assert_eq!(
Ok(Snippet {
elements: vec![Placeholder {
tabstop: 1,
value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },),
},]
}),
parse("${1:var, $2}")
)
}
#[test]
fn parse_placeholder_nested_in_placeholder() {
assert_eq!(
Ok(Snippet {
elements: vec![Placeholder {
tabstop: 1,
value: vec!(
Text("foo ".into()),
Placeholder {
tabstop: 2,
value: vec!(Text("bar".into())),
},
),
},]
}),
parse("${1:foo ${2:bar}}")
)
}
#[test]
fn parse_all() {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("hello ".into()),
Tabstop { tabstop: 1 },
Tabstop { tabstop: 2 },
Text(" ".into()),
Choice {
tabstop: 1,
choices: vec!["one".into(), "two".into(), "three".into()]
},
Text(" ".into()),
Variable {
name: "name",
default: Some(vec![Text("foo".into())]),
regex: None
},
Text(" ".into()),
Variable {
name: "var",
default: None,
regex: None
},
Text(" ".into()),
Variable {
name: "TM",
default: None,
regex: None
},
]
}),
parse("hello $1${2} ${1|one,two,three|} ${name:foo} $var $TM")
);
}
#[test]
fn regex_capture_replace() {
assert_eq!(
Ok(Snippet {
elements: vec![Variable {
name: "TM_FILENAME",
default: None,
regex: Some(Regex {
value: "(.*).+$".into(),
replacement: vec![FormatItem::Capture(1), FormatItem::Text("$".into())],
options: Tendril::new(),
}),
}]
}),
parse("${TM_FILENAME/(.*).+$/$1$/}")
);
}
#[test]
fn rust_macro() {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("macro_rules! ".into()),
Tabstop { tabstop: 1 },
Text(" {\n (".into()),
Tabstop { tabstop: 2 },
Text(") => {\n ".into()),
Tabstop { tabstop: 0 },
Text("\n };\n}".into())
]
}),
parse("macro_rules! $1 {\n ($2) => {\n $0\n };\n}")
);
}
fn assert_text(snippet: &str, parsed_text: &str) {
let res = parse(snippet).unwrap();
let text = crate::snippet::render(&res, "\n", true).0;
assert_eq!(text, parsed_text)
}
#[test]
fn robust_parsing() {
assert_text("$", "$");
assert_text("\\\\$", "\\$");
assert_text("{", "{");
assert_text("\\}", "}");
assert_text("\\abc", "\\abc");
assert_text("foo${f:\\}}bar", "foo}bar");
assert_text("\\{", "\\{");
assert_text("I need \\\\\\$", "I need \\$");
assert_text("\\", "\\");
assert_text("\\{{", "\\{{");
assert_text("{{", "{{");
assert_text("{{dd", "{{dd");
assert_text("}}", "}}");
assert_text("ff}}", "ff}}");
assert_text("farboo", "farboo");
assert_text("far{{}}boo", "far{{}}boo");
assert_text("far{{123}}boo", "far{{123}}boo");
assert_text("far\\{{123}}boo", "far\\{{123}}boo");
assert_text("far{{id:bern}}boo", "far{{id:bern}}boo");
assert_text("far{{id:bern {{basel}}}}boo", "far{{id:bern {{basel}}}}boo");
assert_text(
"far{{id:bern {{id:basel}}}}boo",
"far{{id:bern {{id:basel}}}}boo",
);
assert_text(
"far{{id:bern {{id2:basel}}}}boo",
"far{{id:bern {{id2:basel}}}}boo",
);
assert_text("${}$\\a\\$\\}\\\\", "${}$\\a$}\\");
assert_text("farboo", "farboo");
assert_text("far{{}}boo", "far{{}}boo");
assert_text("far{{123}}boo", "far{{123}}boo");
assert_text("far\\{{123}}boo", "far\\{{123}}boo");
assert_text("far`123`boo", "far`123`boo");
assert_text("far\\`123\\`boo", "far\\`123\\`boo");
assert_text("\\$far-boo", "$far-boo");
}
fn assert_snippet(snippet: &str, expect: &[SnippetElement]) {
let parsed_snippet = parse(snippet).unwrap();
assert_eq!(parsed_snippet.elements, expect.to_owned())
}
#[test]
fn parse_variable() {
use SnippetElement::*;
assert_snippet(
"$far-boo",
&[
Variable {
name: "far",
default: None,
regex: None,
},
Text("-boo".into()),
],
);
assert_snippet(
"far$farboo",
&[
Text("far".into()),
Variable {
name: "farboo",
regex: None,
default: None,
},
],
);
assert_snippet(
"far${farboo}",
&[
Text("far".into()),
Variable {
name: "farboo",
regex: None,
default: None,
},
],
);
assert_snippet("$123", &[Tabstop { tabstop: 123 }]);
assert_snippet(
"$farboo",
&[Variable {
name: "farboo",
regex: None,
default: None,
}],
);
assert_snippet(
"$far12boo",
&[Variable {
name: "far12boo",
regex: None,
default: None,
}],
);
assert_snippet(
"000_${far}_000",
&[
Text("000_".into()),
Variable {
name: "far",
regex: None,
default: None,
},
Text("_000".into()),
],
);
}
#[test]
fn parse_variable_transform() {
assert_snippet(
"${foo///}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: Tendril::new(),
replacement: Vec::new(),
options: Tendril::new(),
}),
default: None,
}],
);
assert_snippet(
"${foo/regex/format/gmi}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: "regex".into(),
replacement: vec![FormatItem::Text("format".into())],
options: "gmi".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/([A-Z][a-z])/format/}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: "([A-Z][a-z])".into(),
replacement: vec![FormatItem::Text("format".into())],
options: Tendril::new(),
}),
default: None,
}],
);
// invalid regex TODO: reneable tests once we actually parse this regex flavour
// assert_text(
// "${foo/([A-Z][a-z])/format/GMI}",
// "${foo/([A-Z][a-z])/format/GMI}",
// );
// assert_text(
// "${foo/([A-Z][a-z])/format/funky}",
// "${foo/([A-Z][a-z])/format/funky}",
// );
// assert_text("${foo/([A-Z][a-z]/format/}", "${foo/([A-Z][a-z]/format/}");
assert_text(
"${foo/regex\\/format/options}",
"${foo/regex\\/format/options}",
);
// tricky regex
assert_snippet(
"${foo/m\\/atch/$1/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: "m/atch".into(),
replacement: vec![FormatItem::Capture(1)],
options: "i".into(),
}),
default: None,
}],
);
// incomplete
assert_text("${foo///", "${foo///");
assert_text("${foo/regex/format/options", "${foo/regex/format/options");
// format string
assert_snippet(
"${foo/.*/${0:fooo}/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![FormatItem::Conditional(0, None, Some("fooo".into()))],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/.*/${1}/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![FormatItem::Capture(1)],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/.*/$1/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![FormatItem::Capture(1)],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/.*/This-$1-encloses/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![
FormatItem::Text("This-".into()),
FormatItem::Capture(1),
FormatItem::Text("-encloses".into()),
],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/.*/complex${1:else}/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![
FormatItem::Text("complex".into()),
FormatItem::Conditional(1, None, Some("else".into())),
],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/.*/complex${1:-else}/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![
FormatItem::Text("complex".into()),
FormatItem::Conditional(1, None, Some("else".into())),
],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/.*/complex${1:+if}/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![
FormatItem::Text("complex".into()),
FormatItem::Conditional(1, Some("if".into()), None),
],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/.*/complex${1:?if:else}/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![
FormatItem::Text("complex".into()),
FormatItem::Conditional(1, Some("if".into()), Some("else".into())),
],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${foo/.*/complex${1:/upcase}/i}",
&[Variable {
name: "foo",
regex: Some(Regex {
value: ".*".into(),
replacement: vec![
FormatItem::Text("complex".into()),
FormatItem::CaseChange(1, CaseChange::Upcase),
],
options: "i".into(),
}),
default: None,
}],
);
assert_snippet(
"${TM_DIRECTORY/src\\//$1/}",
&[Variable {
name: "TM_DIRECTORY",
regex: Some(Regex {
value: "src/".into(),
replacement: vec![FormatItem::Capture(1)],
options: Tendril::new(),
}),
default: None,
}],
);
assert_snippet(
"${TM_SELECTED_TEXT/a/\\/$1/g}",
&[Variable {
name: "TM_SELECTED_TEXT",
regex: Some(Regex {
value: "a".into(),
replacement: vec![FormatItem::Text("/".into()), FormatItem::Capture(1)],
options: "g".into(),
}),
default: None,
}],
);
assert_snippet(
"${TM_SELECTED_TEXT/a/in\\/$1ner/g}",
&[Variable {
name: "TM_SELECTED_TEXT",
regex: Some(Regex {
value: "a".into(),
replacement: vec![
FormatItem::Text("in/".into()),
FormatItem::Capture(1),
FormatItem::Text("ner".into()),
],
options: "g".into(),
}),
default: None,
}],
);
assert_snippet(
"${TM_SELECTED_TEXT/a/end\\//g}",
&[Variable {
name: "TM_SELECTED_TEXT",
regex: Some(Regex {
value: "a".into(),
replacement: vec![FormatItem::Text("end/".into())],
options: "g".into(),
}),
default: None,
}],
);
}
// TODO port more tests from https://github.com/microsoft/vscode/blob/dce493cb6e36346ef2714e82c42ce14fc461b15c/src/vs/editor/contrib/snippet/test/browser/snippetParser.test.ts
}
}