Capture word parts while calculating shellwords (#4632)

This fixes an edge case for completing shellwords. With a file
"a b.txt" in the current directory, the sequence `:open a\<tab>`
will result in the prompt containing `:open aa\ b.txt`. This is
because the length of the input which is trimmed when replacing with
completion is calculated on the part of the input which is parsed by
shellwords and then escaped (in a separate operation), which is lossy.
In this case it loses the trailing backslash.

The fix provided here refactors shellwords to track both the _words_
(shellwords with quotes and escapes resolved) and the _parts_ (chunks
of the input which turned into each word, with separating whitespace
removed). When calculating how much of the input to delete when
replacing with the completion item, we now use the length of the last
part.

This also allows us to eliminate the duplicate work done in the
`ends_with_whitespace` check.
pull/4578/head^2
Michael Davis 2 years ago committed by GitHub
parent b474ee1843
commit c6b83368b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -27,181 +27,172 @@ enum State {
DquoteEscaped, DquoteEscaped,
} }
/// Get the vec of escaped / quoted / doublequoted filenames from the input str pub struct Shellwords<'a> {
pub fn shellwords(input: &str) -> Vec<Cow<'_, str>> { state: State,
use State::*; /// Shellwords where whitespace and escapes has been resolved.
words: Vec<Cow<'a, str>>,
/// The parts of the input that are divided into shellwords. This can be
/// used to retrieve the original text for a given word by looking up the
/// same index in the Vec as the word in `words`.
parts: Vec<&'a str>,
}
let mut state = Unquoted; impl<'a> From<&'a str> for Shellwords<'a> {
let mut args: Vec<Cow<str>> = Vec::new(); fn from(input: &'a str) -> Self {
let mut escaped = String::with_capacity(input.len()); use State::*;
let mut start = 0; let mut state = Unquoted;
let mut end = 0; let mut words = Vec::new();
let mut parts = Vec::new();
let mut escaped = String::with_capacity(input.len());
for (i, c) in input.char_indices() { let mut part_start = 0;
state = match state { let mut unescaped_start = 0;
OnWhitespace => match c { let mut end = 0;
'"' => {
end = i; for (i, c) in input.char_indices() {
Dquoted state = match state {
} OnWhitespace => match c {
'\'' => { '"' => {
end = i; end = i;
Quoted Dquoted
} }
'\\' => { '\'' => {
if cfg!(unix) { end = i;
escaped.push_str(&input[start..i]); Quoted
start = i + 1; }
UnquotedEscaped '\\' => {
} else { if cfg!(unix) {
escaped.push_str(&input[unescaped_start..i]);
unescaped_start = i + 1;
UnquotedEscaped
} else {
OnWhitespace
}
}
c if c.is_ascii_whitespace() => {
end = i;
OnWhitespace OnWhitespace
} }
} _ => Unquoted,
c if c.is_ascii_whitespace() => { },
end = i; Unquoted => match c {
OnWhitespace '\\' => {
} if cfg!(unix) {
_ => Unquoted, escaped.push_str(&input[unescaped_start..i]);
}, unescaped_start = i + 1;
Unquoted => match c { UnquotedEscaped
'\\' => { } else {
if cfg!(unix) { Unquoted
escaped.push_str(&input[start..i]); }
start = i + 1;
UnquotedEscaped
} else {
Unquoted
} }
} c if c.is_ascii_whitespace() => {
c if c.is_ascii_whitespace() => { end = i;
end = i; OnWhitespace
OnWhitespace
}
_ => Unquoted,
},
UnquotedEscaped => Unquoted,
Quoted => match c {
'\\' => {
if cfg!(unix) {
escaped.push_str(&input[start..i]);
start = i + 1;
QuoteEscaped
} else {
Quoted
} }
} _ => Unquoted,
'\'' => { },
end = i; UnquotedEscaped => Unquoted,
OnWhitespace Quoted => match c {
} '\\' => {
_ => Quoted, if cfg!(unix) {
}, escaped.push_str(&input[unescaped_start..i]);
QuoteEscaped => Quoted, unescaped_start = i + 1;
Dquoted => match c { QuoteEscaped
'\\' => { } else {
if cfg!(unix) { Quoted
escaped.push_str(&input[start..i]); }
start = i + 1;
DquoteEscaped
} else {
Dquoted
} }
} '\'' => {
'"' => { end = i;
end = i; OnWhitespace
OnWhitespace }
} _ => Quoted,
_ => Dquoted, },
}, QuoteEscaped => Quoted,
DquoteEscaped => Dquoted, Dquoted => match c {
}; '\\' => {
if cfg!(unix) {
escaped.push_str(&input[unescaped_start..i]);
unescaped_start = i + 1;
DquoteEscaped
} else {
Dquoted
}
}
'"' => {
end = i;
OnWhitespace
}
_ => Dquoted,
},
DquoteEscaped => Dquoted,
};
if i >= input.len() - 1 && end == 0 { if i >= input.len() - 1 && end == 0 {
end = i + 1; end = i + 1;
} }
if end > 0 { if end > 0 {
let esc_trim = escaped.trim(); let esc_trim = escaped.trim();
let inp = &input[start..end]; let inp = &input[unescaped_start..end];
if !(esc_trim.is_empty() && inp.trim().is_empty()) { if !(esc_trim.is_empty() && inp.trim().is_empty()) {
if esc_trim.is_empty() { if esc_trim.is_empty() {
args.push(inp.into()); words.push(inp.into());
} else { parts.push(inp);
args.push([escaped, inp.into()].concat().into()); } else {
escaped = "".to_string(); words.push([escaped, inp.into()].concat().into());
parts.push(&input[part_start..end]);
escaped = "".to_string();
}
} }
unescaped_start = i + 1;
part_start = i + 1;
end = 0;
} }
start = i + 1;
end = 0;
} }
}
args
}
/// Checks that the input ends with an ascii whitespace character which is debug_assert!(words.len() == parts.len());
/// not escaped.
///
/// # Examples
///
/// ```rust
/// use helix_core::shellwords::ends_with_whitespace;
/// assert_eq!(ends_with_whitespace(" "), true);
/// assert_eq!(ends_with_whitespace(":open "), true);
/// assert_eq!(ends_with_whitespace(":open foo.txt "), true);
/// assert_eq!(ends_with_whitespace(":open"), false);
/// #[cfg(unix)]
/// assert_eq!(ends_with_whitespace(":open a\\ "), false);
/// #[cfg(unix)]
/// assert_eq!(ends_with_whitespace(":open a\\ b.txt"), false);
/// ```
pub fn ends_with_whitespace(input: &str) -> bool {
use State::*;
// Fast-lane: the input must end with a whitespace character Self {
// regardless of quoting. state,
if !input.ends_with(|c: char| c.is_ascii_whitespace()) { words,
return false; parts,
}
} }
}
let mut state = Unquoted; impl<'a> Shellwords<'a> {
/// Checks that the input ends with a whitespace character which is not escaped.
///
/// # Examples
///
/// ```rust
/// use helix_core::shellwords::Shellwords;
/// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
/// ```
pub fn ends_with_whitespace(&self) -> bool {
matches!(self.state, State::OnWhitespace)
}
for c in input.chars() { /// Returns the list of shellwords calculated from the input string.
state = match state { pub fn words(&self) -> &[Cow<'a, str>] {
OnWhitespace => match c { &self.words
'"' => Dquoted,
'\'' => Quoted,
'\\' if cfg!(unix) => UnquotedEscaped,
'\\' => OnWhitespace,
c if c.is_ascii_whitespace() => OnWhitespace,
_ => Unquoted,
},
Unquoted => match c {
'\\' if cfg!(unix) => UnquotedEscaped,
'\\' => Unquoted,
c if c.is_ascii_whitespace() => OnWhitespace,
_ => Unquoted,
},
UnquotedEscaped => Unquoted,
Quoted => match c {
'\\' if cfg!(unix) => QuoteEscaped,
'\\' => Quoted,
'\'' => OnWhitespace,
_ => Quoted,
},
QuoteEscaped => Quoted,
Dquoted => match c {
'\\' if cfg!(unix) => DquoteEscaped,
'\\' => Dquoted,
'"' => OnWhitespace,
_ => Dquoted,
},
DquoteEscaped => Dquoted,
}
} }
matches!(state, OnWhitespace) /// Returns a list of strings which correspond to [`Self::words`] but represent the original
/// text in the input string - including escape characters - without separating whitespace.
pub fn parts(&self) -> &[&'a str] {
&self.parts
}
} }
#[cfg(test)] #[cfg(test)]
@ -212,7 +203,8 @@ mod test {
#[cfg(windows)] #[cfg(windows)]
fn test_normal() { fn test_normal() {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
let result = shellwords(input); let shellwords = Shellwords::from(input);
let result = shellwords.words().to_vec();
let expected = vec![ let expected = vec![
Cow::from(":o"), Cow::from(":o"),
Cow::from("single_word"), Cow::from("single_word"),
@ -230,7 +222,8 @@ mod test {
#[cfg(unix)] #[cfg(unix)]
fn test_normal() { fn test_normal() {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
let result = shellwords(input); let shellwords = Shellwords::from(input);
let result = shellwords.words().to_vec();
let expected = vec![ let expected = vec![
Cow::from(":o"), Cow::from(":o"),
Cow::from("single_word"), Cow::from("single_word"),
@ -247,7 +240,8 @@ mod test {
fn test_quoted() { fn test_quoted() {
let quoted = let quoted =
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#; r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;
let result = shellwords(quoted); let shellwords = Shellwords::from(quoted);
let result = shellwords.words().to_vec();
let expected = vec![ let expected = vec![
Cow::from(":o"), Cow::from(":o"),
Cow::from("single_word"), Cow::from("single_word"),
@ -262,7 +256,8 @@ mod test {
#[cfg(unix)] #[cfg(unix)]
fn test_dquoted() { fn test_dquoted() {
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#; let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#;
let result = shellwords(dquoted); let shellwords = Shellwords::from(dquoted);
let result = shellwords.words().to_vec();
let expected = vec![ let expected = vec![
Cow::from(":o"), Cow::from(":o"),
Cow::from("single_word"), Cow::from("single_word"),
@ -277,7 +272,8 @@ mod test {
#[cfg(unix)] #[cfg(unix)]
fn test_mixed() { fn test_mixed() {
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#; let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;
let result = shellwords(dquoted); let shellwords = Shellwords::from(dquoted);
let result = shellwords.words().to_vec();
let expected = vec![ let expected = vec![
Cow::from(":o"), Cow::from(":o"),
Cow::from("single_word"), Cow::from("single_word"),
@ -298,7 +294,8 @@ mod test {
fn test_lists() { fn test_lists() {
let input = let input =
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"#; r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"#;
let result = shellwords(input); let shellwords = Shellwords::from(input);
let result = shellwords.words().to_vec();
let expected = vec![ let expected = vec![
Cow::from(":set"), Cow::from(":set"),
Cow::from("statusline.center"), Cow::from("statusline.center"),
@ -322,4 +319,18 @@ mod test {
assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar")); assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\"")); assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));
} }
#[test]
#[cfg(unix)]
fn test_parts() {
assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);
}
#[test]
#[cfg(windows)]
fn test_parts() {
assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);
}
} }

@ -2253,7 +2253,10 @@ pub static TYPABLE_COMMAND_MAP: Lazy<HashMap<&'static str, &'static TypableComma
.collect() .collect()
}); });
#[allow(clippy::unnecessary_unwrap)]
pub(super) fn command_mode(cx: &mut Context) { pub(super) fn command_mode(cx: &mut Context) {
use shellwords::Shellwords;
let mut prompt = Prompt::new( let mut prompt = Prompt::new(
":".into(), ":".into(),
Some(':'), Some(':'),
@ -2261,10 +2264,10 @@ pub(super) fn command_mode(cx: &mut Context) {
static FUZZY_MATCHER: Lazy<fuzzy_matcher::skim::SkimMatcherV2> = static FUZZY_MATCHER: Lazy<fuzzy_matcher::skim::SkimMatcherV2> =
Lazy::new(fuzzy_matcher::skim::SkimMatcherV2::default); Lazy::new(fuzzy_matcher::skim::SkimMatcherV2::default);
let parts = shellwords::shellwords(input); let shellwords = Shellwords::from(input);
let ends_with_whitespace = shellwords::ends_with_whitespace(input); let words = shellwords.words();
if parts.is_empty() || (parts.len() == 1 && !ends_with_whitespace) { if words.is_empty() || (words.len() == 1 && !shellwords.ends_with_whitespace()) {
// If the command has not been finished yet, complete commands. // If the command has not been finished yet, complete commands.
let mut matches: Vec<_> = typed::TYPABLE_COMMAND_LIST let mut matches: Vec<_> = typed::TYPABLE_COMMAND_LIST
.iter() .iter()
@ -2283,19 +2286,20 @@ pub(super) fn command_mode(cx: &mut Context) {
} else { } else {
// Otherwise, use the command's completer and the last shellword // Otherwise, use the command's completer and the last shellword
// as completion input. // as completion input.
let part = if parts.len() == 1 { let (part, part_len) = if words.len() == 1 || shellwords.ends_with_whitespace() {
&Cow::Borrowed("") (&Cow::Borrowed(""), 0)
} else { } else {
parts.last().unwrap() (
words.last().unwrap(),
shellwords.parts().last().unwrap().len(),
)
}; };
if let Some(typed::TypableCommand { if let Some(typed::TypableCommand {
completer: Some(completer), completer: Some(completer),
.. ..
}) = typed::TYPABLE_COMMAND_MAP.get(&parts[0] as &str) }) = typed::TYPABLE_COMMAND_MAP.get(&words[0] as &str)
{ {
let part_len = shellwords::escape(part.clone()).len();
completer(editor, part) completer(editor, part)
.into_iter() .into_iter()
.map(|(range, file)| { .map(|(range, file)| {
@ -2328,7 +2332,8 @@ pub(super) fn command_mode(cx: &mut Context) {
// Handle typable commands // Handle typable commands
if let Some(cmd) = typed::TYPABLE_COMMAND_MAP.get(parts[0]) { if let Some(cmd) = typed::TYPABLE_COMMAND_MAP.get(parts[0]) {
let args = shellwords::shellwords(input); let shellwords = Shellwords::from(input);
let args = shellwords.words();
if let Err(e) = (cmd.fun)(cx, &args[1..], event) { if let Err(e) = (cmd.fun)(cx, &args[1..], event) {
cx.editor.set_error(format!("{}", e)); cx.editor.set_error(format!("{}", e));

Loading…
Cancel
Save