@ -1,6 +1,329 @@
use smartstring ::{ LazyCompact , SmartString } ;
use std ::borrow ::Cow ;
/// A utility for parsing shell-like command lines.
///
/// The `Shellwords` struct takes an input string and allows extracting the command and its arguments.
///
/// # Features
///
/// - Parses command and arguments from input strings.
/// - Supports single, double, and backtick quoted arguments.
/// - Respects backslash escaping in arguments.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// # use helix_core::shellwords::Shellwords;
/// let shellwords = Shellwords::from(":o helix-core/src/shellwords.rs");
/// assert_eq!(":o", shellwords.command());
/// assert_eq!("helix-core/src/shellwords.rs", shellwords.args().next().unwrap());
/// ```
///
/// Empty command:
///
/// ```
/// # use helix_core::shellwords::Shellwords;
/// let shellwords = Shellwords::from(" ");
/// assert!(shellwords.command().is_empty());
/// ```
///
/// # Iterator
///
/// The `args` method returns a non-allocating iterator, `Args`, over the arguments of the input.
///
/// ```
/// # use helix_core::shellwords::Shellwords;
/// let shellwords = Shellwords::from(":o a b c");
/// let mut args = shellwords.args();
/// assert_eq!(Some("a"), args.next());
/// assert_eq!(Some("b"), args.next());
/// assert_eq!(Some("c"), args.next());
/// assert_eq!(None, args.next());
/// ```
#[ derive(Clone, Copy) ]
pub struct Shellwords < ' a > {
input : & ' a str ,
}
impl < ' a > From < & ' a str > for Shellwords < ' a > {
#[ inline ]
fn from ( input : & ' a str ) -> Self {
Self { input }
}
}
impl < ' a > From < & ' a String > for Shellwords < ' a > {
#[ inline ]
fn from ( input : & ' a String ) -> Self {
Self { input }
}
}
impl < ' a > From < & ' a Cow < ' a , str > > for Shellwords < ' a > {
#[ inline ]
fn from ( input : & ' a Cow < str > ) -> Self {
Self { input }
}
}
impl < ' a > Shellwords < ' a > {
#[ inline ]
#[ must_use ]
pub fn command ( & self ) -> & str {
self . input
. split_once ( ' ' )
. map_or ( self . input , | ( command , _ ) | command )
}
#[ inline ]
#[ must_use ]
pub fn args ( & self ) -> Args < ' a > {
let args = self . input . split_once ( ' ' ) . map_or ( "" , | ( _ , args ) | args ) ;
Args ::parse ( args )
}
#[ inline ]
pub fn input ( & self ) -> & str {
self . input
}
/// Checks that the input ends with a whitespace character which is not escaped.
///
/// # Examples
///
/// ```rust
/// # use helix_core::shellwords::Shellwords;
/// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
/// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
/// ```
#[ inline ]
pub fn ends_with_whitespace ( & self ) -> bool {
self . input . ends_with ( ' ' )
}
}
/// An iterator over an input string which yields arguments.
///
/// Splits on whitespace, but respects quoted substrings (using double quotes, single quotes, or backticks).
#[ derive(Debug, Clone, Copy) ]
pub struct Args < ' a > {
input : & ' a str ,
idx : usize ,
start : usize ,
}
impl < ' a > Args < ' a > {
#[ inline ]
fn parse ( input : & ' a str ) -> Self {
Self {
input ,
idx : 0 ,
start : 0 ,
}
}
#[ inline ]
pub fn is_empty ( & self ) -> bool {
self . input . is_empty ( )
}
/// Returns the args exactly as input.
///
/// # Examples
/// ```
/// # use helix_core::shellwords::Args;
/// let args = Args::from(r#"sed -n "s/test t/not /p""#);
/// assert_eq!(r#"sed -n "s/test t/not /p""#, args.raw());
///
/// let args = Args::from(r#"cat "file name with space.txt""#);
/// assert_eq!(r#"cat "file name with space.txt""#, args.raw());
/// ```
#[ inline ]
pub fn raw ( & self ) -> & str {
self . input
}
/// Returns the remainder of the args exactly as input.
///
/// # Examples
/// ```
/// # use helix_core::shellwords::Args;
/// let mut args = Args::from(r#"sed -n "s/test t/not /p""#);
/// assert_eq!("sed", args.next().unwrap());
/// assert_eq!(r#"-n "s/test t/not /p""#, args.rest());
/// ```
///
/// Never calling `next` and using `rest` is functionally equivalent to calling `raw`.
#[ inline ]
pub fn rest ( & self ) -> & str {
& self . input [ self . idx .. ]
}
/// Convenient function to return an empty `Args`.
///
/// When used in any iteration, it will always return `None`.
#[ inline(always) ]
pub const fn empty ( ) -> Self {
Self {
input : "" ,
idx : 0 ,
start : 0 ,
}
}
}
#[ allow(clippy::copy_iterator) ]
impl < ' a > Iterator for Args < ' a > {
type Item = & ' a str ;
#[ inline ]
#[ allow(clippy::too_many_lines) ]
fn next ( & mut self ) -> Option < Self ::Item > {
// The parser loop is split into three main blocks to handle different types of input processing:
//
// 1. Quote block:
// - Detects an unescaped quote character, either starting an in-quote scan or, if already in-quote,
// locating the closing quote to return the quoted argument.
// - Handles cases where mismatched quotes are ignored and when quotes appear as the last character.
//
// 2. Whitespace block:
// - Handles arguments separated by whitespace (space or tab), respecting quotes so quoted phrases
// remain grouped together.
// - Splits arguments by whitespace when outside of a quoted context and updates boundaries accordingly.
//
// 3. Catch-all block:
// - Handles any other character, updating the `is_escaped` status if a backslash is encountered,
// advancing the loop to the next character.
let bytes = self . input . as_bytes ( ) ;
let mut in_quotes = false ;
let mut quote = b'\0' ;
let mut is_escaped = false ;
while self . idx < bytes . len ( ) {
match bytes [ self . idx ] {
b'"' | b'\'' | b'`' if ! is_escaped = > {
if in_quotes {
// Found the proper closing quote, so can return the arg and advance the state along.
if bytes [ self . idx ] = = quote {
let arg = Some ( & self . input [ self . start .. self . idx ] ) ;
self . idx + = 1 ;
self . start = self . idx ;
return arg ;
}
// If quote does not match the type of the opening quote, then do nothing and advance.
self . idx + = 1 ;
} else if self . idx = = bytes . len ( ) - 1 {
// Special case for when a quote is the last input in args.
// e.g: :read "file with space.txt""
// This preserves the quote as an arg:
// - `file with space`
// - `"`
let arg = Some ( & self . input [ self . idx .. ] ) ;
self . idx = bytes . len ( ) ;
self . start = bytes . len ( ) ;
return arg ;
} else {
// Found opening quote.
in_quotes = true ;
// Kind of quote that was found.
quote = bytes [ self . idx ] ;
if self . start < self . idx {
// When part of the input ends in a quote, `one two" three`, this properly returns the `two`
// before advancing to the quoted arg for the next iteration:
// - `one` <- previous arg
// - `two` <- this step
// - ` three` <- next arg
let arg = Some ( & self . input [ self . start .. self . idx ] ) ;
self . idx + = 1 ;
self . start = self . idx ;
return arg ;
}
// Advance after quote.
self . idx + = 1 ;
// Exclude quote from arg output.
self . start = self . idx ;
}
}
b' ' | b'\t' if ! in_quotes = > {
// Found a true whitespace separator that wasn't inside quotes.
// Check if there is anything to return or if its just advancing over whitespace.
// `start` will only be less than `idx` when there is something to return.
if self . start < self . idx {
let arg = Some ( & self . input [ self . start .. self . idx ] ) ;
self . idx + = 1 ;
self . start = self . idx ;
return arg ;
}
// Advance beyond the whitespace.
self . idx + = 1 ;
// This is where `start` will be set to the start of an arg boundary, either encountering a word
// boundary or a quote boundary. If it finds a quote, then it will be advanced again in that part
// of the code. Either way, all that remains for the check above will be to return a full arg.
self . start = self . idx ;
}
_ = > {
// If previous loop didn't find any backslash and was already escaped it will change to false
// as the backslash chain was broken.
//
// If the previous loop had no backslash escape, and found one this iteration, then its the start
// of an escape chain.
is_escaped = match ( is_escaped , bytes [ self . idx ] ) {
( false , b'\\' ) = > true , // Set `is_escaped` if the current byte is a backslash
_ = > false , //Reset `is_escaped` if it was true, otherwise keep `is_escaped` as false
} ;
// Advance to next `char`.
self . idx + = 1 ;
}
}
}
// Fallback that catches when the loop would have exited but failed to return the arg between start and the end.
if self . start < bytes . len ( ) {
let arg = Some ( & self . input [ self . start .. ] ) ;
self . start = bytes . len ( ) ;
return arg ;
}
// All args have been parsed.
None
}
}
impl < ' a > From < & ' a String > for Args < ' a > {
fn from ( args : & ' a String ) -> Self {
Args ::parse ( args )
}
}
impl < ' a > From < & ' a str > for Args < ' a > {
fn from ( args : & ' a str ) -> Self {
Args ::parse ( args )
}
}
impl < ' a > From < & ' a Cow < ' _ , str > > for Args < ' a > {
fn from ( args : & ' a Cow < str > ) -> Self {
Args ::parse ( args )
}
}
/// Auto escape for shellwords usage.
#[ inline ]
#[ must_use ]
pub fn escape ( input : Cow < str > ) -> Cow < str > {
if ! input . chars ( ) . any ( | x | x . is_ascii_whitespace ( ) ) {
input
@ -13,186 +336,141 @@ pub fn escape(input: Cow<str>) -> Cow<str> {
buf
} ) )
} else {
Cow ::Owned ( format! ( "\"{ }\"", input) )
Cow ::Owned ( format! ( "\"{ input}\"" ) )
}
}
enum State {
OnWhitespace ,
Unquoted ,
UnquotedEscaped ,
Quoted ,
QuoteEscaped ,
Dquoted ,
DquoteEscaped ,
}
/// Unescapes a string, converting escape sequences into their literal characters.
///
/// This function handles the following escape sequences:
/// - `\\n` is converted to `\n` (newline)
/// - `\\t` is converted to `\t` (tab)
/// - `\\u{...}` is converted to the corresponding Unicode character
///
/// Other escape sequences, such as `\\` followed by any character not listed above, will remain unchanged.
///
/// If input is invalid, for example if there is invalid unicode, \u{999999999}, it will return the input as is.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// # use helix_core::shellwords::unescape;
/// let unescaped = unescape("hello\\nworld");
/// assert_eq!("hello\nworld", unescaped);
/// ```
///
/// Unescaping tabs:
///
/// ```
/// # use helix_core::shellwords::unescape;
/// let unescaped = unescape("hello\\tworld");
/// assert_eq!("hello\tworld", unescaped);
/// ```
///
/// Unescaping Unicode characters:
///
/// ```
/// # use helix_core::shellwords::unescape;
/// let unescaped = unescape("hello\\u{1f929}world");
/// assert_eq!("hello\u{1f929}world", unescaped);
/// assert_eq!("hello🤩world", unescaped);
/// ```
///
/// Handling backslashes:
///
/// ```
/// # use helix_core::shellwords::unescape;
/// let unescaped = unescape(r"hello\\world");
/// assert_eq!(r"hello\\world", unescaped);
///
/// let unescaped = unescape(r"hello\\\\world");
/// assert_eq!(r"hello\\\\world", unescaped);
/// ```
///
/// # Note
///
/// This function is opinionated, with a clear purpose of handling user input, not a general or generic unescaping utility, and does not unescape sequences like `\\'` or `\\\"`, leaving them as is.
#[ inline ]
#[ must_use ]
pub fn unescape ( input : & str ) -> Cow < ' _ , str > {
enum State {
Normal ,
Escaped ,
Unicode ,
}
pub struct Shellwords < ' a > {
state : State ,
/// Shellwords where whitespace and escapes has been resolved.
words : Vec < Cow < ' a , str > > ,
/// The parts of the input that are divided into shellwords. This can be
/// used to retrieve the original text for a given word by looking up the
/// same index in the Vec as the word in `words`.
parts : Vec < & ' a str > ,
}
let mut unescaped = String ::new ( ) ;
let mut state = State ::Normal ;
let mut is_escaped = false ;
// NOTE: Max unicode code point is U+10FFFF for a maximum of 6 chars
let mut unicode = SmartString ::< LazyCompact > ::new_const ( ) ;
impl < ' a > From < & ' a str > for Shellwords < ' a > {
fn from ( input : & ' a str ) -> Self {
use State ::* ;
let mut state = Unquoted ;
let mut words = Vec ::new ( ) ;
let mut parts = Vec ::new ( ) ;
let mut escaped = String ::with_capacity ( input . len ( ) ) ;
let mut part_start = 0 ;
let mut unescaped_start = 0 ;
let mut end = 0 ;
for ( i , c ) in input . char_indices ( ) {
state = match state {
OnWhitespace = > match c {
'"' = > {
end = i ;
Dquoted
}
'\'' = > {
end = i ;
Quoted
}
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
UnquotedEscaped
} else {
OnWhitespace
}
}
c if c . is_ascii_whitespace ( ) = > {
end = i ;
OnWhitespace
}
_ = > Unquoted ,
} ,
Unquoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
UnquotedEscaped
} else {
Unquoted
}
}
c if c . is_ascii_whitespace ( ) = > {
end = i ;
OnWhitespace
}
_ = > Unquoted ,
} ,
UnquotedEscaped = > Unquoted ,
Quoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
QuoteEscaped
} else {
Quoted
for ( idx , ch ) in input . char_indices ( ) {
match state {
State ::Normal = > match ch {
'\\' = > {
if ! is_escaped {
// PERF: As not every separator will be escaped, we use `String::new` as that has no initial
// allocation. If an escape is found, then we reserve capacity thats the len of the separator,
// as the new unescaped string will be at least that long.
unescaped . reserve ( input . len ( ) ) ;
if idx > 0 {
// First time finding an escape, so all prior chars can be added to the new unescaped
// version if its not the very first char found.
unescaped . push_str ( & input [ 0 .. idx ] ) ;
}
}
'\'' = > {
end = i ;
OnWhitespace
}
_ = > Quoted ,
} ,
QuoteEscaped = > Quoted ,
Dquoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
DquoteEscaped
} else {
Dquoted
}
state = State ::Escaped ;
is_escaped = true ;
}
_ = > {
if is_escaped {
unescaped . push ( ch ) ;
}
'"' = > {
end = i ;
OnWhitespace
}
} ,
State ::Escaped = > {
match ch {
'n' = > unescaped . push ( '\n' ) ,
't' = > unescaped . push ( '\t' ) ,
'u' = > {
state = State ::Unicode ;
continue ;
}
_ = > Dquoted ,
} ,
DquoteEscaped = > Dquoted ,
} ;
let c_len = c . len_utf8 ( ) ;
if i = = input . len ( ) - c_len & & end = = 0 {
end = i + c_len ;
}
if end > 0 {
let esc_trim = escaped . trim ( ) ;
let inp = & input [ unescaped_start .. end ] ;
if ! ( esc_trim . is_empty ( ) & & inp . trim ( ) . is_empty ( ) ) {
if esc_trim . is_empty ( ) {
words . push ( inp . into ( ) ) ;
parts . push ( inp ) ;
} else {
words . push ( [ escaped , inp . into ( ) ] . concat ( ) . into ( ) ) ;
parts . push ( & input [ part_start .. end ] ) ;
escaped = "" . to_string ( ) ;
// Uncomment if you want to handle '\\' to '\'
// '\\' => unescaped.push('\\'),
_ = > {
unescaped . push ( '\\' ) ;
unescaped . push ( ch ) ;
}
}
unescaped_start = i + 1 ;
part_start = i + 1 ;
end = 0 ;
state = State ::Normal ;
}
}
debug_assert! ( words . len ( ) = = parts . len ( ) ) ;
Self {
state ,
words ,
parts ,
State ::Unicode = > match ch {
'{' = > continue ,
'}' = > {
let Ok ( digit ) = u32 ::from_str_radix ( & unicode , 16 ) else {
return input . into ( ) ;
} ;
let Some ( point ) = char ::from_u32 ( digit ) else {
return input . into ( ) ;
} ;
unescaped . push ( point ) ;
// Might be more unicode to unescape so clear for reuse.
unicode . clear ( ) ;
state = State ::Normal ;
}
_ = > unicode . push ( ch ) ,
} ,
}
}
}
impl < ' a > Shellwords < ' a > {
/// Checks that the input ends with a whitespace character which is not escaped.
///
/// # Examples
///
/// ```rust
/// use helix_core::shellwords::Shellwords;
/// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
/// ```
pub fn ends_with_whitespace ( & self ) -> bool {
matches! ( self . state , State ::OnWhitespace )
}
/// Returns the list of shellwords calculated from the input string.
pub fn words ( & self ) -> & [ Cow < ' a , str > ] {
& self . words
}
/// Returns a list of strings which correspond to [`Self::words`] but represent the original
/// text in the input string - including escape characters - without separating whitespace.
pub fn parts ( & self ) -> & [ & ' a str ] {
& self . parts
if is_escaped {
unescaped . into ( )
} else {
input . into ( )
}
}
@ -201,114 +479,191 @@ mod test {
use super ::* ;
#[ test ]
#[ cfg(windows) ]
fn test_normal ( ) {
fn base ( ) {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "twó" ) ,
Cow ::from ( "wörds" ) ,
Cow ::from ( "\\three\\" ) ,
Cow ::from ( "\\" ) ,
Cow ::from ( "with\\ escaping\\\\" ) ,
let args = vec! [
"single_word" ,
"twó" ,
"wörds" ,
r"\three\" ,
r#"\"with\"# ,
r"escaping\\" ,
] ;
// TODO test is_owned and is_borrowed, once they get stabilized.
assert_eq! ( expected , result ) ;
assert_eq! ( ":o" , shellwords . command ( ) ) ;
assert_eq! ( args , shellwords . args ( ) . collect ::< Vec < _ > > ( ) ) ;
}
#[ test ]
#[ cfg(unix) ]
fn test_normal ( ) {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "twó" ) ,
Cow ::from ( "wörds" ) ,
Cow ::from ( r#"three " with escaping \ " #) ,
] ;
// TODO test is_owned and is_borrowed, once they get stabilized.
assert_eq! ( expected , result ) ;
fn should_have_empty_args ( ) {
let shellwords = Shellwords ::from ( ":quit" ) ;
assert! (
shellwords . args ( ) . is_empty ( ) ,
"args: `{}`" ,
shellwords . args ( ) . next ( ) . unwrap ( )
) ;
assert! ( shellwords . args ( ) . next ( ) . is_none ( ) ) ;
}
#[ test ]
#[ cfg(unix) ]
fn test_quoted ( ) {
fn should_return_empty_command ( ) {
let shellwords = Shellwords ::from ( " " ) ;
assert! ( shellwords . command ( ) . is_empty ( ) ) ;
}
#[ test ]
fn should_support_unicode_args ( ) {
assert_eq! (
Shellwords ::from ( ":sh echo 𒀀" ) . args ( ) . collect ::< Vec < _ > > ( ) ,
& [ "echo" , "𒀀" ]
) ;
assert_eq! (
Shellwords ::from ( ":sh echo 𒀀 hello world𒀀" )
. args ( )
. collect ::< Vec < _ > > ( ) ,
& [ "echo" , "𒀀" , "hello" , "world𒀀" ]
) ;
}
#[ test ]
fn should_preserve_quote_if_last_argument ( ) {
let sh = Shellwords ::from ( r#":read "file with space.txt"""# ) ;
let mut args = sh . args ( ) ;
assert_eq! ( "file with space.txt" , args . next ( ) . unwrap ( ) ) ;
assert_eq! ( r#"""# , args . next ( ) . unwrap ( ) ) ;
}
#[ test ]
fn should_return_rest_of_non_closed_quote_as_one_argument ( ) {
let sh = Shellwords ::from ( r":rename 'should be one \'argument" ) ;
assert_eq! ( r"should be one \'argument" , sh . args ( ) . next ( ) . unwrap ( ) ) ;
}
#[ test ]
fn should_respect_escaped_quote_in_what_looks_like_non_closed_arg ( ) {
let sh = Shellwords ::from ( r":rename 'should be one \\'argument" ) ;
let mut args = sh . args ( ) ;
assert_eq! ( r"should be one \\" , args . next ( ) . unwrap ( ) ) ;
assert_eq! ( r"argument" , args . next ( ) . unwrap ( ) ) ;
}
#[ test ]
fn should_split_args ( ) {
assert_eq! ( Shellwords ::from ( ":o a" ) . args ( ) . collect ::< Vec < _ > > ( ) , & [ "a" ] ) ;
assert_eq! (
Shellwords ::from ( ":o a\\ " ) . args ( ) . collect ::< Vec < _ > > ( ) ,
& [ "a\\" ]
) ;
}
#[ test ]
fn should_parse_args_even_with_leading_whitespace ( ) {
// Three spaces
assert_eq! (
Shellwords ::from ( ":o a" ) . args ( ) . collect ::< Vec < _ > > ( ) ,
& [ "a" ]
) ;
}
#[ test ]
fn should_parse_single_quotes_while_respecting_escapes ( ) {
let quoted =
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"# ;
let shellwords = Shellwords ::from ( quoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let result = shellwords . args( ) . collect ::< Vec < _ > > ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "twó wörds" ) ,
Cow ::from ( r#"three' " with escaping \ " #) ,
Cow ::from ( "quote incomplete" ) ,
"single_word" ,
"twó wörds" ,
"" ,
" " ,
r#"\three\' \"with\ escaping\\"# ,
"quote incomplete" ,
] ;
assert_eq! ( expected , result ) ;
}
#[ test ]
#[ cfg(unix) ]
fn test_dquoted ( ) {
fn should_parse_double_quotes_while_respecting_escapes ( ) {
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"# ;
let shellwords = Shellwords ::from ( dquoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let result = shellwords . args( ) . collect ::< Vec < _ > > ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "twó wörds" ) ,
Cow ::from ( r#"three' " with escaping \ " #) ,
Cow ::from ( "dquote incomplete" ) ,
"single_word" ,
"twó wörds" ,
"" ,
" " ,
r#"\three\' \"with\ escaping\\"# ,
"dquote incomplete" ,
] ;
assert_eq! ( expected , result ) ;
}
#[ test ]
#[ cfg(unix) ]
fn test_mixed ( ) {
fn should_respect_escapes_with_mixed_quotes ( ) {
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"# ;
let shellwords = Shellwords ::from ( dquoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let result = shellwords . args( ) . collect ::< Vec < _ > > ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "twó wörds" ) ,
Cow ::from ( "three' \"with escaping\\" ) ,
Cow ::from ( "no space before" ) ,
Cow ::from ( "and after" ) ,
Cow ::from ( "$#%^@" ) ,
Cow ::from ( "%^&(%^" ) ,
Cow ::from ( ")(*&^%" ) ,
Cow ::from ( r#"a\\b"# ) ,
//last ' just changes to quoted but since we dont have anything after it, it should be ignored
"single_word" ,
"twó wörds" ,
r#"\three\' \"with\ escaping\\"# ,
"no space before" ,
"and after" ,
"$#%^@" ,
"%^&(%^" ,
r")(*&^%" ,
r"a\\\\\b" ,
// Last ' is important, as if the user input an accidental quote at the end, this should be checked in
// commands where there should only be one input and return an error rather than silently succeed.
"'" ,
] ;
assert_eq! ( expected , result ) ;
}
#[ test ]
fn test_lists ( ) {
let input =
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"# ;
fn should_return_rest ( ) {
let input = r#":set statusline.center ["file-type","file-encoding"]"# ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":set" ) ,
Cow ::from ( "statusline.center" ) ,
Cow ::from ( r#"["file-type","file-encoding"]"# ) ,
Cow ::from ( r#"["list", "in", "quotes"]"# ) ,
] ;
assert_eq! ( expected , result ) ;
let mut args = shellwords . args ( ) ;
assert_eq! ( ":set" , shellwords . command ( ) ) ;
assert_eq! ( Some ( "statusline.center" ) , args . next ( ) ) ;
assert_eq! ( r#"["file-type","file-encoding"]"# , args . rest ( ) ) ;
}
#[ test ]
fn should_return_no_args ( ) {
let mut args = Args ::parse ( "" ) ;
assert! ( args . next ( ) . is_none ( ) ) ;
}
#[ test ]
fn should_leave_escaped_quotes ( ) {
let input = r#"\" \` \' \"with \'with \`with"# ;
let result = Args ::parse ( input ) . collect ::< Vec < _ > > ( ) ;
assert_eq! ( r#"\""# , result [ 0 ] ) ;
assert_eq! ( r"\`" , result [ 1 ] ) ;
assert_eq! ( r"\'" , result [ 2 ] ) ;
assert_eq! ( r#"\"with"# , result [ 3 ] ) ;
assert_eq! ( r"\'with" , result [ 4 ] ) ;
assert_eq! ( r"\`with" , result [ 5 ] ) ;
}
#[ test ]
fn should_leave_literal_newline_alone ( ) {
let result = Args ::parse ( r"\n" ) . collect ::< Vec < _ > > ( ) ;
assert_eq! ( r"\n" , result [ 0 ] ) ;
}
#[ test ]
fn should_leave_literal_unicode_alone ( ) {
let result = Args ::parse ( r"\u{C}" ) . collect ::< Vec < _ > > ( ) ;
assert_eq! ( r"\u{C}" , result [ 0 ] ) ;
}
#[ test ]
#[ cfg(unix) ]
fn test_escaping_unix ( ) {
fn should_escape _unix( ) {
assert_eq! ( escape ( "foobar" . into ( ) ) , Cow ::Borrowed ( "foobar" ) ) ;
assert_eq! ( escape ( "foo bar" . into ( ) ) , Cow ::Borrowed ( "foo\\ bar" ) ) ;
assert_eq! ( escape ( "foo\tbar" . into ( ) ) , Cow ::Borrowed ( "foo\\\tbar" ) ) ;
@ -316,35 +671,79 @@ mod test {
#[ test ]
#[ cfg(windows) ]
fn test_escaping _windows( ) {
fn should_escape _windows( ) {
assert_eq! ( escape ( "foobar" . into ( ) ) , Cow ::Borrowed ( "foobar" ) ) ;
assert_eq! ( escape ( "foo bar" . into ( ) ) , Cow ::Borrowed ( "\"foo bar\"" ) ) ;
}
#[ test ]
#[ cfg(unix) ]
fn test_parts ( ) {
assert_eq! ( Shellwords ::from ( ":o a" ) . parts ( ) , & [ ":o" , "a" ] ) ;
assert_eq! ( Shellwords ::from ( ":o a\\ " ) . parts ( ) , & [ ":o" , "a\\ " ] ) ;
fn should_unescape_newline ( ) {
let unescaped = unescape ( "hello\\nworld" ) ;
assert_eq! ( "hello\nworld" , unescaped ) ;
}
#[ test ]
#[ cfg(windows) ]
fn test_parts ( ) {
assert_eq! ( Shellwords ::from ( ":o a" ) . parts ( ) , & [ ":o" , "a" ] ) ;
assert_eq! ( Shellwords ::from ( ":o a\\ " ) . parts ( ) , & [ ":o" , "a\\" ] ) ;
fn should_unescape_tab ( ) {
let unescaped = unescape ( "hello\\tworld" ) ;
assert_eq! ( "hello\tworld" , unescaped ) ;
}
#[ test ]
fn test_multibyte_at_end ( ) {
assert_eq! ( Shellwords ::from ( "𒀀" ) . parts ( ) , & [ "𒀀" ] ) ;
assert_eq! (
Shellwords ::from ( ":sh echo 𒀀" ) . parts ( ) ,
& [ ":sh" , "echo" , "𒀀" ]
) ;
assert_eq! (
Shellwords ::from ( ":sh echo 𒀀 hello world𒀀" ) . parts ( ) ,
& [ ":sh" , "echo" , "𒀀" , "hello" , "world𒀀" ]
) ;
fn should_unescape_unicode ( ) {
let unescaped = unescape ( "hello\\u{1f929}world" ) ;
assert_eq! ( "hello\u{1f929}world" , unescaped , "char: 🤩 " ) ;
assert_eq! ( "hello🤩world" , unescaped ) ;
}
#[ test ]
fn should_return_original_input_due_to_bad_unicode ( ) {
let unescaped = unescape ( "hello\\u{999999999}world" ) ;
assert_eq! ( "hello\\u{999999999}world" , unescaped ) ;
}
#[ test ]
fn should_not_unescape_slash ( ) {
let unescaped = unescape ( r"hello\\world" ) ;
assert_eq! ( r"hello\\world" , unescaped ) ;
let unescaped = unescape ( r"hello\\\\world" ) ;
assert_eq! ( r"hello\\\\world" , unescaped ) ;
}
#[ test ]
fn should_not_unescape_slash_single_quote ( ) {
let unescaped = unescape ( "\\'" ) ;
assert_eq! ( r"\'" , unescaped ) ;
}
#[ test ]
fn should_not_unescape_slash_double_quote ( ) {
let unescaped = unescape ( "\\\"" ) ;
assert_eq! ( r#"\""# , unescaped ) ;
}
#[ test ]
fn should_not_change_anything ( ) {
let unescaped = unescape ( "'" ) ;
assert_eq! ( "'" , unescaped ) ;
let unescaped = unescape ( r#"""# ) ;
assert_eq! ( r#"""# , unescaped ) ;
}
#[ test ]
fn should_only_unescape_newline_not_slash_single_quote ( ) {
let unescaped = unescape ( "\\n\'" ) ;
assert_eq! ( "\n'" , unescaped ) ;
let unescaped = unescape ( "\\n\\'" ) ;
assert_eq! ( "\n\\'" , unescaped ) ;
}
#[ test ]
fn should_unescape_args ( ) {
// 1f929: 🤩
let args = Args ::parse ( r#"'hello\u{1f929} world' '["hello", "\u{1f929}", "world"]'"# )
. collect ::< Vec < _ > > ( ) ;
assert_eq! ( "hello\u{1f929} world" , unescape ( args [ 0 ] ) ) ;
assert_eq! ( r#"["hello", "🤩", "world"]"# , unescape ( args [ 1 ] ) ) ;
}
}