@ -27,181 +27,172 @@ enum State {
DquoteEscaped ,
}
/// Get the vec of escaped / quoted / doublequoted filenames from the input str
pub fn shellwords ( input : & str ) -> Vec < Cow < ' _ , str > > {
use State ::* ;
pub struct Shellwords < ' a > {
state : State ,
/// Shellwords where whitespace and escapes has been resolved.
words : Vec < Cow < ' a , str > > ,
/// The parts of the input that are divided into shellwords. This can be
/// used to retrieve the original text for a given word by looking up the
/// same index in the Vec as the word in `words`.
parts : Vec < & ' a str > ,
}
let mut state = Unquoted ;
let mut args : Vec < Cow < str > > = Vec ::new ( ) ;
let mut escaped = String ::with_capacity ( input . len ( ) ) ;
impl < ' a > From < & ' a str > for Shellwords < ' a > {
fn from ( input : & ' a str ) -> Self {
use State ::* ;
let mut start = 0 ;
let mut end = 0 ;
let mut state = Unquoted ;
let mut words = Vec ::new ( ) ;
let mut parts = Vec ::new ( ) ;
let mut escaped = String ::with_capacity ( input . len ( ) ) ;
for ( i , c ) in input . char_indices ( ) {
state = match state {
OnWhitespace = > match c {
'"' = > {
end = i ;
Dquoted
}
'\'' = > {
end = i ;
Quoted
}
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ start .. i ] ) ;
start = i + 1 ;
UnquotedEscaped
} else {
let mut part_start = 0 ;
let mut unescaped_start = 0 ;
let mut end = 0 ;
for ( i , c ) in input . char_indices ( ) {
state = match state {
OnWhitespace = > match c {
'"' = > {
end = i ;
Dquoted
}
'\'' = > {
end = i ;
Quoted
}
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
UnquotedEscaped
} else {
OnWhitespace
}
}
c if c . is_ascii_whitespace ( ) = > {
end = i ;
OnWhitespace
}
}
c if c . is_ascii_whitespace ( ) = > {
end = i ;
OnWhitespace
}
_ = > Unquoted ,
} ,
Unquoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ start .. i ] ) ;
start = i + 1 ;
UnquotedEscaped
} else {
Unquoted
_ = > Unquoted ,
} ,
Unquoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
UnquotedEscaped
} else {
Unquoted
}
}
}
c if c . is_ascii_whitespace ( ) = > {
end = i ;
OnWhitespace
}
_ = > Unquoted ,
} ,
UnquotedEscaped = > Unquoted ,
Quoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ start .. i ] ) ;
start = i + 1 ;
QuoteEscaped
} else {
Quoted
c if c . is_ascii_whitespace ( ) = > {
end = i ;
OnWhitespace
}
}
'\'' = > {
end = i ;
OnWhitespace
}
_ = > Quoted ,
} ,
QuoteEscaped = > Quoted ,
Dquoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ start .. i ] ) ;
start = i + 1 ;
DquoteEscaped
} else {
Dquoted
_ = > Unquoted ,
} ,
UnquotedEscaped = > Unquoted ,
Quoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
QuoteEscaped
} else {
Quoted
}
}
}
'"' = > {
end = i ;
OnWhitespace
}
_ = > Dquoted ,
} ,
DquoteEscaped = > Dquoted ,
} ;
'\'' = > {
end = i ;
OnWhitespace
}
_ = > Quoted ,
} ,
QuoteEscaped = > Quoted ,
Dquoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
DquoteEscaped
} else {
Dquoted
}
}
'"' = > {
end = i ;
OnWhitespace
}
_ = > Dquoted ,
} ,
DquoteEscaped = > Dquoted ,
} ;
if i > = input . len ( ) - 1 & & end = = 0 {
end = i + 1 ;
}
if i > = input . len ( ) - 1 & & end = = 0 {
end = i + 1 ;
}
if end > 0 {
let esc_trim = escaped . trim ( ) ;
let inp = & input [ start .. end ] ;
if end > 0 {
let esc_trim = escaped . trim ( ) ;
let inp = & input [ unescaped_ start.. end ] ;
if ! ( esc_trim . is_empty ( ) & & inp . trim ( ) . is_empty ( ) ) {
if esc_trim . is_empty ( ) {
args . push ( inp . into ( ) ) ;
} else {
args . push ( [ escaped , inp . into ( ) ] . concat ( ) . into ( ) ) ;
escaped = "" . to_string ( ) ;
if ! ( esc_trim . is_empty ( ) & & inp . trim ( ) . is_empty ( ) ) {
if esc_trim . is_empty ( ) {
words . push ( inp . into ( ) ) ;
parts . push ( inp ) ;
} else {
words . push ( [ escaped , inp . into ( ) ] . concat ( ) . into ( ) ) ;
parts . push ( & input [ part_start .. end ] ) ;
escaped = "" . to_string ( ) ;
}
}
unescaped_start = i + 1 ;
part_start = i + 1 ;
end = 0 ;
}
start = i + 1 ;
end = 0 ;
}
}
args
}
/// Checks that the input ends with an ascii whitespace character which is
/// not escaped.
///
/// # Examples
///
/// ```rust
/// use helix_core::shellwords::ends_with_whitespace;
/// assert_eq!(ends_with_whitespace(" "), true);
/// assert_eq!(ends_with_whitespace(":open "), true);
/// assert_eq!(ends_with_whitespace(":open foo.txt "), true);
/// assert_eq!(ends_with_whitespace(":open"), false);
/// #[cfg(unix)]
/// assert_eq!(ends_with_whitespace(":open a\\ "), false);
/// #[cfg(unix)]
/// assert_eq!(ends_with_whitespace(":open a\\ b.txt"), false);
/// ```
pub fn ends_with_whitespace ( input : & str ) -> bool {
use State ::* ;
debug_assert! ( words . len ( ) = = parts . len ( ) ) ;
// Fast-lane: the input must end with a whitespace character
// regardless of quoting.
if ! input . ends_with ( | c : char | c . is_ascii_whitespace ( ) ) {
return false ;
Self {
state ,
words ,
parts ,
}
}
}
let mut state = Unquoted ;
impl < ' a > Shellwords < ' a > {
/// Checks that the input ends with a whitespace character which is not escaped.
///
/// # Examples
///
/// ```rust
/// use helix_core::shellwords::Shellwords;
/// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
/// ```
pub fn ends_with_whitespace ( & self ) -> bool {
matches! ( self . state , State ::OnWhitespace )
}
for c in input . chars ( ) {
state = match state {
OnWhitespace = > match c {
'"' = > Dquoted ,
'\'' = > Quoted ,
'\\' if cfg! ( unix ) = > UnquotedEscaped ,
'\\' = > OnWhitespace ,
c if c . is_ascii_whitespace ( ) = > OnWhitespace ,
_ = > Unquoted ,
} ,
Unquoted = > match c {
'\\' if cfg! ( unix ) = > UnquotedEscaped ,
'\\' = > Unquoted ,
c if c . is_ascii_whitespace ( ) = > OnWhitespace ,
_ = > Unquoted ,
} ,
UnquotedEscaped = > Unquoted ,
Quoted = > match c {
'\\' if cfg! ( unix ) = > QuoteEscaped ,
'\\' = > Quoted ,
'\'' = > OnWhitespace ,
_ = > Quoted ,
} ,
QuoteEscaped = > Quoted ,
Dquoted = > match c {
'\\' if cfg! ( unix ) = > DquoteEscaped ,
'\\' = > Dquoted ,
'"' = > OnWhitespace ,
_ = > Dquoted ,
} ,
DquoteEscaped = > Dquoted ,
}
/// Returns the list of shellwords calculated from the input string.
pub fn words ( & self ) -> & [ Cow < ' a , str > ] {
& self . words
}
matches! ( state , OnWhitespace )
/// Returns a list of strings which correspond to [`Self::words`] but represent the original
/// text in the input string - including escape characters - without separating whitespace.
pub fn parts ( & self ) -> & [ & ' a str ] {
& self . parts
}
}
#[ cfg(test) ]
@ -212,7 +203,8 @@ mod test {
#[ cfg(windows) ]
fn test_normal ( ) {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
let result = shellwords ( input ) ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
@ -230,7 +222,8 @@ mod test {
#[ cfg(unix) ]
fn test_normal ( ) {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
let result = shellwords ( input ) ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
@ -247,7 +240,8 @@ mod test {
fn test_quoted ( ) {
let quoted =
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"# ;
let result = shellwords ( quoted ) ;
let shellwords = Shellwords ::from ( quoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
@ -262,7 +256,8 @@ mod test {
#[ cfg(unix) ]
fn test_dquoted ( ) {
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"# ;
let result = shellwords ( dquoted ) ;
let shellwords = Shellwords ::from ( dquoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
@ -277,7 +272,8 @@ mod test {
#[ cfg(unix) ]
fn test_mixed ( ) {
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"# ;
let result = shellwords ( dquoted ) ;
let shellwords = Shellwords ::from ( dquoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
@ -298,7 +294,8 @@ mod test {
fn test_lists ( ) {
let input =
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"# ;
let result = shellwords ( input ) ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
Cow ::from ( ":set" ) ,
Cow ::from ( "statusline.center" ) ,
@ -322,4 +319,18 @@ mod test {
assert_eq! ( escape ( "foobar" . into ( ) ) , Cow ::Borrowed ( "foobar" ) ) ;
assert_eq! ( escape ( "foo bar" . into ( ) ) , Cow ::Borrowed ( "\"foo bar\"" ) ) ;
}
#[ test ]
#[ cfg(unix) ]
fn test_parts ( ) {
assert_eq! ( Shellwords ::from ( ":o a" ) . parts ( ) , & [ ":o" , "a" ] ) ;
assert_eq! ( Shellwords ::from ( ":o a\\ " ) . parts ( ) , & [ ":o" , "a\\ " ] ) ;
}
#[ test ]
#[ cfg(windows) ]
fn test_parts ( ) {
assert_eq! ( Shellwords ::from ( ":o a" ) . parts ( ) , & [ ":o" , "a" ] ) ;
assert_eq! ( Shellwords ::from ( ":o a\\ " ) . parts ( ) , & [ ":o" , "a\\" ] ) ;
}
}