@ -27,181 +27,172 @@ enum State {
DquoteEscaped ,
DquoteEscaped ,
}
}
/// Get the vec of escaped / quoted / doublequoted filenames from the input str
pub struct Shellwords < ' a > {
pub fn shellwords ( input : & str ) -> Vec < Cow < ' _ , str > > {
state : State ,
use State ::* ;
/// Shellwords where whitespace and escapes has been resolved.
words : Vec < Cow < ' a , str > > ,
/// The parts of the input that are divided into shellwords. This can be
/// used to retrieve the original text for a given word by looking up the
/// same index in the Vec as the word in `words`.
parts : Vec < & ' a str > ,
}
let mut state = Unquoted ;
impl < ' a > From < & ' a str > for Shellwords < ' a > {
let mut args : Vec < Cow < str > > = Vec ::new ( ) ;
fn from ( input : & ' a str ) -> Self {
let mut escaped = String ::with_capacity ( input . len ( ) ) ;
use State ::* ;
let mut start = 0 ;
let mut state = Unquoted ;
let mut end = 0 ;
let mut words = Vec ::new ( ) ;
let mut parts = Vec ::new ( ) ;
let mut escaped = String ::with_capacity ( input . len ( ) ) ;
for ( i , c ) in input . char_indices ( ) {
let mut part_start = 0 ;
state = match state {
let mut unescaped_start = 0 ;
OnWhitespace = > match c {
let mut end = 0 ;
'"' = > {
end = i ;
for ( i , c ) in input . char_indices ( ) {
Dquoted
state = match state {
}
OnWhitespace = > match c {
'\'' = > {
'"' = > {
end = i ;
end = i ;
Quoted
Dquoted
}
}
'\\' = > {
'\'' = > {
if cfg! ( unix ) {
end = i ;
escaped . push_str ( & input [ start .. i ] ) ;
Quoted
start = i + 1 ;
}
UnquotedEscaped
'\\' = > {
} else {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
UnquotedEscaped
} else {
OnWhitespace
}
}
c if c . is_ascii_whitespace ( ) = > {
end = i ;
OnWhitespace
OnWhitespace
}
}
}
_ = > Unquoted ,
c if c . is_ascii_whitespace ( ) = > {
} ,
end = i ;
Unquoted = > match c {
OnWhitespace
'\\' = > {
}
if cfg! ( unix ) {
_ = > Unquoted ,
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
} ,
unescaped_start = i + 1 ;
Unquoted = > match c {
UnquotedEscaped
'\\' = > {
} else {
if cfg! ( unix ) {
Unquoted
escaped . push_str ( & input [ start .. i ] ) ;
}
start = i + 1 ;
UnquotedEscaped
} else {
Unquoted
}
}
}
c if c . is_ascii_whitespace ( ) = > {
c if c . is_ascii_whitespace ( ) = > {
end = i ;
end = i ;
OnWhitespace
OnWhitespace
}
_ = > Unquoted ,
} ,
UnquotedEscaped = > Unquoted ,
Quoted = > match c {
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ start .. i ] ) ;
start = i + 1 ;
QuoteEscaped
} else {
Quoted
}
}
}
_ = > Unquoted ,
'\'' = > {
} ,
end = i ;
UnquotedEscaped = > Unquoted ,
OnWhitespace
Quoted = > match c {
}
'\\' = > {
_ = > Quoted ,
if cfg! ( unix ) {
} ,
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
QuoteEscaped = > Quoted ,
unescaped_start = i + 1 ;
Dquoted = > match c {
QuoteEscaped
'\\' = > {
} else {
if cfg! ( unix ) {
Quoted
escaped . push_str ( & input [ start .. i ] ) ;
}
start = i + 1 ;
DquoteEscaped
} else {
Dquoted
}
}
}
'\'' = > {
'"' = > {
end = i ;
end = i ;
OnWhitespace
OnWhitespace
}
}
_ = > Quoted ,
_ = > Dquoted ,
} ,
} ,
QuoteEscaped = > Quoted ,
DquoteEscaped = > Dquoted ,
Dquoted = > match c {
} ;
'\\' = > {
if cfg! ( unix ) {
escaped . push_str ( & input [ unescaped_start .. i ] ) ;
unescaped_start = i + 1 ;
DquoteEscaped
} else {
Dquoted
}
}
'"' = > {
end = i ;
OnWhitespace
}
_ = > Dquoted ,
} ,
DquoteEscaped = > Dquoted ,
} ;
if i > = input . len ( ) - 1 & & end = = 0 {
if i > = input . len ( ) - 1 & & end = = 0 {
end = i + 1 ;
end = i + 1 ;
}
}
if end > 0 {
if end > 0 {
let esc_trim = escaped . trim ( ) ;
let esc_trim = escaped . trim ( ) ;
let inp = & input [ start .. end ] ;
let inp = & input [ unescaped_ start.. end ] ;
if ! ( esc_trim . is_empty ( ) & & inp . trim ( ) . is_empty ( ) ) {
if ! ( esc_trim . is_empty ( ) & & inp . trim ( ) . is_empty ( ) ) {
if esc_trim . is_empty ( ) {
if esc_trim . is_empty ( ) {
args . push ( inp . into ( ) ) ;
words . push ( inp . into ( ) ) ;
} else {
parts . push ( inp ) ;
args . push ( [ escaped , inp . into ( ) ] . concat ( ) . into ( ) ) ;
} else {
escaped = "" . to_string ( ) ;
words . push ( [ escaped , inp . into ( ) ] . concat ( ) . into ( ) ) ;
parts . push ( & input [ part_start .. end ] ) ;
escaped = "" . to_string ( ) ;
}
}
}
unescaped_start = i + 1 ;
part_start = i + 1 ;
end = 0 ;
}
}
start = i + 1 ;
end = 0 ;
}
}
}
args
}
/// Checks that the input ends with an ascii whitespace character which is
debug_assert! ( words . len ( ) = = parts . len ( ) ) ;
/// not escaped.
///
/// # Examples
///
/// ```rust
/// use helix_core::shellwords::ends_with_whitespace;
/// assert_eq!(ends_with_whitespace(" "), true);
/// assert_eq!(ends_with_whitespace(":open "), true);
/// assert_eq!(ends_with_whitespace(":open foo.txt "), true);
/// assert_eq!(ends_with_whitespace(":open"), false);
/// #[cfg(unix)]
/// assert_eq!(ends_with_whitespace(":open a\\ "), false);
/// #[cfg(unix)]
/// assert_eq!(ends_with_whitespace(":open a\\ b.txt"), false);
/// ```
pub fn ends_with_whitespace ( input : & str ) -> bool {
use State ::* ;
// Fast-lane: the input must end with a whitespace character
Self {
// regardless of quoting.
state ,
if ! input . ends_with ( | c : char | c . is_ascii_whitespace ( ) ) {
words ,
return false ;
parts ,
}
}
}
}
let mut state = Unquoted ;
impl < ' a > Shellwords < ' a > {
/// Checks that the input ends with a whitespace character which is not escaped.
///
/// # Examples
///
/// ```rust
/// use helix_core::shellwords::Shellwords;
/// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
/// ```
pub fn ends_with_whitespace ( & self ) -> bool {
matches! ( self . state , State ::OnWhitespace )
}
for c in input . chars ( ) {
/// Returns the list of shellwords calculated from the input string.
state = match state {
pub fn words ( & self ) -> & [ Cow < ' a , str > ] {
OnWhitespace = > match c {
& self . words
'"' = > Dquoted ,
'\'' = > Quoted ,
'\\' if cfg! ( unix ) = > UnquotedEscaped ,
'\\' = > OnWhitespace ,
c if c . is_ascii_whitespace ( ) = > OnWhitespace ,
_ = > Unquoted ,
} ,
Unquoted = > match c {
'\\' if cfg! ( unix ) = > UnquotedEscaped ,
'\\' = > Unquoted ,
c if c . is_ascii_whitespace ( ) = > OnWhitespace ,
_ = > Unquoted ,
} ,
UnquotedEscaped = > Unquoted ,
Quoted = > match c {
'\\' if cfg! ( unix ) = > QuoteEscaped ,
'\\' = > Quoted ,
'\'' = > OnWhitespace ,
_ = > Quoted ,
} ,
QuoteEscaped = > Quoted ,
Dquoted = > match c {
'\\' if cfg! ( unix ) = > DquoteEscaped ,
'\\' = > Dquoted ,
'"' = > OnWhitespace ,
_ = > Dquoted ,
} ,
DquoteEscaped = > Dquoted ,
}
}
}
matches! ( state , OnWhitespace )
/// Returns a list of strings which correspond to [`Self::words`] but represent the original
/// text in the input string - including escape characters - without separating whitespace.
pub fn parts ( & self ) -> & [ & ' a str ] {
& self . parts
}
}
}
#[ cfg(test) ]
#[ cfg(test) ]
@ -212,7 +203,8 @@ mod test {
#[ cfg(windows) ]
#[ cfg(windows) ]
fn test_normal ( ) {
fn test_normal ( ) {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
let result = shellwords ( input ) ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "single_word" ) ,
@ -230,7 +222,8 @@ mod test {
#[ cfg(unix) ]
#[ cfg(unix) ]
fn test_normal ( ) {
fn test_normal ( ) {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
let result = shellwords ( input ) ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "single_word" ) ,
@ -247,7 +240,8 @@ mod test {
fn test_quoted ( ) {
fn test_quoted ( ) {
let quoted =
let quoted =
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"# ;
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"# ;
let result = shellwords ( quoted ) ;
let shellwords = Shellwords ::from ( quoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "single_word" ) ,
@ -262,7 +256,8 @@ mod test {
#[ cfg(unix) ]
#[ cfg(unix) ]
fn test_dquoted ( ) {
fn test_dquoted ( ) {
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"# ;
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"# ;
let result = shellwords ( dquoted ) ;
let shellwords = Shellwords ::from ( dquoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "single_word" ) ,
@ -277,7 +272,8 @@ mod test {
#[ cfg(unix) ]
#[ cfg(unix) ]
fn test_mixed ( ) {
fn test_mixed ( ) {
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"# ;
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"# ;
let result = shellwords ( dquoted ) ;
let shellwords = Shellwords ::from ( dquoted ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
let expected = vec! [
Cow ::from ( ":o" ) ,
Cow ::from ( ":o" ) ,
Cow ::from ( "single_word" ) ,
Cow ::from ( "single_word" ) ,
@ -298,7 +294,8 @@ mod test {
fn test_lists ( ) {
fn test_lists ( ) {
let input =
let input =
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"# ;
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"# ;
let result = shellwords ( input ) ;
let shellwords = Shellwords ::from ( input ) ;
let result = shellwords . words ( ) . to_vec ( ) ;
let expected = vec! [
let expected = vec! [
Cow ::from ( ":set" ) ,
Cow ::from ( ":set" ) ,
Cow ::from ( "statusline.center" ) ,
Cow ::from ( "statusline.center" ) ,
@ -322,4 +319,18 @@ mod test {
assert_eq! ( escape ( "foobar" . into ( ) ) , Cow ::Borrowed ( "foobar" ) ) ;
assert_eq! ( escape ( "foobar" . into ( ) ) , Cow ::Borrowed ( "foobar" ) ) ;
assert_eq! ( escape ( "foo bar" . into ( ) ) , Cow ::Borrowed ( "\"foo bar\"" ) ) ;
assert_eq! ( escape ( "foo bar" . into ( ) ) , Cow ::Borrowed ( "\"foo bar\"" ) ) ;
}
}
#[ test ]
#[ cfg(unix) ]
fn test_parts ( ) {
assert_eq! ( Shellwords ::from ( ":o a" ) . parts ( ) , & [ ":o" , "a" ] ) ;
assert_eq! ( Shellwords ::from ( ":o a\\ " ) . parts ( ) , & [ ":o" , "a\\ " ] ) ;
}
#[ test ]
#[ cfg(windows) ]
fn test_parts ( ) {
assert_eq! ( Shellwords ::from ( ":o a" ) . parts ( ) , & [ ":o" , "a" ] ) ;
assert_eq! ( Shellwords ::from ( ":o a\\ " ) . parts ( ) , & [ ":o" , "a\\" ] ) ;
}
}
}