From 62d046fa219b927c536bf6726fcae1e825346e0e Mon Sep 17 00:00:00 2001 From: Mike Trinkala Date: Wed, 1 Feb 2023 14:07:42 -0800 Subject: [PATCH] Fix utf8 length handling for shellwords (#5738) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the last argument to shellwords ends in a multibyte utf8 character the entire argument will be dropped. e.g. `:sh echo test1 test2π’€€` will only output `test1` Add additional tests based on the code review feedback --- helix-core/src/shellwords.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs index 9475f5e50..0883eb917 100644 --- a/helix-core/src/shellwords.rs +++ b/helix-core/src/shellwords.rs @@ -129,8 +129,9 @@ impl<'a> From<&'a str> for Shellwords<'a> { DquoteEscaped => Dquoted, }; - if i >= input.len() - 1 && end == 0 { - end = i + 1; + let c_len = c.len_utf8(); + if i == input.len() - c_len && end == 0 { + end = i + c_len; } if end > 0 { @@ -333,4 +334,17 @@ mod test { assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]); } + + #[test] + fn test_multibyte_at_end() { + assert_eq!(Shellwords::from("π’€€").parts(), &["π’€€"]); + assert_eq!( + Shellwords::from(":sh echo π’€€").parts(), + &[":sh", "echo", "π’€€"] + ); + assert_eq!( + Shellwords::from(":sh echo π’€€ hello worldπ’€€").parts(), + &[":sh", "echo", "π’€€", "hello", "worldπ’€€"] + ); + } }