Add :character-info command (#4000)

pull/5/head
William Etheredge 1 year ago committed by GitHub
parent d8f482e11e
commit f7bd7b5eaf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -43,6 +43,7 @@
| `:change-current-directory`, `:cd` | Change the current working directory. |
| `:show-directory`, `:pwd` | Show the current working directory. |
| `:encoding` | Set encoding. Based on `https://encoding.spec.whatwg.org`. |
| `:character-info`, `:char` | Get info about the character under the primary cursor. |
| `:reload` | Discard changes and reload from the source file. |
| `:reload-all` | Discard changes and reload all documents from the source files. |
| `:update` | Write changes only if the file has been modified. |

@ -4,6 +4,7 @@ use crate::job::Job;
use super::*;
use helix_core::encoding;
use helix_view::editor::{Action, CloseError, ConfigEvent};
use ui::completers::{self, Completer};
@ -1033,6 +1034,131 @@ fn set_encoding(
}
}
/// Shows info about the character under the primary cursor.
fn get_character_info(
cx: &mut compositor::Context,
_args: &[Cow<str>],
event: PromptEvent,
) -> anyhow::Result<()> {
if event != PromptEvent::Validate {
return Ok(());
}
let (view, doc) = current_ref!(cx.editor);
let text = doc.text().slice(..);
let grapheme_start = doc.selection(view.id).primary().cursor(text);
let grapheme_end = graphemes::next_grapheme_boundary(text, grapheme_start);
if grapheme_start == grapheme_end {
return Ok(());
}
let grapheme = text.slice(grapheme_start..grapheme_end).to_string();
let encoding = doc.encoding();
let printable = grapheme.chars().fold(String::new(), |mut s, c| {
match c {
'\0' => s.push_str("\\0"),
'\t' => s.push_str("\\t"),
'\n' => s.push_str("\\n"),
'\r' => s.push_str("\\r"),
_ => s.push(c),
}
s
});
// Convert to Unicode codepoints if in UTF-8
let unicode = if encoding == encoding::UTF_8 {
let mut unicode = " (".to_owned();
for (i, char) in grapheme.chars().enumerate() {
if i != 0 {
unicode.push(' ');
}
unicode.push_str("U+");
let codepoint: u32 = if char.is_ascii() {
char.into()
} else {
// Not ascii means it will be multi-byte, so strip out the extra
// bits that encode the length & mark continuation bytes
let s = String::from(char);
let bytes = s.as_bytes();
// First byte starts with 2-4 ones then a zero, so strip those off
let first = bytes[0];
let codepoint = first & (0xFF >> (first.leading_ones() + 1));
let mut codepoint = u32::from(codepoint);
// Following bytes start with 10
for byte in bytes.iter().skip(1) {
codepoint <<= 6;
codepoint += u32::from(*byte) & 0x3F;
}
codepoint
};
unicode.push_str(&format!("{codepoint:0>4x}"));
}
unicode.push(')');
unicode
} else {
String::new()
};
// Give the decimal value for ascii characters
let dec = if encoding.is_ascii_compatible() && grapheme.len() == 1 {
format!(" Dec {}", grapheme.as_bytes()[0])
} else {
String::new()
};
let hex = {
let mut encoder = encoding.new_encoder();
let max_encoded_len = encoder
.max_buffer_length_from_utf8_without_replacement(grapheme.len())
.unwrap();
let mut bytes = Vec::with_capacity(max_encoded_len);
let mut current_byte = 0;
let mut hex = String::new();
for (i, char) in grapheme.chars().enumerate() {
if i != 0 {
hex.push_str(" +");
}
let (result, _input_bytes_read) = encoder.encode_from_utf8_to_vec_without_replacement(
&char.to_string(),
&mut bytes,
true,
);
if let encoding::EncoderResult::Unmappable(char) = result {
bail!("{char:?} cannot be mapped to {}", encoding.name());
}
for byte in &bytes[current_byte..] {
hex.push_str(&format!(" {byte:0>2x}"));
}
current_byte = bytes.len();
}
hex
};
cx.editor
.set_status(format!("\"{printable}\"{unicode}{dec} Hex{hex}"));
Ok(())
}
/// Reload the [`Document`] from its source file.
fn reload(
cx: &mut compositor::Context,
@ -2131,6 +2257,13 @@ pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[
fun: set_encoding,
completer: None,
},
TypableCommand {
name: "character-info",
aliases: &["char"],
doc: "Get info about the character under the primary cursor.",
fun: get_character_info,
completer: None,
},
TypableCommand {
name: "reload",
aliases: &[],

@ -354,3 +354,61 @@ async fn test_extend_line() -> anyhow::Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread")]
async fn test_character_info() -> anyhow::Result<()> {
// UTF-8, single byte
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some("ih<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(
r#""h" (U+0068) Dec 104 Hex 68"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;
// UTF-8, multi-byte
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some("ië<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(
r#""ë" (U+0065 U+0308) Hex 65 + cc 88"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;
// Multiple characters displayed as one, escaped characters
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some(":line<minus>ending crlf<ret>:char<ret>"),
Some(&|app| {
assert_eq!(
r#""\r\n" (U+000d U+000a) Hex 0d + 0a"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;
// Non-UTF-8
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some(":encoding ascii<ret>ih<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(r#""h" Dec 104 Hex 68"#, app.editor.get_status().unwrap().0);
}),
false,
)
.await?;
Ok(())
}

Loading…
Cancel
Save