graphemes: Optimize nth_next/nth_prev operation.

It's used a lot more than it used to in position calculation. Instead of throwing away state between boundary calculation, reuse it.
4 years ago · 6bd16a7320
parent f118e7580f
commit 6bd16a7320
1 changed files with 54 additions and 60 deletions
--- a/helix-core/src/graphemes.rs
+++ b/helix-core/src/graphemes.rs
@ -27,23 +27,11 @@ pub fn grapheme_width(g: &str) -> usize {
 }
 pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
    // TODO: implement this more efficiently.  This has to do a lot of
    // re-scanning of rope chunks.  Probably move the main implementation here,
    // and have prev_grapheme_boundary call this instead.
    let mut char_idx = char_idx;
    for _ in 0..n {
        char_idx = prev_grapheme_boundary(slice, char_idx);
    }
    char_idx
 }
 /// Finds the previous grapheme boundary before the given char position.
 pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
    // Bounds check
    debug_assert!(char_idx <= slice.len_chars());
    // We work with bytes for this, so convert.
-    let byte_idx = slice.char_to_byte(char_idx);
+    let mut byte_idx = slice.char_to_byte(char_idx);
    // Get the chunk with our byte index in it.
    let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
@ -52,12 +40,13 @@ pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
    let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
    // Find the previous grapheme cluster boundary.
    for _ in 0..n {
        loop {
            match gc.prev_boundary(chunk, chunk_byte_idx) {
                Ok(None) => return 0,
                Ok(Some(n)) => {
-                let tmp = byte_to_char_idx(chunk, n - chunk_byte_idx);
+                    byte_idx = n;
-                return chunk_char_idx + tmp;
+                    break;
                }
                Err(GraphemeIncomplete::PrevChunk) => {
                    let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1);
@ -72,26 +61,22 @@ pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
                _ => unreachable!(),
            }
        }
    }
    let tmp = byte_to_char_idx(chunk, byte_idx + chunk_byte_idx);
    chunk_char_idx + tmp
 }
-pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
+/// Finds the previous grapheme boundary before the given char position.
-    // TODO: implement this more efficiently.  This has to do a lot of
+pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
-    // re-scanning of rope chunks.  Probably move the main implementation here,
+    nth_prev_grapheme_boundary(slice, char_idx, 1)
    // and have next_grapheme_boundary call this instead.
    let mut char_idx = char_idx;
    for _ in 0..n {
        char_idx = next_grapheme_boundary(slice, char_idx);
    }
    char_idx
 }
-/// Finds the next grapheme boundary after the given char position.
+pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
 pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
    // Bounds check
    debug_assert!(char_idx <= slice.len_chars());
    // We work with bytes for this, so convert.
-    let byte_idx = slice.char_to_byte(char_idx);
+    let mut byte_idx = slice.char_to_byte(char_idx);
    // Get the chunk with our byte index in it.
    let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
@ -99,13 +84,14 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
    // Set up the grapheme cursor.
    let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
-    // Find the next grapheme cluster boundary.
+    // Find the nth next grapheme cluster boundary.
    for _ in 0..n {
        loop {
            match gc.next_boundary(chunk, chunk_byte_idx) {
                Ok(None) => return slice.len_chars(),
                Ok(Some(n)) => {
-                let tmp = byte_to_char_idx(chunk, n - chunk_byte_idx);
+                    byte_idx = n;
-                return chunk_char_idx + tmp;
+                    break;
                }
                Err(GraphemeIncomplete::NextChunk) => {
                    chunk_byte_idx += chunk.len();
@ -120,6 +106,14 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
                _ => unreachable!(),
            }
        }
    }
    let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx);
    chunk_char_idx + tmp
 }
 /// Finds the next grapheme boundary after the given char position.
 pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
    nth_next_grapheme_boundary(slice, char_idx, 1)
 }
 /// Returns whether the given char position is a grapheme boundary.