graphemes: Optimize nth_next/nth_prev operation.

It's used a lot more than it used to in position calculation. Instead of
throwing away state between boundary calculation, reuse it.
imgbot
Blaž Hrastnik 3 years ago
parent f118e7580f
commit 6bd16a7320

@ -27,23 +27,11 @@ pub fn grapheme_width(g: &str) -> usize {
} }
pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize { pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
// TODO: implement this more efficiently. This has to do a lot of
// re-scanning of rope chunks. Probably move the main implementation here,
// and have prev_grapheme_boundary call this instead.
let mut char_idx = char_idx;
for _ in 0..n {
char_idx = prev_grapheme_boundary(slice, char_idx);
}
char_idx
}
/// Finds the previous grapheme boundary before the given char position.
pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
// Bounds check // Bounds check
debug_assert!(char_idx <= slice.len_chars()); debug_assert!(char_idx <= slice.len_chars());
// We work with bytes for this, so convert. // We work with bytes for this, so convert.
let byte_idx = slice.char_to_byte(char_idx); let mut byte_idx = slice.char_to_byte(char_idx);
// Get the chunk with our byte index in it. // Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
@ -52,46 +40,43 @@ pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
// Find the previous grapheme cluster boundary. // Find the previous grapheme cluster boundary.
loop { for _ in 0..n {
match gc.prev_boundary(chunk, chunk_byte_idx) { loop {
Ok(None) => return 0, match gc.prev_boundary(chunk, chunk_byte_idx) {
Ok(Some(n)) => { Ok(None) => return 0,
let tmp = byte_to_char_idx(chunk, n - chunk_byte_idx); Ok(Some(n)) => {
return chunk_char_idx + tmp; byte_idx = n;
} break;
Err(GraphemeIncomplete::PrevChunk) => { }
let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1); Err(GraphemeIncomplete::PrevChunk) => {
chunk = a; let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1);
chunk_byte_idx = b; chunk = a;
chunk_char_idx = c; chunk_byte_idx = b;
} chunk_char_idx = c;
Err(GraphemeIncomplete::PreContext(n)) => { }
let ctx_chunk = slice.chunk_at_byte(n - 1).0; Err(GraphemeIncomplete::PreContext(n)) => {
gc.provide_context(ctx_chunk, n - ctx_chunk.len()); let ctx_chunk = slice.chunk_at_byte(n - 1).0;
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
}
_ => unreachable!(),
} }
_ => unreachable!(),
} }
} }
let tmp = byte_to_char_idx(chunk, byte_idx + chunk_byte_idx);
chunk_char_idx + tmp
} }
pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize { /// Finds the previous grapheme boundary before the given char position.
// TODO: implement this more efficiently. This has to do a lot of pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
// re-scanning of rope chunks. Probably move the main implementation here, nth_prev_grapheme_boundary(slice, char_idx, 1)
// and have next_grapheme_boundary call this instead.
let mut char_idx = char_idx;
for _ in 0..n {
char_idx = next_grapheme_boundary(slice, char_idx);
}
char_idx
} }
/// Finds the next grapheme boundary after the given char position. pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
// Bounds check // Bounds check
debug_assert!(char_idx <= slice.len_chars()); debug_assert!(char_idx <= slice.len_chars());
// We work with bytes for this, so convert. // We work with bytes for this, so convert.
let byte_idx = slice.char_to_byte(char_idx); let mut byte_idx = slice.char_to_byte(char_idx);
// Get the chunk with our byte index in it. // Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
@ -99,27 +84,36 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
// Set up the grapheme cursor. // Set up the grapheme cursor.
let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
// Find the next grapheme cluster boundary. // Find the nth next grapheme cluster boundary.
loop { for _ in 0..n {
match gc.next_boundary(chunk, chunk_byte_idx) { loop {
Ok(None) => return slice.len_chars(), match gc.next_boundary(chunk, chunk_byte_idx) {
Ok(Some(n)) => { Ok(None) => return slice.len_chars(),
let tmp = byte_to_char_idx(chunk, n - chunk_byte_idx); Ok(Some(n)) => {
return chunk_char_idx + tmp; byte_idx = n;
} break;
Err(GraphemeIncomplete::NextChunk) => { }
chunk_byte_idx += chunk.len(); Err(GraphemeIncomplete::NextChunk) => {
let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx); chunk_byte_idx += chunk.len();
chunk = a; let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx);
chunk_char_idx = c; chunk = a;
} chunk_char_idx = c;
Err(GraphemeIncomplete::PreContext(n)) => { }
let ctx_chunk = slice.chunk_at_byte(n - 1).0; Err(GraphemeIncomplete::PreContext(n)) => {
gc.provide_context(ctx_chunk, n - ctx_chunk.len()); let ctx_chunk = slice.chunk_at_byte(n - 1).0;
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
}
_ => unreachable!(),
} }
_ => unreachable!(),
} }
} }
let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx);
chunk_char_idx + tmp
}
/// Finds the next grapheme boundary after the given char position.
pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
nth_next_grapheme_boundary(slice, char_idx, 1)
} }
/// Returns whether the given char position is a grapheme boundary. /// Returns whether the given char position is a grapheme boundary.

Loading…
Cancel
Save