From c68fe1f2a3a40c37969c1f5d18e3134320a0c773 Mon Sep 17 00:00:00 2001 From: Gokul Soumya Date: Sat, 3 Jul 2021 06:37:49 +0530 Subject: [PATCH] Add object selection (textobjects) (#385) * Add textobjects for word * Add textobjects for surround characters * Apply clippy lints * Remove ThisWordPrevBound in favor of PrevWordEnd It's the same as PrevWordEnd except for taking the current char into account, so use a "flag" to capture that usecase * Add tests for PrevWordEnd movement * Remove ThisWord* movements They did not preserve anchor positions and were only used for textobject boundary search anyway so replace them with simple position finding functions * Rewrite tests of word textobject * Add tests for surround textobject * Add textobject docs * Refactor textobject word position functions * Apply clippy lints on textobject * Fix overflow error with textobjects --- book/src/keymap.md | 5 +- book/src/usage.md | 16 ++ helix-core/src/lib.rs | 1 + helix-core/src/movement.rs | 99 ++++++++++- helix-core/src/selection.rs | 10 ++ helix-core/src/textobject.rs | 319 +++++++++++++++++++++++++++++++++++ helix-term/src/commands.rs | 32 +++- 7 files changed, 475 insertions(+), 7 deletions(-) create mode 100644 helix-core/src/textobject.rs diff --git a/book/src/keymap.md b/book/src/keymap.md index 0265fe6d..6b7ccd11 100644 --- a/book/src/keymap.md +++ b/book/src/keymap.md @@ -150,7 +150,8 @@ Jumps to various locations. ## Match mode Enter this mode using `m` from normal mode. See the relavant section -in [Usage](./usage.md#surround) for an explanation about surround usage. +in [Usage](./usage.md) for an explanation about [surround](./usage.md#surround) +and [textobject](./usage.md#textobject) usage. | Key | Description | | ----- | ----------- | @@ -158,6 +159,8 @@ in [Usage](./usage.md#surround) for an explanation about surround usage. | `s` `` | Surround current selection with `` | | `r` `` | Replace surround character `` with `` | | `d` `` | Delete surround character `` | +| `a` `` | Select around textobject | +| `i` `` | Select inside textobject | ## Object mode diff --git a/book/src/usage.md b/book/src/usage.md index e6bd60e2..0458071a 100644 --- a/book/src/usage.md +++ b/book/src/usage.md @@ -24,3 +24,19 @@ It can also act on multiple seletions (yay!). For example, to change every occur - `mr([` to replace the parens with square brackets Multiple characters are currently not supported, but planned. + +## Textobjects + +Currently supported: `word`, `surround`. + +![textobject-demo](https://user-images.githubusercontent.com/23398472/124231131-81a4bb00-db2d-11eb-9d10-8e577ca7b177.gif) + +- `ma` - Select around the object (`va` in vim, `` in kakoune) +- `mi` - Select inside the object (`vi` in vim, `` in kakoune) + +| Key after `mi` or `ma` | Textobject selected | +| --- | --- | +| `w` | Word | +| `(`, `[`, `'`, etc | Specified surround pairs | + +Textobjects based on treesitter, like `function`, `class`, etc are planned. diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index c2bb8c55..3684a93e 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -18,6 +18,7 @@ pub mod selection; mod state; pub mod surround; pub mod syntax; +pub mod textobject; mod transaction; pub mod unicode { diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index acc95e7e..f9e5deb4 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -113,6 +113,10 @@ pub fn move_prev_long_word_start(slice: RopeSlice, range: Range, count: usize) - word_move(slice, range, count, WordMotionTarget::PrevLongWordStart) } +pub fn move_prev_word_end(slice: RopeSlice, range: Range, count: usize) -> Range { + word_move(slice, range, count, WordMotionTarget::PrevWordEnd) +} + fn word_move(slice: RopeSlice, range: Range, count: usize, target: WordMotionTarget) -> Range { (0..count).fold(range, |range, _| { slice.chars_at(range.head).range_to_target(target, range) @@ -159,6 +163,7 @@ pub enum WordMotionTarget { NextWordStart, NextWordEnd, PrevWordStart, + PrevWordEnd, // A "Long word" (also known as a WORD in vim/kakoune) is strictly // delimited by whitespace, and can consist of punctuation as well // as alphanumerics. @@ -181,7 +186,9 @@ impl CharHelpers for Chars<'_> { fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range { // Characters are iterated forward or backwards depending on the motion direction. let characters: Box> = match target { - WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => { + WordMotionTarget::PrevWordStart + | WordMotionTarget::PrevLongWordStart + | WordMotionTarget::PrevWordEnd => { self.next(); Box::new(from_fn(|| self.prev())) } @@ -190,9 +197,9 @@ impl CharHelpers for Chars<'_> { // Index advancement also depends on the direction. let advance: &dyn Fn(&mut usize) = match target { - WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => { - &|u| *u = u.saturating_sub(1) - } + WordMotionTarget::PrevWordStart + | WordMotionTarget::PrevLongWordStart + | WordMotionTarget::PrevWordEnd => &|u| *u = u.saturating_sub(1), _ => &|u| *u += 1, }; @@ -265,7 +272,7 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char> }; match target { - WordMotionTarget::NextWordStart => { + WordMotionTarget::NextWordStart | WordMotionTarget::PrevWordEnd => { is_word_boundary(peek, *next_peek) && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()) } @@ -913,6 +920,88 @@ mod test { } } + #[test] + fn test_behaviour_when_moving_to_end_of_previous_words() { + let tests = array::IntoIter::new([ + ("Basic backward motion from the middle of a word", + vec![(1, Range::new(9, 9), Range::new(9, 5))]), + ("Starting from after boundary retreats the anchor", + vec![(1, Range::new(0, 13), Range::new(12, 8))]), + ("Jump to end of a word succeeded by whitespace", + vec![(1, Range::new(10, 10), Range::new(10, 4))]), + (" Jump to start of line from end of word preceded by whitespace", + vec![(1, Range::new(7, 7), Range::new(7, 0))]), + ("Previous anchor is irrelevant for backward motions", + vec![(1, Range::new(26, 12), Range::new(12, 8))]), + (" Starting from whitespace moves to first space in sequence", + vec![(1, Range::new(0, 3), Range::new(3, 0))]), + ("Test identifiers_with_underscores are considered a single word", + vec![(1, Range::new(0, 25), Range::new(25, 4))]), + ("Jumping\n \nback through a newline selects whitespace", + vec![(1, Range::new(0, 13), Range::new(11, 8))]), + ("Jumping to start of word from the end selects the whole word", + vec![(1, Range::new(15, 15), Range::new(15, 10))]), + ("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion", + vec![ + (1, Range::new(30, 30), Range::new(30, 21)), + (1, Range::new(30, 21), Range::new(20, 18)), + (1, Range::new(20, 18), Range::new(17, 15)) + ]), + + ("... ... punctuation and spaces behave as expected", + vec![ + (1, Range::new(0, 10), Range::new(9, 9)), + (1, Range::new(9, 6), Range::new(5, 3)), + ]), + (".._.._ punctuation is not joined by underscores into a single block", + vec![(1, Range::new(0, 5), Range::new(4, 3))]), + ("Newlines\n\nare bridged seamlessly.", + vec![ + (1, Range::new(0, 10), Range::new(7, 0)), + ]), + ("Jumping \n\n\n\n\nback from within a newline group selects previous block", + vec![ + (1, Range::new(0, 13), Range::new(10, 7)), + ]), + ("Failed motions do not modify the range", + vec![ + (0, Range::new(3, 0), Range::new(3, 0)), + ]), + ("Multiple motions at once resolve correctly", + vec![ + (3, Range::new(23, 23), Range::new(15, 8)), + ]), + ("Excessive motions are performed partially", + vec![ + (999, Range::new(40, 40), Range::new(8, 0)), + ]), + ("", // Edge case of moving backwards in empty string + vec![ + (1, Range::new(0, 0), Range::new(0, 0)), + ]), + ("\n\n\n\n\n", // Edge case of moving backwards in all newlines + vec![ + (1, Range::new(0, 0), Range::new(0, 0)), + ]), + (" \n \nJumping back through alternated space blocks and newlines selects the space blocks", + vec![ + (1, Range::new(0, 7), Range::new(6, 4)), + (1, Range::new(6, 4), Range::new(2, 0)), + ]), + ("Test ヒーリクス multibyte characters behave as normal characters", + vec![ + (1, Range::new(0, 9), Range::new(9, 4)), + ]), + ]); + + for (sample, scenario) in tests { + for (count, begin, expected_end) in scenario.into_iter() { + let range = move_prev_word_end(Rope::from(sample).slice(..), begin, count); + assert_eq!(range, expected_end, "Case failed: [{}]", sample); + } + } + } + #[test] fn test_behaviour_when_moving_to_end_of_next_long_words() { let tests = array::IntoIter::new([ diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index d99e2aff..63b9b557 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -130,6 +130,16 @@ impl Range { } } +impl From<(usize, usize)> for Range { + fn from(tuple: (usize, usize)) -> Self { + Self { + anchor: tuple.0, + head: tuple.1, + horiz: None, + } + } +} + /// A selection consists of one or more selection ranges. /// invariant: A selection can never be empty (always contains at least primary range). #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs new file mode 100644 index 00000000..d29eb03c --- /dev/null +++ b/helix-core/src/textobject.rs @@ -0,0 +1,319 @@ +use ropey::RopeSlice; + +use crate::chars::{categorize_char, char_is_line_ending, char_is_whitespace, CharCategory}; +use crate::movement::{self, Direction}; +use crate::surround; +use crate::Range; + +fn this_word_end_pos(slice: RopeSlice, pos: usize) -> usize { + this_word_bound_pos(slice, pos, Direction::Forward) +} + +fn this_word_start_pos(slice: RopeSlice, pos: usize) -> usize { + this_word_bound_pos(slice, pos, Direction::Backward) +} + +fn this_word_bound_pos(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize { + let iter = match direction { + Direction::Forward => slice.chars_at(pos + 1), + Direction::Backward => { + let mut iter = slice.chars_at(pos); + iter.reverse(); + iter + } + }; + + match categorize_char(slice.char(pos)) { + CharCategory::Eol | CharCategory::Whitespace => pos, + category => { + for peek in iter { + let curr_category = categorize_char(peek); + if curr_category != category + || curr_category == CharCategory::Eol + || curr_category == CharCategory::Whitespace + { + return pos; + } + pos = match direction { + Direction::Forward => pos + 1, + Direction::Backward => pos.saturating_sub(1), + } + } + pos + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum TextObject { + Around, + Inside, +} + +// count doesn't do anything yet +pub fn textobject_word( + slice: RopeSlice, + range: Range, + textobject: TextObject, + count: usize, +) -> Range { + let this_word_start = this_word_start_pos(slice, range.head); + let this_word_end = this_word_end_pos(slice, range.head); + + let (anchor, head); + match textobject { + TextObject::Inside => { + anchor = this_word_start; + head = this_word_end; + } + TextObject::Around => { + if slice + .get_char(this_word_end + 1) + .map_or(true, char_is_line_ending) + { + head = this_word_end; + if slice + .get_char(this_word_start.saturating_sub(1)) + .map_or(true, char_is_line_ending) + { + // single word on a line + anchor = this_word_start; + } else { + // last word on a line, select the whitespace before it too + anchor = movement::move_prev_word_end(slice, range, count).head; + } + } else if char_is_whitespace(slice.char(range.head)) { + // select whole whitespace and next word + head = movement::move_next_word_end(slice, range, count).head; + anchor = movement::backwards_skip_while(slice, range.head, |c| c.is_whitespace()) + .map(|p| p + 1) // p is first *non* whitespace char, so +1 to get whitespace pos + .unwrap_or(0); + } else { + head = movement::move_next_word_start(slice, range, count).head; + anchor = this_word_start; + } + } + }; + Range::new(anchor, head) +} + +pub fn textobject_surround( + slice: RopeSlice, + range: Range, + textobject: TextObject, + ch: char, + count: usize, +) -> Range { + surround::find_nth_pairs_pos(slice, ch, range.head, count) + .map(|(anchor, head)| match textobject { + TextObject::Inside => Range::new(anchor + 1, head.saturating_sub(1)), + TextObject::Around => Range::new(anchor, head), + }) + .unwrap_or(range) +} + +#[cfg(test)] +mod test { + use super::TextObject::*; + use super::*; + + use crate::Range; + use ropey::Rope; + + #[test] + fn test_textobject_word() { + // (text, [(cursor position, textobject, final range), ...]) + let tests = &[ + ( + "cursor at beginning of doc", + vec![(0, Inside, (0, 5)), (0, Around, (0, 6))], + ), + ( + "cursor at middle of word", + vec![ + (13, Inside, (10, 15)), + (10, Inside, (10, 15)), + (15, Inside, (10, 15)), + (13, Around, (10, 16)), + (10, Around, (10, 16)), + (15, Around, (10, 16)), + ], + ), + ( + "cursor between word whitespace", + vec![(6, Inside, (6, 6)), (6, Around, (6, 13))], + ), + ( + "cursor on word before newline\n", + vec![ + (22, Inside, (22, 28)), + (28, Inside, (22, 28)), + (25, Inside, (22, 28)), + (22, Around, (21, 28)), + (28, Around, (21, 28)), + (25, Around, (21, 28)), + ], + ), + ( + "cursor on newline\nnext line", + vec![(17, Inside, (17, 17)), (17, Around, (17, 22))], + ), + ( + "cursor on word after newline\nnext line", + vec![ + (29, Inside, (29, 32)), + (30, Inside, (29, 32)), + (32, Inside, (29, 32)), + (29, Around, (29, 33)), + (30, Around, (29, 33)), + (32, Around, (29, 33)), + ], + ), + ( + "cursor on #$%:;* punctuation", + vec![ + (13, Inside, (10, 15)), + (10, Inside, (10, 15)), + (15, Inside, (10, 15)), + (13, Around, (10, 16)), + (10, Around, (10, 16)), + (15, Around, (10, 16)), + ], + ), + ( + "cursor on punc%^#$:;.tuation", + vec![ + (14, Inside, (14, 20)), + (20, Inside, (14, 20)), + (17, Inside, (14, 20)), + (14, Around, (14, 20)), + // FIXME: edge case + // (20, Around, (14, 20)), + (17, Around, (14, 20)), + ], + ), + ( + "cursor in extra whitespace", + vec![ + (9, Inside, (9, 9)), + (10, Inside, (10, 10)), + (11, Inside, (11, 11)), + (9, Around, (9, 16)), + (10, Around, (9, 16)), + (11, Around, (9, 16)), + ], + ), + ( + "cursor at end of doc", + vec![(19, Inside, (17, 19)), (19, Around, (16, 19))], + ), + ]; + + for (sample, scenario) in tests { + let doc = Rope::from(*sample); + let slice = doc.slice(..); + for &case in scenario { + let (pos, objtype, expected_range) = case; + let result = textobject_word(slice, Range::point(pos), objtype, 1); + assert_eq!( + result, + expected_range.into(), + "\nCase failed: {:?} - {:?}", + sample, + case + ); + } + } + } + + #[test] + fn test_textobject_surround() { + // (text, [(cursor position, textobject, final range, count), ...]) + let tests = &[ + ( + "simple (single) surround pairs", + vec![ + (3, Inside, (3, 3), '(', 1), + (7, Inside, (8, 13), ')', 1), + (10, Inside, (8, 13), '(', 1), + (14, Inside, (8, 13), ')', 1), + (3, Around, (3, 3), '(', 1), + (7, Around, (7, 14), ')', 1), + (10, Around, (7, 14), '(', 1), + (14, Around, (7, 14), ')', 1), + ], + ), + ( + "samexx 'single' surround pairs", + vec![ + (3, Inside, (3, 3), '\'', 1), + // FIXME: surround doesn't work when *on* same chars pair + // (7, Inner, (8, 13), '\'', 1), + (10, Inside, (8, 13), '\'', 1), + // (14, Inner, (8, 13), '\'', 1), + (3, Around, (3, 3), '\'', 1), + // (7, Around, (7, 14), '\'', 1), + (10, Around, (7, 14), '\'', 1), + // (14, Around, (7, 14), '\'', 1), + ], + ), + ( + "(nested (surround (pairs)) 3 levels)", + vec![ + (0, Inside, (1, 34), '(', 1), + (6, Inside, (1, 34), ')', 1), + (8, Inside, (9, 24), '(', 1), + (8, Inside, (9, 34), ')', 2), + (20, Inside, (9, 24), '(', 2), + (20, Inside, (1, 34), ')', 3), + (0, Around, (0, 35), '(', 1), + (6, Around, (0, 35), ')', 1), + (8, Around, (8, 25), '(', 1), + (8, Around, (8, 35), ')', 2), + (20, Around, (8, 25), '(', 2), + (20, Around, (0, 35), ')', 3), + ], + ), + ( + "(mixed {surround [pair] same} line)", + vec![ + (2, Inside, (1, 33), '(', 1), + (9, Inside, (8, 27), '{', 1), + (18, Inside, (18, 21), '[', 1), + (2, Around, (0, 34), '(', 1), + (9, Around, (7, 28), '{', 1), + (18, Around, (17, 22), '[', 1), + ], + ), + ( + "(stepped (surround) pairs (should) skip)", + vec![(22, Inside, (1, 38), '(', 1), (22, Around, (0, 39), '(', 1)], + ), + ( + "[surround pairs{\non different]\nlines}", + vec![ + (7, Inside, (1, 28), '[', 1), + (15, Inside, (16, 35), '{', 1), + (7, Around, (0, 29), '[', 1), + (15, Around, (15, 36), '{', 1), + ], + ), + ]; + + for (sample, scenario) in tests { + let doc = Rope::from(*sample); + let slice = doc.slice(..); + for &case in scenario { + let (pos, objtype, expected_range, ch, count) = case; + let result = textobject_surround(slice, Range::point(pos), objtype, ch, count); + assert_eq!( + result, + expected_range.into(), + "\nCase failed: {:?} - {:?}", + sample, + case + ); + } + } + } +} diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 860d8e22..d198b803 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -3502,6 +3502,9 @@ fn right_bracket_mode(cx: &mut Context) { }) } +use helix_core::surround; +use helix_core::textobject; + fn match_mode(cx: &mut Context) { let count = cx.count; cx.on_next_key(move |cx, event| { @@ -3517,13 +3520,40 @@ fn match_mode(cx: &mut Context) { 's' => surround_add(cx), 'r' => surround_replace(cx), 'd' => surround_delete(cx), + 'a' => select_textobject(cx, textobject::TextObject::Around), + 'i' => select_textobject(cx, textobject::TextObject::Inside), _ => (), } } }) } -use helix_core::surround; +fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) { + let count = cx.count(); + cx.on_next_key(move |cx, event| { + if let KeyEvent { + code: KeyCode::Char(ch), + .. + } = event + { + let (view, doc) = current!(cx.editor); + let text = doc.text().slice(..); + + let selection = doc.selection(view.id).transform(|range| { + match ch { + 'w' => textobject::textobject_word(text, range, objtype, count), + // TODO: cancel new ranges if inconsistent surround matches across lines + ch if !ch.is_ascii_alphanumeric() => { + textobject::textobject_surround(text, range, objtype, ch, count) + } + _ => range, + } + }); + + doc.set_selection(view.id, selection); + } + }) +} fn surround_add(cx: &mut Context) { cx.on_next_key(move |cx, event| {