Add object selection (textobjects) (#385)

* Add textobjects for word * Add textobjects for surround characters * Apply clippy lints * Remove ThisWordPrevBound in favor of PrevWordEnd It's the same as PrevWordEnd except for taking the current char into account, so use a "flag" to capture that usecase * Add tests for PrevWordEnd movement * Remove ThisWord* movements They did not preserve anchor positions and were only used for textobject boundary search anyway so replace them with simple position finding functions * Rewrite tests of word textobject * Add tests for surround textobject * Add textobject docs * Refactor textobject word position functions * Apply clippy lints on textobject * Fix overflow error with textobjects
3 years ago · c68fe1f2a3
parent c5b2973739
commit c68fe1f2a3
7 changed files with 475 additions and 7 deletions
--- a/book/src/keymap.md
+++ b/book/src/keymap.md
@ -150,7 +150,8 @@ Jumps to various locations.
 ## Match mode

 Enter this mode using `m` from normal mode. See the relavant section
-in [Usage](./usage.md#surround) for an explanation about surround usage.
+in [Usage](./usage.md) for an explanation about [surround](./usage.md#surround)
+and [textobject](./usage.md#textobject) usage.

 | Key              | Description                                     |
 | -----            | -----------                                     |
@ -158,6 +159,8 @@ in [Usage](./usage.md#surround) for an explanation about surround usage.
 | `s` `<char>`     | Surround current selection with `<char>`        |
 | `r` `<from><to>` | Replace surround character `<from>` with `<to>` |
 | `d` `<char>`     | Delete surround character `<char>`              |
+| `a` `<object>`   | Select around textobject                        |
+| `i` `<object>`   | Select inside textobject                        |

 ## Object mode

--- a/book/src/usage.md
+++ b/book/src/usage.md
@ -24,3 +24,19 @@ It can also act on multiple seletions (yay!). For example, to change every occur
 - `mr([` to replace the parens with square brackets

 Multiple characters are currently not supported, but planned.
+
+## Textobjects
+
+Currently supported: `word`, `surround`.
+
+![textobject-demo](https://user-images.githubusercontent.com/23398472/124231131-81a4bb00-db2d-11eb-9d10-8e577ca7b177.gif)
+
+- `ma` - Select around the object (`va` in vim, `<alt-a>` in kakoune)
+- `mi` - Select inside the object (`vi` in vim, `<alt-i>` in kakoune)
+
+| Key after `mi` or `ma` | Textobject selected      |
+| ---                    | ---                      |
+| `w`                    | Word                     |
+| `(`, `[`, `'`, etc     | Specified surround pairs |
+
+Textobjects based on treesitter, like `function`, `class`, etc are planned.
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@ -18,6 +18,7 @@ pub mod selection;
 mod state;
 pub mod surround;
 pub mod syntax;
+pub mod textobject;
 mod transaction;

 pub mod unicode {
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs
@ -113,6 +113,10 @@ pub fn move_prev_long_word_start(slice: RopeSlice, range: Range, count: usize) -
    word_move(slice, range, count, WordMotionTarget::PrevLongWordStart)
 }

+pub fn move_prev_word_end(slice: RopeSlice, range: Range, count: usize) -> Range {
+    word_move(slice, range, count, WordMotionTarget::PrevWordEnd)
+}
+
 fn word_move(slice: RopeSlice, range: Range, count: usize, target: WordMotionTarget) -> Range {
    (0..count).fold(range, |range, _| {
        slice.chars_at(range.head).range_to_target(target, range)
@ -159,6 +163,7 @@ pub enum WordMotionTarget {
    NextWordStart,
    NextWordEnd,
    PrevWordStart,
+    PrevWordEnd,
    // A "Long word" (also known as a WORD in vim/kakoune) is strictly
    // delimited by whitespace, and can consist of punctuation as well
    // as alphanumerics.
@ -181,7 +186,9 @@ impl CharHelpers for Chars<'_> {
    fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range {
        // Characters are iterated forward or backwards depending on the motion direction.
        let characters: Box<dyn Iterator<Item = char>> = match target {
-            WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => {
+            WordMotionTarget::PrevWordStart
+            | WordMotionTarget::PrevLongWordStart
+            | WordMotionTarget::PrevWordEnd => {
                self.next();
                Box::new(from_fn(|| self.prev()))
            }
@ -190,9 +197,9 @@ impl CharHelpers for Chars<'_> {

        // Index advancement also depends on the direction.
        let advance: &dyn Fn(&mut usize) = match target {
-            WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => {
-                &|u| *u = u.saturating_sub(1)
-            }
+            WordMotionTarget::PrevWordStart
+            | WordMotionTarget::PrevLongWordStart
+            | WordMotionTarget::PrevWordEnd => &|u| *u = u.saturating_sub(1),
            _ => &|u| *u += 1,
        };

@ -265,7 +272,7 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
    };

    match target {
-        WordMotionTarget::NextWordStart => {
+        WordMotionTarget::NextWordStart | WordMotionTarget::PrevWordEnd => {
            is_word_boundary(peek, *next_peek)
                && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace())
        }
@ -913,6 +920,88 @@ mod test {
        }
    }

+    #[test]
+    fn test_behaviour_when_moving_to_end_of_previous_words() {
+        let tests = array::IntoIter::new([
+            ("Basic backward motion from the middle of a word",
+                vec![(1, Range::new(9, 9), Range::new(9, 5))]),
+            ("Starting from after boundary retreats the anchor",
+                vec![(1, Range::new(0, 13), Range::new(12, 8))]),
+            ("Jump     to end of a word succeeded by whitespace",
+                vec![(1, Range::new(10, 10), Range::new(10, 4))]),
+            ("    Jump to start of line from end of word preceded by whitespace",
+                vec![(1, Range::new(7, 7), Range::new(7, 0))]),
+            ("Previous anchor is irrelevant for backward motions",
+                vec![(1, Range::new(26, 12), Range::new(12, 8))]),
+            ("    Starting from whitespace moves to first space in sequence",
+                vec![(1, Range::new(0, 3), Range::new(3, 0))]),
+            ("Test identifiers_with_underscores are considered a single word",
+                vec![(1, Range::new(0, 25), Range::new(25, 4))]),
+            ("Jumping\n    \nback through a newline selects whitespace",
+                vec![(1, Range::new(0, 13), Range::new(11, 8))]),
+            ("Jumping to start of word from the end selects the whole word",
+                vec![(1, Range::new(15, 15), Range::new(15, 10))]),
+            ("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion",
+                vec![
+                    (1, Range::new(30, 30), Range::new(30, 21)),
+                    (1, Range::new(30, 21), Range::new(20, 18)),
+                    (1, Range::new(20, 18), Range::new(17, 15))
+                ]),
+
+            ("...   ... punctuation and spaces behave as expected",
+                vec![
+                    (1, Range::new(0, 10), Range::new(9, 9)),
+                    (1, Range::new(9, 6), Range::new(5, 3)),
+                ]),
+            (".._.._ punctuation is not joined by underscores into a single block",
+                vec![(1, Range::new(0, 5), Range::new(4, 3))]),
+            ("Newlines\n\nare bridged seamlessly.",
+                vec![
+                    (1, Range::new(0, 10), Range::new(7, 0)),
+                ]),
+            ("Jumping    \n\n\n\n\nback from within a newline group selects previous block",
+                vec![
+                    (1, Range::new(0, 13), Range::new(10, 7)),
+                ]),
+            ("Failed motions do not modify the range",
+                vec![
+                    (0, Range::new(3, 0), Range::new(3, 0)),
+                ]),
+            ("Multiple motions at once resolve correctly",
+                vec![
+                    (3, Range::new(23, 23), Range::new(15, 8)),
+                ]),
+            ("Excessive motions are performed partially",
+                vec![
+                    (999, Range::new(40, 40), Range::new(8, 0)),
+                ]),
+            ("", // Edge case of moving backwards in empty string
+                vec![
+                    (1, Range::new(0, 0), Range::new(0, 0)),
+                ]),
+            ("\n\n\n\n\n", // Edge case of moving backwards in all newlines
+                vec![
+                    (1, Range::new(0, 0), Range::new(0, 0)),
+                ]),
+            ("   \n   \nJumping back through alternated space blocks and newlines selects the space blocks",
+                vec![
+                    (1, Range::new(0, 7), Range::new(6, 4)),
+                    (1, Range::new(6, 4), Range::new(2, 0)),
+                ]),
+            ("Test ヒーリクス multibyte characters behave as normal characters",
+                vec![
+                    (1, Range::new(0, 9), Range::new(9, 4)),
+                ]),
+        ]);
+
+        for (sample, scenario) in tests {
+            for (count, begin, expected_end) in scenario.into_iter() {
+                let range = move_prev_word_end(Rope::from(sample).slice(..), begin, count);
+                assert_eq!(range, expected_end, "Case failed: [{}]", sample);
+            }
+        }
+    }
+
    #[test]
    fn test_behaviour_when_moving_to_end_of_next_long_words() {
        let tests = array::IntoIter::new([
--- a/helix-core/src/selection.rs
+++ b/helix-core/src/selection.rs
@ -130,6 +130,16 @@ impl Range {
    }
 }

+impl From<(usize, usize)> for Range {
+    fn from(tuple: (usize, usize)) -> Self {
+        Self {
+            anchor: tuple.0,
+            head: tuple.1,
+            horiz: None,
+        }
+    }
+}
+
 /// A selection consists of one or more selection ranges.
 /// invariant: A selection can never be empty (always contains at least primary range).
 #[derive(Debug, Clone, PartialEq, Eq)]
--- a/helix-core/src/textobject.rs
+++ b/helix-core/src/textobject.rs
@ -0,0 +1,319 @@
+use ropey::RopeSlice;
+
+use crate::chars::{categorize_char, char_is_line_ending, char_is_whitespace, CharCategory};
+use crate::movement::{self, Direction};
+use crate::surround;
+use crate::Range;
+
+fn this_word_end_pos(slice: RopeSlice, pos: usize) -> usize {
+    this_word_bound_pos(slice, pos, Direction::Forward)
+}
+
+fn this_word_start_pos(slice: RopeSlice, pos: usize) -> usize {
+    this_word_bound_pos(slice, pos, Direction::Backward)
+}
+
+fn this_word_bound_pos(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize {
+    let iter = match direction {
+        Direction::Forward => slice.chars_at(pos + 1),
+        Direction::Backward => {
+            let mut iter = slice.chars_at(pos);
+            iter.reverse();
+            iter
+        }
+    };
+
+    match categorize_char(slice.char(pos)) {
+        CharCategory::Eol | CharCategory::Whitespace => pos,
+        category => {
+            for peek in iter {
+                let curr_category = categorize_char(peek);
+                if curr_category != category
+                    || curr_category == CharCategory::Eol
+                    || curr_category == CharCategory::Whitespace
+                {
+                    return pos;
+                }
+                pos = match direction {
+                    Direction::Forward => pos + 1,
+                    Direction::Backward => pos.saturating_sub(1),
+                }
+            }
+            pos
+        }
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum TextObject {
+    Around,
+    Inside,
+}
+
+// count doesn't do anything yet
+pub fn textobject_word(
+    slice: RopeSlice,
+    range: Range,
+    textobject: TextObject,
+    count: usize,
+) -> Range {
+    let this_word_start = this_word_start_pos(slice, range.head);
+    let this_word_end = this_word_end_pos(slice, range.head);
+
+    let (anchor, head);
+    match textobject {
+        TextObject::Inside => {
+            anchor = this_word_start;
+            head = this_word_end;
+        }
+        TextObject::Around => {
+            if slice
+                .get_char(this_word_end + 1)
+                .map_or(true, char_is_line_ending)
+            {
+                head = this_word_end;
+                if slice
+                    .get_char(this_word_start.saturating_sub(1))
+                    .map_or(true, char_is_line_ending)
+                {
+                    // single word on a line
+                    anchor = this_word_start;
+                } else {
+                    // last word on a line, select the whitespace before it too
+                    anchor = movement::move_prev_word_end(slice, range, count).head;
+                }
+            } else if char_is_whitespace(slice.char(range.head)) {
+                // select whole whitespace and next word
+                head = movement::move_next_word_end(slice, range, count).head;
+                anchor = movement::backwards_skip_while(slice, range.head, |c| c.is_whitespace())
+                    .map(|p| p + 1) // p is first *non* whitespace char, so +1 to get whitespace pos
+                    .unwrap_or(0);
+            } else {
+                head = movement::move_next_word_start(slice, range, count).head;
+                anchor = this_word_start;
+            }
+        }
+    };
+    Range::new(anchor, head)
+}
+
+pub fn textobject_surround(
+    slice: RopeSlice,
+    range: Range,
+    textobject: TextObject,
+    ch: char,
+    count: usize,
+) -> Range {
+    surround::find_nth_pairs_pos(slice, ch, range.head, count)
+        .map(|(anchor, head)| match textobject {
+            TextObject::Inside => Range::new(anchor + 1, head.saturating_sub(1)),
+            TextObject::Around => Range::new(anchor, head),
+        })
+        .unwrap_or(range)
+}
+
+#[cfg(test)]
+mod test {
+    use super::TextObject::*;
+    use super::*;
+
+    use crate::Range;
+    use ropey::Rope;
+
+    #[test]
+    fn test_textobject_word() {
+        // (text, [(cursor position, textobject, final range), ...])
+        let tests = &[
+            (
+                "cursor at beginning of doc",
+                vec![(0, Inside, (0, 5)), (0, Around, (0, 6))],
+            ),
+            (
+                "cursor at middle of word",
+                vec![
+                    (13, Inside, (10, 15)),
+                    (10, Inside, (10, 15)),
+                    (15, Inside, (10, 15)),
+                    (13, Around, (10, 16)),
+                    (10, Around, (10, 16)),
+                    (15, Around, (10, 16)),
+                ],
+            ),
+            (
+                "cursor between word whitespace",
+                vec![(6, Inside, (6, 6)), (6, Around, (6, 13))],
+            ),
+            (
+                "cursor on word before newline\n",
+                vec![
+                    (22, Inside, (22, 28)),
+                    (28, Inside, (22, 28)),
+                    (25, Inside, (22, 28)),
+                    (22, Around, (21, 28)),
+                    (28, Around, (21, 28)),
+                    (25, Around, (21, 28)),
+                ],
+            ),
+            (
+                "cursor on newline\nnext line",
+                vec![(17, Inside, (17, 17)), (17, Around, (17, 22))],
+            ),
+            (
+                "cursor on word after newline\nnext line",
+                vec![
+                    (29, Inside, (29, 32)),
+                    (30, Inside, (29, 32)),
+                    (32, Inside, (29, 32)),
+                    (29, Around, (29, 33)),
+                    (30, Around, (29, 33)),
+                    (32, Around, (29, 33)),
+                ],
+            ),
+            (
+                "cursor on #$%:;* punctuation",
+                vec![
+                    (13, Inside, (10, 15)),
+                    (10, Inside, (10, 15)),
+                    (15, Inside, (10, 15)),
+                    (13, Around, (10, 16)),
+                    (10, Around, (10, 16)),
+                    (15, Around, (10, 16)),
+                ],
+            ),
+            (
+                "cursor on punc%^#$:;.tuation",
+                vec![
+                    (14, Inside, (14, 20)),
+                    (20, Inside, (14, 20)),
+                    (17, Inside, (14, 20)),
+                    (14, Around, (14, 20)),
+                    // FIXME: edge case
+                    // (20, Around, (14, 20)),
+                    (17, Around, (14, 20)),
+                ],
+            ),
+            (
+                "cursor in   extra whitespace",
+                vec![
+                    (9, Inside, (9, 9)),
+                    (10, Inside, (10, 10)),
+                    (11, Inside, (11, 11)),
+                    (9, Around, (9, 16)),
+                    (10, Around, (9, 16)),
+                    (11, Around, (9, 16)),
+                ],
+            ),
+            (
+                "cursor at end of doc",
+                vec![(19, Inside, (17, 19)), (19, Around, (16, 19))],
+            ),
+        ];
+
+        for (sample, scenario) in tests {
+            let doc = Rope::from(*sample);
+            let slice = doc.slice(..);
+            for &case in scenario {
+                let (pos, objtype, expected_range) = case;
+                let result = textobject_word(slice, Range::point(pos), objtype, 1);
+                assert_eq!(
+                    result,
+                    expected_range.into(),
+                    "\nCase failed: {:?} - {:?}",
+                    sample,
+                    case
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_textobject_surround() {
+        // (text, [(cursor position, textobject, final range, count), ...])
+        let tests = &[
+            (
+                "simple (single) surround pairs",
+                vec![
+                    (3, Inside, (3, 3), '(', 1),
+                    (7, Inside, (8, 13), ')', 1),
+                    (10, Inside, (8, 13), '(', 1),
+                    (14, Inside, (8, 13), ')', 1),
+                    (3, Around, (3, 3), '(', 1),
+                    (7, Around, (7, 14), ')', 1),
+                    (10, Around, (7, 14), '(', 1),
+                    (14, Around, (7, 14), ')', 1),
+                ],
+            ),
+            (
+                "samexx 'single' surround pairs",
+                vec![
+                    (3, Inside, (3, 3), '\'', 1),
+                    // FIXME: surround doesn't work when *on* same chars pair
+                    // (7, Inner, (8, 13), '\'', 1),
+                    (10, Inside, (8, 13), '\'', 1),
+                    // (14, Inner, (8, 13), '\'', 1),
+                    (3, Around, (3, 3), '\'', 1),
+                    // (7, Around, (7, 14), '\'', 1),
+                    (10, Around, (7, 14), '\'', 1),
+                    // (14, Around, (7, 14), '\'', 1),
+                ],
+            ),
+            (
+                "(nested (surround (pairs)) 3 levels)",
+                vec![
+                    (0, Inside, (1, 34), '(', 1),
+                    (6, Inside, (1, 34), ')', 1),
+                    (8, Inside, (9, 24), '(', 1),
+                    (8, Inside, (9, 34), ')', 2),
+                    (20, Inside, (9, 24), '(', 2),
+                    (20, Inside, (1, 34), ')', 3),
+                    (0, Around, (0, 35), '(', 1),
+                    (6, Around, (0, 35), ')', 1),
+                    (8, Around, (8, 25), '(', 1),
+                    (8, Around, (8, 35), ')', 2),
+                    (20, Around, (8, 25), '(', 2),
+                    (20, Around, (0, 35), ')', 3),
+                ],
+            ),
+            (
+                "(mixed {surround [pair] same} line)",
+                vec![
+                    (2, Inside, (1, 33), '(', 1),
+                    (9, Inside, (8, 27), '{', 1),
+                    (18, Inside, (18, 21), '[', 1),
+                    (2, Around, (0, 34), '(', 1),
+                    (9, Around, (7, 28), '{', 1),
+                    (18, Around, (17, 22), '[', 1),
+                ],
+            ),
+            (
+                "(stepped (surround) pairs (should) skip)",
+                vec![(22, Inside, (1, 38), '(', 1), (22, Around, (0, 39), '(', 1)],
+            ),
+            (
+                "[surround pairs{\non different]\nlines}",
+                vec![
+                    (7, Inside, (1, 28), '[', 1),
+                    (15, Inside, (16, 35), '{', 1),
+                    (7, Around, (0, 29), '[', 1),
+                    (15, Around, (15, 36), '{', 1),
+                ],
+            ),
+        ];
+
+        for (sample, scenario) in tests {
+            let doc = Rope::from(*sample);
+            let slice = doc.slice(..);
+            for &case in scenario {
+                let (pos, objtype, expected_range, ch, count) = case;
+                let result = textobject_surround(slice, Range::point(pos), objtype, ch, count);
+                assert_eq!(
+                    result,
+                    expected_range.into(),
+                    "\nCase failed: {:?} - {:?}",
+                    sample,
+                    case
+                );
+            }
+        }
+    }
+}
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@ -3502,6 +3502,9 @@ fn right_bracket_mode(cx: &mut Context) {
    })
 }

+use helix_core::surround;
+use helix_core::textobject;
+
 fn match_mode(cx: &mut Context) {
    let count = cx.count;
    cx.on_next_key(move |cx, event| {
@ -3517,13 +3520,40 @@ fn match_mode(cx: &mut Context) {
                's' => surround_add(cx),
                'r' => surround_replace(cx),
                'd' => surround_delete(cx),
+                'a' => select_textobject(cx, textobject::TextObject::Around),
+                'i' => select_textobject(cx, textobject::TextObject::Inside),
                _ => (),
            }
        }
    })
 }

-use helix_core::surround;
+fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
+    let count = cx.count();
+    cx.on_next_key(move |cx, event| {
+        if let KeyEvent {
+            code: KeyCode::Char(ch),
+            ..
+        } = event
+        {
+            let (view, doc) = current!(cx.editor);
+            let text = doc.text().slice(..);
+
+            let selection = doc.selection(view.id).transform(|range| {
+                match ch {
+                    'w' => textobject::textobject_word(text, range, objtype, count),
+                    // TODO: cancel new ranges if inconsistent surround matches across lines
+                    ch if !ch.is_ascii_alphanumeric() => {
+                        textobject::textobject_surround(text, range, objtype, ch, count)
+                    }
+                    _ => range,
+                }
+            });
+
+            doc.set_selection(view.id, selection);
+        }
+    })
+}

 fn surround_add(cx: &mut Context) {
    cx.on_next_key(move |cx, event| {