feat: find closest pair using tree-sitter

pull/10593/head
woojiq 7 months ago committed by Michael Davis
parent 50c90cb47c
commit 81dc8e8d6b

@ -9,16 +9,32 @@ use crate::Syntax;
const MAX_PLAINTEXT_SCAN: usize = 10000;
const MATCH_LIMIT: usize = 16;
// Limit matching pairs to only ( ) { } [ ] < > ' ' " "
const PAIRS: &[(char, char)] = &[
pub const BRACKETS: [(char, char); 7] = [
('(', ')'),
('{', '}'),
('[', ']'),
('<', '>'),
('\'', '\''),
('\"', '\"'),
('«', '»'),
('「', '」'),
('', ''),
];
// The difference between BRACKETS and PAIRS is that we can find matching
// BRACKETS in a plain text file, but we can't do the same for PAIRs.
// PAIRS also contains all BRACKETS.
pub const PAIRS: [(char, char); BRACKETS.len() + 3] = {
let mut pairs = [(' ', ' '); BRACKETS.len() + 3];
let mut idx = 0;
while idx < BRACKETS.len() {
pairs[idx] = BRACKETS[idx];
idx += 1;
}
pairs[idx] = ('"', '"');
pairs[idx + 1] = ('\'', '\'');
pairs[idx + 2] = ('`', '`');
pairs
};
/// Returns the position of the matching bracket under cursor.
///
/// If the cursor is on the opening bracket, the position of
@ -30,7 +46,7 @@ const PAIRS: &[(char, char)] = &[
/// If no matching bracket is found, `None` is returned.
#[must_use]
pub fn find_matching_bracket(syntax: &Syntax, doc: RopeSlice, pos: usize) -> Option<usize> {
if pos >= doc.len_chars() || !is_valid_bracket(doc.char(pos)) {
if pos >= doc.len_chars() || !is_valid_pair(doc.char(pos)) {
return None;
}
find_pair(syntax, doc, pos, false)
@ -67,7 +83,7 @@ fn find_pair(
let (start_byte, end_byte) = surrounding_bytes(doc, &node)?;
let (start_char, end_char) = (doc.byte_to_char(start_byte), doc.byte_to_char(end_byte));
if is_valid_pair(doc, start_char, end_char) {
if is_valid_pair_on_pos(doc, start_char, end_char) {
if end_byte == pos {
return Some(start_char);
}
@ -140,14 +156,22 @@ fn find_pair(
/// If no matching bracket is found, `None` is returned.
#[must_use]
pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Option<usize> {
// Don't do anything when the cursor is not on top of a bracket.
let bracket = doc.get_char(cursor_pos)?;
let matching_bracket = {
let pair = get_pair(bracket);
if pair.0 == bracket {
pair.1
} else {
pair.0
}
};
// Don't do anything when the cursor is not on top of a bracket.
if !is_valid_bracket(bracket) {
return None;
}
// Determine the direction of the matching.
let is_fwd = is_forward_bracket(bracket);
let is_fwd = is_open_bracket(bracket);
let chars_iter = if is_fwd {
doc.chars_at(cursor_pos + 1)
} else {
@ -159,19 +183,7 @@ pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Opt
for (i, candidate) in chars_iter.take(MAX_PLAINTEXT_SCAN).enumerate() {
if candidate == bracket {
open_cnt += 1;
} else if is_valid_pair(
doc,
if is_fwd {
cursor_pos
} else {
cursor_pos - i - 1
},
if is_fwd {
cursor_pos + i + 1
} else {
cursor_pos
},
) {
} else if candidate == matching_bracket {
// Return when all pending brackets have been closed.
if open_cnt == 1 {
return Some(if is_fwd {
@ -187,15 +199,49 @@ pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Opt
None
}
fn is_valid_bracket(c: char) -> bool {
PAIRS.iter().any(|(l, r)| *l == c || *r == c)
/// Returns the open and closing chars pair. If not found in
/// [`BRACKETS`] returns (ch, ch).
///
/// ```
/// use helix_core::match_brackets::get_pair;
///
/// assert_eq!(get_pair('['), ('[', ']'));
/// assert_eq!(get_pair('}'), ('{', '}'));
/// assert_eq!(get_pair('"'), ('"', '"'));
/// ```
pub fn get_pair(ch: char) -> (char, char) {
PAIRS
.iter()
.find(|(open, close)| *open == ch || *close == ch)
.copied()
.unwrap_or((ch, ch))
}
pub fn is_open_bracket(ch: char) -> bool {
BRACKETS.iter().any(|(l, _)| *l == ch)
}
pub fn is_close_bracket(ch: char) -> bool {
BRACKETS.iter().any(|(_, r)| *r == ch)
}
pub fn is_valid_bracket(ch: char) -> bool {
BRACKETS.iter().any(|(l, r)| *l == ch || *r == ch)
}
pub fn is_open_pair(ch: char) -> bool {
PAIRS.iter().any(|(l, _)| *l == ch)
}
pub fn is_close_pair(ch: char) -> bool {
PAIRS.iter().any(|(_, r)| *r == ch)
}
fn is_forward_bracket(c: char) -> bool {
PAIRS.iter().any(|(l, _)| *l == c)
pub fn is_valid_pair(ch: char) -> bool {
PAIRS.iter().any(|(l, r)| *l == ch || *r == ch)
}
fn is_valid_pair(doc: RopeSlice, start_char: usize, end_char: usize) -> bool {
fn is_valid_pair_on_pos(doc: RopeSlice, start_char: usize, end_char: usize) -> bool {
PAIRS.contains(&(doc.char(start_char), doc.char(end_char)))
}

@ -122,7 +122,7 @@ impl Range {
}
/// `Direction::Backward` when head < anchor.
/// `Direction::Backward` otherwise.
/// `Direction::Forward` otherwise.
#[inline]
#[must_use]
pub fn direction(&self) -> Direction {

@ -1,18 +1,16 @@
use std::fmt::Display;
use crate::{movement::Direction, search, Range, Selection};
use crate::{
graphemes::next_grapheme_boundary,
match_brackets::{
find_matching_bracket, find_matching_bracket_fuzzy, get_pair, is_close_bracket,
is_open_bracket,
},
movement::Direction,
search, Range, Selection, Syntax,
};
use ropey::RopeSlice;
pub const PAIRS: &[(char, char)] = &[
('(', ')'),
('[', ']'),
('{', '}'),
('<', '>'),
('«', '»'),
('「', '」'),
('', ''),
];
#[derive(Debug, PartialEq, Eq)]
pub enum Error {
PairNotFound,
@ -34,32 +32,68 @@ impl Display for Error {
type Result<T> = std::result::Result<T, Error>;
/// Given any char in [PAIRS], return the open and closing chars. If not found in
/// [PAIRS] return (ch, ch).
/// Finds the position of surround pairs of any [`crate::match_brackets::PAIRS`]
/// using tree-sitter when possible.
///
/// ```
/// use helix_core::surround::get_pair;
/// # Returns
///
/// assert_eq!(get_pair('['), ('[', ']'));
/// assert_eq!(get_pair('}'), ('{', '}'));
/// assert_eq!(get_pair('"'), ('"', '"'));
/// ```
pub fn get_pair(ch: char) -> (char, char) {
PAIRS
.iter()
.find(|(open, close)| *open == ch || *close == ch)
.copied()
.unwrap_or((ch, ch))
/// Tuple `(anchor, head)`, meaning it is not always ordered.
pub fn find_nth_closest_pairs_pos(
syntax: Option<&Syntax>,
text: RopeSlice,
range: Range,
skip: usize,
) -> Result<(usize, usize)> {
match syntax {
Some(syntax) => find_nth_closest_pairs_ts(syntax, text, range, skip),
None => find_nth_closest_pairs_plain(text, range, skip),
}
}
pub fn find_nth_closest_pairs_pos(
fn find_nth_closest_pairs_ts(
syntax: &Syntax,
text: RopeSlice,
range: Range,
mut skip: usize,
) -> Result<(usize, usize)> {
let is_open_pair = |ch| PAIRS.iter().any(|(open, _)| *open == ch);
let is_close_pair = |ch| PAIRS.iter().any(|(_, close)| *close == ch);
let mut opening = range.from();
// We want to expand the selection if we are already on the found pair,
// otherwise we would need to subtract "-1" from "range.to()".
let mut closing = range.to();
while skip > 0 {
closing = find_matching_bracket_fuzzy(syntax, text, closing).ok_or(Error::PairNotFound)?;
opening = find_matching_bracket(syntax, text, closing).ok_or(Error::PairNotFound)?;
// If we're already on a closing bracket "find_matching_bracket_fuzzy" will return
// the position of the opening bracket.
if closing < opening {
(opening, closing) = (closing, opening);
}
// In case found brackets are partially inside current selection.
if range.from() < opening || closing < range.to() - 1 {
closing = next_grapheme_boundary(text, closing);
} else {
skip -= 1;
if skip != 0 {
closing = next_grapheme_boundary(text, closing);
}
}
}
// Keep the original direction.
if let Direction::Forward = range.direction() {
Ok((opening, closing))
} else {
Ok((closing, opening))
}
}
fn find_nth_closest_pairs_plain(
text: RopeSlice,
range: Range,
mut skip: usize,
) -> Result<(usize, usize)> {
let mut stack = Vec::with_capacity(2);
let pos = range.from();
let mut close_pos = pos.saturating_sub(1);
@ -67,7 +101,7 @@ pub fn find_nth_closest_pairs_pos(
for ch in text.chars_at(pos) {
close_pos += 1;
if is_open_pair(ch) {
if is_open_bracket(ch) {
// Track open pairs encountered so that we can step over
// the corresponding close pairs that will come up further
// down the loop. We want to find a lone close pair whose
@ -76,7 +110,7 @@ pub fn find_nth_closest_pairs_pos(
continue;
}
if !is_close_pair(ch) {
if !is_close_bracket(ch) {
// We don't care if this character isn't a brace pair item,
// so short circuit here.
continue;
@ -157,7 +191,11 @@ pub fn find_nth_pairs_pos(
)
};
Option::zip(open, close).ok_or(Error::PairNotFound)
// preserve original direction
match range.direction() {
Direction::Forward => Option::zip(open, close).ok_or(Error::PairNotFound),
Direction::Backward => Option::zip(close, open).ok_or(Error::PairNotFound),
}
}
fn find_nth_open_pair(
@ -249,6 +287,7 @@ fn find_nth_close_pair(
/// are automatically detected around each cursor (note that this may result
/// in them selecting different surround characters for each selection).
pub fn get_surround_pos(
syntax: Option<&Syntax>,
text: RopeSlice,
selection: &Selection,
ch: Option<char>,
@ -257,9 +296,13 @@ pub fn get_surround_pos(
let mut change_pos = Vec::new();
for &range in selection {
let (open_pos, close_pos) = match ch {
Some(ch) => find_nth_pairs_pos(text, ch, range, skip)?,
None => find_nth_closest_pairs_pos(text, range, skip)?,
let (open_pos, close_pos) = {
let range_raw = match ch {
Some(ch) => find_nth_pairs_pos(text, ch, range, skip)?,
None => find_nth_closest_pairs_pos(syntax, text, range, skip)?,
};
let range = Range::new(range_raw.0, range_raw.1);
(range.from(), range.to())
};
if change_pos.contains(&open_pos) || change_pos.contains(&close_pos) {
return Err(Error::CursorOverlap);

@ -7,9 +7,9 @@ use crate::chars::{categorize_char, char_is_whitespace, CharCategory};
use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary};
use crate::line_ending::rope_is_line_ending;
use crate::movement::Direction;
use crate::surround;
use crate::syntax::LanguageConfiguration;
use crate::Range;
use crate::{surround, Syntax};
fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize {
use CharCategory::{Eol, Whitespace};
@ -199,25 +199,28 @@ pub fn textobject_paragraph(
}
pub fn textobject_pair_surround(
syntax: Option<&Syntax>,
slice: RopeSlice,
range: Range,
textobject: TextObject,
ch: char,
count: usize,
) -> Range {
textobject_pair_surround_impl(slice, range, textobject, Some(ch), count)
textobject_pair_surround_impl(syntax, slice, range, textobject, Some(ch), count)
}
pub fn textobject_pair_surround_closest(
syntax: Option<&Syntax>,
slice: RopeSlice,
range: Range,
textobject: TextObject,
count: usize,
) -> Range {
textobject_pair_surround_impl(slice, range, textobject, None, count)
textobject_pair_surround_impl(syntax, slice, range, textobject, None, count)
}
fn textobject_pair_surround_impl(
syntax: Option<&Syntax>,
slice: RopeSlice,
range: Range,
textobject: TextObject,
@ -226,8 +229,7 @@ fn textobject_pair_surround_impl(
) -> Range {
let pair_pos = match ch {
Some(ch) => surround::find_nth_pairs_pos(slice, ch, range, count),
// Automatically find the closest surround pairs
None => surround::find_nth_closest_pairs_pos(slice, range, count),
None => surround::find_nth_closest_pairs_pos(syntax, slice, range, count),
};
pair_pos
.map(|(anchor, head)| match textobject {

@ -5409,13 +5409,22 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
'e' => textobject_treesitter("entry", range),
'p' => textobject::textobject_paragraph(text, range, objtype, count),
'm' => textobject::textobject_pair_surround_closest(
text, range, objtype, count,
doc.syntax(),
text,
range,
objtype,
count,
),
'g' => textobject_change(range),
// TODO: cancel new ranges if inconsistent surround matches across lines
ch if !ch.is_ascii_alphanumeric() => {
textobject::textobject_pair_surround(text, range, objtype, ch, count)
}
ch if !ch.is_ascii_alphanumeric() => textobject::textobject_pair_surround(
doc.syntax(),
text,
range,
objtype,
ch,
count,
),
_ => range,
}
});
@ -5440,7 +5449,7 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
("c", "Comment (tree-sitter)"),
("T", "Test (tree-sitter)"),
("e", "Data structure entry (tree-sitter)"),
("m", "Closest surrounding pair"),
("m", "Closest surrounding pair (tree-sitter)"),
("g", "Change"),
(" ", "... or any character acting as a pair"),
];
@ -5454,7 +5463,7 @@ fn surround_add(cx: &mut Context) {
// surround_len is the number of new characters being added.
let (open, close, surround_len) = match event.char() {
Some(ch) => {
let (o, c) = surround::get_pair(ch);
let (o, c) = match_brackets::get_pair(ch);
let mut open = Tendril::new();
open.push(o);
let mut close = Tendril::new();
@ -5505,13 +5514,14 @@ fn surround_replace(cx: &mut Context) {
let text = doc.text().slice(..);
let selection = doc.selection(view.id);
let change_pos = match surround::get_surround_pos(text, selection, surround_ch, count) {
Ok(c) => c,
Err(err) => {
cx.editor.set_error(err.to_string());
return;
}
};
let change_pos =
match surround::get_surround_pos(doc.syntax(), text, selection, surround_ch, count) {
Ok(c) => c,
Err(err) => {
cx.editor.set_error(err.to_string());
return;
}
};
let selection = selection.clone();
let ranges: SmallVec<[Range; 1]> = change_pos.iter().map(|&p| Range::point(p)).collect();
@ -5526,7 +5536,7 @@ fn surround_replace(cx: &mut Context) {
Some(to) => to,
None => return doc.set_selection(view.id, selection),
};
let (open, close) = surround::get_pair(to);
let (open, close) = match_brackets::get_pair(to);
// the changeset has to be sorted to allow nested surrounds
let mut sorted_pos: Vec<(usize, char)> = Vec::new();
@ -5563,13 +5573,14 @@ fn surround_delete(cx: &mut Context) {
let text = doc.text().slice(..);
let selection = doc.selection(view.id);
let mut change_pos = match surround::get_surround_pos(text, selection, surround_ch, count) {
Ok(c) => c,
Err(err) => {
cx.editor.set_error(err.to_string());
return;
}
};
let mut change_pos =
match surround::get_surround_pos(doc.syntax(), text, selection, surround_ch, count) {
Ok(c) => c,
Err(err) => {
cx.editor.set_error(err.to_string());
return;
}
};
change_pos.sort_unstable(); // the changeset has to be sorted to allow nested surrounds
let transaction =
Transaction::change(doc.text(), change_pos.into_iter().map(|p| (p, p + 1, None)));

Loading…
Cancel
Save