From b1222f06640c02feb1b87b988d6bca53fdddb9c0 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 10 Jan 2024 15:58:44 -0500 Subject: [PATCH] Add a TreeCursor type that travels over injection layers This uses the layer parentage information from the parent commit to traverse the layers. It's a similar API to `tree_sitter:TreeCursor` but internally it does not use a `tree_sitter::TreeCursor` currently because that interface is behaving very unexpectedly. Using the `next_sibling`/`prev_sibling`/`parent` API on `tree_sitter::Node` reflects the previous code's behavior so this should result in no surprising changes. --- helix-core/src/syntax.rs | 16 ++- helix-core/src/syntax/tree_cursor.rs | 160 +++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 3 deletions(-) create mode 100644 helix-core/src/syntax/tree_cursor.rs diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 6414f3ce9..78abc0b0a 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,3 +1,5 @@ +mod tree_cursor; + use crate::{ auto_pairs::AutoPairs, chars::char_is_line_ending, @@ -32,6 +34,8 @@ use serde::{ser::SerializeSeq, Deserialize, Serialize}; use helix_loader::grammar::{get_language, load_runtime_file}; +pub use tree_cursor::TreeCursor; + fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, @@ -1495,6 +1499,12 @@ impl Syntax { .descendant_for_byte_range(start, end) } + pub fn walk(&self) -> TreeCursor<'_> { + // data structure to find the smallest range that contains a point + // when some of the ranges in the structure can overlap. + TreeCursor::new(&self.layers, self.root) + } + // Commenting // comment_strings_for_pos // is_commented @@ -1723,7 +1733,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, TreeCursor, + QueryMatch, Range, TextProvider, Tree, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; @@ -2657,7 +2667,7 @@ pub fn pretty_print_tree(fmt: &mut W, node: Node) -> fmt::Result fn pretty_print_tree_impl( fmt: &mut W, - cursor: &mut TreeCursor, + cursor: &mut tree_sitter::TreeCursor, depth: usize, ) -> fmt::Result { let node = cursor.node(); @@ -2967,7 +2977,7 @@ mod test { // rule but `name` and `body` belong to an unnamed helper `_method_rest`. // This can cause a bug with a pretty-printing implementation that // uses `Node::field_name_for_child` to determine field names but is - // fixed when using `TreeCursor::field_name`. + // fixed when using `tree_sitter::TreeCursor::field_name`. let source = "def self.method_name true end"; diff --git a/helix-core/src/syntax/tree_cursor.rs b/helix-core/src/syntax/tree_cursor.rs new file mode 100644 index 000000000..d9d140c9f --- /dev/null +++ b/helix-core/src/syntax/tree_cursor.rs @@ -0,0 +1,160 @@ +use std::{cmp::Reverse, ops::Range}; + +use super::{LanguageLayer, LayerId}; + +use slotmap::HopSlotMap; +use tree_sitter::Node; + +/// The byte range of an injection layer. +/// +/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges. +/// This allows us to sort the ranges ahead of time in order to efficiently find a range that +/// contains a point with maximum depth. +#[derive(Debug)] +struct InjectionRange { + start: usize, + end: usize, + layer_id: LayerId, + depth: u32, +} + +pub struct TreeCursor<'a> { + layers: &'a HopSlotMap, + root: LayerId, + current: LayerId, + injection_ranges: Vec, + // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but + // that returns very surprising results in testing. + cursor: Node<'a>, +} + +impl<'a> TreeCursor<'a> { + pub(super) fn new(layers: &'a HopSlotMap, root: LayerId) -> Self { + let mut injection_ranges = Vec::new(); + + for (layer_id, layer) in layers.iter() { + // Skip the root layer + if layer.parent.is_none() { + continue; + } + for byte_range in layer.ranges.iter() { + let range = InjectionRange { + start: byte_range.start_byte, + end: byte_range.end_byte, + layer_id, + depth: layer.depth, + }; + injection_ranges.push(range); + } + } + + injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth))); + + let cursor = layers[root].tree().root_node(); + + Self { + layers, + root, + current: root, + injection_ranges, + cursor, + } + } + + pub fn node(&self) -> Node<'a> { + self.cursor + } + + pub fn goto_parent(&mut self) -> bool { + if let Some(parent) = self.node().parent() { + self.cursor = parent; + return true; + } + + // If we are already on the root layer, we cannot ascend. + if self.current == self.root { + return false; + } + + // Ascend to the parent layer. + let range = self.node().byte_range(); + let parent_id = self.layers[self.current] + .parent + .expect("non-root layers have a parent"); + self.current = parent_id; + let root = self.layers[self.current].tree().root_node(); + self.cursor = root + .descendant_for_byte_range(range.start, range.end) + .unwrap_or(root); + + true + } + + /// Finds the injection layer that has exactly the same range as the given `range`. + fn layer_id_of_byte_range(&self, search_range: Range) -> Option { + let start_idx = self + .injection_ranges + .partition_point(|range| range.end < search_range.end); + + self.injection_ranges[start_idx..] + .iter() + .take_while(|range| range.end == search_range.end) + .find_map(|range| (range.start == search_range.start).then_some(range.layer_id)) + } + + pub fn goto_first_child(&mut self) -> bool { + // Check if the current node's range is an exact injection layer range. + if let Some(layer_id) = self + .layer_id_of_byte_range(self.node().byte_range()) + .filter(|&layer_id| layer_id != self.current) + { + // Switch to the child layer. + self.current = layer_id; + self.cursor = self.layers[self.current].tree().root_node(); + true + } else if let Some(child) = self.cursor.child(0) { + // Otherwise descend in the current tree. + self.cursor = child; + true + } else { + false + } + } + + pub fn goto_next_sibling(&mut self) -> bool { + if let Some(sibling) = self.cursor.next_sibling() { + self.cursor = sibling; + true + } else { + false + } + } + + pub fn goto_prev_sibling(&mut self) -> bool { + if let Some(sibling) = self.cursor.prev_sibling() { + self.cursor = sibling; + true + } else { + false + } + } + + /// Finds the injection layer that contains the given start-end range. + fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId { + let start_idx = self + .injection_ranges + .partition_point(|range| range.end < end); + + self.injection_ranges[start_idx..] + .iter() + .take_while(|range| range.start < end) + .find_map(|range| (range.start <= start).then_some(range.layer_id)) + .unwrap_or(self.root) + } + + pub fn reset_to_byte_range(&mut self, start: usize, end: usize) { + self.current = self.layer_id_containing_byte_range(start, end); + let root = self.layers[self.current].tree().root_node(); + self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root); + } +}