getting somewhere

4 months ago · f7686d7af2
parent 29ffc20290
commit f7686d7af2
12 changed files with 731 additions and 173 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1433,6 +1433,7 @@ dependencies = [
 "ahash",
 "arc-swap",
 "bitflags 2.6.0",
+ "cc",
 "hashbrown 0.14.5",
 "helix-stdx",
 "libloading",
--- a/helix-syntax/src/injections_tree.rs
+++ b/helix-syntax/src/injections_tree.rs
@ -1,15 +1,21 @@
 use core::slice;
+use std::cell::RefCell;
 use std::iter::Peekable;
+use std::mem::replace;
 use std::sync::Arc;

 use hashbrown::HashMap;
+use ropey::RopeSlice;
 use slotmap::{new_key_type, SlotMap};

 use crate::parse::LayerUpdateFlags;
-use crate::tree_sitter::SyntaxTree;
-use crate::{HighlightConfiguration, RopeProvider};
+use crate::tree_sitter::{
+    self, Capture, InactiveQueryCursor, Parser, Query, QueryCursor, RopeTsInput, SyntaxTree,
+    SyntaxTreeNode,
+};
+use crate::HighlightConfiguration;

-// TODO(perf): replace std::ops::Range with helix_core::Range once added
+// TODO(perf): replace std::ops::Range<usize> with helix_stdx::Range<u32> once added
 type Range = std::ops::Range<usize>;

 new_key_type! {
@ -23,15 +29,16 @@ pub struct LanguageLayer {
    pub(crate) parse_tree: Option<SyntaxTree>,
    /// internal flags used during parsing to track incremental invalidation
    pub(crate) flags: LayerUpdateFlags,
+    ranges: Vec<tree_sitter::Range>,
    pub(crate) parent: Option<LayerId>,
-    /// a list of **sorted** non-overlapping injection ranges note that
+    /// a list of **sorted** non-overlapping injection ranges. Note that
    /// injection ranges are not relative to the start of this layer but the
    /// start of the root layer
-    pub(crate) injection_ranges: Box<[InjectionRange]>,
+    pub(crate) injections: Box<[Injection]>,
 }

-#[derive(Debug)]
-pub(crate) struct InjectionRange {
+#[derive(Debug, Clone)]
+pub(crate) struct Injection {
    pub byte_range: Range,
    pub layer: LayerId,
 }
@ -39,11 +46,11 @@ pub(crate) struct InjectionRange {
 impl LanguageLayer {
    /// Returns the injection range **within this layers** that contains `idx`.
    /// This function will not descend into nested injections
-    pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&InjectionRange> {
+    pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&Injection> {
        let i = self
-            .injection_ranges
+            .injections
            .partition_point(|range| range.byte_range.start <= idx);
-        self.injection_ranges
+        self.injections
            .get(i)
            .filter(|injection| injection.byte_range.end > idx)
    }
@ -75,20 +82,187 @@ impl InjectionTree {
    }
 }

-struct ActiveInjection<'a> {
-    injections: Peekable<slice::Iter<'a, InjectionTree>>,
-    range: InjectionRange,
+#[derive(Clone)]
+pub struct MatchedNode {
+    pub capture: Capture,
+    pub byte_range: Range,
+}
+
+struct LayerQueryIter<'a> {
+    cursor: QueryCursor<'a, 'a, RopeTsInput<'a>>,
+    peeked: Option<MatchedNode>,
+}
+
+impl<'a> LayerQueryIter<'a> {
+    fn peek(&mut self) -> Option<&MatchedNode> {
+        if self.peeked.is_none() {
+            let (query_match, node_idx) = self.cursor.next_matched_node()?;
+            let matched_node = query_match.matched_node(node_idx);
+            self.peeked = Some(MatchedNode {
+                capture: matched_node.capture,
+                byte_range: matched_node.syntax_node.byte_range(),
+            });
+        }
+        self.peeked.as_ref()
+    }
+
+    fn consume(&mut self) -> MatchedNode {
+        self.peeked.take().unwrap()
+    }
+}
+
+struct ActiveLayer<'a> {
+    query_iter: LayerQueryIter<'a>,
+    injections: Peekable<slice::Iter<'a, Injection>>,
+}
+
+struct QueryBuilder<'a, 'tree> {
+    query: &'a Query,
+    node: &'a SyntaxTreeNode<'tree>,
+    src: RopeSlice<'a>,
+    injection_tree: &'a InjectionTree,
+}
+
+pub struct QueryIter<'a, 'tree> {
+    query_builder: Box<QueryBuilder<'a, 'tree>>,
+    active_layers: HashMap<LayerId, ActiveLayer<'a>>,
+    active_injections: Vec<Injection>,
+    current_injection: Injection,
+}
+
+impl<'a> QueryIter<'a, '_> {
+    fn enter_injection(&mut self, injection: Injection) -> bool {
+        self.active_layers
+            .entry(injection.layer)
+            .or_insert_with(|| {
+                let layer = &self.query_builder.injection_tree.layers[injection.layer];
+                let injection_start = layer
+                    .injections
+                    .partition_point(|child| child.byte_range.start < injection.byte_range.start);
+                let cursor = get_cursor().execute_query(
+                    self.query_builder.query,
+                    self.query_builder.node,
+                    RopeTsInput::new(self.query_builder.src),
+                );
+                ActiveLayer {
+                    query_iter: LayerQueryIter {
+                        cursor,
+                        peeked: None,
+                    },
+                    injections: layer.injections[injection_start..].iter().peekable(),
+                }
+            });
+        let old_injection = replace(&mut self.current_injection, injection);
+        self.active_injections.push(old_injection);
+        true
+    }
+
+    fn exit_injection(&mut self) -> Option<Injection> {
+        let injection = replace(&mut self.current_injection, self.active_injections.pop()?);
+        let finished_layer = self.active_layers[&injection.layer]
+            .query_iter
+            .peeked
+            .is_none();
+        if finished_layer {
+            let layer = self.active_layers.remove(&injection.layer).unwrap();
+            reuse_cursor(layer.query_iter.cursor.reuse());
+        }
+        Some(injection)
+    }
+}
+
+pub enum QueryIterEvent {
+    EnterInjection(Injection),
+    Match(MatchedNode),
+    ExitInjection(Injection),
+}
+
+impl<'a> Iterator for QueryIter<'a, '_> {
+    type Item = QueryIterEvent;
+
+    fn next(&mut self) -> Option<QueryIterEvent> {
+        loop {
+            let active_layer = self
+                .active_layers
+                .get_mut(&self.current_injection.layer)
+                .unwrap();
+            let next_injection = active_layer.injections.peek().filter(|injection| {
+                injection.byte_range.start < self.current_injection.byte_range.end
+            });
+            let next_match = active_layer.query_iter.peek().filter(|matched_node| {
+                matched_node.byte_range.start < self.current_injection.byte_range.end
+            });
+
+            match (next_match, next_injection) {
+                (None, None) => {
+                    return self.exit_injection().map(QueryIterEvent::ExitInjection);
+                }
+                (Some(_), None) => {
+                    // consume match
+                    let matched_node = active_layer.query_iter.consume();
+                    return Some(QueryIterEvent::Match(matched_node));
+                }
+                (Some(matched_node), Some(injection))
+                    if matched_node.byte_range.start <= injection.byte_range.end =>
+                {
+                    // consume match
+                    let matched_node = active_layer.query_iter.consume();
+                    // ignore nodes that are overlapped by the injection
+                    if matched_node.byte_range.start <= injection.byte_range.start {
+                        return Some(QueryIterEvent::Match(matched_node));
+                    }
+                }
+                (Some(_), Some(_)) | (None, Some(_)) => {
+                    // consume injection
+                    let injection = active_layer.injections.next().unwrap();
+                    if self.enter_injection(injection.clone()) {
+                        return Some(QueryIterEvent::EnterInjection(injection.clone()));
+                    }
+                }
+            }
+        }
+    }
+}
+
+struct TsParser {
+    parser: crate::tree_sitter::Parser,
+    pub cursors: Vec<crate::tree_sitter::InactiveQueryCursor>,
 }

-struct ActiveLayer<'a, State> {
-    state: State,
-    /// the query captures just for this layer
-    layer_captures: Peekable<LayerQueryCaptures<'a>>,
+// could also just use a pool, or a single instance?
+thread_local! {
+    static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
+        parser: Parser::new(),
+        cursors: Vec::new(),
+    })
 }

-type LayerQueryCaptures<'a> = tree_sitter::QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>;
+pub fn with_cursor<T>(f: impl FnOnce(&mut InactiveQueryCursor) -> T) -> T {
+    PARSER.with(|parser| {
+        let mut parser = parser.borrow_mut();
+        let mut cursor = parser
+            .cursors
+            .pop()
+            .unwrap_or_else(InactiveQueryCursor::new);
+        let res = f(&mut cursor);
+        parser.cursors.push(cursor);
+        res
+    })
+}
+
+pub fn get_cursor() -> InactiveQueryCursor {
+    PARSER.with(|parser| {
+        let mut parser = parser.borrow_mut();
+        parser
+            .cursors
+            .pop()
+            .unwrap_or_else(InactiveQueryCursor::new)
+    })
+}

-pub struct QueryCaptures<'a> {
-    active_layers: HashMap<LayerId, ActiveLayer<'a, ()>>,
-    active_injections: Vec<ActiveInjection<'a>>,
+pub fn reuse_cursor(cursor: InactiveQueryCursor) {
+    PARSER.with(|parser| {
+        let mut parser = parser.borrow_mut();
+        parser.cursors.push(cursor)
+    })
 }
--- a/helix-syntax/src/lib.rs
+++ b/helix-syntax/src/lib.rs
@ -337,7 +337,7 @@ thread_local! {
 pub fn with_cursor<T>(f: impl FnOnce(&mut QueryCursor) -> T) -> T {
    PARSER.with(|parser| {
        let mut parser = parser.borrow_mut();
-        let mut cursor = parser.cursors.pop().unwrap_or_else(QueryCursor::new);
+        let mut cursor = parser.cursors.pop().unwrap_or_default();
        let res = f(&mut cursor);
        parser.cursors.push(cursor);
        res
--- a/helix-syntax/src/tree_sitter.rs
+++ b/helix-syntax/src/tree_sitter.rs
@ -1,13 +1,19 @@
 mod grammar;
 mod parser;
 mod query;
-mod query_captures;
+mod query_cursor;
+mod query_match;
 mod ropey;
 mod syntax_tree;
 mod syntax_tree_node;

+use std::ops;
+
 pub use grammar::Grammar;
 pub use parser::{Parser, ParserInputRaw};
+pub use query::{Capture, ParserErrorLocation, Pattern, Query, QueryStr};
+pub use query_cursor::{InactiveQueryCursor, MatchedNode, MatchedNodeIdx, QueryCursor, QueryMatch};
+pub use ropey::RopeTsInput;
 pub use syntax_tree::{InputEdit, SyntaxTree};
 pub use syntax_tree_node::SyntaxTreeNode;

@ -26,3 +32,14 @@ pub struct Range {
    pub start_byte: u32,
    pub end_byte: u32,
 }
+
+pub trait TsInput {
+    type Cursor: regex_cursor::Cursor;
+    fn cursor_at(&mut self, offset: usize) -> &mut Self::Cursor;
+    fn eq(&mut self, range1: ops::Range<usize>, range2: ops::Range<usize>) -> bool;
+}
+
+pub trait IntoTsInput {
+    type TsInput: TsInput;
+    fn into_ts_input(self) -> Self::TsInput;
+}
--- a/helix-syntax/src/tree_sitter/parser.rs
+++ b/helix-syntax/src/tree_sitter/parser.rs
@ -1,10 +1,12 @@
 use std::os::raw::c_void;
-use std::panic::catch_unwind;
+use std::panic::{catch_unwind, AssertUnwindSafe};
 use std::ptr::NonNull;
 use std::{fmt, ptr};

+use regex_cursor::Cursor;
+
 use crate::tree_sitter::syntax_tree::{SyntaxTree, SyntaxTreeData};
-use crate::tree_sitter::{Grammar, Point, Range};
+use crate::tree_sitter::{Grammar, IntoTsInput, Point, Range, TsInput};

 // opaque data
 enum ParserData {}
@ -51,25 +53,28 @@ impl Parser {
    }

    #[must_use]
-    pub fn parse<I: ParserInput>(
+    pub fn parse<I: TsInput>(
        &mut self,
-        input: impl IntoParserInput<ParserInput = I>,
+        input: impl IntoTsInput<TsInput = I>,
        old_tree: Option<&SyntaxTree>,
    ) -> Option<SyntaxTree> {
-        let mut input = input.into_parser_input();
-        unsafe extern "C" fn read<C: ParserInput>(
+        let mut input = input.into_ts_input();
+        unsafe extern "C" fn read<C: TsInput>(
            payload: NonNull<c_void>,
            byte_index: u32,
            _position: Point,
-            bytes_read: &mut u32,
+            bytes_read: *mut u32,
        ) -> *const u8 {
-            match catch_unwind(|| {
-                let cursor: &mut C = payload.cast().as_mut();
-                cursor.read(byte_index as usize)
-            }) {
-                Ok(slice) => {
-                    *bytes_read = slice.len() as u32;
-                    slice.as_ptr()
+            let cursor = catch_unwind(AssertUnwindSafe(move || {
+                let input: &mut C = payload.cast().as_mut();
+                let cursor = input.cursor_at(byte_index as usize);
+                let slice = cursor.chunk();
+                (slice.as_ptr(), slice.len().try_into().unwrap())
+            }));
+            match cursor {
+                Ok((ptr, len)) => {
+                    *bytes_read = len;
+                    ptr
                }
                Err(_) => {
                    *bytes_read = 0;
@ -121,7 +126,7 @@ type TreeSitterReadFn = unsafe extern "C" fn(
    payload: NonNull<c_void>,
    byte_index: u32,
    position: Point,
-    bytes_read: &mut u32,
+    bytes_read: *mut u32,
 ) -> *const u8;

 #[repr(C)]
@ -132,15 +137,6 @@ pub struct ParserInputRaw {
    pub encoding: u32,
 }

-pub trait ParserInput {
-    fn read(&mut self, offset: usize) -> &[u8];
-}
-
-pub trait IntoParserInput {
-    type ParserInput;
-    fn into_parser_input(self) -> Self::ParserInput;
-}
-
 extern "C" {
    /// Create a new parser
    fn ts_parser_new() -> NonNull<ParserData>;
--- a/helix-syntax/src/tree_sitter/query.rs
+++ b/helix-syntax/src/tree_sitter/query.rs
@ -4,8 +4,6 @@ use std::path::{Path, PathBuf};
 use std::ptr::NonNull;
 use std::{slice, str};

-use regex_cursor::Cursor;
-
 use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate, TextPredicate};
 use crate::tree_sitter::query::property::QueryProperty;
 use crate::tree_sitter::Grammar;
@ -13,20 +11,23 @@ use crate::tree_sitter::Grammar;
 mod predicate;
 mod property;

+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Pattern(pub(crate) u32);
+
 pub enum QueryData {}

-pub(super) struct Pattern {
+pub(super) struct PatternData {
    text_predicates: Range<u32>,
    properties: Range<u32>,
 }

 pub struct Query {
-    raw: NonNull<QueryData>,
+    pub(crate) raw: NonNull<QueryData>,
    num_captures: u32,
    num_strings: u32,
    text_predicates: Vec<TextPredicate>,
    properties: Vec<QueryProperty>,
-    patterns: Box<[Pattern]>,
+    patterns: Box<[PatternData]>,
 }

 impl Query {
@ -40,7 +41,7 @@ impl Query {
        grammar: Grammar,
        source: &str,
        path: impl AsRef<Path>,
-        mut custom_predicate: impl FnMut(Predicate) -> Result<(), InvalidPredicateError>,
+        mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>,
    ) -> Result<Self, ParseError> {
        assert!(
            source.len() <= i32::MAX as usize,
@ -121,7 +122,7 @@ impl Query {
                }
                RawQueryError::Language => unreachable!("should be handled at grammar load"),
            };
-            return Err(err)
+            return Err(err);
        };

        // I am not going to bother with safety comments here, all of these are
@ -141,7 +142,7 @@ impl Query {
        let patterns: Result<_, ParseError> = (0..num_patterns)
            .map(|pattern| {
                query
-                    .parse_pattern_predicates(pattern, &mut custom_predicate)
+                    .parse_pattern_predicates(Pattern(pattern), &mut custom_predicate)
                    .map_err(|err| ParseError::InvalidPredicate {
                        message: err.msg.into(),
                        location: ParserErrorLocation::new(
@ -157,35 +158,6 @@ impl Query {
        Ok(query)
    }

-    pub fn satsifies_text_predicate<C: Cursor>(
-        &self,
-        cursor: &mut regex_cursor::Input<C>,
-        pattern: u32,
-    ) {
-        let text_predicates = self.patterns[pattern as usize].text_predicates;
-        let text_predicates =
-            &self.text_predicates[text_predicates.start as usize..text_predicates.end as usize];
-        for predicate in text_predicates {
-            match predicate.kind {
-                predicate::TextPredicateKind::EqString(_) => todo!(),
-                predicate::TextPredicateKind::EqCapture(_) => todo!(),
-                predicate::TextPredicateKind::MatchString(_) => todo!(),
-                predicate::TextPredicateKind::AnyString(_) => todo!(),
-            }
-        }
-    }
-
-    // fn parse_predicates(&mut self) {
-    //     let pattern_count = unsafe { ts_query_pattern_count(self.raw) };
-
-    //     let mut text_predicates = Vec::with_capacity(pattern_count as usize);
-    //     let mut property_predicates = Vec::with_capacity(pattern_count as usize);
-    //     let mut property_settings = Vec::with_capacity(pattern_count as usize);
-    //     let mut general_predicates = Vec::with_capacity(pattern_count as usize);
-
-    //     for i in 0..pattern_count {}
-    // }
-
    #[inline]
    fn get_string(&self, str: QueryStr) -> &str {
        let value_id = str.0;
@ -218,10 +190,15 @@ impl Query {
        }
    }

-    pub fn pattern_properies(&self, pattern_idx: u32) -> &[QueryProperty] {
-        let range = self.patterns[pattern_idx as usize].properties.clone();
+    pub fn pattern_properies(&self, pattern_idx: Pattern) -> &[QueryProperty] {
+        let range = self.patterns[pattern_idx.0 as usize].properties.clone();
        &self.properties[range.start as usize..range.end as usize]
    }
+
+    pub(crate) fn pattern_text_predicates(&self, pattern_idx: u16) -> &[TextPredicate] {
+        let range = self.patterns[pattern_idx as usize].text_predicates.clone();
+        &self.text_predicates[range.start as usize..range.end as usize]
+    }
 }

 impl Drop for Query {
@ -231,6 +208,7 @@ impl Drop for Query {
 }

 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(transparent)]
 pub struct Capture(u32);

 impl Capture {
--- a/helix-syntax/src/tree_sitter/query/predicate.rs
+++ b/helix-syntax/src/tree_sitter/query/predicate.rs
@ -1,11 +1,16 @@
 use std::error::Error;
+use std::iter::zip;
+use std::ops::Range;
 use std::ptr::NonNull;
 use std::{fmt, slice};

 use crate::tree_sitter::query::property::QueryProperty;
-use crate::tree_sitter::query::{Capture, Pattern, Query, QueryData, QueryStr};
+use crate::tree_sitter::query::{Capture, Pattern, PatternData, Query, QueryData, QueryStr};
+use crate::tree_sitter::query_cursor::MatchedNode;
+use crate::tree_sitter::TsInput;

 use regex_cursor::engines::meta::Regex;
+use regex_cursor::Cursor;

 macro_rules! bail {
    ($($args:tt)*) => {{
@ -29,25 +34,141 @@ pub(super) enum TextPredicateKind {
    AnyString(Box<[QueryStr]>),
 }

-pub(super) struct TextPredicate {
+pub(crate) struct TextPredicate {
    capture: Capture,
    kind: TextPredicateKind,
    negated: bool,
    match_all: bool,
 }

+fn input_matches_str<I: TsInput>(str: &str, range: Range<usize>, input: &mut I) -> bool {
+    if str.len() != range.len() {
+        return false;
+    }
+    let mut str = str.as_bytes();
+    let cursor = input.cursor_at(range.start);
+    let start_in_chunk = range.start - cursor.offset();
+    if range.end - cursor.offset() <= cursor.chunk().len() {
+        // hotpath
+        return &cursor.chunk()[start_in_chunk..range.end - cursor.offset()] == str;
+    }
+    if cursor.chunk()[start_in_chunk..] != str[..cursor.chunk().len() - start_in_chunk] {
+        return false;
+    }
+    str = &str[..cursor.chunk().len() - start_in_chunk];
+    while cursor.advance() {
+        if str.len() <= cursor.chunk().len() {
+            return &cursor.chunk()[..range.end - cursor.offset()] == str;
+        }
+        if &str[..cursor.chunk().len()] != cursor.chunk() {
+            return false;
+        }
+        str = &str[cursor.chunk().len()..]
+    }
+    // buggy cursor/invalid range
+    false
+}
+
+fn inputs_match<I: TsInput>(str: &str, range: Range<usize>, input: &mut I) -> bool {
+    if str.len() != range.len() {
+        return false;
+    }
+    let mut str = str.as_bytes();
+    let cursor = input.cursor_at(range.start);
+    let start_in_chunk = range.start - cursor.offset();
+    if range.end - cursor.offset() <= cursor.chunk().len() {
+        // hotpath
+        return &cursor.chunk()[start_in_chunk..range.end - cursor.offset()] == str;
+    }
+    if cursor.chunk()[start_in_chunk..] != str[..cursor.chunk().len() - start_in_chunk] {
+        return false;
+    }
+    str = &str[..cursor.chunk().len() - start_in_chunk];
+    while cursor.advance() {
+        if str.len() <= cursor.chunk().len() {
+            return &cursor.chunk()[..range.end - cursor.offset()] == str;
+        }
+        if &str[..cursor.chunk().len()] != cursor.chunk() {
+            return false;
+        }
+        str = &str[cursor.chunk().len()..]
+    }
+    // buggy cursor/invalid range
+    false
+}
+
+impl TextPredicate {
+    /// handlers match_all and negated
+    fn satisfied_helper(&self, mut nodes: impl Iterator<Item = bool>) -> bool {
+        if self.match_all {
+            nodes.all(|matched| matched != self.negated)
+        } else {
+            nodes.any(|matched| matched != self.negated)
+        }
+    }
+
+    pub fn satsified<I: TsInput>(
+        &self,
+        input: &mut I,
+        matched_nodes: &[MatchedNode],
+        query: &Query,
+    ) -> bool {
+        let mut capture_nodes = matched_nodes
+            .iter()
+            .filter(|matched_node| matched_node.capture == self.capture);
+        match self.kind {
+            TextPredicateKind::EqString(str) => self.satisfied_helper(capture_nodes.map(|node| {
+                let range = node.syntax_node.byte_range();
+                input_matches_str(query.get_string(str), range.clone(), input)
+            })),
+            TextPredicateKind::EqCapture(other_capture) => {
+                let mut other_nodes = matched_nodes
+                    .iter()
+                    .filter(|matched_node| matched_node.capture == other_capture);
+
+                let res = self.satisfied_helper(zip(&mut capture_nodes, &mut other_nodes).map(
+                    |(node1, node2)| {
+                        let range1 = node1.syntax_node.byte_range();
+                        let range2 = node2.syntax_node.byte_range();
+                        input.eq(range1, range2)
+                    },
+                ));
+                let consumed_all = capture_nodes.next().is_none() && other_nodes.next().is_none();
+                res && (!self.match_all || consumed_all)
+            }
+            TextPredicateKind::MatchString(ref regex) => {
+                self.satisfied_helper(capture_nodes.map(|node| {
+                    let range = node.syntax_node.byte_range();
+                    let input = regex_cursor::Input::new(input.cursor_at(range.start)).range(range);
+                    regex.is_match(input)
+                }))
+            }
+            TextPredicateKind::AnyString(ref strings) => {
+                let strings = strings.iter().map(|&str| query.get_string(str));
+                self.satisfied_helper(capture_nodes.map(|node| {
+                    let range = node.syntax_node.byte_range();
+                    strings
+                        .clone()
+                        .filter(|str| str.len() == range.len())
+                        .any(|str| input_matches_str(str, range.clone(), input))
+                }))
+            }
+        }
+    }
+}
+
 impl Query {
    pub(super) fn parse_pattern_predicates(
        &mut self,
-        pattern_index: u32,
-        mut custom_predicate: impl FnMut(Predicate) -> Result<(), InvalidPredicateError>,
-    ) -> Result<Pattern, InvalidPredicateError> {
+        pattern: Pattern,
+        mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>,
+    ) -> Result<PatternData, InvalidPredicateError> {
        let text_predicate_start = self.text_predicates.len() as u32;
        let property_start = self.properties.len() as u32;

        let predicate_steps = unsafe {
            let mut len = 0u32;
-            let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern_index, &mut len);
+            let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern.0, &mut len);
            (len != 0)
                .then(|| slice::from_raw_parts(raw_predicates, len as usize))
                .unwrap_or_default()
@ -118,10 +239,10 @@ impl Query {
                // is and is-not are better handeled as custom predicates since interpreting is context dependent
                // "is?" => property_predicates.push((QueryProperty::parse(&predicate), false)),
                // "is-not?" => property_predicates.push((QueryProperty::parse(&predicate), true)),
-                _ => custom_predicate(predicate)?,
+                _ => custom_predicate(pattern, predicate)?,
            }
        }
-        Ok(Pattern {
+        Ok(PatternData {
            text_predicates: text_predicate_start..self.text_predicates.len() as u32,
            properties: property_start..self.properties.len() as u32,
        })
--- a/helix-syntax/src/tree_sitter/query_captures.rs
+++ b/helix-syntax/src/tree_sitter/query_captures.rs
@ -1,66 +0,0 @@
-use std::ptr::{self, NonNull};
-
-use regex_cursor::Cursor;
-
-use crate::tree_sitter::query::Query;
-use crate::tree_sitter::syntax_tree_node::SyntaxTreeNodeRaw;
-
-enum QueryCursorData {}
-
-pub struct QueryCaptures<'a> {
-    query: &'a Query,
-    query_cursor: &'a mut QueryCursorData,
-    text_cursor: regex_cursor::RopeyCursor<'a>,
-}
-
-impl<C: Cursor> QueryCaptures<'_, C> {
-    fn next(&mut self) {
-        let mut query_match = TSQueryMatch {
-            id: 0,
-            pattern_index: 0,
-            capture_count: 0,
-            captures: ptr::null(),
-        };
-        let mut capture_idx = 0;
-        loop {
-            let success = unsafe {
-                ts_query_cursor_next_capture(
-                    &mut self.query_cursor,
-                    &mut query_match,
-                    &mut capture_idx,
-                )
-            };
-            if !success {
-                break;
-            }
-        }
-        let mut input = regex_cursor::Input::new(self.text_cursor.clone());
-    }
-}
-
-#[repr(C)]
-#[derive(Debug)]
-struct TSQueryCapture {
-    node: SyntaxTreeNodeRaw,
-    index: u32,
-}
-
-#[repr(C)]
-#[derive(Debug)]
-struct TSQueryMatch {
-    id: u32,
-    pattern_index: u16,
-    capture_count: u16,
-    captures: *const TSQueryCapture,
-}
-
-extern "C" {
-    /// Advance to the next capture of the currently running query.
-    /// If there is a capture, write its match to `*match` and its index within
-    /// the matche's capture list to `*capture_index`. Otherwise, return `false`.
-    fn ts_query_cursor_next_capture(
-        self_: &mut QueryCursorData,
-        match_: &mut TSQueryMatch,
-        capture_index: &mut u32,
-    ) -> bool;
-}
--- a/helix-syntax/src/tree_sitter/query_cursor.rs
+++ b/helix-syntax/src/tree_sitter/query_cursor.rs
@ -0,0 +1,319 @@
+use core::slice;
+use std::marker::PhantomData;
+use std::mem::replace;
+use std::ops::Range;
+use std::ptr::{self, NonNull};
+
+use crate::tree_sitter::query::{Capture, Pattern, Query, QueryData};
+use crate::tree_sitter::syntax_tree_node::SyntaxTreeNodeRaw;
+use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode, TsInput};
+
+enum QueryCursorData {}
+
+pub struct QueryCursor<'a, 'tree, I: TsInput> {
+    query: &'a Query,
+    ptr: *mut QueryCursorData,
+    tree: PhantomData<&'tree SyntaxTree>,
+    input: I,
+}
+
+impl<'tree, I: TsInput> QueryCursor<'_, 'tree, I> {
+    pub fn next_match(&mut self) -> Option<QueryMatch<'_, 'tree>> {
+        let mut query_match = TSQueryMatch {
+            id: 0,
+            pattern_index: 0,
+            capture_count: 0,
+            captures: ptr::null(),
+        };
+        loop {
+            let success = unsafe { ts_query_cursor_next_match(self.ptr, &mut query_match) };
+            if !success {
+                return None;
+            }
+            let matched_nodes = unsafe {
+                slice::from_raw_parts(
+                    query_match.captures.cast(),
+                    query_match.capture_count as usize,
+                )
+            };
+            let satisfies_predicates = self
+                .query
+                .pattern_text_predicates(query_match.pattern_index)
+                .iter()
+                .all(|predicate| predicate.satsified(&mut self.input, matched_nodes, self.query));
+            if satisfies_predicates {
+                let res = QueryMatch {
+                    id: query_match.id,
+                    pattern: Pattern(query_match.pattern_index as u32),
+                    matched_nodes,
+                    query_cursor: unsafe { &mut *self.ptr },
+                    _tree: PhantomData,
+                };
+                return Some(res);
+            }
+        }
+    }
+
+    pub fn next_matched_node(&mut self) -> Option<(QueryMatch<'_, 'tree>, MatchedNodeIdx)> {
+        let mut query_match = TSQueryMatch {
+            id: 0,
+            pattern_index: 0,
+            capture_count: 0,
+            captures: ptr::null(),
+        };
+        let mut capture_idx = 0;
+        loop {
+            let success = unsafe {
+                ts_query_cursor_next_capture(self.ptr, &mut query_match, &mut capture_idx)
+            };
+            if !success {
+                return None;
+            }
+            let matched_nodes = unsafe {
+                slice::from_raw_parts(
+                    query_match.captures.cast(),
+                    query_match.capture_count as usize,
+                )
+            };
+            let satisfies_predicates = self
+                .query
+                .pattern_text_predicates(query_match.pattern_index)
+                .iter()
+                .all(|predicate| predicate.satsified(&mut self.input, matched_nodes, self.query));
+            if satisfies_predicates {
+                let res = QueryMatch {
+                    id: query_match.id,
+                    pattern: Pattern(query_match.pattern_index as u32),
+                    matched_nodes,
+                    query_cursor: unsafe { &mut *self.ptr },
+                    _tree: PhantomData,
+                };
+                return Some((res, capture_idx));
+            } else {
+                unsafe {
+                    ts_query_cursor_remove_match(self.ptr, query_match.id);
+                }
+            }
+        }
+    }
+
+    pub fn set_byte_range(&mut self, range: Range<usize>) {
+        unsafe {
+            ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32);
+        }
+    }
+
+    pub fn reuse(mut self) -> InactiveQueryCursor {
+        let ptr = replace(&mut self.ptr, ptr::null_mut());
+        InactiveQueryCursor {
+            ptr: unsafe { NonNull::new_unchecked(ptr) },
+        }
+    }
+}
+
+impl<I: TsInput> Drop for QueryCursor<'_, '_, I> {
+    fn drop(&mut self) {
+        // we allow moving the cursor data out so we need the null check here
+        // would be cleaner with a subtype but doesn't really matter at the end of the day
+        if !self.ptr.is_null() {
+            unsafe { ts_query_cursor_delete(self.ptr) }
+        }
+    }
+}
+
+/// A query cursor that is not actively associated with a query
+pub struct InactiveQueryCursor {
+    ptr: NonNull<QueryCursorData>,
+}
+
+impl InactiveQueryCursor {
+    pub fn new() -> Self {
+        InactiveQueryCursor {
+            ptr: unsafe { NonNull::new_unchecked(ts_query_cursor_new()) },
+        }
+    }
+
+    /// Return the maximum number of in-progress matches for this cursor.
+    #[doc(alias = "ts_query_cursor_match_limit")]
+    #[must_use]
+    pub fn match_limit(&self) -> u32 {
+        unsafe { ts_query_cursor_match_limit(self.ptr.as_ptr()) }
+    }
+
+    /// Set the maximum number of in-progress matches for this cursor.  The
+    /// limit must be > 0 and <= 65536.
+    #[doc(alias = "ts_query_cursor_set_match_limit")]
+    pub fn set_match_limit(&mut self, limit: u32) {
+        unsafe {
+            ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit);
+        }
+    }
+
+    /// Check if, on its last execution, this cursor exceeded its maximum number
+    /// of in-progress matches.
+    #[doc(alias = "ts_query_cursor_did_exceed_match_limit")]
+    #[must_use]
+    pub fn did_exceed_match_limit(&self) -> bool {
+        unsafe { ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) }
+    }
+
+    pub fn set_byte_range(&mut self, range: Range<usize>) {
+        unsafe {
+            ts_query_cursor_set_byte_range(self.ptr.as_ptr(), range.start as u32, range.end as u32);
+        }
+    }
+
+    pub fn execute_query<'a, 'tree, I: TsInput>(
+        self,
+        query: &'a Query,
+        node: &SyntaxTreeNode<'tree>,
+        input: I,
+    ) -> QueryCursor<'a, 'tree, I> {
+        let ptr = self.ptr.as_ptr();
+        unsafe { ts_query_cursor_exec(self.ptr.as_ptr(), query.raw.as_ref(), node.as_raw()) };
+        QueryCursor {
+            query,
+            ptr,
+            tree: PhantomData,
+            input,
+        }
+    }
+}
+
+impl Drop for InactiveQueryCursor {
+    fn drop(&mut self) {
+        unsafe { ts_query_cursor_delete(self.ptr.as_ptr()) }
+    }
+}
+
+pub type MatchedNodeIdx = u32;
+
+#[repr(C)]
+#[derive(Clone)]
+pub struct MatchedNode<'tree> {
+    pub syntax_node: SyntaxTreeNode<'tree>,
+    pub capture: Capture,
+}
+
+pub struct QueryMatch<'cursor, 'tree> {
+    id: u32,
+    pattern: Pattern,
+    matched_nodes: &'cursor [MatchedNode<'tree>],
+    query_cursor: &'cursor mut QueryCursorData,
+    _tree: PhantomData<&'tree super::SyntaxTree>,
+}
+
+impl<'tree> QueryMatch<'_, 'tree> {
+    pub fn matched_nodes(&self) -> impl Iterator<Item = &MatchedNode<'tree>> {
+        self.matched_nodes.iter()
+    }
+
+    pub fn matched_node(&self, i: MatchedNodeIdx) -> &MatchedNode {
+        &self.matched_nodes[i as usize]
+    }
+
+    #[must_use]
+    pub const fn id(&self) -> u32 {
+        self.id
+    }
+
+    #[must_use]
+    pub const fn pattern(&self) -> Pattern {
+        self.pattern
+    }
+
+    #[doc(alias = "ts_query_cursor_remove_match")]
+    /// removes this match from the cursor so that further captures
+    /// from its cursor so that future captures that belong to this match
+    /// are no longer returned by capture iterators
+    pub fn remove(self) {
+        unsafe {
+            ts_query_cursor_remove_match(self.query_cursor, self.id);
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug)]
+struct TSQueryCapture {
+    node: SyntaxTreeNodeRaw,
+    index: u32,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+struct TSQueryMatch {
+    id: u32,
+    pattern_index: u16,
+    capture_count: u16,
+    captures: *const TSQueryCapture,
+}
+
+extern "C" {
+    /// Advance to the next capture of the currently running query.
+    /// If there is a capture, write its match to `*match` and its index within
+    /// the matche's capture list to `*capture_index`. Otherwise, return `false`.
+    fn ts_query_cursor_next_capture(
+        self_: *mut QueryCursorData,
+        match_: &mut TSQueryMatch,
+        capture_index: &mut u32,
+    ) -> bool;
+
+    /// Advance to the next match of the currently running query.
+    ///
+    /// If there is a match, write it to `*match` and return `true`.
+    /// Otherwise, return `false`.
+    pub fn ts_query_cursor_next_match(
+        self_: *mut QueryCursorData,
+        match_: &mut TSQueryMatch,
+    ) -> bool;
+    fn ts_query_cursor_remove_match(self_: *mut QueryCursorData, match_id: u32);
+    /// Delete a query cursor, freeing all of the memory that it used
+    pub fn ts_query_cursor_delete(self_: *mut QueryCursorData);
+    /// Create a new cursor for executing a given query.
+    /// The cursor stores the state that is needed to iteratively search
+    /// for matches. To use the query cursor, first call [`ts_query_cursor_exec`]
+    /// to start running a given query on a given syntax node. Then, there are
+    /// two options for consuming the results of the query:
+    /// 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the
+    ///    *matches* in the order that they were found. Each match contains the
+    ///    index of the pattern that matched, and an array of captures. Because
+    ///    multiple patterns can match the same set of nodes, one match may contain
+    ///    captures that appear *before* some of the captures from a previous match.
+    /// 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the
+    ///    individual *captures* in the order that they appear. This is useful if
+    ///    don't care about which pattern matched, and just want a single ordered
+    ///    sequence of captures.
+    /// If you don't care about consuming all of the results, you can stop calling
+    /// [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.
+    ///  You can then start executing another query on another node by calling
+    ///  [`ts_query_cursor_exec`] again."]
+    pub fn ts_query_cursor_new() -> *mut QueryCursorData;
+
+    /// Start running a given query on a given node.
+    pub fn ts_query_cursor_exec(
+        self_: *mut QueryCursorData,
+        query: &QueryData,
+        node: SyntaxTreeNodeRaw,
+    );
+    /// Manage the maximum number of in-progress matches allowed by this query
+    /// cursor.
+    ///
+    /// Query cursors have an optional maximum capacity for storing lists of
+    /// in-progress captures. If this capacity is exceeded, then the
+    /// earliest-starting match will silently be dropped to make room for further
+    /// matches. This maximum capacity is optional — by default, query cursors allow
+    /// any number of pending matches, dynamically allocating new space for them as
+    /// needed as the query is executed.
+    pub fn ts_query_cursor_did_exceed_match_limit(self_: *const QueryCursorData) -> bool;
+    pub fn ts_query_cursor_match_limit(self_: *const QueryCursorData) -> u32;
+    pub fn ts_query_cursor_set_match_limit(self_: *mut QueryCursorData, limit: u32);
+    /// Set the range of bytes or (row, column) positions in which the query
+    /// will be executed.
+    pub fn ts_query_cursor_set_byte_range(
+        self_: *mut QueryCursorData,
+        start_byte: u32,
+        end_byte: u32,
+    );
+
+}
--- a/helix-syntax/src/tree_sitter/query_match.rs
+++ b/helix-syntax/src/tree_sitter/query_match.rs
@ -0,0 +1 @@
+pub struct QueryMatch {}
--- a/helix-syntax/src/tree_sitter/ropey.rs
+++ b/helix-syntax/src/tree_sitter/ropey.rs
@ -1,38 +1,54 @@
+use std::ops;
+
 use regex_cursor::{Cursor, RopeyCursor};
 use ropey::RopeSlice;

-use crate::tree_sitter::parser::{IntoParserInput, ParserInput};
+use crate::tree_sitter::{IntoTsInput, TsInput};

-pub struct RopeParserInput<'a> {
+pub struct RopeTsInput<'a> {
    src: RopeSlice<'a>,
    cursor: regex_cursor::RopeyCursor<'a>,
 }

-impl<'a> IntoParserInput for RopeSlice<'a> {
-    type ParserInput = RopeParserInput<'a>;
+impl<'a> RopeTsInput<'a> {
+    pub fn new(src: RopeSlice<'a>) -> Self {
+        RopeTsInput {
+            src,
+            cursor: regex_cursor::RopeyCursor::new(src),
+        }
+    }
+}

-    fn into_parser_input(self) -> Self::ParserInput {
-        RopeParserInput {
+impl<'a> IntoTsInput for RopeSlice<'a> {
+    type TsInput = RopeTsInput<'a>;
+
+    fn into_ts_input(self) -> Self::TsInput {
+        RopeTsInput {
            src: self,
            cursor: RopeyCursor::new(self),
        }
    }
 }

-impl ParserInput for RopeParserInput<'_> {
-    fn read(&mut self, offset: usize) -> &[u8] {
+impl<'a> TsInput for RopeTsInput<'a> {
+    type Cursor = RopeyCursor<'a>;
+    fn cursor_at(&mut self, offset: usize) -> &mut RopeyCursor<'a> {
        // this cursor is optimized for contigous reads which are by far the most common during parsing
        // very far jumps (like injections at the other end of the document) are handelde
-        // by restarting a new cursor (new chunks iterator)
-        if offset < self.cursor.offset() && self.cursor.offset() - offset > 4906 {
+        // by starting a new cursor (new chunks iterator)
+        if offset < self.cursor.offset() || self.cursor.offset() - offset > 4906 {
            self.cursor = regex_cursor::RopeyCursor::at(self.src, offset);
        } else {
            while self.cursor.offset() + self.cursor.chunk().len() >= offset {
                if !self.cursor.advance() {
-                    return &[];
+                    break;
+                }
            }
        }
+        &mut self.cursor
    }
-        self.cursor.chunk()
+
+    fn eq(&mut self, range1: ops::Range<usize>, range2: ops::Range<usize>) -> bool {
+        self.src.byte_slice(range1) == self.src.byte_slice(range2)
    }
 }
--- a/helix-syntax/src/tree_sitter/syntax_tree_node.rs
+++ b/helix-syntax/src/tree_sitter/syntax_tree_node.rs
@ -25,6 +25,7 @@ impl From<SyntaxTreeNode<'_>> for SyntaxTreeNodeRaw {
 }

 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct SyntaxTreeNode<'tree> {
    context: [u32; 4],
    id: NonNull<c_void>,
@ -44,7 +45,7 @@ impl<'tree> SyntaxTreeNode<'tree> {
    }

    #[inline]
-    fn as_raw(&self) -> SyntaxTreeNodeRaw {
+    pub(crate) fn as_raw(&self) -> SyntaxTreeNodeRaw {
        SyntaxTreeNodeRaw {
            context: self.context,
            id: self.id.as_ptr(),