From 0cd8817d069f9afe05bea965872471613a4e7f43 Mon Sep 17 00:00:00 2001
From: Pascal Kuthe <pascalkuthe@pm.me>
Date: Sun, 5 May 2024 03:40:34 +0200
Subject: [PATCH] tmp

---
 Cargo.lock                                    |   3 +
 helix-syntax/Cargo.toml                       |   6 +
 helix-syntax/build.rs                         |  28 +
 helix-syntax/src/injections_tree.rs           |   2 +-
 helix-syntax/src/lib.rs                       |   7 +-
 helix-syntax/src/tree_sitter.rs               |  27 +
 helix-syntax/src/tree_sitter/grammar.rs       | 101 +++
 helix-syntax/src/tree_sitter/parser.rs        | 204 +++++++
 helix-syntax/src/tree_sitter/query.rs         | 574 ++++++++++++++++++
 helix-syntax/src/tree_sitter/ropey.rs         |  38 ++
 helix-syntax/src/tree_sitter/syntax_tree.rs   |  80 +++
 .../src/tree_sitter/syntax_tree_node.rs       | 291 +++++++++
 12 files changed, 1357 insertions(+), 4 deletions(-)
 create mode 100644 helix-syntax/build.rs
 create mode 100644 helix-syntax/src/tree_sitter.rs
 create mode 100644 helix-syntax/src/tree_sitter/grammar.rs
 create mode 100644 helix-syntax/src/tree_sitter/parser.rs
 create mode 100644 helix-syntax/src/tree_sitter/query.rs
 create mode 100644 helix-syntax/src/tree_sitter/ropey.rs
 create mode 100644 helix-syntax/src/tree_sitter/syntax_tree.rs
 create mode 100644 helix-syntax/src/tree_sitter/syntax_tree_node.rs

diff --git a/Cargo.lock b/Cargo.lock
index aa8b613cd..610f7e650 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1435,11 +1435,14 @@ dependencies = [
  "bitflags 2.6.0",
  "hashbrown 0.14.5",
  "helix-stdx",
+ "libloading",
  "log",
  "once_cell",
  "regex",
+ "regex-cursor",
  "ropey",
  "slotmap",
+ "thiserror",
  "tree-sitter",
 ]
 
diff --git a/helix-syntax/Cargo.toml b/helix-syntax/Cargo.toml
index 3ba12ddd1..c2fadb0a4 100644
--- a/helix-syntax/Cargo.toml
+++ b/helix-syntax/Cargo.toml
@@ -26,3 +26,9 @@ bitflags = "2.4"
 ahash = "0.8.9"
 hashbrown = { version = "0.14.3", features = ["raw"] }
 log = "0.4"
+regex-cursor = "0.1.4"
+libloading = "0.8.3"
+thiserror = "1.0.59"
+
+[build-dependencies]
+cc = "1.0.95"
diff --git a/helix-syntax/build.rs b/helix-syntax/build.rs
new file mode 100644
index 000000000..49a0dc593
--- /dev/null
+++ b/helix-syntax/build.rs
@@ -0,0 +1,28 @@
+use std::path::PathBuf;
+use std::{env, fs};
+
+fn main() {
+    if env::var_os("DISABLED_TS_BUILD").is_some() {
+        return;
+    }
+    let mut config = cc::Build::new();
+
+    let manifest_path = PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").unwrap());
+    let include_path = manifest_path.join("../vendor/tree-sitter/include");
+    let src_path = manifest_path.join("../vendor/tree-sitter/src");
+    for entry in fs::read_dir(&src_path).unwrap() {
+        let entry = entry.unwrap();
+        let path = src_path.join(entry.file_name());
+        println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
+    }
+
+    config
+        .flag_if_supported("-std=c11")
+        .flag_if_supported("-fvisibility=hidden")
+        .flag_if_supported("-Wshadow")
+        .flag_if_supported("-Wno-unused-parameter")
+        .include(&src_path)
+        .include(&include_path)
+        .file(src_path.join("lib.c"))
+        .compile("tree-sitter");
+}
diff --git a/helix-syntax/src/injections_tree.rs b/helix-syntax/src/injections_tree.rs
index e181a7754..fdc53e491 100644
--- a/helix-syntax/src/injections_tree.rs
+++ b/helix-syntax/src/injections_tree.rs
@@ -3,7 +3,7 @@ use std::iter::Peekable;
 use std::sync::Arc;
 
 use hashbrown::HashMap;
-use slotmap::{new_key_type, HopSlotMap, SlotMap};
+use slotmap::{new_key_type, SlotMap};
 use tree_sitter::Tree;
 
 use crate::parse::LayerUpdateFlags;
diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs
index 915d8df5b..074f87272 100644
--- a/helix-syntax/src/lib.rs
+++ b/helix-syntax/src/lib.rs
@@ -1,6 +1,6 @@
 use ::ropey::RopeSlice;
-use slotmap::{new_key_type, HopSlotMap};
-use tree_sitter::{Node, Parser, Point, Query, QueryCursor, Range, Tree};
+use ::tree_sitter::{Node, Parser, Point, Query, QueryCursor, Range, Tree};
+use slotmap::HopSlotMap;
 
 use std::borrow::Cow;
 use std::cell::RefCell;
@@ -26,6 +26,7 @@ mod parse;
 mod pretty_print;
 mod ropey;
 mod tree_cursor;
+pub mod tree_sitter;
 
 #[derive(Debug)]
 pub struct Syntax {
@@ -321,7 +322,7 @@ fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<st
 }
 
 struct TsParser {
-    parser: tree_sitter::Parser,
+    parser: ::tree_sitter::Parser,
     pub cursors: Vec<QueryCursor>,
 }
 
diff --git a/helix-syntax/src/tree_sitter.rs b/helix-syntax/src/tree_sitter.rs
new file mode 100644
index 000000000..d75c5b245
--- /dev/null
+++ b/helix-syntax/src/tree_sitter.rs
@@ -0,0 +1,27 @@
+mod grammar;
+mod parser;
+mod query;
+mod ropey;
+mod syntax_tree;
+mod syntax_tree_node;
+
+pub use grammar::Grammar;
+pub use parser::{Parser, ParserInputRaw};
+pub use syntax_tree::{InputEdit, SyntaxTree};
+pub use syntax_tree_node::SyntaxTreeNode;
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Point {
+    pub row: u32,
+    pub column: u32,
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Range {
+    pub start_point: Point,
+    pub end_point: Point,
+    pub start_byte: u32,
+    pub end_byte: u32,
+}
diff --git a/helix-syntax/src/tree_sitter/grammar.rs b/helix-syntax/src/tree_sitter/grammar.rs
new file mode 100644
index 000000000..a97769248
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/grammar.rs
@@ -0,0 +1,101 @@
+use std::fmt;
+use std::path::{Path, PathBuf};
+use std::ptr::NonNull;
+
+use libloading::{Library, Symbol};
+
+/// supported TS versions, WARNING: update when updating vendored c sources
+pub const MIN_COMPATIBLE_ABI_VERSION: u32 = 13;
+pub const ABI_VERSION: u32 = 14;
+
+// opaque pointer
+enum GrammarData {}
+
+#[repr(transparent)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub struct Grammar {
+    ptr: NonNull<GrammarData>,
+}
+
+unsafe impl Send for Grammar {}
+unsafe impl Sync for Grammar {}
+
+impl std::fmt::Debug for Grammar {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Grammar").finish_non_exhaustive()
+    }
+}
+
+impl Grammar {
+    pub unsafe fn new(name: &str, library_path: &Path) -> Result<Grammar, Error> {
+        let library = unsafe {
+            Library::new(&library_path).map_err(|err| Error::DlOpen {
+                err,
+                path: library_path.to_owned(),
+            })?
+        };
+        let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_"));
+        let grammar = unsafe {
+            let language_fn: Symbol<unsafe extern "C" fn() -> NonNull<GrammarData>> = library
+                .get(language_fn_name.as_bytes())
+                .map_err(|err| Error::DlSym {
+                    err,
+                    symbol: name.to_owned(),
+                })?;
+            Grammar { ptr: language_fn() }
+        };
+        let version = grammar.version();
+        if MIN_COMPATIBLE_ABI_VERSION <= version && version <= ABI_VERSION {
+            std::mem::forget(library);
+            Ok(grammar)
+        } else {
+            Err(Error::IncompatibleVersion { version })
+        }
+    }
+    pub fn version(self) -> u32 {
+        unsafe { ts_language_version(self) }
+    }
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("Error opening dynamic library {path:?}")]
+    DlOpen {
+        #[source]
+        err: libloading::Error,
+        path: PathBuf,
+    },
+    #[error("Failed to load symbol {symbol}")]
+    DlSym {
+        #[source]
+        err: libloading::Error,
+        symbol: String,
+    },
+    #[error("Tried to load grammar with incompatible ABI {version}.")]
+    IncompatibleVersion { version: u32 },
+}
+
+/// An error that occurred when trying to assign an incompatible [`Grammar`] to
+/// a [`Parser`].
+#[derive(Debug, PartialEq, Eq)]
+pub struct IncompatibleGrammarError {
+    version: u32,
+}
+
+impl fmt::Display for IncompatibleGrammarError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Tried to load grammar with incompatible ABI {}.",
+            self.version,
+        )
+    }
+}
+impl std::error::Error for IncompatibleGrammarError {}
+
+extern "C" {
+    /// Get the ABI version number for this language. This version number
+    /// is used to ensure that languages were generated by a compatible version of
+    /// Tree-sitter. See also [`ts_parser_set_language`].
+    pub fn ts_language_version(grammar: Grammar) -> u32;
+}
diff --git a/helix-syntax/src/tree_sitter/parser.rs b/helix-syntax/src/tree_sitter/parser.rs
new file mode 100644
index 000000000..611637390
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/parser.rs
@@ -0,0 +1,204 @@
+use std::os::raw::c_void;
+use std::panic::catch_unwind;
+use std::ptr::NonNull;
+use std::{fmt, ptr};
+
+use crate::tree_sitter::syntax_tree::{SyntaxTree, SyntaxTreeData};
+use crate::tree_sitter::{Grammar, Point, Range};
+
+// opaque data
+enum ParserData {}
+
+/// A stateful object that this is used to produce a [`Tree`] based on some
+/// source code.
+pub struct Parser {
+    ptr: NonNull<ParserData>,
+}
+
+impl Parser {
+    /// Create a new parser.
+    #[must_use]
+    pub fn new() -> Parser {
+        Parser {
+            ptr: unsafe { ts_parser_new() },
+        }
+    }
+
+    /// Set the language that the parser should use for parsing.
+    pub fn set_language(&mut self, grammar: Grammar) {
+        unsafe { ts_parser_set_language(self.ptr, grammar) };
+    }
+
+    /// Set the ranges of text that the parser should include when parsing. By default, the parser
+    /// will always include entire documents. This function allows you to parse only a *portion*
+    /// of a document but still return a syntax tree whose ranges match up with the document as a
+    /// whole. You can also pass multiple disjoint ranges.
+    ///
+    /// `ranges` must be non-overlapping and sorted.
+    pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), InvalidRangesErrror> {
+        // TODO: save some memory by only storing byte ranges and converting them to TS ranges in an
+        // internal buffer here. Points are not used by TS. Alternatively we can path the TS C code
+        // to accept a simple pair (struct with two fields) of byte positions here instead of a full
+        // tree sitter range
+        let success = unsafe {
+            ts_parser_set_included_ranges(self.ptr, ranges.as_ptr(), ranges.len() as u32)
+        };
+        if success {
+            Ok(())
+        } else {
+            Err(InvalidRangesErrror)
+        }
+    }
+
+    #[must_use]
+    pub fn parse<I: ParserInput>(
+        &mut self,
+        input: impl IntoParserInput<ParserInput = I>,
+        old_tree: Option<&SyntaxTree>,
+    ) -> Option<SyntaxTree> {
+        let mut input = input.into_parser_input();
+        unsafe extern "C" fn read<C: ParserInput>(
+            payload: NonNull<c_void>,
+            byte_index: u32,
+            _position: Point,
+            bytes_read: &mut u32,
+        ) -> *const u8 {
+            match catch_unwind(|| {
+                let cursor: &mut C = payload.cast().as_mut();
+                cursor.read(byte_index as usize)
+            }) {
+                Ok(slice) => {
+                    *bytes_read = slice.len() as u32;
+                    slice.as_ptr()
+                }
+                Err(_) => {
+                    *bytes_read = 0;
+                    ptr::null()
+                }
+            }
+        }
+        let input = ParserInputRaw {
+            payload: NonNull::from(&mut input).cast(),
+            read: read::<I>,
+            // utf8
+            encoding: 0,
+        };
+        unsafe {
+            let old_tree = old_tree.map(|tree| tree.as_raw());
+            let new_tree = ts_parser_parse(self.ptr, old_tree, input);
+            new_tree.map(|raw| SyntaxTree::from_raw(raw))
+        }
+    }
+}
+
+impl Default for Parser {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+unsafe impl Sync for Parser {}
+unsafe impl Send for Parser {}
+impl Drop for Parser {
+    fn drop(&mut self) {
+        unsafe { ts_parser_delete(self.ptr) }
+    }
+}
+
+/// An error that occurred when trying to assign an incompatible [`Grammar`] to
+/// a [`Parser`].
+#[derive(Debug, PartialEq, Eq)]
+pub struct InvalidRangesErrror;
+
+impl fmt::Display for InvalidRangesErrror {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "include ranges are overlap or are not sorted",)
+    }
+}
+impl std::error::Error for InvalidRangesErrror {}
+
+type TreeSitterReadFn = unsafe extern "C" fn(
+    payload: NonNull<c_void>,
+    byte_index: u32,
+    position: Point,
+    bytes_read: &mut u32,
+) -> *const u8;
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct ParserInputRaw {
+    pub payload: NonNull<c_void>,
+    pub read: TreeSitterReadFn,
+    pub encoding: u32,
+}
+
+pub trait ParserInput {
+    fn read(&mut self, offset: usize) -> &[u8];
+}
+
+pub trait IntoParserInput {
+    type ParserInput;
+    fn into_parser_input(self) -> Self::ParserInput;
+}
+
+extern "C" {
+    /// Create a new parser
+    fn ts_parser_new() -> NonNull<ParserData>;
+    /// Delete the parser, freeing all of the memory that it used.
+    fn ts_parser_delete(parser: NonNull<ParserData>);
+    /// Set the language that the parser should use for parsing. Returns a boolean indicating
+    /// whether or not the language was successfully assigned. True means assignment
+    /// succeeded. False means there was a version mismatch: the language was generated with
+    /// an incompatible version of the Tree-sitter CLI. Check the language's version using
+    /// [`ts_language_version`] and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`]
+    /// and [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
+    fn ts_parser_set_language(parser: NonNull<ParserData>, language: Grammar) -> bool;
+    /// Set the ranges of text that the parser should include when parsing. By default, the parser
+    /// will always include entire documents. This function allows you to parse only a *portion*
+    /// of a document but still return a syntax tree whose ranges match up with the document as a
+    /// whole. You can also pass multiple disjoint ranges. The second and third parameters specify
+    /// the location and length of an array of ranges. The parser does *not* take ownership of
+    /// these ranges; it copies the data, so it doesn't matter how these ranges are allocated.
+    /// If `count` is zero, then the entire document will be parsed. Otherwise, the given ranges
+    /// must be ordered from earliest to latest in the document, and they must not overlap. That
+    /// is, the following must hold for all: `i < count - 1`: `ranges[i].end_byte <= ranges[i +
+    /// 1].start_byte` If this requirement is not satisfied, the operation will fail, the ranges
+    /// will not be assigned, and this function will return `false`. On success, this function
+    /// returns `true`
+    fn ts_parser_set_included_ranges(
+        parser: NonNull<ParserData>,
+        ranges: *const Range,
+        count: u32,
+    ) -> bool;
+
+    /// Use the parser to parse some source code and create a syntax tree. If you are parsing this
+    /// document for the first time, pass `NULL` for the `old_tree` parameter. Otherwise, if you
+    /// have already parsed an earlier version of this document and the document has since been
+    /// edited, pass the previous syntax tree so that the unchanged parts of it can be reused.
+    /// This will save time and memory. For this to work correctly, you must have already edited
+    /// the old syntax tree using the [`ts_tree_edit`] function in a way that exactly matches
+    /// the source code changes. The [`TSInput`] parameter lets you specify how to read the text.
+    /// It has the following three fields: 1. [`read`]: A function to retrieve a chunk of text
+    /// at a given byte offset and (row, column) position. The function should return a pointer
+    /// to the text and write its length to the [`bytes_read`] pointer. The parser does not
+    /// take ownership of this buffer; it just borrows it until it has finished reading it. The
+    /// function should write a zero value to the [`bytes_read`] pointer to indicate the end of the
+    /// document. 2. [`payload`]: An arbitrary pointer that will be passed to each invocation of
+    /// the [`read`] function. 3. [`encoding`]: An indication of how the text is encoded. Either
+    /// `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. This function returns a syntax tree
+    /// on success, and `NULL` on failure. There are three possible reasons for failure: 1. The
+    /// parser does not have a language assigned. Check for this using the [`ts_parser_language`]
+    /// function. 2. Parsing was cancelled due to a timeout that was set by an earlier call to the
+    /// [`ts_parser_set_timeout_micros`] function. You can resume parsing from where the parser
+    /// left out by calling [`ts_parser_parse`] again with the same arguments. Or you can start
+    /// parsing from scratch by first calling [`ts_parser_reset`]. 3. Parsing was cancelled using
+    /// a cancellation flag that was set by an earlier call to [`ts_parser_set_cancellation_flag`].
+    /// You can resume parsing from where the parser left out by calling [`ts_parser_parse`] again
+    /// with the same arguments. [`read`]: TSInput::read [`payload`]: TSInput::payload [`encoding`]:
+    /// TSInput::encoding [`bytes_read`]: TSInput::read
+    fn ts_parser_parse(
+        parser: NonNull<ParserData>,
+        old_tree: Option<NonNull<SyntaxTreeData>>,
+        input: ParserInputRaw,
+    ) -> Option<NonNull<SyntaxTreeData>>;
+}
diff --git a/helix-syntax/src/tree_sitter/query.rs b/helix-syntax/src/tree_sitter/query.rs
new file mode 100644
index 000000000..44a7fa3c3
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/query.rs
@@ -0,0 +1,574 @@
+use std::fmt::Display;
+use std::iter::zip;
+use std::path::{Path, PathBuf};
+use std::ptr::NonNull;
+use std::{slice, str};
+
+use regex_cursor::engines::meta::Regex;
+
+use crate::tree_sitter::Grammar;
+
+macro_rules! bail {
+    ($($args:tt)*) => {{
+        return Err(format!($($args)*))
+    }}
+}
+
+macro_rules! ensure {
+    ($cond: expr, $($args:tt)*) => {{
+        if !$cond {
+            return Err(format!($($args)*))
+        }
+    }}
+}
+
+#[derive(Debug)]
+enum TextPredicateCaptureKind {
+    EqString(u32),
+    EqCapture(u32),
+    MatchString(Regex),
+    AnyString(Box<[Box<str>]>),
+}
+
+struct TextPredicateCapture {
+    capture_idx: u32,
+    kind: TextPredicateCaptureKind,
+    negated: bool,
+    match_all: bool,
+}
+
+pub enum QueryData {}
+pub struct Query {
+    raw: NonNull<QueryData>,
+    num_captures: u32,
+}
+
+impl Query {
+    /// Create a new query from a string containing one or more S-expression
+    /// patterns.
+    ///
+    /// The query is associated with a particular grammar, and can only be run
+    /// on syntax nodes parsed with that grammar. References to Queries can be
+    /// shared between multiple threads.
+    pub fn new(grammar: Grammar, source: &str, path: impl AsRef<Path>) -> Result<Self, ParseError> {
+        assert!(
+            source.len() <= i32::MAX as usize,
+            "TreeSitter queries must be smaller then 2 GiB (is {})",
+            source.len() as f64 / 1024.0 / 1024.0 / 1024.0
+        );
+        let mut error_offset = 0u32;
+        let mut error_kind = RawQueryError::None;
+        let bytes = source.as_bytes();
+
+        // Compile the query.
+        let ptr = unsafe {
+            ts_query_new(
+                grammar,
+                bytes.as_ptr(),
+                bytes.len() as u32,
+                &mut error_offset,
+                &mut error_kind,
+            )
+        };
+
+        let Some(raw) = ptr else {
+            let offset = error_offset as usize;
+            let error_word = || {
+                source[offset..]
+                    .chars()
+                    .take_while(|&c| c.is_alphanumeric() || matches!(c, '_' | '-'))
+                    .collect()
+            };
+            let err = match error_kind {
+                RawQueryError::NodeType => {
+                    let node: String = error_word();
+                    ParseError::InvalidNodeType {
+                        location: ParserErrorLocation::new(
+                            source,
+                            path.as_ref(),
+                            offset,
+                            node.chars().count(),
+                        ),
+                        node,
+                    }
+                }
+                RawQueryError::Field => {
+                    let field = error_word();
+                    ParseError::InvalidFieldName {
+                        location: ParserErrorLocation::new(
+                            source,
+                            path.as_ref(),
+                            offset,
+                            field.chars().count(),
+                        ),
+                        field,
+                    }
+                }
+                RawQueryError::Capture => {
+                    let capture = error_word();
+                    ParseError::InvalidCaptureName {
+                        location: ParserErrorLocation::new(
+                            source,
+                            path.as_ref(),
+                            offset,
+                            capture.chars().count(),
+                        ),
+                        capture,
+                    }
+                }
+                RawQueryError::Syntax => ParseError::SyntaxError(ParserErrorLocation::new(
+                    source,
+                    path.as_ref(),
+                    offset,
+                    0,
+                )),
+                RawQueryError::Structure => ParseError::ImpossiblePattern(
+                    ParserErrorLocation::new(source, path.as_ref(), offset, 0),
+                ),
+                RawQueryError::None => {
+                    unreachable!("tree-sitter returned a null pointer but did not set an error")
+                }
+                RawQueryError::Language => unreachable!("should be handled at grammar load"),
+            };
+            return Err(err)
+        };
+
+        // I am not going to bother with safety comments here, all of these are
+        // safe as long as TS is not buggy because raw is a properly constructed query
+        let num_captures = unsafe { ts_query_capture_count(raw) };
+
+        Ok(Query { raw, num_captures })
+    }
+
+    fn parse_predicates(&mut self) {
+        let pattern_count = unsafe { ts_query_pattern_count(self.raw) };
+
+        let mut text_predicates = Vec::with_capacity(pattern_count as usize);
+        let mut property_predicates = Vec::with_capacity(pattern_count as usize);
+        let mut property_settings = Vec::with_capacity(pattern_count as usize);
+        let mut general_predicates = Vec::with_capacity(pattern_count as usize);
+
+        for i in 0..pattern_count {}
+    }
+
+    fn parse_predicate(&self, pattern_index: u32) -> Result<(), String> {
+        let mut text_predicates = Vec::new();
+        let mut property_predicates = Vec::new();
+        let mut property_settings = Vec::new();
+        let mut general_predicates = Vec::new();
+        for predicate in self.predicates(pattern_index) {
+            let predicate = unsafe { Predicate::new(self, predicate)? };
+
+            // Build a predicate for each of the known predicate function names.
+            match predicate.operator_name {
+                "eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => {
+                    predicate.check_arg_count(2)?;
+                    let capture_idx = predicate.get_arg(0, PredicateArg::Capture)?;
+                    let (arg2, arg2_kind) = predicate.get_any_arg(1);
+
+                    let negated = matches!(predicate.operator_name, "not-eq?" | "not-any-eq?");
+                    let match_all = matches!(predicate.operator_name, "eq?" | "not-eq?");
+                    let kind = match arg2_kind {
+                        PredicateArg::Capture => TextPredicateCaptureKind::EqCapture(arg2),
+                        PredicateArg::String => TextPredicateCaptureKind::EqString(arg2),
+                    };
+                    text_predicates.push(TextPredicateCapture {
+                        capture_idx,
+                        kind,
+                        negated,
+                        match_all,
+                    });
+                }
+
+                "match?" | "not-match?" | "any-match?" | "any-not-match?" => {
+                    predicate.check_arg_count(2)?;
+                    let capture_idx = predicate.get_arg(0, PredicateArg::Capture)?;
+                    let regex = predicate.get_str_arg(1)?;
+
+                    let negated =
+                        matches!(predicate.operator_name, "not-match?" | "any-not-match?");
+                    let match_all = matches!(predicate.operator_name, "match?" | "not-match?");
+                    let regex = match Regex::new(regex) {
+                        Ok(regex) => regex,
+                        Err(err) => bail!("invalid regex '{regex}', {err}"),
+                    };
+                    text_predicates.push(TextPredicateCapture {
+                        capture_idx,
+                        kind: TextPredicateCaptureKind::MatchString(regex),
+                        negated,
+                        match_all,
+                    });
+                }
+
+                "set!" => property_settings.push(Self::parse_property(
+                    row,
+                    operator_name,
+                    &capture_names,
+                    &string_values,
+                    &p[1..],
+                )?),
+
+                "is?" | "is-not?" => property_predicates.push((
+                    Self::parse_property(
+                        row,
+                        operator_name,
+                        &capture_names,
+                        &string_values,
+                        &p[1..],
+                    )?,
+                    operator_name == "is?",
+                )),
+
+                "any-of?" | "not-any-of?" => {
+                    if p.len() < 2 {
+                        return Err(predicate_error(row, format!(
+                                "Wrong number of arguments to #any-of? predicate. Expected at least 1, got {}.",
+                                p.len() - 1
+                            )));
+                    }
+                    if p[1].type_ != TYPE_CAPTURE {
+                        return Err(predicate_error(row, format!(
+                                "First argument to #any-of? predicate must be a capture name. Got literal \"{}\".",
+                                string_values[p[1].value_id as usize],
+                            )));
+                    }
+
+                    let is_positive = operator_name == "any-of?";
+                    let mut values = Vec::new();
+                    for arg in &p[2..] {
+                        if arg.type_ == TYPE_CAPTURE {
+                            return Err(predicate_error(row, format!(
+                                    "Arguments to #any-of? predicate must be literals. Got capture @{}.",
+                                    capture_names[arg.value_id as usize],
+                                )));
+                        }
+                        values.push(string_values[arg.value_id as usize]);
+                    }
+                    text_predicates.push(TextPredicateCapture::AnyString(
+                        p[1].value_id,
+                        values
+                            .iter()
+                            .map(|x| (*x).to_string().into())
+                            .collect::<Vec<_>>()
+                            .into(),
+                        is_positive,
+                    ));
+                }
+
+                _ => general_predicates.push(QueryPredicate {
+                    operator: operator_name.to_string().into(),
+                    args: p[1..]
+                        .iter()
+                        .map(|a| {
+                            if a.type_ == TYPE_CAPTURE {
+                                QueryPredicateArg::Capture(a.value_id)
+                            } else {
+                                QueryPredicateArg::String(
+                                    string_values[a.value_id as usize].to_string().into(),
+                                )
+                            }
+                        })
+                        .collect(),
+                }),
+            }
+        }
+
+        text_predicates_vec.push(text_predicates.into());
+        property_predicates_vec.push(property_predicates.into());
+        property_settings_vec.push(property_settings.into());
+        general_predicates_vec.push(general_predicates.into());
+    }
+
+    fn predicates<'a>(
+        &'a self,
+        pattern_index: u32,
+    ) -> impl Iterator<Item = &'a [PredicateStep]> + 'a {
+        let predicate_steps = unsafe {
+            let mut len = 0u32;
+            let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern_index, &mut len);
+            (len != 0)
+                .then(|| slice::from_raw_parts(raw_predicates, len as usize))
+                .unwrap_or_default()
+        };
+        predicate_steps
+            .split(|step| step.kind == PredicateStepKind::Done)
+            .filter(|predicate| !predicate.is_empty())
+    }
+
+    /// Safety: value_idx must be a valid string id (in bounds) for this query and pattern_index
+    unsafe fn get_pattern_string(&self, value_id: u32) -> &str {
+        unsafe {
+            let mut len = 0;
+            let ptr = ts_query_string_value_for_id(self.raw, value_id, &mut len);
+            let data = slice::from_raw_parts(ptr, len as usize);
+            // safety: we only allow passing valid str(ings) as arguments to query::new
+            // name is always a substring of that. Treesitter does proper utf8 segmentation
+            // so any substrings it produces are codepoint aligned and therefore valid utf8
+            str::from_utf8_unchecked(data)
+        }
+    }
+
+    #[inline]
+    pub fn capture_name(&self, capture_idx: u32) -> &str {
+        // this one needs an assertions because the ts c api is inconsisent
+        // and unsafe, other functions do have checks and would return null
+        assert!(capture_idx <= self.num_captures, "invalid capture index");
+        let mut length = 0;
+        unsafe {
+            let ptr = ts_query_capture_name_for_id(self.raw, capture_idx, &mut length);
+            let name = slice::from_raw_parts(ptr, length as usize);
+            // safety: we only allow passing valid str(ings) as arguments to query::new
+            // name is always a substring of that. Treesitter does proper utf8 segmentation
+            // so any substrings it produces are codepoint aligned and therefore valid utf8
+            str::from_utf8_unchecked(name)
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct ParserErrorLocation {
+    pub path: PathBuf,
+    /// at which line the error occured
+    pub line: usize,
+    /// at which codepoints/columns the errors starts in the line
+    pub column: usize,
+    /// how many codepoints/columns the error takes up
+    pub len: usize,
+    line_content: String,
+}
+
+impl ParserErrorLocation {
+    pub fn new(source: &str, path: &Path, offset: usize, len: usize) -> ParserErrorLocation {
+        let (line, line_content) = source[..offset]
+            .split('\n')
+            .map(|line| line.strip_suffix('\r').unwrap_or(line))
+            .enumerate()
+            .last()
+            .unwrap_or((0, ""));
+        let column = line_content.chars().count();
+        ParserErrorLocation {
+            path: path.to_owned(),
+            line,
+            column,
+            len,
+            line_content: line_content.to_owned(),
+        }
+    }
+}
+
+impl Display for ParserErrorLocation {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        writeln!(
+            f,
+            "  --> {}:{}:{}",
+            self.path.display(),
+            self.line,
+            self.column
+        )?;
+        let line = self.line.to_string();
+        let prefix = format_args!(" {:width$} |", "", width = line.len());
+        writeln!(f, "{prefix}");
+        writeln!(f, " {line} | {}", self.line_content)?;
+        writeln!(
+            f,
+            "{prefix}{:width$}{:^<len$}",
+            "",
+            "^",
+            width = self.column,
+            len = self.len
+        )?;
+        writeln!(f, "{prefix}")
+    }
+}
+
+#[derive(thiserror::Error, Debug, PartialEq, Eq)]
+pub enum ParseError {
+    #[error("unexpected EOF")]
+    UnexpectedEof,
+    #[error("invalid query syntax\n{0}")]
+    SyntaxError(ParserErrorLocation),
+    #[error("invalid node type {node:?}\n{location}")]
+    InvalidNodeType {
+        node: String,
+        location: ParserErrorLocation,
+    },
+    #[error("invalid field name {field:?}\n{location}")]
+    InvalidFieldName {
+        field: String,
+        location: ParserErrorLocation,
+    },
+    #[error("invalid capture name {capture:?}\n{location}")]
+    InvalidCaptureName {
+        capture: String,
+        location: ParserErrorLocation,
+    },
+    #[error("{message}\n{location}")]
+    InvalidPredicate {
+        message: String,
+        location: ParserErrorLocation,
+    },
+    #[error("invalid predicate\n{0}")]
+    ImpossiblePattern(ParserErrorLocation),
+}
+
+#[repr(C)]
+enum RawQueryError {
+    None = 0,
+    Syntax = 1,
+    NodeType = 2,
+    Field = 3,
+    Capture = 4,
+    Structure = 5,
+    Language = 6,
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum PredicateStepKind {
+    Done = 0,
+    Capture = 1,
+    String = 2,
+}
+
+#[repr(C)]
+struct PredicateStep {
+    kind: PredicateStepKind,
+    value_id: u32,
+}
+
+struct Predicate<'a> {
+    operator_name: &'a str,
+    args: &'a [PredicateStep],
+    query: &'a Query,
+}
+
+impl<'a> Predicate<'a> {
+    unsafe fn new(
+        query: &'a Query,
+        predicate: &'a [PredicateStep],
+    ) -> Result<Predicate<'a>, String> {
+        ensure!(
+            predicate[0].kind == PredicateStepKind::String,
+            "expected predicate to start with a function name. Got @{}.",
+            query.capture_name(predicate[0].value_id)
+        );
+        let operator_name = query.get_pattern_string(predicate[0].value_id);
+        Ok(Predicate {
+            operator_name,
+            args: &predicate[1..],
+            query,
+        })
+    }
+    pub fn check_arg_count(&self, n: usize) -> Result<(), String> {
+        ensure!(
+            self.args.len() == n,
+            "expected {n} arguments for #{}, got {}",
+            self.operator_name,
+            self.args.len()
+        );
+        Ok(())
+    }
+
+    pub fn get_arg(&self, i: usize, expect: PredicateArg) -> Result<u32, String> {
+        let (val, actual) = self.get_any_arg(i);
+        match (actual, expect) {
+            (PredicateArg::Capture, PredicateArg::String) => bail!(
+                "{i}. argument to #{} expected a capture, got literal {val:?}",
+                self.operator_name
+            ),
+            (PredicateArg::String, PredicateArg::Capture) => bail!(
+                "{i}. argument to #{} must be a literal, got capture @{val:?}",
+                self.operator_name
+            ),
+            _ => (),
+        };
+        Ok(val)
+    }
+    pub fn get_str_arg(&self, i: usize) -> Result<&'a str, String> {
+        let arg = self.get_arg(i, PredicateArg::String)?;
+        unsafe { Ok(self.query.get_pattern_string(arg)) }
+    }
+
+    pub fn get_any_arg(&self, i: usize) -> (u32, PredicateArg) {
+        match self.args[i].kind {
+            PredicateStepKind::String => unsafe { (self.args[i].value_id, PredicateArg::String) },
+            PredicateStepKind::Capture => (self.args[i].value_id, PredicateArg::Capture),
+            PredicateStepKind::Done => unreachable!(),
+        }
+    }
+}
+
+enum PredicateArg {
+    Capture,
+    String,
+}
+
+extern "C" {
+    /// Create a new query from a string containing one or more S-expression
+    /// patterns. The query is associated with a particular language, and can
+    /// only be run on syntax nodes parsed with that language. If all of the
+    /// given patterns are valid, this returns a [`TSQuery`]. If a pattern is
+    /// invalid, this returns `NULL`, and provides two pieces of information
+    /// about the problem: 1. The byte offset of the error is written to
+    /// the `error_offset` parameter. 2. The type of error is written to the
+    /// `error_type` parameter.
+    pub fn ts_query_new(
+        grammar: Grammar,
+        source: *const u8,
+        source_len: u32,
+        error_offset: &mut u32,
+        error_type: &mut RawQueryError,
+    ) -> Option<NonNull<QueryData>>;
+
+    /// Delete a query, freeing all of the memory that it used.
+    pub fn ts_query_delete(query: NonNull<QueryData>);
+
+    /// Get the number of patterns, captures, or string literals in the query.
+    pub fn ts_query_pattern_count(query: NonNull<QueryData>) -> u32;
+    pub fn ts_query_capture_count(query: NonNull<QueryData>) -> u32;
+    pub fn ts_query_string_count(query: NonNull<QueryData>) -> u32;
+
+    /// Get the byte offset where the given pattern starts in the query's
+    /// source. This can be useful when combining queries by concatenating their
+    /// source code strings.
+    pub fn ts_query_start_byte_for_pattern(query: NonNull<QueryData>, pattern_index: u32) -> u32;
+
+    /// Get all of the predicates for the given pattern in the query. The
+    /// predicates are represented as a single array of steps. There are three
+    /// types of steps in this array, which correspond to the three legal values
+    /// for the `type` field: - `TSQueryPredicateStepTypeCapture` - Steps with
+    /// this type represent names    of captures. Their `value_id` can be used
+    /// with the   [`ts_query_capture_name_for_id`] function to obtain the name
+    /// of the capture. - `TSQueryPredicateStepTypeString` - Steps with this
+    /// type represent literal    strings. Their `value_id` can be used with the
+    /// [`ts_query_string_value_for_id`] function to obtain their string value.
+    /// - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*
+    /// that represent the end of an individual predicate. If a pattern has two
+    /// predicates, then there will be two steps with this `type` in the array.
+    pub fn ts_query_predicates_for_pattern(
+        query: NonNull<QueryData>,
+        pattern_index: u32,
+        step_count: &mut u32,
+    ) -> *const PredicateStep;
+
+    pub fn ts_query_is_pattern_rooted(query: NonNull<QueryData>, pattern_index: u32) -> bool;
+    pub fn ts_query_is_pattern_non_local(query: NonNull<QueryData>, pattern_index: u32) -> bool;
+    pub fn ts_query_is_pattern_guaranteed_at_step(
+        query: NonNull<QueryData>,
+        byte_offset: u32,
+    ) -> bool;
+    /// Get the name and length of one of the query's captures, or one of the
+    /// query's string literals. Each capture and string is associated with a
+    /// numeric id based on the order that it appeared in the query's source.
+    pub fn ts_query_capture_name_for_id(
+        query: NonNull<QueryData>,
+        index: u32,
+        length: &mut u32,
+    ) -> *const u8;
+
+    pub fn ts_query_string_value_for_id(
+        self_: NonNull<QueryData>,
+        index: u32,
+        length: &mut u32,
+    ) -> *const u8;
+}
diff --git a/helix-syntax/src/tree_sitter/ropey.rs b/helix-syntax/src/tree_sitter/ropey.rs
new file mode 100644
index 000000000..aa59b2f27
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/ropey.rs
@@ -0,0 +1,38 @@
+use regex_cursor::{Cursor, RopeyCursor};
+use ropey::RopeSlice;
+
+use crate::tree_sitter::parser::{IntoParserInput, ParserInput};
+
+pub struct RopeParserInput<'a> {
+    src: RopeSlice<'a>,
+    cursor: regex_cursor::RopeyCursor<'a>,
+}
+
+impl<'a> IntoParserInput for RopeSlice<'a> {
+    type ParserInput = RopeParserInput<'a>;
+
+    fn into_parser_input(self) -> Self::ParserInput {
+        RopeParserInput {
+            src: self,
+            cursor: RopeyCursor::new(self),
+        }
+    }
+}
+
+impl ParserInput for RopeParserInput<'_> {
+    fn read(&mut self, offset: usize) -> &[u8] {
+        // this cursor is optimized for contigous reads which are by far the most common during parsing
+        // very far jumps (like injections at the other end of the document) are handelde
+        // by restarting a new cursor (new chunks iterator)
+        if offset < self.cursor.offset() && self.cursor.offset() - offset > 4906 {
+            self.cursor = regex_cursor::RopeyCursor::at(self.src, offset);
+        } else {
+            while self.cursor.offset() + self.cursor.chunk().len() >= offset {
+                if !self.cursor.advance() {
+                    return &[];
+                }
+            }
+        }
+        self.cursor.chunk()
+    }
+}
diff --git a/helix-syntax/src/tree_sitter/syntax_tree.rs b/helix-syntax/src/tree_sitter/syntax_tree.rs
new file mode 100644
index 000000000..f1c608d6f
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/syntax_tree.rs
@@ -0,0 +1,80 @@
+use std::fmt;
+use std::ptr::NonNull;
+
+use crate::tree_sitter::syntax_tree_node::{SyntaxTreeNode, SyntaxTreeNodeRaw};
+use crate::tree_sitter::Point;
+
+// opaque pointers
+pub(super) enum SyntaxTreeData {}
+
+pub struct SyntaxTree {
+    ptr: NonNull<SyntaxTreeData>,
+}
+
+impl SyntaxTree {
+    pub(super) unsafe fn from_raw(raw: NonNull<SyntaxTreeData>) -> SyntaxTree {
+        SyntaxTree { ptr: raw }
+    }
+
+    pub(super) fn as_raw(&self) -> NonNull<SyntaxTreeData> {
+        self.ptr
+    }
+
+    pub fn root_node(&self) -> SyntaxTreeNode<'_> {
+        unsafe { SyntaxTreeNode::from_raw(ts_tree_root_node(self.ptr)).unwrap() }
+    }
+
+    pub fn edit(&mut self, edit: &InputEdit) {
+        unsafe { ts_tree_edit(self.ptr, edit) }
+    }
+}
+
+impl fmt::Debug for SyntaxTree {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{{Tree {:?}}}", self.root_node())
+    }
+}
+
+impl Drop for SyntaxTree {
+    fn drop(&mut self) {
+        unsafe { ts_tree_delete(self.ptr) }
+    }
+}
+
+impl Clone for SyntaxTree {
+    fn clone(&self) -> Self {
+        unsafe {
+            SyntaxTree {
+                ptr: ts_tree_copy(self.ptr),
+            }
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct InputEdit {
+    pub start_byte: u32,
+    pub old_end_byte: u32,
+    pub new_end_byte: u32,
+    pub start_point: Point,
+    pub old_end_point: Point,
+    pub new_end_point: Point,
+}
+
+extern "C" {
+    /// Create a shallow copy of the syntax tree. This is very fast. You need to
+    /// copy a syntax tree in order to use it on more than one thread at a time,
+    /// as syntax trees are not thread safe.
+    fn ts_tree_copy(self_: NonNull<SyntaxTreeData>) -> NonNull<SyntaxTreeData>;
+    /// Delete the syntax tree, freeing all of the memory that it used.
+    fn ts_tree_delete(self_: NonNull<SyntaxTreeData>);
+    /// Get the root node of the syntax tree.
+    fn ts_tree_root_node<'tree>(self_: NonNull<SyntaxTreeData>) -> SyntaxTreeNodeRaw;
+    /// Edit the syntax tree to keep it in sync with source code that has been
+    /// edited.
+    ///
+    /// You must describe the edit both in terms of byte offsets and in terms of
+    /// row/column coordinates.
+    fn ts_tree_edit(self_: NonNull<SyntaxTreeData>, edit: &InputEdit);
+}
diff --git a/helix-syntax/src/tree_sitter/syntax_tree_node.rs b/helix-syntax/src/tree_sitter/syntax_tree_node.rs
new file mode 100644
index 000000000..b50c79696
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/syntax_tree_node.rs
@@ -0,0 +1,291 @@
+use std::ffi::c_void;
+use std::marker::PhantomData;
+use std::ops::Range;
+use std::ptr::NonNull;
+
+use crate::tree_sitter::syntax_tree::SyntaxTree;
+use crate::tree_sitter::Grammar;
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub(super) struct SyntaxTreeNodeRaw {
+    context: [u32; 4],
+    id: *const c_void,
+    tree: *const c_void,
+}
+
+impl From<SyntaxTreeNode<'_>> for SyntaxTreeNodeRaw {
+    fn from(node: SyntaxTreeNode) -> SyntaxTreeNodeRaw {
+        SyntaxTreeNodeRaw {
+            context: node.context,
+            id: node.id.as_ptr(),
+            tree: node.tree.as_ptr(),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct SyntaxTreeNode<'tree> {
+    context: [u32; 4],
+    id: NonNull<c_void>,
+    tree: NonNull<c_void>,
+    _phantom: PhantomData<&'tree SyntaxTree>,
+}
+
+impl<'tree> SyntaxTreeNode<'tree> {
+    #[inline]
+    pub(super) unsafe fn from_raw(raw: SyntaxTreeNodeRaw) -> Option<Self> {
+        Some(SyntaxTreeNode {
+            context: raw.context,
+            id: NonNull::new(raw.id as *mut _)?,
+            tree: unsafe { NonNull::new_unchecked(raw.tree as *mut _) },
+            _phantom: PhantomData,
+        })
+    }
+
+    #[inline]
+    fn as_raw(&self) -> SyntaxTreeNodeRaw {
+        SyntaxTreeNodeRaw {
+            context: self.context,
+            id: self.id.as_ptr(),
+            tree: self.tree.as_ptr(),
+        }
+    }
+
+    /// Get this node's type as a numerical id.
+    #[inline]
+    pub fn kind_id(&self) -> u16 {
+        unsafe { ts_node_symbol(self.as_raw()) }
+    }
+
+    /// Get the [`Language`] that was used to parse this node's syntax tree.
+    #[inline]
+    pub fn grammar(&self) -> Grammar {
+        unsafe { ts_node_language(self.as_raw()) }
+    }
+
+    /// Check if this node is *named*.
+    ///
+    /// Named nodes correspond to named rules in the grammar, whereas
+    /// *anonymous* nodes correspond to string literals in the grammar.
+    #[inline]
+    pub fn is_named(&self) -> bool {
+        unsafe { ts_node_is_named(self.as_raw()) }
+    }
+
+    /// Check if this node is *missing*.
+    ///
+    /// Missing nodes are inserted by the parser in order to recover from
+    /// certain kinds of syntax errors.
+    #[inline]
+    pub fn is_missing(&self) -> bool {
+        unsafe { ts_node_is_missing(self.as_raw()) }
+    }
+    /// Get the byte offsets where this node starts.
+    #[inline]
+    pub fn start_byte(&self) -> usize {
+        unsafe { ts_node_start_byte(self.as_raw()) as usize }
+    }
+
+    /// Get the byte offsets where this node end.
+    #[inline]
+    pub fn end_byte(&self) -> usize {
+        unsafe { ts_node_end_byte(self.as_raw()) as usize }
+    }
+
+    /// Get the byte range of source code that this node represents.
+    // TODO: use helix_stdx::Range once available
+    #[inline]
+    pub fn byte_range(&self) -> Range<usize> {
+        self.start_byte()..self.end_byte()
+    }
+
+    /// Get the node's child at the given index, where zero represents the first
+    /// child.
+    ///
+    /// This method is fairly fast, but its cost is technically log(i), so if
+    /// you might be iterating over a long list of children, you should use
+    /// [`SyntaxTreeNode::children`] instead.
+    #[inline]
+    pub fn child(&self, i: usize) -> Option<SyntaxTreeNode<'tree>> {
+        unsafe { SyntaxTreeNode::from_raw(ts_node_child(self.as_raw(), i as u32)) }
+    }
+
+    /// Get this node's number of children.
+    #[inline]
+    pub fn child_count(&self) -> usize {
+        unsafe { ts_node_child_count(self.as_raw()) as usize }
+    }
+
+    /// Get this node's *named* child at the given index.
+    ///
+    /// See also [`SyntaxTreeNode::is_named`].
+    /// This method is fairly fast, but its cost is technically log(i), so if
+    /// you might be iterating over a long list of children, you should use
+    /// [`SyntaxTreeNode::named_children`] instead.
+    #[inline]
+    pub fn named_child(&self, i: usize) -> Option<SyntaxTreeNode<'tree>> {
+        unsafe { SyntaxTreeNode::from_raw(ts_node_named_child(self.as_raw(), i as u32)) }
+    }
+
+    /// Get this node's number of *named* children.
+    ///
+    /// See also [`SyntaxTreeNode::is_named`].
+    #[inline]
+    pub fn named_child_count(&self) -> usize {
+        unsafe { ts_node_named_child_count(self.as_raw()) as usize }
+    }
+
+    #[inline]
+    unsafe fn map(
+        &self,
+        f: unsafe extern "C" fn(SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw,
+    ) -> Option<SyntaxTreeNode<'tree>> {
+        SyntaxTreeNode::from_raw(f(self.as_raw()))
+    }
+
+    /// Get this node's immediate parent.
+    #[inline]
+    pub fn parent(&self) -> Option<Self> {
+        unsafe { self.map(ts_node_parent) }
+    }
+
+    /// Get this node's next sibling.
+    #[inline]
+    pub fn next_sibling(&self) -> Option<Self> {
+        unsafe { self.map(ts_node_next_sibling) }
+    }
+
+    /// Get this node's previous sibling.
+    #[inline]
+    pub fn prev_sibling(&self) -> Option<Self> {
+        unsafe { self.map(ts_node_prev_sibling) }
+    }
+
+    /// Get this node's next named sibling.
+    #[inline]
+    pub fn next_named_sibling(&self) -> Option<Self> {
+        unsafe { self.map(ts_node_next_named_sibling) }
+    }
+
+    /// Get this node's previous named sibling.
+    #[inline]
+    pub fn prev_named_sibling(&self) -> Option<Self> {
+        unsafe { self.map(ts_node_prev_named_sibling) }
+    }
+
+    /// Get the smallest node within this node that spans the given range.
+    #[inline]
+    pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
+        unsafe {
+            Self::from_raw(ts_node_descendant_for_byte_range(
+                self.as_raw(),
+                start as u32,
+                end as u32,
+            ))
+        }
+    }
+
+    /// Get the smallest named node within this node that spans the given range.
+    #[inline]
+    pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
+        unsafe {
+            Self::from_raw(ts_node_named_descendant_for_byte_range(
+                self.as_raw(),
+                start as u32,
+                end as u32,
+            ))
+        }
+    }
+    // /// Iterate over this node's children.
+    // ///
+    // /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain
+    // /// a [`TreeCursor`] by calling [`Tree::walk`] or [`SyntaxTreeNode::walk`]. To avoid
+    // /// unnecessary allocations, you should reuse the same cursor for
+    // /// subsequent calls to this method.
+    // ///
+    // /// If you're walking the tree recursively, you may want to use the
+    // /// [`TreeCursor`] APIs directly instead.
+    // pub fn children<'cursor>(
+    //     &self,
+    //     cursor: &'cursor mut TreeCursor<'tree>,
+    // ) -> impl ExactSizeIterator<Item = SyntaxTreeNode<'tree>> + 'cursor {
+    //     cursor.reset(self.to_raw());
+    //     cursor.goto_first_child();
+    //     (0..self.child_count()).map(move |_| {
+    //         let result = cursor.node();
+    //         cursor.goto_next_sibling();
+    //         result
+    //     })
+    // }
+}
+
+unsafe impl Send for SyntaxTreeNode<'_> {}
+unsafe impl Sync for SyntaxTreeNode<'_> {}
+
+extern "C" {
+    /// Get the node's type as a numerical id.
+    fn ts_node_symbol(node: SyntaxTreeNodeRaw) -> u16;
+
+    /// Get the node's language.
+    fn ts_node_language(node: SyntaxTreeNodeRaw) -> Grammar;
+
+    /// Check if the node is *named*. Named nodes correspond to named rules in
+    /// the grammar, whereas *anonymous* nodes correspond to string literals in
+    /// the grammar
+    fn ts_node_is_named(node: SyntaxTreeNodeRaw) -> bool;
+
+    /// Check if the node is *missing*. Missing nodes are inserted by the parser
+    /// in order to recover from certain kinds of syntax errors
+    fn ts_node_is_missing(node: SyntaxTreeNodeRaw) -> bool;
+
+    /// Get the node's immediate parent
+    fn ts_node_parent(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
+
+    /// Get the node's child at the given index, where zero represents the first
+    /// child
+    fn ts_node_child(node: SyntaxTreeNodeRaw, child_index: u32) -> SyntaxTreeNodeRaw;
+
+    /// Get the node's number of children
+    fn ts_node_child_count(node: SyntaxTreeNodeRaw) -> u32;
+
+    /// Get the node's *named* child at the given index. See also
+    /// [`ts_node_is_named`]
+    fn ts_node_named_child(node: SyntaxTreeNodeRaw, child_index: u32) -> SyntaxTreeNodeRaw;
+
+    /// Get the node's number of *named* children. See also [`ts_node_is_named`]
+    fn ts_node_named_child_count(node: SyntaxTreeNodeRaw) -> u32;
+
+    /// Get the node's next sibling
+    fn ts_node_next_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
+
+    fn ts_node_prev_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
+
+    /// Get the node's next *named* sibling
+    fn ts_node_next_named_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
+
+    fn ts_node_prev_named_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
+
+    /// Get the smallest node within this node that spans the given range of
+    /// bytes or (row, column) positions
+    fn ts_node_descendant_for_byte_range(
+        node: SyntaxTreeNodeRaw,
+
+        start: u32,
+        end: u32,
+    ) -> SyntaxTreeNodeRaw;
+
+    /// Get the smallest named node within this node that spans the given range
+    /// of bytes or (row, column) positions
+    fn ts_node_named_descendant_for_byte_range(
+        node: SyntaxTreeNodeRaw,
+        start: u32,
+        end: u32,
+    ) -> SyntaxTreeNodeRaw;
+
+    /// Get the node's start byte.
+    fn ts_node_start_byte(self_: SyntaxTreeNodeRaw) -> u32;
+
+    /// Get the node's end byte.
+    fn ts_node_end_byte(node: SyntaxTreeNodeRaw) -> u32;
+}