mirror of https://github.com/helix-editor/helix
tmp
parent
782a34941e
commit
0cd8817d06
@ -0,0 +1,28 @@
|
|||||||
|
use std::path::PathBuf;
|
||||||
|
use std::{env, fs};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
if env::var_os("DISABLED_TS_BUILD").is_some() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let mut config = cc::Build::new();
|
||||||
|
|
||||||
|
let manifest_path = PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").unwrap());
|
||||||
|
let include_path = manifest_path.join("../vendor/tree-sitter/include");
|
||||||
|
let src_path = manifest_path.join("../vendor/tree-sitter/src");
|
||||||
|
for entry in fs::read_dir(&src_path).unwrap() {
|
||||||
|
let entry = entry.unwrap();
|
||||||
|
let path = src_path.join(entry.file_name());
|
||||||
|
println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
config
|
||||||
|
.flag_if_supported("-std=c11")
|
||||||
|
.flag_if_supported("-fvisibility=hidden")
|
||||||
|
.flag_if_supported("-Wshadow")
|
||||||
|
.flag_if_supported("-Wno-unused-parameter")
|
||||||
|
.include(&src_path)
|
||||||
|
.include(&include_path)
|
||||||
|
.file(src_path.join("lib.c"))
|
||||||
|
.compile("tree-sitter");
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
mod grammar;
|
||||||
|
mod parser;
|
||||||
|
mod query;
|
||||||
|
mod ropey;
|
||||||
|
mod syntax_tree;
|
||||||
|
mod syntax_tree_node;
|
||||||
|
|
||||||
|
pub use grammar::Grammar;
|
||||||
|
pub use parser::{Parser, ParserInputRaw};
|
||||||
|
pub use syntax_tree::{InputEdit, SyntaxTree};
|
||||||
|
pub use syntax_tree_node::SyntaxTreeNode;
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct Point {
|
||||||
|
pub row: u32,
|
||||||
|
pub column: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct Range {
|
||||||
|
pub start_point: Point,
|
||||||
|
pub end_point: Point,
|
||||||
|
pub start_byte: u32,
|
||||||
|
pub end_byte: u32,
|
||||||
|
}
|
@ -0,0 +1,101 @@
|
|||||||
|
use std::fmt;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::ptr::NonNull;
|
||||||
|
|
||||||
|
use libloading::{Library, Symbol};
|
||||||
|
|
||||||
|
/// supported TS versions, WARNING: update when updating vendored c sources
|
||||||
|
pub const MIN_COMPATIBLE_ABI_VERSION: u32 = 13;
|
||||||
|
pub const ABI_VERSION: u32 = 14;
|
||||||
|
|
||||||
|
// opaque pointer
|
||||||
|
enum GrammarData {}
|
||||||
|
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct Grammar {
|
||||||
|
ptr: NonNull<GrammarData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Send for Grammar {}
|
||||||
|
unsafe impl Sync for Grammar {}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for Grammar {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("Grammar").finish_non_exhaustive()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Grammar {
|
||||||
|
pub unsafe fn new(name: &str, library_path: &Path) -> Result<Grammar, Error> {
|
||||||
|
let library = unsafe {
|
||||||
|
Library::new(&library_path).map_err(|err| Error::DlOpen {
|
||||||
|
err,
|
||||||
|
path: library_path.to_owned(),
|
||||||
|
})?
|
||||||
|
};
|
||||||
|
let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_"));
|
||||||
|
let grammar = unsafe {
|
||||||
|
let language_fn: Symbol<unsafe extern "C" fn() -> NonNull<GrammarData>> = library
|
||||||
|
.get(language_fn_name.as_bytes())
|
||||||
|
.map_err(|err| Error::DlSym {
|
||||||
|
err,
|
||||||
|
symbol: name.to_owned(),
|
||||||
|
})?;
|
||||||
|
Grammar { ptr: language_fn() }
|
||||||
|
};
|
||||||
|
let version = grammar.version();
|
||||||
|
if MIN_COMPATIBLE_ABI_VERSION <= version && version <= ABI_VERSION {
|
||||||
|
std::mem::forget(library);
|
||||||
|
Ok(grammar)
|
||||||
|
} else {
|
||||||
|
Err(Error::IncompatibleVersion { version })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn version(self) -> u32 {
|
||||||
|
unsafe { ts_language_version(self) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("Error opening dynamic library {path:?}")]
|
||||||
|
DlOpen {
|
||||||
|
#[source]
|
||||||
|
err: libloading::Error,
|
||||||
|
path: PathBuf,
|
||||||
|
},
|
||||||
|
#[error("Failed to load symbol {symbol}")]
|
||||||
|
DlSym {
|
||||||
|
#[source]
|
||||||
|
err: libloading::Error,
|
||||||
|
symbol: String,
|
||||||
|
},
|
||||||
|
#[error("Tried to load grammar with incompatible ABI {version}.")]
|
||||||
|
IncompatibleVersion { version: u32 },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An error that occurred when trying to assign an incompatible [`Grammar`] to
|
||||||
|
/// a [`Parser`].
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct IncompatibleGrammarError {
|
||||||
|
version: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for IncompatibleGrammarError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"Tried to load grammar with incompatible ABI {}.",
|
||||||
|
self.version,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl std::error::Error for IncompatibleGrammarError {}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/// Get the ABI version number for this language. This version number
|
||||||
|
/// is used to ensure that languages were generated by a compatible version of
|
||||||
|
/// Tree-sitter. See also [`ts_parser_set_language`].
|
||||||
|
pub fn ts_language_version(grammar: Grammar) -> u32;
|
||||||
|
}
|
@ -0,0 +1,204 @@
|
|||||||
|
use std::os::raw::c_void;
|
||||||
|
use std::panic::catch_unwind;
|
||||||
|
use std::ptr::NonNull;
|
||||||
|
use std::{fmt, ptr};
|
||||||
|
|
||||||
|
use crate::tree_sitter::syntax_tree::{SyntaxTree, SyntaxTreeData};
|
||||||
|
use crate::tree_sitter::{Grammar, Point, Range};
|
||||||
|
|
||||||
|
// opaque data
|
||||||
|
enum ParserData {}
|
||||||
|
|
||||||
|
/// A stateful object that this is used to produce a [`Tree`] based on some
|
||||||
|
/// source code.
|
||||||
|
pub struct Parser {
|
||||||
|
ptr: NonNull<ParserData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser {
|
||||||
|
/// Create a new parser.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Parser {
|
||||||
|
Parser {
|
||||||
|
ptr: unsafe { ts_parser_new() },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the language that the parser should use for parsing.
|
||||||
|
pub fn set_language(&mut self, grammar: Grammar) {
|
||||||
|
unsafe { ts_parser_set_language(self.ptr, grammar) };
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the ranges of text that the parser should include when parsing. By default, the parser
|
||||||
|
/// will always include entire documents. This function allows you to parse only a *portion*
|
||||||
|
/// of a document but still return a syntax tree whose ranges match up with the document as a
|
||||||
|
/// whole. You can also pass multiple disjoint ranges.
|
||||||
|
///
|
||||||
|
/// `ranges` must be non-overlapping and sorted.
|
||||||
|
pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), InvalidRangesErrror> {
|
||||||
|
// TODO: save some memory by only storing byte ranges and converting them to TS ranges in an
|
||||||
|
// internal buffer here. Points are not used by TS. Alternatively we can path the TS C code
|
||||||
|
// to accept a simple pair (struct with two fields) of byte positions here instead of a full
|
||||||
|
// tree sitter range
|
||||||
|
let success = unsafe {
|
||||||
|
ts_parser_set_included_ranges(self.ptr, ranges.as_ptr(), ranges.len() as u32)
|
||||||
|
};
|
||||||
|
if success {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(InvalidRangesErrror)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn parse<I: ParserInput>(
|
||||||
|
&mut self,
|
||||||
|
input: impl IntoParserInput<ParserInput = I>,
|
||||||
|
old_tree: Option<&SyntaxTree>,
|
||||||
|
) -> Option<SyntaxTree> {
|
||||||
|
let mut input = input.into_parser_input();
|
||||||
|
unsafe extern "C" fn read<C: ParserInput>(
|
||||||
|
payload: NonNull<c_void>,
|
||||||
|
byte_index: u32,
|
||||||
|
_position: Point,
|
||||||
|
bytes_read: &mut u32,
|
||||||
|
) -> *const u8 {
|
||||||
|
match catch_unwind(|| {
|
||||||
|
let cursor: &mut C = payload.cast().as_mut();
|
||||||
|
cursor.read(byte_index as usize)
|
||||||
|
}) {
|
||||||
|
Ok(slice) => {
|
||||||
|
*bytes_read = slice.len() as u32;
|
||||||
|
slice.as_ptr()
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
*bytes_read = 0;
|
||||||
|
ptr::null()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let input = ParserInputRaw {
|
||||||
|
payload: NonNull::from(&mut input).cast(),
|
||||||
|
read: read::<I>,
|
||||||
|
// utf8
|
||||||
|
encoding: 0,
|
||||||
|
};
|
||||||
|
unsafe {
|
||||||
|
let old_tree = old_tree.map(|tree| tree.as_raw());
|
||||||
|
let new_tree = ts_parser_parse(self.ptr, old_tree, input);
|
||||||
|
new_tree.map(|raw| SyntaxTree::from_raw(raw))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Parser {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Sync for Parser {}
|
||||||
|
unsafe impl Send for Parser {}
|
||||||
|
impl Drop for Parser {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
unsafe { ts_parser_delete(self.ptr) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An error that occurred when trying to assign an incompatible [`Grammar`] to
|
||||||
|
/// a [`Parser`].
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct InvalidRangesErrror;
|
||||||
|
|
||||||
|
impl fmt::Display for InvalidRangesErrror {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "include ranges are overlap or are not sorted",)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl std::error::Error for InvalidRangesErrror {}
|
||||||
|
|
||||||
|
type TreeSitterReadFn = unsafe extern "C" fn(
|
||||||
|
payload: NonNull<c_void>,
|
||||||
|
byte_index: u32,
|
||||||
|
position: Point,
|
||||||
|
bytes_read: &mut u32,
|
||||||
|
) -> *const u8;
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ParserInputRaw {
|
||||||
|
pub payload: NonNull<c_void>,
|
||||||
|
pub read: TreeSitterReadFn,
|
||||||
|
pub encoding: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait ParserInput {
|
||||||
|
fn read(&mut self, offset: usize) -> &[u8];
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait IntoParserInput {
|
||||||
|
type ParserInput;
|
||||||
|
fn into_parser_input(self) -> Self::ParserInput;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/// Create a new parser
|
||||||
|
fn ts_parser_new() -> NonNull<ParserData>;
|
||||||
|
/// Delete the parser, freeing all of the memory that it used.
|
||||||
|
fn ts_parser_delete(parser: NonNull<ParserData>);
|
||||||
|
/// Set the language that the parser should use for parsing. Returns a boolean indicating
|
||||||
|
/// whether or not the language was successfully assigned. True means assignment
|
||||||
|
/// succeeded. False means there was a version mismatch: the language was generated with
|
||||||
|
/// an incompatible version of the Tree-sitter CLI. Check the language's version using
|
||||||
|
/// [`ts_language_version`] and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`]
|
||||||
|
/// and [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
|
||||||
|
fn ts_parser_set_language(parser: NonNull<ParserData>, language: Grammar) -> bool;
|
||||||
|
/// Set the ranges of text that the parser should include when parsing. By default, the parser
|
||||||
|
/// will always include entire documents. This function allows you to parse only a *portion*
|
||||||
|
/// of a document but still return a syntax tree whose ranges match up with the document as a
|
||||||
|
/// whole. You can also pass multiple disjoint ranges. The second and third parameters specify
|
||||||
|
/// the location and length of an array of ranges. The parser does *not* take ownership of
|
||||||
|
/// these ranges; it copies the data, so it doesn't matter how these ranges are allocated.
|
||||||
|
/// If `count` is zero, then the entire document will be parsed. Otherwise, the given ranges
|
||||||
|
/// must be ordered from earliest to latest in the document, and they must not overlap. That
|
||||||
|
/// is, the following must hold for all: `i < count - 1`: `ranges[i].end_byte <= ranges[i +
|
||||||
|
/// 1].start_byte` If this requirement is not satisfied, the operation will fail, the ranges
|
||||||
|
/// will not be assigned, and this function will return `false`. On success, this function
|
||||||
|
/// returns `true`
|
||||||
|
fn ts_parser_set_included_ranges(
|
||||||
|
parser: NonNull<ParserData>,
|
||||||
|
ranges: *const Range,
|
||||||
|
count: u32,
|
||||||
|
) -> bool;
|
||||||
|
|
||||||
|
/// Use the parser to parse some source code and create a syntax tree. If you are parsing this
|
||||||
|
/// document for the first time, pass `NULL` for the `old_tree` parameter. Otherwise, if you
|
||||||
|
/// have already parsed an earlier version of this document and the document has since been
|
||||||
|
/// edited, pass the previous syntax tree so that the unchanged parts of it can be reused.
|
||||||
|
/// This will save time and memory. For this to work correctly, you must have already edited
|
||||||
|
/// the old syntax tree using the [`ts_tree_edit`] function in a way that exactly matches
|
||||||
|
/// the source code changes. The [`TSInput`] parameter lets you specify how to read the text.
|
||||||
|
/// It has the following three fields: 1. [`read`]: A function to retrieve a chunk of text
|
||||||
|
/// at a given byte offset and (row, column) position. The function should return a pointer
|
||||||
|
/// to the text and write its length to the [`bytes_read`] pointer. The parser does not
|
||||||
|
/// take ownership of this buffer; it just borrows it until it has finished reading it. The
|
||||||
|
/// function should write a zero value to the [`bytes_read`] pointer to indicate the end of the
|
||||||
|
/// document. 2. [`payload`]: An arbitrary pointer that will be passed to each invocation of
|
||||||
|
/// the [`read`] function. 3. [`encoding`]: An indication of how the text is encoded. Either
|
||||||
|
/// `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. This function returns a syntax tree
|
||||||
|
/// on success, and `NULL` on failure. There are three possible reasons for failure: 1. The
|
||||||
|
/// parser does not have a language assigned. Check for this using the [`ts_parser_language`]
|
||||||
|
/// function. 2. Parsing was cancelled due to a timeout that was set by an earlier call to the
|
||||||
|
/// [`ts_parser_set_timeout_micros`] function. You can resume parsing from where the parser
|
||||||
|
/// left out by calling [`ts_parser_parse`] again with the same arguments. Or you can start
|
||||||
|
/// parsing from scratch by first calling [`ts_parser_reset`]. 3. Parsing was cancelled using
|
||||||
|
/// a cancellation flag that was set by an earlier call to [`ts_parser_set_cancellation_flag`].
|
||||||
|
/// You can resume parsing from where the parser left out by calling [`ts_parser_parse`] again
|
||||||
|
/// with the same arguments. [`read`]: TSInput::read [`payload`]: TSInput::payload [`encoding`]:
|
||||||
|
/// TSInput::encoding [`bytes_read`]: TSInput::read
|
||||||
|
fn ts_parser_parse(
|
||||||
|
parser: NonNull<ParserData>,
|
||||||
|
old_tree: Option<NonNull<SyntaxTreeData>>,
|
||||||
|
input: ParserInputRaw,
|
||||||
|
) -> Option<NonNull<SyntaxTreeData>>;
|
||||||
|
}
|
@ -0,0 +1,574 @@
|
|||||||
|
use std::fmt::Display;
|
||||||
|
use std::iter::zip;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::ptr::NonNull;
|
||||||
|
use std::{slice, str};
|
||||||
|
|
||||||
|
use regex_cursor::engines::meta::Regex;
|
||||||
|
|
||||||
|
use crate::tree_sitter::Grammar;
|
||||||
|
|
||||||
|
macro_rules! bail {
|
||||||
|
($($args:tt)*) => {{
|
||||||
|
return Err(format!($($args)*))
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! ensure {
|
||||||
|
($cond: expr, $($args:tt)*) => {{
|
||||||
|
if !$cond {
|
||||||
|
return Err(format!($($args)*))
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum TextPredicateCaptureKind {
|
||||||
|
EqString(u32),
|
||||||
|
EqCapture(u32),
|
||||||
|
MatchString(Regex),
|
||||||
|
AnyString(Box<[Box<str>]>),
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TextPredicateCapture {
|
||||||
|
capture_idx: u32,
|
||||||
|
kind: TextPredicateCaptureKind,
|
||||||
|
negated: bool,
|
||||||
|
match_all: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum QueryData {}
|
||||||
|
pub struct Query {
|
||||||
|
raw: NonNull<QueryData>,
|
||||||
|
num_captures: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Query {
|
||||||
|
/// Create a new query from a string containing one or more S-expression
|
||||||
|
/// patterns.
|
||||||
|
///
|
||||||
|
/// The query is associated with a particular grammar, and can only be run
|
||||||
|
/// on syntax nodes parsed with that grammar. References to Queries can be
|
||||||
|
/// shared between multiple threads.
|
||||||
|
pub fn new(grammar: Grammar, source: &str, path: impl AsRef<Path>) -> Result<Self, ParseError> {
|
||||||
|
assert!(
|
||||||
|
source.len() <= i32::MAX as usize,
|
||||||
|
"TreeSitter queries must be smaller then 2 GiB (is {})",
|
||||||
|
source.len() as f64 / 1024.0 / 1024.0 / 1024.0
|
||||||
|
);
|
||||||
|
let mut error_offset = 0u32;
|
||||||
|
let mut error_kind = RawQueryError::None;
|
||||||
|
let bytes = source.as_bytes();
|
||||||
|
|
||||||
|
// Compile the query.
|
||||||
|
let ptr = unsafe {
|
||||||
|
ts_query_new(
|
||||||
|
grammar,
|
||||||
|
bytes.as_ptr(),
|
||||||
|
bytes.len() as u32,
|
||||||
|
&mut error_offset,
|
||||||
|
&mut error_kind,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some(raw) = ptr else {
|
||||||
|
let offset = error_offset as usize;
|
||||||
|
let error_word = || {
|
||||||
|
source[offset..]
|
||||||
|
.chars()
|
||||||
|
.take_while(|&c| c.is_alphanumeric() || matches!(c, '_' | '-'))
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
let err = match error_kind {
|
||||||
|
RawQueryError::NodeType => {
|
||||||
|
let node: String = error_word();
|
||||||
|
ParseError::InvalidNodeType {
|
||||||
|
location: ParserErrorLocation::new(
|
||||||
|
source,
|
||||||
|
path.as_ref(),
|
||||||
|
offset,
|
||||||
|
node.chars().count(),
|
||||||
|
),
|
||||||
|
node,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RawQueryError::Field => {
|
||||||
|
let field = error_word();
|
||||||
|
ParseError::InvalidFieldName {
|
||||||
|
location: ParserErrorLocation::new(
|
||||||
|
source,
|
||||||
|
path.as_ref(),
|
||||||
|
offset,
|
||||||
|
field.chars().count(),
|
||||||
|
),
|
||||||
|
field,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RawQueryError::Capture => {
|
||||||
|
let capture = error_word();
|
||||||
|
ParseError::InvalidCaptureName {
|
||||||
|
location: ParserErrorLocation::new(
|
||||||
|
source,
|
||||||
|
path.as_ref(),
|
||||||
|
offset,
|
||||||
|
capture.chars().count(),
|
||||||
|
),
|
||||||
|
capture,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RawQueryError::Syntax => ParseError::SyntaxError(ParserErrorLocation::new(
|
||||||
|
source,
|
||||||
|
path.as_ref(),
|
||||||
|
offset,
|
||||||
|
0,
|
||||||
|
)),
|
||||||
|
RawQueryError::Structure => ParseError::ImpossiblePattern(
|
||||||
|
ParserErrorLocation::new(source, path.as_ref(), offset, 0),
|
||||||
|
),
|
||||||
|
RawQueryError::None => {
|
||||||
|
unreachable!("tree-sitter returned a null pointer but did not set an error")
|
||||||
|
}
|
||||||
|
RawQueryError::Language => unreachable!("should be handled at grammar load"),
|
||||||
|
};
|
||||||
|
return Err(err)
|
||||||
|
};
|
||||||
|
|
||||||
|
// I am not going to bother with safety comments here, all of these are
|
||||||
|
// safe as long as TS is not buggy because raw is a properly constructed query
|
||||||
|
let num_captures = unsafe { ts_query_capture_count(raw) };
|
||||||
|
|
||||||
|
Ok(Query { raw, num_captures })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_predicates(&mut self) {
|
||||||
|
let pattern_count = unsafe { ts_query_pattern_count(self.raw) };
|
||||||
|
|
||||||
|
let mut text_predicates = Vec::with_capacity(pattern_count as usize);
|
||||||
|
let mut property_predicates = Vec::with_capacity(pattern_count as usize);
|
||||||
|
let mut property_settings = Vec::with_capacity(pattern_count as usize);
|
||||||
|
let mut general_predicates = Vec::with_capacity(pattern_count as usize);
|
||||||
|
|
||||||
|
for i in 0..pattern_count {}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_predicate(&self, pattern_index: u32) -> Result<(), String> {
|
||||||
|
let mut text_predicates = Vec::new();
|
||||||
|
let mut property_predicates = Vec::new();
|
||||||
|
let mut property_settings = Vec::new();
|
||||||
|
let mut general_predicates = Vec::new();
|
||||||
|
for predicate in self.predicates(pattern_index) {
|
||||||
|
let predicate = unsafe { Predicate::new(self, predicate)? };
|
||||||
|
|
||||||
|
// Build a predicate for each of the known predicate function names.
|
||||||
|
match predicate.operator_name {
|
||||||
|
"eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => {
|
||||||
|
predicate.check_arg_count(2)?;
|
||||||
|
let capture_idx = predicate.get_arg(0, PredicateArg::Capture)?;
|
||||||
|
let (arg2, arg2_kind) = predicate.get_any_arg(1);
|
||||||
|
|
||||||
|
let negated = matches!(predicate.operator_name, "not-eq?" | "not-any-eq?");
|
||||||
|
let match_all = matches!(predicate.operator_name, "eq?" | "not-eq?");
|
||||||
|
let kind = match arg2_kind {
|
||||||
|
PredicateArg::Capture => TextPredicateCaptureKind::EqCapture(arg2),
|
||||||
|
PredicateArg::String => TextPredicateCaptureKind::EqString(arg2),
|
||||||
|
};
|
||||||
|
text_predicates.push(TextPredicateCapture {
|
||||||
|
capture_idx,
|
||||||
|
kind,
|
||||||
|
negated,
|
||||||
|
match_all,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
"match?" | "not-match?" | "any-match?" | "any-not-match?" => {
|
||||||
|
predicate.check_arg_count(2)?;
|
||||||
|
let capture_idx = predicate.get_arg(0, PredicateArg::Capture)?;
|
||||||
|
let regex = predicate.get_str_arg(1)?;
|
||||||
|
|
||||||
|
let negated =
|
||||||
|
matches!(predicate.operator_name, "not-match?" | "any-not-match?");
|
||||||
|
let match_all = matches!(predicate.operator_name, "match?" | "not-match?");
|
||||||
|
let regex = match Regex::new(regex) {
|
||||||
|
Ok(regex) => regex,
|
||||||
|
Err(err) => bail!("invalid regex '{regex}', {err}"),
|
||||||
|
};
|
||||||
|
text_predicates.push(TextPredicateCapture {
|
||||||
|
capture_idx,
|
||||||
|
kind: TextPredicateCaptureKind::MatchString(regex),
|
||||||
|
negated,
|
||||||
|
match_all,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
"set!" => property_settings.push(Self::parse_property(
|
||||||
|
row,
|
||||||
|
operator_name,
|
||||||
|
&capture_names,
|
||||||
|
&string_values,
|
||||||
|
&p[1..],
|
||||||
|
)?),
|
||||||
|
|
||||||
|
"is?" | "is-not?" => property_predicates.push((
|
||||||
|
Self::parse_property(
|
||||||
|
row,
|
||||||
|
operator_name,
|
||||||
|
&capture_names,
|
||||||
|
&string_values,
|
||||||
|
&p[1..],
|
||||||
|
)?,
|
||||||
|
operator_name == "is?",
|
||||||
|
)),
|
||||||
|
|
||||||
|
"any-of?" | "not-any-of?" => {
|
||||||
|
if p.len() < 2 {
|
||||||
|
return Err(predicate_error(row, format!(
|
||||||
|
"Wrong number of arguments to #any-of? predicate. Expected at least 1, got {}.",
|
||||||
|
p.len() - 1
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if p[1].type_ != TYPE_CAPTURE {
|
||||||
|
return Err(predicate_error(row, format!(
|
||||||
|
"First argument to #any-of? predicate must be a capture name. Got literal \"{}\".",
|
||||||
|
string_values[p[1].value_id as usize],
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let is_positive = operator_name == "any-of?";
|
||||||
|
let mut values = Vec::new();
|
||||||
|
for arg in &p[2..] {
|
||||||
|
if arg.type_ == TYPE_CAPTURE {
|
||||||
|
return Err(predicate_error(row, format!(
|
||||||
|
"Arguments to #any-of? predicate must be literals. Got capture @{}.",
|
||||||
|
capture_names[arg.value_id as usize],
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
values.push(string_values[arg.value_id as usize]);
|
||||||
|
}
|
||||||
|
text_predicates.push(TextPredicateCapture::AnyString(
|
||||||
|
p[1].value_id,
|
||||||
|
values
|
||||||
|
.iter()
|
||||||
|
.map(|x| (*x).to_string().into())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into(),
|
||||||
|
is_positive,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => general_predicates.push(QueryPredicate {
|
||||||
|
operator: operator_name.to_string().into(),
|
||||||
|
args: p[1..]
|
||||||
|
.iter()
|
||||||
|
.map(|a| {
|
||||||
|
if a.type_ == TYPE_CAPTURE {
|
||||||
|
QueryPredicateArg::Capture(a.value_id)
|
||||||
|
} else {
|
||||||
|
QueryPredicateArg::String(
|
||||||
|
string_values[a.value_id as usize].to_string().into(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
text_predicates_vec.push(text_predicates.into());
|
||||||
|
property_predicates_vec.push(property_predicates.into());
|
||||||
|
property_settings_vec.push(property_settings.into());
|
||||||
|
general_predicates_vec.push(general_predicates.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn predicates<'a>(
|
||||||
|
&'a self,
|
||||||
|
pattern_index: u32,
|
||||||
|
) -> impl Iterator<Item = &'a [PredicateStep]> + 'a {
|
||||||
|
let predicate_steps = unsafe {
|
||||||
|
let mut len = 0u32;
|
||||||
|
let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern_index, &mut len);
|
||||||
|
(len != 0)
|
||||||
|
.then(|| slice::from_raw_parts(raw_predicates, len as usize))
|
||||||
|
.unwrap_or_default()
|
||||||
|
};
|
||||||
|
predicate_steps
|
||||||
|
.split(|step| step.kind == PredicateStepKind::Done)
|
||||||
|
.filter(|predicate| !predicate.is_empty())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Safety: value_idx must be a valid string id (in bounds) for this query and pattern_index
|
||||||
|
unsafe fn get_pattern_string(&self, value_id: u32) -> &str {
|
||||||
|
unsafe {
|
||||||
|
let mut len = 0;
|
||||||
|
let ptr = ts_query_string_value_for_id(self.raw, value_id, &mut len);
|
||||||
|
let data = slice::from_raw_parts(ptr, len as usize);
|
||||||
|
// safety: we only allow passing valid str(ings) as arguments to query::new
|
||||||
|
// name is always a substring of that. Treesitter does proper utf8 segmentation
|
||||||
|
// so any substrings it produces are codepoint aligned and therefore valid utf8
|
||||||
|
str::from_utf8_unchecked(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn capture_name(&self, capture_idx: u32) -> &str {
|
||||||
|
// this one needs an assertions because the ts c api is inconsisent
|
||||||
|
// and unsafe, other functions do have checks and would return null
|
||||||
|
assert!(capture_idx <= self.num_captures, "invalid capture index");
|
||||||
|
let mut length = 0;
|
||||||
|
unsafe {
|
||||||
|
let ptr = ts_query_capture_name_for_id(self.raw, capture_idx, &mut length);
|
||||||
|
let name = slice::from_raw_parts(ptr, length as usize);
|
||||||
|
// safety: we only allow passing valid str(ings) as arguments to query::new
|
||||||
|
// name is always a substring of that. Treesitter does proper utf8 segmentation
|
||||||
|
// so any substrings it produces are codepoint aligned and therefore valid utf8
|
||||||
|
str::from_utf8_unchecked(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct ParserErrorLocation {
|
||||||
|
pub path: PathBuf,
|
||||||
|
/// at which line the error occured
|
||||||
|
pub line: usize,
|
||||||
|
/// at which codepoints/columns the errors starts in the line
|
||||||
|
pub column: usize,
|
||||||
|
/// how many codepoints/columns the error takes up
|
||||||
|
pub len: usize,
|
||||||
|
line_content: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParserErrorLocation {
|
||||||
|
pub fn new(source: &str, path: &Path, offset: usize, len: usize) -> ParserErrorLocation {
|
||||||
|
let (line, line_content) = source[..offset]
|
||||||
|
.split('\n')
|
||||||
|
.map(|line| line.strip_suffix('\r').unwrap_or(line))
|
||||||
|
.enumerate()
|
||||||
|
.last()
|
||||||
|
.unwrap_or((0, ""));
|
||||||
|
let column = line_content.chars().count();
|
||||||
|
ParserErrorLocation {
|
||||||
|
path: path.to_owned(),
|
||||||
|
line,
|
||||||
|
column,
|
||||||
|
len,
|
||||||
|
line_content: line_content.to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for ParserErrorLocation {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
" --> {}:{}:{}",
|
||||||
|
self.path.display(),
|
||||||
|
self.line,
|
||||||
|
self.column
|
||||||
|
)?;
|
||||||
|
let line = self.line.to_string();
|
||||||
|
let prefix = format_args!(" {:width$} |", "", width = line.len());
|
||||||
|
writeln!(f, "{prefix}");
|
||||||
|
writeln!(f, " {line} | {}", self.line_content)?;
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"{prefix}{:width$}{:^<len$}",
|
||||||
|
"",
|
||||||
|
"^",
|
||||||
|
width = self.column,
|
||||||
|
len = self.len
|
||||||
|
)?;
|
||||||
|
writeln!(f, "{prefix}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(thiserror::Error, Debug, PartialEq, Eq)]
|
||||||
|
pub enum ParseError {
|
||||||
|
#[error("unexpected EOF")]
|
||||||
|
UnexpectedEof,
|
||||||
|
#[error("invalid query syntax\n{0}")]
|
||||||
|
SyntaxError(ParserErrorLocation),
|
||||||
|
#[error("invalid node type {node:?}\n{location}")]
|
||||||
|
InvalidNodeType {
|
||||||
|
node: String,
|
||||||
|
location: ParserErrorLocation,
|
||||||
|
},
|
||||||
|
#[error("invalid field name {field:?}\n{location}")]
|
||||||
|
InvalidFieldName {
|
||||||
|
field: String,
|
||||||
|
location: ParserErrorLocation,
|
||||||
|
},
|
||||||
|
#[error("invalid capture name {capture:?}\n{location}")]
|
||||||
|
InvalidCaptureName {
|
||||||
|
capture: String,
|
||||||
|
location: ParserErrorLocation,
|
||||||
|
},
|
||||||
|
#[error("{message}\n{location}")]
|
||||||
|
InvalidPredicate {
|
||||||
|
message: String,
|
||||||
|
location: ParserErrorLocation,
|
||||||
|
},
|
||||||
|
#[error("invalid predicate\n{0}")]
|
||||||
|
ImpossiblePattern(ParserErrorLocation),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
enum RawQueryError {
|
||||||
|
None = 0,
|
||||||
|
Syntax = 1,
|
||||||
|
NodeType = 2,
|
||||||
|
Field = 3,
|
||||||
|
Capture = 4,
|
||||||
|
Structure = 5,
|
||||||
|
Language = 6,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum PredicateStepKind {
|
||||||
|
Done = 0,
|
||||||
|
Capture = 1,
|
||||||
|
String = 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
struct PredicateStep {
|
||||||
|
kind: PredicateStepKind,
|
||||||
|
value_id: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Predicate<'a> {
|
||||||
|
operator_name: &'a str,
|
||||||
|
args: &'a [PredicateStep],
|
||||||
|
query: &'a Query,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Predicate<'a> {
|
||||||
|
unsafe fn new(
|
||||||
|
query: &'a Query,
|
||||||
|
predicate: &'a [PredicateStep],
|
||||||
|
) -> Result<Predicate<'a>, String> {
|
||||||
|
ensure!(
|
||||||
|
predicate[0].kind == PredicateStepKind::String,
|
||||||
|
"expected predicate to start with a function name. Got @{}.",
|
||||||
|
query.capture_name(predicate[0].value_id)
|
||||||
|
);
|
||||||
|
let operator_name = query.get_pattern_string(predicate[0].value_id);
|
||||||
|
Ok(Predicate {
|
||||||
|
operator_name,
|
||||||
|
args: &predicate[1..],
|
||||||
|
query,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
pub fn check_arg_count(&self, n: usize) -> Result<(), String> {
|
||||||
|
ensure!(
|
||||||
|
self.args.len() == n,
|
||||||
|
"expected {n} arguments for #{}, got {}",
|
||||||
|
self.operator_name,
|
||||||
|
self.args.len()
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_arg(&self, i: usize, expect: PredicateArg) -> Result<u32, String> {
|
||||||
|
let (val, actual) = self.get_any_arg(i);
|
||||||
|
match (actual, expect) {
|
||||||
|
(PredicateArg::Capture, PredicateArg::String) => bail!(
|
||||||
|
"{i}. argument to #{} expected a capture, got literal {val:?}",
|
||||||
|
self.operator_name
|
||||||
|
),
|
||||||
|
(PredicateArg::String, PredicateArg::Capture) => bail!(
|
||||||
|
"{i}. argument to #{} must be a literal, got capture @{val:?}",
|
||||||
|
self.operator_name
|
||||||
|
),
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
Ok(val)
|
||||||
|
}
|
||||||
|
pub fn get_str_arg(&self, i: usize) -> Result<&'a str, String> {
|
||||||
|
let arg = self.get_arg(i, PredicateArg::String)?;
|
||||||
|
unsafe { Ok(self.query.get_pattern_string(arg)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_any_arg(&self, i: usize) -> (u32, PredicateArg) {
|
||||||
|
match self.args[i].kind {
|
||||||
|
PredicateStepKind::String => unsafe { (self.args[i].value_id, PredicateArg::String) },
|
||||||
|
PredicateStepKind::Capture => (self.args[i].value_id, PredicateArg::Capture),
|
||||||
|
PredicateStepKind::Done => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum PredicateArg {
|
||||||
|
Capture,
|
||||||
|
String,
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/// Create a new query from a string containing one or more S-expression
|
||||||
|
/// patterns. The query is associated with a particular language, and can
|
||||||
|
/// only be run on syntax nodes parsed with that language. If all of the
|
||||||
|
/// given patterns are valid, this returns a [`TSQuery`]. If a pattern is
|
||||||
|
/// invalid, this returns `NULL`, and provides two pieces of information
|
||||||
|
/// about the problem: 1. The byte offset of the error is written to
|
||||||
|
/// the `error_offset` parameter. 2. The type of error is written to the
|
||||||
|
/// `error_type` parameter.
|
||||||
|
pub fn ts_query_new(
|
||||||
|
grammar: Grammar,
|
||||||
|
source: *const u8,
|
||||||
|
source_len: u32,
|
||||||
|
error_offset: &mut u32,
|
||||||
|
error_type: &mut RawQueryError,
|
||||||
|
) -> Option<NonNull<QueryData>>;
|
||||||
|
|
||||||
|
/// Delete a query, freeing all of the memory that it used.
|
||||||
|
pub fn ts_query_delete(query: NonNull<QueryData>);
|
||||||
|
|
||||||
|
/// Get the number of patterns, captures, or string literals in the query.
|
||||||
|
pub fn ts_query_pattern_count(query: NonNull<QueryData>) -> u32;
|
||||||
|
pub fn ts_query_capture_count(query: NonNull<QueryData>) -> u32;
|
||||||
|
pub fn ts_query_string_count(query: NonNull<QueryData>) -> u32;
|
||||||
|
|
||||||
|
/// Get the byte offset where the given pattern starts in the query's
|
||||||
|
/// source. This can be useful when combining queries by concatenating their
|
||||||
|
/// source code strings.
|
||||||
|
pub fn ts_query_start_byte_for_pattern(query: NonNull<QueryData>, pattern_index: u32) -> u32;
|
||||||
|
|
||||||
|
/// Get all of the predicates for the given pattern in the query. The
|
||||||
|
/// predicates are represented as a single array of steps. There are three
|
||||||
|
/// types of steps in this array, which correspond to the three legal values
|
||||||
|
/// for the `type` field: - `TSQueryPredicateStepTypeCapture` - Steps with
|
||||||
|
/// this type represent names of captures. Their `value_id` can be used
|
||||||
|
/// with the [`ts_query_capture_name_for_id`] function to obtain the name
|
||||||
|
/// of the capture. - `TSQueryPredicateStepTypeString` - Steps with this
|
||||||
|
/// type represent literal strings. Their `value_id` can be used with the
|
||||||
|
/// [`ts_query_string_value_for_id`] function to obtain their string value.
|
||||||
|
/// - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*
|
||||||
|
/// that represent the end of an individual predicate. If a pattern has two
|
||||||
|
/// predicates, then there will be two steps with this `type` in the array.
|
||||||
|
pub fn ts_query_predicates_for_pattern(
|
||||||
|
query: NonNull<QueryData>,
|
||||||
|
pattern_index: u32,
|
||||||
|
step_count: &mut u32,
|
||||||
|
) -> *const PredicateStep;
|
||||||
|
|
||||||
|
pub fn ts_query_is_pattern_rooted(query: NonNull<QueryData>, pattern_index: u32) -> bool;
|
||||||
|
pub fn ts_query_is_pattern_non_local(query: NonNull<QueryData>, pattern_index: u32) -> bool;
|
||||||
|
pub fn ts_query_is_pattern_guaranteed_at_step(
|
||||||
|
query: NonNull<QueryData>,
|
||||||
|
byte_offset: u32,
|
||||||
|
) -> bool;
|
||||||
|
/// Get the name and length of one of the query's captures, or one of the
|
||||||
|
/// query's string literals. Each capture and string is associated with a
|
||||||
|
/// numeric id based on the order that it appeared in the query's source.
|
||||||
|
pub fn ts_query_capture_name_for_id(
|
||||||
|
query: NonNull<QueryData>,
|
||||||
|
index: u32,
|
||||||
|
length: &mut u32,
|
||||||
|
) -> *const u8;
|
||||||
|
|
||||||
|
pub fn ts_query_string_value_for_id(
|
||||||
|
self_: NonNull<QueryData>,
|
||||||
|
index: u32,
|
||||||
|
length: &mut u32,
|
||||||
|
) -> *const u8;
|
||||||
|
}
|
@ -0,0 +1,38 @@
|
|||||||
|
use regex_cursor::{Cursor, RopeyCursor};
|
||||||
|
use ropey::RopeSlice;
|
||||||
|
|
||||||
|
use crate::tree_sitter::parser::{IntoParserInput, ParserInput};
|
||||||
|
|
||||||
|
pub struct RopeParserInput<'a> {
|
||||||
|
src: RopeSlice<'a>,
|
||||||
|
cursor: regex_cursor::RopeyCursor<'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> IntoParserInput for RopeSlice<'a> {
|
||||||
|
type ParserInput = RopeParserInput<'a>;
|
||||||
|
|
||||||
|
fn into_parser_input(self) -> Self::ParserInput {
|
||||||
|
RopeParserInput {
|
||||||
|
src: self,
|
||||||
|
cursor: RopeyCursor::new(self),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParserInput for RopeParserInput<'_> {
|
||||||
|
fn read(&mut self, offset: usize) -> &[u8] {
|
||||||
|
// this cursor is optimized for contigous reads which are by far the most common during parsing
|
||||||
|
// very far jumps (like injections at the other end of the document) are handelde
|
||||||
|
// by restarting a new cursor (new chunks iterator)
|
||||||
|
if offset < self.cursor.offset() && self.cursor.offset() - offset > 4906 {
|
||||||
|
self.cursor = regex_cursor::RopeyCursor::at(self.src, offset);
|
||||||
|
} else {
|
||||||
|
while self.cursor.offset() + self.cursor.chunk().len() >= offset {
|
||||||
|
if !self.cursor.advance() {
|
||||||
|
return &[];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.cursor.chunk()
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,80 @@
|
|||||||
|
use std::fmt;
|
||||||
|
use std::ptr::NonNull;
|
||||||
|
|
||||||
|
use crate::tree_sitter::syntax_tree_node::{SyntaxTreeNode, SyntaxTreeNodeRaw};
|
||||||
|
use crate::tree_sitter::Point;
|
||||||
|
|
||||||
|
// opaque pointers
|
||||||
|
pub(super) enum SyntaxTreeData {}
|
||||||
|
|
||||||
|
pub struct SyntaxTree {
|
||||||
|
ptr: NonNull<SyntaxTreeData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyntaxTree {
|
||||||
|
pub(super) unsafe fn from_raw(raw: NonNull<SyntaxTreeData>) -> SyntaxTree {
|
||||||
|
SyntaxTree { ptr: raw }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn as_raw(&self) -> NonNull<SyntaxTreeData> {
|
||||||
|
self.ptr
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn root_node(&self) -> SyntaxTreeNode<'_> {
|
||||||
|
unsafe { SyntaxTreeNode::from_raw(ts_tree_root_node(self.ptr)).unwrap() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn edit(&mut self, edit: &InputEdit) {
|
||||||
|
unsafe { ts_tree_edit(self.ptr, edit) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for SyntaxTree {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "{{Tree {:?}}}", self.root_node())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for SyntaxTree {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
unsafe { ts_tree_delete(self.ptr) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for SyntaxTree {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
unsafe {
|
||||||
|
SyntaxTree {
|
||||||
|
ptr: ts_tree_copy(self.ptr),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct InputEdit {
|
||||||
|
pub start_byte: u32,
|
||||||
|
pub old_end_byte: u32,
|
||||||
|
pub new_end_byte: u32,
|
||||||
|
pub start_point: Point,
|
||||||
|
pub old_end_point: Point,
|
||||||
|
pub new_end_point: Point,
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/// Create a shallow copy of the syntax tree. This is very fast. You need to
|
||||||
|
/// copy a syntax tree in order to use it on more than one thread at a time,
|
||||||
|
/// as syntax trees are not thread safe.
|
||||||
|
fn ts_tree_copy(self_: NonNull<SyntaxTreeData>) -> NonNull<SyntaxTreeData>;
|
||||||
|
/// Delete the syntax tree, freeing all of the memory that it used.
|
||||||
|
fn ts_tree_delete(self_: NonNull<SyntaxTreeData>);
|
||||||
|
/// Get the root node of the syntax tree.
|
||||||
|
fn ts_tree_root_node<'tree>(self_: NonNull<SyntaxTreeData>) -> SyntaxTreeNodeRaw;
|
||||||
|
/// Edit the syntax tree to keep it in sync with source code that has been
|
||||||
|
/// edited.
|
||||||
|
///
|
||||||
|
/// You must describe the edit both in terms of byte offsets and in terms of
|
||||||
|
/// row/column coordinates.
|
||||||
|
fn ts_tree_edit(self_: NonNull<SyntaxTreeData>, edit: &InputEdit);
|
||||||
|
}
|
@ -0,0 +1,291 @@
|
|||||||
|
use std::ffi::c_void;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::ptr::NonNull;
|
||||||
|
|
||||||
|
use crate::tree_sitter::syntax_tree::SyntaxTree;
|
||||||
|
use crate::tree_sitter::Grammar;
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub(super) struct SyntaxTreeNodeRaw {
|
||||||
|
context: [u32; 4],
|
||||||
|
id: *const c_void,
|
||||||
|
tree: *const c_void,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<SyntaxTreeNode<'_>> for SyntaxTreeNodeRaw {
|
||||||
|
fn from(node: SyntaxTreeNode) -> SyntaxTreeNodeRaw {
|
||||||
|
SyntaxTreeNodeRaw {
|
||||||
|
context: node.context,
|
||||||
|
id: node.id.as_ptr(),
|
||||||
|
tree: node.tree.as_ptr(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct SyntaxTreeNode<'tree> {
|
||||||
|
context: [u32; 4],
|
||||||
|
id: NonNull<c_void>,
|
||||||
|
tree: NonNull<c_void>,
|
||||||
|
_phantom: PhantomData<&'tree SyntaxTree>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'tree> SyntaxTreeNode<'tree> {
|
||||||
|
#[inline]
|
||||||
|
pub(super) unsafe fn from_raw(raw: SyntaxTreeNodeRaw) -> Option<Self> {
|
||||||
|
Some(SyntaxTreeNode {
|
||||||
|
context: raw.context,
|
||||||
|
id: NonNull::new(raw.id as *mut _)?,
|
||||||
|
tree: unsafe { NonNull::new_unchecked(raw.tree as *mut _) },
|
||||||
|
_phantom: PhantomData,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn as_raw(&self) -> SyntaxTreeNodeRaw {
|
||||||
|
SyntaxTreeNodeRaw {
|
||||||
|
context: self.context,
|
||||||
|
id: self.id.as_ptr(),
|
||||||
|
tree: self.tree.as_ptr(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's type as a numerical id.
|
||||||
|
#[inline]
|
||||||
|
pub fn kind_id(&self) -> u16 {
|
||||||
|
unsafe { ts_node_symbol(self.as_raw()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the [`Language`] that was used to parse this node's syntax tree.
|
||||||
|
#[inline]
|
||||||
|
pub fn grammar(&self) -> Grammar {
|
||||||
|
unsafe { ts_node_language(self.as_raw()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if this node is *named*.
|
||||||
|
///
|
||||||
|
/// Named nodes correspond to named rules in the grammar, whereas
|
||||||
|
/// *anonymous* nodes correspond to string literals in the grammar.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_named(&self) -> bool {
|
||||||
|
unsafe { ts_node_is_named(self.as_raw()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if this node is *missing*.
|
||||||
|
///
|
||||||
|
/// Missing nodes are inserted by the parser in order to recover from
|
||||||
|
/// certain kinds of syntax errors.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_missing(&self) -> bool {
|
||||||
|
unsafe { ts_node_is_missing(self.as_raw()) }
|
||||||
|
}
|
||||||
|
/// Get the byte offsets where this node starts.
|
||||||
|
#[inline]
|
||||||
|
pub fn start_byte(&self) -> usize {
|
||||||
|
unsafe { ts_node_start_byte(self.as_raw()) as usize }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the byte offsets where this node end.
|
||||||
|
#[inline]
|
||||||
|
pub fn end_byte(&self) -> usize {
|
||||||
|
unsafe { ts_node_end_byte(self.as_raw()) as usize }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the byte range of source code that this node represents.
|
||||||
|
// TODO: use helix_stdx::Range once available
|
||||||
|
#[inline]
|
||||||
|
pub fn byte_range(&self) -> Range<usize> {
|
||||||
|
self.start_byte()..self.end_byte()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the node's child at the given index, where zero represents the first
|
||||||
|
/// child.
|
||||||
|
///
|
||||||
|
/// This method is fairly fast, but its cost is technically log(i), so if
|
||||||
|
/// you might be iterating over a long list of children, you should use
|
||||||
|
/// [`SyntaxTreeNode::children`] instead.
|
||||||
|
#[inline]
|
||||||
|
pub fn child(&self, i: usize) -> Option<SyntaxTreeNode<'tree>> {
|
||||||
|
unsafe { SyntaxTreeNode::from_raw(ts_node_child(self.as_raw(), i as u32)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's number of children.
|
||||||
|
#[inline]
|
||||||
|
pub fn child_count(&self) -> usize {
|
||||||
|
unsafe { ts_node_child_count(self.as_raw()) as usize }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's *named* child at the given index.
|
||||||
|
///
|
||||||
|
/// See also [`SyntaxTreeNode::is_named`].
|
||||||
|
/// This method is fairly fast, but its cost is technically log(i), so if
|
||||||
|
/// you might be iterating over a long list of children, you should use
|
||||||
|
/// [`SyntaxTreeNode::named_children`] instead.
|
||||||
|
#[inline]
|
||||||
|
pub fn named_child(&self, i: usize) -> Option<SyntaxTreeNode<'tree>> {
|
||||||
|
unsafe { SyntaxTreeNode::from_raw(ts_node_named_child(self.as_raw(), i as u32)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's number of *named* children.
|
||||||
|
///
|
||||||
|
/// See also [`SyntaxTreeNode::is_named`].
|
||||||
|
#[inline]
|
||||||
|
pub fn named_child_count(&self) -> usize {
|
||||||
|
unsafe { ts_node_named_child_count(self.as_raw()) as usize }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn map(
|
||||||
|
&self,
|
||||||
|
f: unsafe extern "C" fn(SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw,
|
||||||
|
) -> Option<SyntaxTreeNode<'tree>> {
|
||||||
|
SyntaxTreeNode::from_raw(f(self.as_raw()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's immediate parent.
|
||||||
|
#[inline]
|
||||||
|
pub fn parent(&self) -> Option<Self> {
|
||||||
|
unsafe { self.map(ts_node_parent) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's next sibling.
|
||||||
|
#[inline]
|
||||||
|
pub fn next_sibling(&self) -> Option<Self> {
|
||||||
|
unsafe { self.map(ts_node_next_sibling) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's previous sibling.
|
||||||
|
#[inline]
|
||||||
|
pub fn prev_sibling(&self) -> Option<Self> {
|
||||||
|
unsafe { self.map(ts_node_prev_sibling) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's next named sibling.
|
||||||
|
#[inline]
|
||||||
|
pub fn next_named_sibling(&self) -> Option<Self> {
|
||||||
|
unsafe { self.map(ts_node_next_named_sibling) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get this node's previous named sibling.
|
||||||
|
#[inline]
|
||||||
|
pub fn prev_named_sibling(&self) -> Option<Self> {
|
||||||
|
unsafe { self.map(ts_node_prev_named_sibling) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the smallest node within this node that spans the given range.
|
||||||
|
#[inline]
|
||||||
|
pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
|
||||||
|
unsafe {
|
||||||
|
Self::from_raw(ts_node_descendant_for_byte_range(
|
||||||
|
self.as_raw(),
|
||||||
|
start as u32,
|
||||||
|
end as u32,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the smallest named node within this node that spans the given range.
|
||||||
|
#[inline]
|
||||||
|
pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
|
||||||
|
unsafe {
|
||||||
|
Self::from_raw(ts_node_named_descendant_for_byte_range(
|
||||||
|
self.as_raw(),
|
||||||
|
start as u32,
|
||||||
|
end as u32,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// /// Iterate over this node's children.
|
||||||
|
// ///
|
||||||
|
// /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain
|
||||||
|
// /// a [`TreeCursor`] by calling [`Tree::walk`] or [`SyntaxTreeNode::walk`]. To avoid
|
||||||
|
// /// unnecessary allocations, you should reuse the same cursor for
|
||||||
|
// /// subsequent calls to this method.
|
||||||
|
// ///
|
||||||
|
// /// If you're walking the tree recursively, you may want to use the
|
||||||
|
// /// [`TreeCursor`] APIs directly instead.
|
||||||
|
// pub fn children<'cursor>(
|
||||||
|
// &self,
|
||||||
|
// cursor: &'cursor mut TreeCursor<'tree>,
|
||||||
|
// ) -> impl ExactSizeIterator<Item = SyntaxTreeNode<'tree>> + 'cursor {
|
||||||
|
// cursor.reset(self.to_raw());
|
||||||
|
// cursor.goto_first_child();
|
||||||
|
// (0..self.child_count()).map(move |_| {
|
||||||
|
// let result = cursor.node();
|
||||||
|
// cursor.goto_next_sibling();
|
||||||
|
// result
|
||||||
|
// })
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Send for SyntaxTreeNode<'_> {}
|
||||||
|
unsafe impl Sync for SyntaxTreeNode<'_> {}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/// Get the node's type as a numerical id.
|
||||||
|
fn ts_node_symbol(node: SyntaxTreeNodeRaw) -> u16;
|
||||||
|
|
||||||
|
/// Get the node's language.
|
||||||
|
fn ts_node_language(node: SyntaxTreeNodeRaw) -> Grammar;
|
||||||
|
|
||||||
|
/// Check if the node is *named*. Named nodes correspond to named rules in
|
||||||
|
/// the grammar, whereas *anonymous* nodes correspond to string literals in
|
||||||
|
/// the grammar
|
||||||
|
fn ts_node_is_named(node: SyntaxTreeNodeRaw) -> bool;
|
||||||
|
|
||||||
|
/// Check if the node is *missing*. Missing nodes are inserted by the parser
|
||||||
|
/// in order to recover from certain kinds of syntax errors
|
||||||
|
fn ts_node_is_missing(node: SyntaxTreeNodeRaw) -> bool;
|
||||||
|
|
||||||
|
/// Get the node's immediate parent
|
||||||
|
fn ts_node_parent(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
/// Get the node's child at the given index, where zero represents the first
|
||||||
|
/// child
|
||||||
|
fn ts_node_child(node: SyntaxTreeNodeRaw, child_index: u32) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
/// Get the node's number of children
|
||||||
|
fn ts_node_child_count(node: SyntaxTreeNodeRaw) -> u32;
|
||||||
|
|
||||||
|
/// Get the node's *named* child at the given index. See also
|
||||||
|
/// [`ts_node_is_named`]
|
||||||
|
fn ts_node_named_child(node: SyntaxTreeNodeRaw, child_index: u32) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
/// Get the node's number of *named* children. See also [`ts_node_is_named`]
|
||||||
|
fn ts_node_named_child_count(node: SyntaxTreeNodeRaw) -> u32;
|
||||||
|
|
||||||
|
/// Get the node's next sibling
|
||||||
|
fn ts_node_next_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
fn ts_node_prev_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
/// Get the node's next *named* sibling
|
||||||
|
fn ts_node_next_named_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
fn ts_node_prev_named_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
/// Get the smallest node within this node that spans the given range of
|
||||||
|
/// bytes or (row, column) positions
|
||||||
|
fn ts_node_descendant_for_byte_range(
|
||||||
|
node: SyntaxTreeNodeRaw,
|
||||||
|
|
||||||
|
start: u32,
|
||||||
|
end: u32,
|
||||||
|
) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
/// Get the smallest named node within this node that spans the given range
|
||||||
|
/// of bytes or (row, column) positions
|
||||||
|
fn ts_node_named_descendant_for_byte_range(
|
||||||
|
node: SyntaxTreeNodeRaw,
|
||||||
|
start: u32,
|
||||||
|
end: u32,
|
||||||
|
) -> SyntaxTreeNodeRaw;
|
||||||
|
|
||||||
|
/// Get the node's start byte.
|
||||||
|
fn ts_node_start_byte(self_: SyntaxTreeNodeRaw) -> u32;
|
||||||
|
|
||||||
|
/// Get the node's end byte.
|
||||||
|
fn ts_node_end_byte(node: SyntaxTreeNodeRaw) -> u32;
|
||||||
|
}
|
Loading…
Reference in New Issue