From ae0d4189e1f88e657183d3a245d2b5707a91d2bd Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sat, 27 Jul 2024 15:43:44 +0200 Subject: [PATCH] tmp --- helix-core/src/syntax.rs | 2 +- helix-syntax/src/config.rs | 257 ++--- helix-syntax/src/highlighter.rs | 877 +++++++++--------- helix-syntax/src/highlighter2.rs | 206 ++++ helix-syntax/src/injections_tree.rs | 268 ------ helix-syntax/src/lib.rs | 355 +++---- helix-syntax/src/parse.rs | 828 ++++++++--------- helix-syntax/src/query_iter.rs | 236 +++++ helix-syntax/src/ropey.rs | 28 - helix-syntax/src/text_object.rs | 93 ++ helix-syntax/src/tree_sitter.rs | 10 +- helix-syntax/src/tree_sitter/query.rs | 93 +- .../src/tree_sitter/query/predicate.rs | 57 +- .../src/tree_sitter/query/property.rs | 5 +- helix-syntax/src/tree_sitter/query_cursor.rs | 32 +- 15 files changed, 1823 insertions(+), 1524 deletions(-) create mode 100644 helix-syntax/src/highlighter2.rs delete mode 100644 helix-syntax/src/injections_tree.rs create mode 100644 helix-syntax/src/query_iter.rs create mode 100644 helix-syntax/src/text_object.rs diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index e9451ed43..e2d862f47 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -246,7 +246,7 @@ impl LanguageConfiguration { if query_text.is_empty() { return None; } - let lang = &self.highlight_config.get()?.as_ref()?.language; + let lang = &self.highlight_config.get()?.as_ref()?.grammar; Query::new(lang, &query_text) .map_err(|e| { log::error!( diff --git a/helix-syntax/src/config.rs b/helix-syntax/src/config.rs index a983d2932..35774fddf 100644 --- a/helix-syntax/src/config.rs +++ b/helix-syntax/src/config.rs @@ -1,36 +1,38 @@ +use std::borrow::Cow; use std::path::Path; use std::sync::Arc; +use crate::tree_sitter::query::{Capture, Pattern, QueryStr, UserPredicate}; +use crate::tree_sitter::{query, Grammar, Query, QueryMatch, SyntaxTreeNode}; use arc_swap::ArcSwap; use helix_stdx::rope::{self, RopeSliceExt}; use once_cell::sync::Lazy; use regex::Regex; use ropey::RopeSlice; -use tree_sitter::{Language as Grammar, Node, Query, QueryError, QueryMatch}; +use crate::byte_range_to_str; use crate::highlighter::Highlight; -use crate::{byte_range_to_str, IncludedChildren, InjectionLanguageMarker, SHEBANG}; /// Contains the data needed to highlight code written in a particular language. /// /// This struct is immutable and can be shared between threads. #[derive(Debug)] pub struct HighlightConfiguration { - pub language: Grammar, + pub grammar: Grammar, pub query: Query, pub(crate) injections_query: Query, - pub(crate) combined_injections_patterns: Vec, - pub(crate) highlights_pattern_index: usize, - pub(crate) highlight_indices: ArcSwap>>, + pub(crate) combined_injections_patterns: Vec, + first_highlights_pattern: Pattern, + pub(crate) highlight_indices: ArcSwap>, pub(crate) non_local_variable_patterns: Vec, - pub(crate) injection_content_capture_index: Option, - pub(crate) injection_language_capture_index: Option, - pub(crate) injection_filename_capture_index: Option, - pub(crate) injection_shebang_capture_index: Option, - pub(crate) local_scope_capture_index: Option, - pub(crate) local_def_capture_index: Option, - pub(crate) local_def_value_capture_index: Option, - pub(crate) local_ref_capture_index: Option, + pub(crate) injection_content_capture: Option, + pub(crate) injection_language_capture: Option, + pub(crate) injection_filename_capture: Option, + pub(crate) injection_shebang_capture: Option, + pub(crate) local_scope_capture: Option, + pub(crate) local_def_capture: Option, + pub(crate) local_def_value_capture: Option, + pub(crate) local_ref_capture: Option, } impl HighlightConfiguration { @@ -49,105 +51,89 @@ impl HighlightConfiguration { /// /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. pub fn new( - language: Grammar, + grammar: Grammar, + path: impl AsRef, highlights_query: &str, injection_query: &str, locals_query: &str, - ) -> Result { + ) -> Result { // Concatenate the query strings, keeping track of the start offset of each section. let mut query_source = String::new(); query_source.push_str(locals_query); let highlights_query_offset = query_source.len(); query_source.push_str(highlights_query); + let mut non_local_variable_patterns = Vec::with_capacity(32); // Construct a single query by concatenating the three query strings, but record the // range of pattern indices that belong to each individual string. - let query = Query::new(&language, &query_source)?; - let mut highlights_pattern_index = 0; - for i in 0..(query.pattern_count()) { - let pattern_offset = query.start_byte_for_pattern(i); - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; + let query = Query::new(grammar, &query_source, path, |pattern, predicate| { + match predicate { + UserPredicate::IsPropertySet { + negate: true, + key: "local", + val: None, + } => { + if non_local_variable_patterns.len() < pattern.idx() { + non_local_variable_patterns.resize(pattern.idx(), false) + } + non_local_variable_patterns[pattern.idx()] = true; + } + predicate => { + return Err(format!("unsupported predicate {predicate}").into()); + } } - } - - let injections_query = Query::new(&language, injection_query)?; - let combined_injections_patterns = (0..injections_query.pattern_count()) - .filter(|&i| { - injections_query - .property_settings(i) - .iter() - .any(|s| &*s.key == "injection.combined") - }) - .collect(); - - // Find all of the highlighting patterns that are disabled for nodes that - // have been identified as local variables. - let non_local_variable_patterns = (0..query.pattern_count()) - .map(|i| { - query - .property_predicates(i) - .iter() - .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") - }) - .collect(); + Ok(()) + })?; - // Store the numeric ids for all of the special captures. - let mut injection_content_capture_index = None; - let mut injection_language_capture_index = None; - let mut injection_filename_capture_index = None; - let mut injection_shebang_capture_index = None; - let mut local_def_capture_index = None; - let mut local_def_value_capture_index = None; - let mut local_ref_capture_index = None; - let mut local_scope_capture_index = None; - for (i, name) in query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "local.definition" => local_def_capture_index = i, - "local.definition-value" => local_def_value_capture_index = i, - "local.reference" => local_ref_capture_index = i, - "local.scope" => local_scope_capture_index = i, - _ => {} + let mut combined_injections_patterns = Vec::new(); + let injections_query = Query::new(grammar, injection_query, path, |pattern, predicate| { + match predicate { + UserPredicate::SetProperty { + key: "injection.combined", + val: None, + } => combined_injections_patterns.push(pattern), + predicate => { + return Err(format!("unsupported predicate {predicate}").into()); + } } - } + Ok(()) + })?; - for (i, name) in injections_query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, - "injection.filename" => injection_filename_capture_index = i, - "injection.shebang" => injection_shebang_capture_index = i, - _ => {} - } - } + let first_highlights_pattern = query + .patterns() + .find(|pattern| query.start_byte_for_pattern(*pattern) >= highlights_query_offset) + .unwrap_or(Pattern::SENTINEL); + + let injection_content_capture = query.get_capture("injection.content"); + let injection_language_capture = query.get_capture("injection.language"); + let injection_filename_capture = query.get_capture("injection.filename"); + let injection_shebang_capture = query.get_capture("injection.shebang"); + let local_def_capture = query.get_capture("local.definition"); + let local_def_value_capture = query.get_capture("local.definition-value"); + let local_ref_capture = query.get_capture("local.reference"); + let local_scope_capture = query.get_capture("local.scope"); - let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); + let highlight_indices = + ArcSwap::from_pointee(vec![Highlight::NONE; query.num_captures() as usize]); Ok(Self { - language, + grammar, query, injections_query, combined_injections_patterns, - highlights_pattern_index, + first_highlights_pattern, highlight_indices, non_local_variable_patterns, - injection_content_capture_index, - injection_language_capture_index, - injection_filename_capture_index, - injection_shebang_capture_index, - local_scope_capture_index, - local_def_capture_index, - local_def_value_capture_index, - local_ref_capture_index, + injection_content_capture, + injection_language_capture, + injection_filename_capture, + injection_shebang_capture, + local_scope_capture, + local_def_capture, + local_def_value_capture, + local_ref_capture, }) } - /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[&str] { - self.query.capture_names() - } - /// Set the list of recognized highlight names. /// /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated @@ -162,13 +148,12 @@ impl HighlightConfiguration { let mut capture_parts = Vec::new(); let indices: Vec<_> = self .query - .capture_names() - .iter() - .map(move |capture_name| { + .captures() + .map(move |(_, capture_name)| { capture_parts.clear(); capture_parts.extend(capture_name.split('.')); - let mut best_index = None; + let mut best_index = u32::MAX; let mut best_match_len = 0; for (i, recognized_name) in recognized_names.iter().enumerate() { let mut len = 0; @@ -183,11 +168,11 @@ impl HighlightConfiguration { } } if matches && len > best_match_len { - best_index = Some(i); + best_index = i as u32; best_match_len = len; } } - best_index.map(Highlight) + Highlight(best_index) }) .collect(); @@ -198,21 +183,24 @@ impl HighlightConfiguration { &self, query_match: &QueryMatch<'a, 'a>, source: RopeSlice<'a>, - ) -> (Option>, Option>) { + ) -> ( + Option>, + Option>, + ) { let mut injection_capture = None; let mut content_node = None; - for capture in query_match.captures { - let index = Some(capture.index); - if index == self.injection_language_capture_index { - let name = byte_range_to_str(capture.node.byte_range(), source); + for matched_node in query_match.matched_nodes() { + let capture = Some(matched_node.capture); + if capture == self.injection_language_capture { + let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source); injection_capture = Some(InjectionLanguageMarker::Name(name)); - } else if index == self.injection_filename_capture_index { - let name = byte_range_to_str(capture.node.byte_range(), source); + } else if capture == self.injection_filename_capture { + let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source); let path = Path::new(name.as_ref()).to_path_buf(); injection_capture = Some(InjectionLanguageMarker::Filename(path.into())); - } else if index == self.injection_shebang_capture_index { - let node_slice = source.byte_slice(capture.node.byte_range()); + } else if capture == self.injection_shebang_capture { + let node_slice = source.byte_slice(matched_node.syntax_node.byte_range()); // some languages allow space and newlines before the actual string content // so a shebang could be on either the first or second line @@ -222,9 +210,6 @@ impl HighlightConfiguration { node_slice }; - static SHEBANG_REGEX: Lazy = - Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); - injection_capture = SHEBANG_REGEX .captures_iter(lines.regex_input()) .map(|cap| { @@ -232,8 +217,8 @@ impl HighlightConfiguration { InjectionLanguageMarker::Shebang(cap.into()) }) .next() - } else if index == self.injection_content_capture_index { - content_node = Some(capture.node); + } else if capture == self.injection_content_capture { + content_node = Some(matched_node.syntax_node.clone()); } } (injection_capture, content_node) @@ -246,7 +231,7 @@ impl HighlightConfiguration { source: RopeSlice<'a>, ) -> ( Option>, - Option>, + Option>, IncludedChildren, ) { let (mut injection_capture, content_node) = self.injection_pair(query_match, source); @@ -282,18 +267,20 @@ impl HighlightConfiguration { (injection_capture, content_node, included_children) } - pub fn load_query( - &self, - language: &str, - filename: &str, - read_query_text: impl FnMut(&str, &str) -> String, - ) -> Result, QueryError> { - let query_text = read_query(language, filename, read_query_text); - if query_text.is_empty() { - return Ok(None); - } - Query::new(&self.language, &query_text).map(Some) - } + + // pub fn load_query( + // &self, + // language: &str, + // filename: &str, + // read_query_text: impl FnMut(&str, &str) -> String, + // ) -> Result, QueryError> { + // let query_text = read_query(language, filename, read_query_text); + // if query_text.is_empty() { + // return Ok(None); + // } + + // Query::new(&self.grammar, &query_text, ).map(Some) + // } } /// reads a query by invoking `read_query_text`, handeles any `inherits` directives @@ -329,3 +316,31 @@ pub fn read_query( } read_query_impl(language, filename, &mut read_query_text) } + +const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; +static SHEBANG_REGEX: Lazy = Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); + +struct InjectionSettings { + include_children: IncludedChildren, + language: Option, +} + +#[derive(Debug, Clone)] +pub enum InjectionLanguageMarker<'a> { + Name(Cow<'a, str>), + Filename(Cow<'a, Path>), + Shebang(String), +} + +#[derive(Clone)] +enum IncludedChildren { + None, + All, + Unnamed, +} + +impl Default for IncludedChildren { + fn default() -> Self { + Self::None + } +} diff --git a/helix-syntax/src/highlighter.rs b/helix-syntax/src/highlighter.rs index 1b53672f0..8801b13f2 100644 --- a/helix-syntax/src/highlighter.rs +++ b/helix-syntax/src/highlighter.rs @@ -1,439 +1,438 @@ -use std::borrow::Cow; -use std::cell::RefCell; -use std::sync::atomic::{self, AtomicUsize}; -use std::{fmt, iter, mem, ops}; - -use ropey::RopeSlice; -use tree_sitter::{QueryCaptures, QueryCursor, Tree}; - -use crate::ropey::RopeProvider; -use crate::{ - byte_range_to_str, Error, HighlightConfiguration, Syntax, PARSER, TREE_SITTER_MATCH_LIMIT, -}; - -const CANCELLATION_CHECK_INTERVAL: usize = 100; - -/// Indicates which highlight should be applied to a region of source code. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct Highlight(pub usize); - -/// Represents a single step in rendering a syntax-highlighted document. -#[derive(Copy, Clone, Debug)] -pub enum HighlightEvent { - Source { start: usize, end: usize }, - HighlightStart(Highlight), - HighlightEnd, -} - -#[derive(Debug)] -struct LocalDef<'a> { - name: Cow<'a, str>, - value_range: ops::Range, - highlight: Option, -} - -#[derive(Debug)] -struct LocalScope<'a> { - inherits: bool, - range: ops::Range, - local_defs: Vec>, -} - -#[derive(Debug)] -struct HighlightIter<'a> { - source: RopeSlice<'a>, - byte_offset: usize, - cancellation_flag: Option<&'a AtomicUsize>, - layers: Vec>, - iter_count: usize, - next_event: Option, - last_highlight_range: Option<(usize, usize, u32)>, -} - -struct HighlightIterLayer<'a> { - _tree: Option, - cursor: QueryCursor, - captures: RefCell, &'a [u8]>>>, - config: &'a HighlightConfiguration, - highlight_end_stack: Vec, - scope_stack: Vec>, - depth: u32, -} - -impl<'a> fmt::Debug for HighlightIterLayer<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("HighlightIterLayer").finish() - } -} - -impl<'a> HighlightIterLayer<'a> { - // First, sort scope boundaries by their byte offset in the document. At a - // given position, emit scope endings before scope beginnings. Finally, emit - // scope boundaries from deeper layers first. - fn sort_key(&self) -> Option<(usize, bool, isize)> { - let depth = -(self.depth as isize); - let next_start = self - .captures - .borrow_mut() - .peek() - .map(|(m, i)| m.captures[*i].node.start_byte()); - let next_end = self.highlight_end_stack.last().cloned(); - match (next_start, next_end) { - (Some(start), Some(end)) => { - if start < end { - Some((start, true, depth)) - } else { - Some((end, false, depth)) - } - } - (Some(i), None) => Some((i, true, depth)), - (None, Some(j)) => Some((j, false, depth)), - _ => None, - } - } -} - -impl<'a> HighlightIter<'a> { - fn emit_event( - &mut self, - offset: usize, - event: Option, - ) -> Option> { - let result; - if self.byte_offset < offset { - result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: offset, - })); - self.byte_offset = offset; - self.next_event = event; - } else { - result = event.map(Ok); - } - self.sort_layers(); - result - } - - fn sort_layers(&mut self) { - while !self.layers.is_empty() { - if let Some(sort_key) = self.layers[0].sort_key() { - let mut i = 0; - while i + 1 < self.layers.len() { - if let Some(next_offset) = self.layers[i + 1].sort_key() { - if next_offset < sort_key { - i += 1; - continue; - } - } else { - let layer = self.layers.remove(i + 1); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); - } - break; - } - if i > 0 { - self.layers[0..(i + 1)].rotate_left(1); - } - break; - } else { - let layer = self.layers.remove(0); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); - } - } - } -} - -impl<'a> Iterator for HighlightIter<'a> { - type Item = Result; - - fn next(&mut self) -> Option { - 'main: loop { - // If we've already determined the next highlight boundary, just return it. - if let Some(e) = self.next_event.take() { - return Some(Ok(e)); - } - - // Periodically check for cancellation, returning `Cancelled` error if the - // cancellation flag was flipped. - if let Some(cancellation_flag) = self.cancellation_flag { - self.iter_count += 1; - if self.iter_count >= CANCELLATION_CHECK_INTERVAL { - self.iter_count = 0; - if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 { - return Some(Err(Error::Cancelled)); - } - } - } - - // If none of the layers have any more highlight boundaries, terminate. - if self.layers.is_empty() { - let len = self.source.len_bytes(); - return if self.byte_offset < len { - let result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: len, - })); - self.byte_offset = len; - result - } else { - None - }; - } - - // Get the next capture from whichever layer has the earliest highlight boundary. - let range; - let layer = &mut self.layers[0]; - let captures = layer.captures.get_mut(); - if let Some((next_match, capture_index)) = captures.peek() { - let next_capture = next_match.captures[*capture_index]; - range = next_capture.node.byte_range(); - - // If any previous highlight ends before this node starts, then before - // processing this capture, emit the source code up until the end of the - // previous highlight, and an end event for that highlight. - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - if end_byte <= range.start { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } - } - } - // If there are no more captures, then emit any remaining highlight end events. - // And if there are none of those, then just advance to the end of the document. - else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } else { - return self.emit_event(self.source.len_bytes(), None); - }; - - let (mut match_, capture_index) = captures.next().unwrap(); - let mut capture = match_.captures[capture_index]; - - // Remove from the local scope stack any local scopes that have already ended. - while range.start > layer.scope_stack.last().unwrap().range.end { - layer.scope_stack.pop(); - } - - // If this capture is for tracking local variables, then process the - // local variable info. - let mut reference_highlight = None; - let mut definition_highlight = None; - while match_.pattern_index < layer.config.highlights_pattern_index { - // If the node represents a local scope, push a new local scope onto - // the scope stack. - if Some(capture.index) == layer.config.local_scope_capture_index { - definition_highlight = None; - let mut scope = LocalScope { - inherits: true, - range: range.clone(), - local_defs: Vec::new(), - }; - for prop in layer.config.query.property_settings(match_.pattern_index) { - if let "local.scope-inherits" = prop.key.as_ref() { - scope.inherits = - prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); - } - } - layer.scope_stack.push(scope); - } - // If the node represents a definition, add a new definition to the - // local scope at the top of the scope stack. - else if Some(capture.index) == layer.config.local_def_capture_index { - reference_highlight = None; - let scope = layer.scope_stack.last_mut().unwrap(); - - let mut value_range = 0..0; - for capture in match_.captures { - if Some(capture.index) == layer.config.local_def_value_capture_index { - value_range = capture.node.byte_range(); - } - } - - let name = byte_range_to_str(range.clone(), self.source); - scope.local_defs.push(LocalDef { - name, - value_range, - highlight: None, - }); - definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight); - } - // If the node represents a reference, then try to find the corresponding - // definition in the scope stack. - else if Some(capture.index) == layer.config.local_ref_capture_index - && definition_highlight.is_none() - { - definition_highlight = None; - let name = byte_range_to_str(range.clone(), self.source); - for scope in layer.scope_stack.iter().rev() { - if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { - if def.name == name && range.start >= def.value_range.end { - Some(def.highlight) - } else { - None - } - }) { - reference_highlight = highlight; - break; - } - if !scope.inherits { - break; - } - } - } - - // Continue processing any additional matches for the same node. - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - - // Otherwise, this capture must represent a highlight. - // If this exact range has already been highlighted by an earlier pattern, or by - // a different layer, then skip over this one. - if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { - if range.start == last_start && range.end == last_end && layer.depth < last_depth { - self.sort_layers(); - continue 'main; - } - } - - // If the current node was found to be a local variable, then skip over any - // highlighting patterns that are disabled for local variables. - if definition_highlight.is_some() || reference_highlight.is_some() { - while layer.config.non_local_variable_patterns[match_.pattern_index] { - match_.remove(); - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - } - - // Once a highlighting pattern is found for the current node, skip over - // any later highlighting patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent - // captures are guaranteed to be for highlighting, not injections or - // local variables. - while let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - captures.next(); - } else { - break; - } - } - - let current_highlight = layer.config.highlight_indices.load()[capture.index as usize]; - - // If this node represents a local definition, then store the current - // highlight value on the local scope entry representing this node. - if let Some(definition_highlight) = definition_highlight { - *definition_highlight = current_highlight; - } - - // Emit a scope start event and push the node's end position to the stack. - if let Some(highlight) = reference_highlight.or(current_highlight) { - self.last_highlight_range = Some((range.start, range.end, layer.depth)); - layer.highlight_end_stack.push(range.end); - return self - .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); - } - - self.sort_layers(); - } - } -} - -impl Syntax { - /// Iterate over the highlighted regions for a given slice of source code. - pub fn highlight_iter<'a>( - &'a self, - source: RopeSlice<'a>, - range: Option>, - cancellation_flag: Option<&'a AtomicUsize>, - ) -> impl Iterator> + 'a { - let mut layers = self - .layers - .iter() - .filter_map(|(_, layer)| { - // TODO: if range doesn't overlap layer range, skip it - - // Reuse a cursor from the pool if available. - let mut cursor = PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) - }); - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let cursor_ref = - unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - - // if reusing cursors & no range this resets to whole range - cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); - cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let mut captures = cursor_ref - .captures( - &layer.config.query, - layer.tree().root_node(), - RopeProvider(source), - ) - .peekable(); - - // If there's no captures, skip the layer - captures.peek()?; - - Some(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - _tree: None, - captures: RefCell::new(captures), - config: layer.config.as_ref(), // TODO: just reuse `layer` - depth: layer.depth, // TODO: just reuse `layer` - }) - }) - .collect::>(); - - layers.sort_unstable_by_key(|layer| layer.sort_key()); - - let mut result = HighlightIter { - source, - byte_offset: range.map_or(0, |r| r.start), - cancellation_flag, - iter_count: 0, - layers, - next_event: None, - last_highlight_range: None, - }; - result.sort_layers(); - result - } -} +pub use super::highlighter2::*; + +// use std::borrow::Cow; +// use std::cell::RefCell; +// use std::sync::atomic::{self, AtomicUsize}; +// use std::{fmt, iter, mem, ops}; + +// use ropey::RopeSlice; +// use tree_sitter::{QueryCaptures, QueryCursor, Tree}; + +// use crate::{byte_range_to_str, Error, HighlightConfiguration, Syntax, TREE_SITTER_MATCH_LIMIT}; + +// const CANCELLATION_CHECK_INTERVAL: usize = 100; + +// /// Indicates which highlight should be applied to a region of source code. +// #[derive(Copy, Clone, Debug, PartialEq, Eq)] +// pub struct Highlight(pub usize); + +// /// Represents a single step in rendering a syntax-highlighted document. +// #[derive(Copy, Clone, Debug)] +// pub enum HighlightEvent { +// Source { start: usize, end: usize }, +// HighlightStart(Highlight), +// HighlightEnd, +// } + +// #[derive(Debug)] +// struct LocalDef<'a> { +// name: Cow<'a, str>, +// value_range: ops::Range, +// highlight: Option, +// } + +// #[derive(Debug)] +// struct LocalScope<'a> { +// inherits: bool, +// range: ops::Range, +// local_defs: Vec>, +// } + +// #[derive(Debug)] +// struct HighlightIter<'a> { +// source: RopeSlice<'a>, +// byte_offset: usize, +// cancellation_flag: Option<&'a AtomicUsize>, +// layers: Vec>, +// iter_count: usize, +// next_event: Option, +// last_highlight_range: Option<(usize, usize, u32)>, +// } + +// struct HighlightIterLayer<'a> { +// _tree: Option, +// cursor: QueryCursor, +// captures: RefCell, &'a [u8]>>>, +// config: &'a HighlightConfiguration, +// highlight_end_stack: Vec, +// scope_stack: Vec>, +// depth: u32, +// } + +// impl<'a> fmt::Debug for HighlightIterLayer<'a> { +// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +// f.debug_struct("HighlightIterLayer").finish() +// } +// } + +// impl<'a> HighlightIterLayer<'a> { +// // First, sort scope boundaries by their byte offset in the document. At a +// // given position, emit scope endings before scope beginnings. Finally, emit +// // scope boundaries from deeper layers first. +// fn sort_key(&self) -> Option<(usize, bool, isize)> { +// let depth = -(self.depth as isize); +// let next_start = self +// .captures +// .borrow_mut() +// .peek() +// .map(|(m, i)| m.captures[*i].node.start_byte()); +// let next_end = self.highlight_end_stack.last().cloned(); +// match (next_start, next_end) { +// (Some(start), Some(end)) => { +// if start < end { +// Some((start, true, depth)) +// } else { +// Some((end, false, depth)) +// } +// } +// (Some(i), None) => Some((i, true, depth)), +// (None, Some(j)) => Some((j, false, depth)), +// _ => None, +// } +// } +// } + +// impl<'a> HighlightIter<'a> { +// fn emit_event( +// &mut self, +// offset: usize, +// event: Option, +// ) -> Option> { +// let result; +// if self.byte_offset < offset { +// result = Some(Ok(HighlightEvent::Source { +// start: self.byte_offset, +// end: offset, +// })); +// self.byte_offset = offset; +// self.next_event = event; +// } else { +// result = event.map(Ok); +// } +// self.sort_layers(); +// result +// } + +// fn sort_layers(&mut self) { +// while !self.layers.is_empty() { +// if let Some(sort_key) = self.layers[0].sort_key() { +// let mut i = 0; +// while i + 1 < self.layers.len() { +// if let Some(next_offset) = self.layers[i + 1].sort_key() { +// if next_offset < sort_key { +// i += 1; +// continue; +// } +// } else { +// let layer = self.layers.remove(i + 1); +// PARSER.with(|ts_parser| { +// let highlighter = &mut ts_parser.borrow_mut(); +// highlighter.cursors.push(layer.cursor); +// }); +// } +// break; +// } +// if i > 0 { +// self.layers[0..(i + 1)].rotate_left(1); +// } +// break; +// } else { +// let layer = self.layers.remove(0); +// PARSER.with(|ts_parser| { +// let highlighter = &mut ts_parser.borrow_mut(); +// highlighter.cursors.push(layer.cursor); +// }); +// } +// } +// } +// } + +// impl<'a> Iterator for HighlightIter<'a> { +// type Item = Result; + +// fn next(&mut self) -> Option { +// 'main: loop { +// // If we've already determined the next highlight boundary, just return it. +// if let Some(e) = self.next_event.take() { +// return Some(Ok(e)); +// } + +// // Periodically check for cancellation, returning `Cancelled` error if the +// // cancellation flag was flipped. +// if let Some(cancellation_flag) = self.cancellation_flag { +// self.iter_count += 1; +// if self.iter_count >= CANCELLATION_CHECK_INTERVAL { +// self.iter_count = 0; +// if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 { +// return Some(Err(Error::Cancelled)); +// } +// } +// } + +// // If none of the layers have any more highlight boundaries, terminate. +// if self.layers.is_empty() { +// let len = self.source.len_bytes(); +// return if self.byte_offset < len { +// let result = Some(Ok(HighlightEvent::Source { +// start: self.byte_offset, +// end: len, +// })); +// self.byte_offset = len; +// result +// } else { +// None +// }; +// } + +// // Get the next capture from whichever layer has the earliest highlight boundary. +// let range; +// let layer = &mut self.layers[0]; +// let captures = layer.captures.get_mut(); +// if let Some((next_match, capture_index)) = captures.peek() { +// let next_capture = next_match.captures[*capture_index]; +// range = next_capture.node.byte_range(); + +// // If any previous highlight ends before this node starts, then before +// // processing this capture, emit the source code up until the end of the +// // previous highlight, and an end event for that highlight. +// if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { +// if end_byte <= range.start { +// layer.highlight_end_stack.pop(); +// return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); +// } +// } +// } +// // If there are no more captures, then emit any remaining highlight end events. +// // And if there are none of those, then just advance to the end of the document. +// else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { +// layer.highlight_end_stack.pop(); +// return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); +// } else { +// return self.emit_event(self.source.len_bytes(), None); +// }; + +// let (mut match_, capture_index) = captures.next().unwrap(); +// let mut capture = match_.captures[capture_index]; + +// // Remove from the local scope stack any local scopes that have already ended. +// while range.start > layer.scope_stack.last().unwrap().range.end { +// layer.scope_stack.pop(); +// } + +// // If this capture is for tracking local variables, then process the +// // local variable info. +// let mut reference_highlight = None; +// let mut definition_highlight = None; +// while match_.pattern_index < layer.config.highlights_pattern_index { +// // If the node represents a local scope, push a new local scope onto +// // the scope stack. +// if Some(capture.index) == layer.config.local_scope_capture_index { +// definition_highlight = None; +// let mut scope = LocalScope { +// inherits: true, +// range: range.clone(), +// local_defs: Vec::new(), +// }; +// for prop in layer.config.query.property_settings(match_.pattern_index) { +// if let "local.scope-inherits" = prop.key.as_ref() { +// scope.inherits = +// prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); +// } +// } +// layer.scope_stack.push(scope); +// } +// // If the node represents a definition, add a new definition to the +// // local scope at the top of the scope stack. +// else if Some(capture.index) == layer.config.local_def_capture_index { +// reference_highlight = None; +// let scope = layer.scope_stack.last_mut().unwrap(); + +// let mut value_range = 0..0; +// for capture in match_.captures { +// if Some(capture.index) == layer.config.local_def_value_capture_index { +// value_range = capture.node.byte_range(); +// } +// } + +// let name = byte_range_to_str(range.clone(), self.source); +// scope.local_defs.push(LocalDef { +// name, +// value_range, +// highlight: None, +// }); +// definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight); +// } +// // If the node represents a reference, then try to find the corresponding +// // definition in the scope stack. +// else if Some(capture.index) == layer.config.local_ref_capture_index +// && definition_highlight.is_none() +// { +// definition_highlight = None; +// let name = byte_range_to_str(range.clone(), self.source); +// for scope in layer.scope_stack.iter().rev() { +// if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { +// if def.name == name && range.start >= def.value_range.end { +// Some(def.highlight) +// } else { +// None +// } +// }) { +// reference_highlight = highlight; +// break; +// } +// if !scope.inherits { +// break; +// } +// } +// } + +// // Continue processing any additional matches for the same node. +// if let Some((next_match, next_capture_index)) = captures.peek() { +// let next_capture = next_match.captures[*next_capture_index]; +// if next_capture.node == capture.node { +// capture = next_capture; +// match_ = captures.next().unwrap().0; +// continue; +// } +// } + +// self.sort_layers(); +// continue 'main; +// } + +// // Otherwise, this capture must represent a highlight. +// // If this exact range has already been highlighted by an earlier pattern, or by +// // a different layer, then skip over this one. +// if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { +// if range.start == last_start && range.end == last_end && layer.depth < last_depth { +// self.sort_layers(); +// continue 'main; +// } +// } + +// // If the current node was found to be a local variable, then skip over any +// // highlighting patterns that are disabled for local variables. +// if definition_highlight.is_some() || reference_highlight.is_some() { +// while layer.config.non_local_variable_patterns[match_.pattern_index] { +// match_.remove(); +// if let Some((next_match, next_capture_index)) = captures.peek() { +// let next_capture = next_match.captures[*next_capture_index]; +// if next_capture.node == capture.node { +// capture = next_capture; +// match_ = captures.next().unwrap().0; +// continue; +// } +// } + +// self.sort_layers(); +// continue 'main; +// } +// } + +// // Once a highlighting pattern is found for the current node, skip over +// // any later highlighting patterns that also match this node. Captures +// // for a given node are ordered by pattern index, so these subsequent +// // captures are guaranteed to be for highlighting, not injections or +// // local variables. +// while let Some((next_match, next_capture_index)) = captures.peek() { +// let next_capture = next_match.captures[*next_capture_index]; +// if next_capture.node == capture.node { +// captures.next(); +// } else { +// break; +// } +// } + +// let current_highlight = layer.config.highlight_indices.load()[capture.index as usize]; + +// // If this node represents a local definition, then store the current +// // highlight value on the local scope entry representing this node. +// if let Some(definition_highlight) = definition_highlight { +// *definition_highlight = current_highlight; +// } + +// // Emit a scope start event and push the node's end position to the stack. +// if let Some(highlight) = reference_highlight.or(current_highlight) { +// self.last_highlight_range = Some((range.start, range.end, layer.depth)); +// layer.highlight_end_stack.push(range.end); +// return self +// .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); +// } + +// self.sort_layers(); +// } +// } +// } + +// impl Syntax { +// /// Iterate over the highlighted regions for a given slice of source code. +// pub fn highlight_iter<'a>( +// &'a self, +// source: RopeSlice<'a>, +// range: Option>, +// cancellation_flag: Option<&'a AtomicUsize>, +// ) -> impl Iterator> + 'a { +// let mut layers = self +// .layers +// .iter() +// .filter_map(|(_, layer)| { +// // TODO: if range doesn't overlap layer range, skip it + +// // Reuse a cursor from the pool if available. +// let mut cursor = PARSER.with(|ts_parser| { +// let highlighter = &mut ts_parser.borrow_mut(); +// highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) +// }); + +// // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which +// // prevents them from being moved. But both of these values are really just +// // pointers, so it's actually ok to move them. +// let cursor_ref = +// unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + +// // if reusing cursors & no range this resets to whole range +// cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); +// cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT); + +// let mut captures = cursor_ref +// .captures( +// &layer.config.query, +// layer.tree().root_node(), +// RopeProvider(source), +// ) +// .peekable(); + +// // If there's no captures, skip the layer +// captures.peek()?; + +// Some(HighlightIterLayer { +// highlight_end_stack: Vec::new(), +// scope_stack: vec![LocalScope { +// inherits: false, +// range: 0..usize::MAX, +// local_defs: Vec::new(), +// }], +// cursor, +// _tree: None, +// captures: RefCell::new(captures), +// config: layer.config.as_ref(), // TODO: just reuse `layer` +// depth: layer.depth, // TODO: just reuse `layer` +// }) +// }) +// .collect::>(); + +// layers.sort_unstable_by_key(|layer| layer.sort_key()); + +// let mut result = HighlightIter { +// source, +// byte_offset: range.map_or(0, |r| r.start), +// cancellation_flag, +// iter_count: 0, +// layers, +// next_event: None, +// last_highlight_range: None, +// }; +// result.sort_layers(); +// result +// } +// } diff --git a/helix-syntax/src/highlighter2.rs b/helix-syntax/src/highlighter2.rs new file mode 100644 index 000000000..9abe5726c --- /dev/null +++ b/helix-syntax/src/highlighter2.rs @@ -0,0 +1,206 @@ +use std::borrow::Cow; +use std::iter::{self, Peekable}; +use std::mem::{replace, take}; +use std::slice; + +use hashbrown::HashMap; + +use crate::query_iter::{MatchedNode, QueryIter, QueryIterEvent}; +use crate::{Injection, LayerId, Range, Syntax}; + +/// Indicates which highlight should be applied to a region of source code. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Highlight(pub u32); +impl Highlight{ + pub(crate) const NONE = Highlight(u32::MAX); +} + +#[derive(Debug)] +struct LocalDef<'a> { + name: Cow<'a, str>, + value_range: Range, + highlight: Option, +} + +#[derive(Debug)] +struct LocalScope<'a> { + inherits: bool, + range: Range, + local_defs: Vec>, +} + +#[derive(Debug)] +struct HighlightedNode { + end: u32, + highlight: Highlight, +} + +#[derive(Debug, Default)] +struct LayerData<'a> { + parent_highlights: usize, + dormant_highlights: Vec, + scope_stack: Vec>, +} + +struct HighlighterConfig<'a> { + new_precedance: bool, + highlight_indices: &'a [Highlight], +} + +pub struct Highligther<'a> { + query: QueryIter<'a, LayerData<'a>>, + next_query_event: Option>>, + active_highlights: Vec, + next_highlight_end: u32, + next_highlight_start: u32, + config: HighlighterConfig<'a>, +} + +pub struct HighlightList<'a>(slice::Iter<'a, HighlightedNode>); + +impl<'a> Iterator for HighlightList<'a> { + type Item = Highlight; + + fn next(&mut self) -> Option { + self.0.next().map(|node| node.highlight) + } +} + +pub enum HighlighEvent<'a> { + RefreshHiglights(HighlightList<'a>), + PushHighlights(HighlightList<'a>), +} + +impl<'a> Highligther<'a> { + pub fn active_highlights(&self) -> HighlightList<'_> { + HighlightList(self.active_highlights.iter()) + } + + pub fn next_event_offset(&self) -> u32 { + self.next_highlight_start.min(self.next_highlight_end) + } + + pub fn advance(&mut self) -> HighlighEvent<'_> { + let mut refresh = false; + let prev_stack_size = self.active_highlights.len(); + + let pos = self.next_event_offset(); + if self.next_highlight_end == pos { + self.process_injection_ends(); + self.process_higlight_end(); + refresh = true; + } + + let mut first_highlight = true; + while self.next_highlight_start == pos { + let Some(query_event) = self.adance_query_iter() else { + break; + }; + match query_event { + QueryIterEvent::EnterInjection(_) => self.enter_injection(), + QueryIterEvent::Match(node) => self.start_highlight(node, &mut first_highlight), + QueryIterEvent::ExitInjection { injection, state } => { + // state is returned if the layer is finifhed, if it isn't we have + // a combined injection and need to deactive its highlights + if state.is_none() { + self.deactive_layer(injection.layer); + refresh = true; + } + } + } + } + self.next_highlight_end = self + .active_highlights + .last() + .map_or(u32::MAX, |node| node.end); + + if refresh { + HighlighEvent::RefreshHiglights(HighlightList(self.active_highlights.iter())) + } else { + HighlighEvent::PushHighlights(HighlightList( + self.active_highlights[prev_stack_size..].iter(), + )) + } + } + + fn adance_query_iter(&mut self) -> Option>> { + let event = replace(&mut self.next_query_event, self.query.next()); + self.next_highlight_start = self + .next_query_event + .as_ref() + .map_or(u32::MAX, |event| event.start()); + event + } + + fn process_higlight_end(&mut self) { + let i = self + .active_highlights + .iter() + .rposition(|highlight| highlight.end != self.next_highlight_end) + .unwrap(); + self.active_highlights.truncate(i); + } + + /// processes injections that end at the same position as highlights first. + fn process_injection_ends(&mut self) { + while self.next_highlight_end == self.next_highlight_start { + match self.next_query_event.as_ref() { + Some(QueryIterEvent::ExitInjection { injection, state }) => { + if state.is_none() { + self.deactive_layer(injection.layer); + } + } + Some(QueryIterEvent::Match(matched_node)) if matched_node.byte_range.is_empty() => { + } + _ => break, + } + } + } + + fn enter_injection(&mut self) { + self.query.current_layer_state().parent_highlights = self.active_highlights.len(); + } + + fn deactive_layer(&mut self, layer: LayerId) { + let LayerData { + parent_highlights, + ref mut dormant_highlights, + .. + } = *self.query.layer_state(layer); + let i = self.active_highlights[parent_highlights..] + .iter() + .rposition(|highlight| highlight.end != self.next_highlight_end) + .unwrap(); + self.active_highlights.truncate(parent_highlights + i); + dormant_highlights.extend(self.active_highlights.drain(parent_highlights..)) + } + + fn start_highlight(&mut self, node: MatchedNode, first_highlight: &mut bool) { + if node.byte_range.is_empty() { + return; + } + + // if there are multiple matches for the exact same node + // only use one of the (the last with new/nvim precedance) + if !*first_highlight + && self.active_highlights.last().map_or(false, |prev_node| { + prev_node.end == node.byte_range.end as u32 + }) + { + if self.config.new_precedance { + self.active_highlights.pop(); + } else { + return; + } + } + let highlight = self.config.highlight_indices[node.capture.idx()]; + if highlight.0 == u32::MAX { + return; + } + self.active_highlights.push(HighlightedNode { + end: node.byte_range.end as u32, + highlight, + }); + *first_highlight = false; + } +} diff --git a/helix-syntax/src/injections_tree.rs b/helix-syntax/src/injections_tree.rs deleted file mode 100644 index 2290a0e6d..000000000 --- a/helix-syntax/src/injections_tree.rs +++ /dev/null @@ -1,268 +0,0 @@ -use core::slice; -use std::cell::RefCell; -use std::iter::Peekable; -use std::mem::replace; -use std::sync::Arc; - -use hashbrown::HashMap; -use ropey::RopeSlice; -use slotmap::{new_key_type, SlotMap}; - -use crate::parse::LayerUpdateFlags; -use crate::tree_sitter::{ - self, Capture, InactiveQueryCursor, Parser, Query, QueryCursor, RopeTsInput, SyntaxTree, - SyntaxTreeNode, -}; -use crate::HighlightConfiguration; - -// TODO(perf): replace std::ops::Range with helix_stdx::Range once added -type Range = std::ops::Range; - -new_key_type! { - /// The default slot map key type. - pub struct LayerId; -} - -#[derive(Debug)] -pub struct LanguageLayer { - pub config: Arc, - pub(crate) parse_tree: Option, - /// internal flags used during parsing to track incremental invalidation - pub(crate) flags: LayerUpdateFlags, - ranges: Vec, - pub(crate) parent: Option, - /// a list of **sorted** non-overlapping injection ranges. Note that - /// injection ranges are not relative to the start of this layer but the - /// start of the root layer - pub(crate) injections: Box<[Injection]>, -} - -#[derive(Debug, Clone)] -pub(crate) struct Injection { - pub byte_range: Range, - pub layer: LayerId, -} - -impl LanguageLayer { - /// Returns the injection range **within this layers** that contains `idx`. - /// This function will not descend into nested injections - pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&Injection> { - let i = self - .injections - .partition_point(|range| range.byte_range.start <= idx); - self.injections - .get(i) - .filter(|injection| injection.byte_range.end > idx) - } -} - -struct InjectionTree { - layers: SlotMap, - root: LayerId, -} - -impl InjectionTree { - pub fn layer_for_byte_range(&self, start: usize, end: usize) -> LayerId { - let mut cursor = self.root; - loop { - let layer = &self.layers[cursor]; - let Some(start_injection) = layer.injection_at_byte_idx(start) else { - break; - }; - let Some(end_injection) = layer.injection_at_byte_idx(end) else { - break; - }; - if start_injection.layer == end_injection.layer { - cursor = start_injection.layer; - } else { - break; - } - } - cursor - } -} - -#[derive(Clone)] -pub struct MatchedNode { - pub capture: Capture, - pub byte_range: Range, -} - -struct LayerQueryIter<'a> { - cursor: QueryCursor<'a, 'a, RopeTsInput<'a>>, - peeked: Option, -} - -impl<'a> LayerQueryIter<'a> { - fn peek(&mut self) -> Option<&MatchedNode> { - if self.peeked.is_none() { - let (query_match, node_idx) = self.cursor.next_matched_node()?; - let matched_node = query_match.matched_node(node_idx); - self.peeked = Some(MatchedNode { - capture: matched_node.capture, - byte_range: matched_node.syntax_node.byte_range(), - }); - } - self.peeked.as_ref() - } - - fn consume(&mut self) -> MatchedNode { - self.peeked.take().unwrap() - } -} - -struct ActiveLayer<'a> { - query_iter: LayerQueryIter<'a>, - injections: Peekable>, -} - -struct QueryBuilder<'a, 'tree> { - query: &'a Query, - node: &'a SyntaxTreeNode<'tree>, - src: RopeSlice<'a>, - injection_tree: &'a InjectionTree, -} - -pub struct QueryIter<'a, 'tree> { - query_builder: Box>, - active_layers: HashMap>, - active_injections: Vec, - current_injection: Injection, -} - -impl<'a> QueryIter<'a, '_> { - fn enter_injection(&mut self, injection: Injection) -> bool { - self.active_layers - .entry(injection.layer) - .or_insert_with(|| { - let layer = &self.query_builder.injection_tree.layers[injection.layer]; - let injection_start = layer - .injections - .partition_point(|child| child.byte_range.start < injection.byte_range.start); - let cursor = get_cursor().execute_query( - self.query_builder.query, - self.query_builder.node, - RopeTsInput::new(self.query_builder.src), - ); - ActiveLayer { - query_iter: LayerQueryIter { - cursor, - peeked: None, - }, - injections: layer.injections[injection_start..].iter().peekable(), - } - }); - let old_injection = replace(&mut self.current_injection, injection); - self.active_injections.push(old_injection); - true - } - - fn exit_injection(&mut self) -> Option { - let injection = replace(&mut self.current_injection, self.active_injections.pop()?); - let finished_layer = self.active_layers[&injection.layer] - .query_iter - .peeked - .is_none(); - if finished_layer { - let layer = self.active_layers.remove(&injection.layer).unwrap(); - reuse_cursor(layer.query_iter.cursor.reuse()); - } - Some(injection) - } -} - -pub enum QueryIterEvent { - EnterInjection(Injection), - Match(MatchedNode), - ExitInjection(Injection), -} - -impl<'a> Iterator for QueryIter<'a, '_> { - type Item = QueryIterEvent; - - fn next(&mut self) -> Option { - loop { - let active_layer = self - .active_layers - .get_mut(&self.current_injection.layer) - .unwrap(); - let next_injection = active_layer.injections.peek().filter(|injection| { - injection.byte_range.start < self.current_injection.byte_range.end - }); - let next_match = active_layer.query_iter.peek().filter(|matched_node| { - matched_node.byte_range.start < self.current_injection.byte_range.end - }); - - match (next_match, next_injection) { - (None, None) => { - return self.exit_injection().map(QueryIterEvent::ExitInjection); - } - (Some(_), None) => { - // consume match - let matched_node = active_layer.query_iter.consume(); - return Some(QueryIterEvent::Match(matched_node)); - } - (Some(matched_node), Some(injection)) - if matched_node.byte_range.start <= injection.byte_range.end => - { - // consume match - let matched_node = active_layer.query_iter.consume(); - // ignore nodes that are overlapped by the injection - if matched_node.byte_range.start <= injection.byte_range.start { - return Some(QueryIterEvent::Match(matched_node)); - } - } - (Some(_), Some(_)) | (None, Some(_)) => { - // consume injection - let injection = active_layer.injections.next().unwrap(); - if self.enter_injection(injection.clone()) { - return Some(QueryIterEvent::EnterInjection(injection.clone())); - } - } - } - } - } -} - -struct TsParser { - parser: crate::tree_sitter::Parser, - pub cursors: Vec, -} - -// could also just use a pool, or a single instance? -thread_local! { - static PARSER: RefCell = RefCell::new(TsParser { - parser: Parser::new(), - cursors: Vec::new(), - }) -} - -pub fn with_cursor(f: impl FnOnce(&mut InactiveQueryCursor) -> T) -> T { - PARSER.with(|parser| { - let mut parser = parser.borrow_mut(); - let mut cursor = parser - .cursors - .pop() - .unwrap_or_else(InactiveQueryCursor::new); - let res = f(&mut cursor); - parser.cursors.push(cursor); - res - }) -} - -pub fn get_cursor() -> InactiveQueryCursor { - PARSER.with(|parser| { - let mut parser = parser.borrow_mut(); - parser - .cursors - .pop() - .unwrap_or_else(InactiveQueryCursor::new) - }) -} - -pub fn reuse_cursor(cursor: InactiveQueryCursor) { - PARSER.with(|parser| { - let mut parser = parser.borrow_mut(); - parser.cursors.push(cursor) - }) -} diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs index b7331a3aa..593fe3de6 100644 --- a/helix-syntax/src/lib.rs +++ b/helix-syntax/src/lib.rs @@ -1,33 +1,82 @@ use ::ropey::RopeSlice; -use ::tree_sitter::{Node, Parser, Point, Query, QueryCursor, Range, Tree}; -use slotmap::HopSlotMap; +use slotmap::{new_key_type, HopSlotMap}; use std::borrow::Cow; -use std::cell::RefCell; use std::hash::{Hash, Hasher}; use std::path::Path; use std::str; use std::sync::Arc; -use crate::injections_tree::LayerId; use crate::parse::LayerUpdateFlags; pub use crate::config::{read_query, HighlightConfiguration}; -pub use crate::ropey::RopeProvider; -pub use merge::merge; +use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode}; pub use pretty_print::pretty_print_tree; pub use tree_cursor::TreeCursor; mod config; pub mod highlighter; -mod injections_tree; -mod merge; +pub mod highlighter2; mod parse; mod pretty_print; -mod ropey; +mod query_iter; +pub mod text_object; mod tree_cursor; pub mod tree_sitter; +new_key_type! { + /// The default slot map key type. + pub struct LayerId; +} + +/// The maximum number of in-progress matches a TS cursor can consider at once. +/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`. +/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here. +/// +/// +/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually). +/// However, this causes performance issues for medium to large files. +/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc). +/// +/// +/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream +/// (see and ). +/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance. +/// +/// +/// Neovim chose 64 for this value somewhat arbitrarily (). +/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions. +/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. +pub const TREE_SITTER_MATCH_LIMIT: u32 = 256; + +// TODO(perf): replace std::ops::Range with helix_stdx::Range once added +type Range = std::ops::Range; + +/// The Tree siitter syntax tree for a single language. + +/// This is really multipe nested different syntax trees due to tree sitter +/// injections. A single syntax tree/parser is called layer. Each layer +/// is parsed as a single "file" by tree sitter. There can be multiple layers +/// for the same language. A layer corresponds to one of three things: +/// * the root layer +/// * a singular injection limited to a single node in it's parent layer +/// * Multiple injections (multiple disjoint nodes in parent layer) that are +/// parsed as tough they are a single uninterrupted file. +/// +/// An injection always refer to a single node into which another layer is +/// injected. As injections only correspond to syntax tree nodes injections in +/// the same layer do not intersect. However, the syntax tree in a an injected +/// layer can have nodes that intersect with nodes from the parent layer. For +/// example: +/// ``` +/// layer2: | Sibling A | Sibling B (layer3) | Sibling C | +/// layer1: | Sibling A (layer2) | Sibling B | Sibling C (layer2) | +/// ```` +/// In this case Sibling B really spans across a "GAP" in layer2. While the syntax +/// node can not be split up by tree sitter directly, we can treat Sibling B as two +/// seperate injections. That is done while parsing/running the query capture. As +/// a result the injections from a tree. Note that such other queries must account for +/// such multi injection nodes. #[derive(Debug)] pub struct Syntax { layers: HopSlotMap, @@ -41,16 +90,20 @@ impl Syntax { injection_callback: impl Fn(&InjectionLanguageMarker) -> Option>, ) -> Option { let root_layer = LanguageLayer { - tree: None, + parse_tree: None, config, - depth: 0, flags: LayerUpdateFlags::empty(), - ranges: vec![Range { + ranges: vec![tree_sitter::Range { start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], + end_byte: u32::MAX, + start_point: tree_sitter::Point { row: 0, col: 0 }, + end_point: tree_sitter::Point { + row: u32::MAX, + col: u32::MAX, + }, + }] + .into_boxed_slice(), + injections: Box::new([]), parent: None, }; @@ -70,49 +123,75 @@ impl Syntax { Some(syntax) } - pub fn tree(&self) -> &Tree { + pub fn tree(&self) -> &SyntaxTree { self.layers[self.root].tree() } - pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree { - let mut container_id = self.root; - - for (layer_id, layer) in self.layers.iter() { - if layer.depth > self.layers[container_id].depth - && layer.contains_byte_range(start, end) - { - container_id = layer_id; - } - } - - self.layers[container_id].tree() + pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &SyntaxTree { + let layer = self.layer_for_byte_range(start, end); + self.layers[layer].tree() } - pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { + pub fn named_descendant_for_byte_range( + &self, + start: usize, + end: usize, + ) -> Option> { self.tree_for_byte_range(start, end) .root_node() .named_descendant_for_byte_range(start, end) } - pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { + pub fn descendant_for_byte_range( + &self, + start: usize, + end: usize, + ) -> Option> { self.tree_for_byte_range(start, end) .root_node() .descendant_for_byte_range(start, end) } + pub fn layer_for_byte_range(&self, start: usize, end: usize) -> LayerId { + let mut cursor = self.root; + loop { + let layer = &self.layers[cursor]; + let Some(start_injection) = layer.injection_at_byte_idx(start) else { + break; + }; + let Some(end_injection) = layer.injection_at_byte_idx(end) else { + break; + }; + if start_injection.layer == end_injection.layer { + cursor = start_injection.layer; + } else { + break; + } + } + cursor + } + pub fn walk(&self) -> TreeCursor<'_> { TreeCursor::new(&self.layers, self.root) } } +#[derive(Debug, Clone)] +pub(crate) struct Injection { + pub byte_range: Range, + pub layer: LayerId, +} + #[derive(Debug)] pub struct LanguageLayer { - // mode - // grammar pub config: Arc, - pub(crate) tree: Option, - pub ranges: Vec, - pub depth: u32, + parse_tree: Option, + ranges: Box<[tree_sitter::Range]>, + /// a list of **sorted** non-overlapping injection ranges. Note that + /// injection ranges are not relative to the start of this layer but the + /// start of the root layer + injections: Box<[Injection]>, + /// internal flags used during parsing to track incremental invalidation flags: LayerUpdateFlags, parent: Option, } @@ -123,8 +202,8 @@ pub struct LanguageLayer { /// state. impl PartialEq for LanguageLayer { fn eq(&self, other: &Self) -> bool { - self.depth == other.depth - && self.config.language == other.config.language + self.parent == other.parent + && self.config.grammar == other.config.grammar && self.ranges == other.ranges } } @@ -133,165 +212,27 @@ impl PartialEq for LanguageLayer { /// See its documentation for details. impl Hash for LanguageLayer { fn hash(&self, state: &mut H) { - self.depth.hash(state); - self.config.language.hash(state); + self.parent.hash(state); + self.config.grammar.hash(state); self.ranges.hash(state); } } impl LanguageLayer { - pub fn tree(&self) -> &Tree { + pub fn tree(&self) -> &SyntaxTree { // TODO: no unwrap - self.tree.as_ref().unwrap() - } - - /// Whether the layer contains the given byte range. - /// - /// If the layer has multiple ranges (i.e. combined injections), the - /// given range is considered contained if it is within the start and - /// end bytes of the first and last ranges **and** if the given range - /// starts or ends within any of the layer's ranges. - fn contains_byte_range(&self, start: usize, end: usize) -> bool { - let layer_start = self - .ranges - .first() - .expect("ranges should not be empty") - .start_byte; - let layer_end = self - .ranges - .last() - .expect("ranges should not be empty") - .end_byte; - - layer_start <= start - && layer_end >= end - && self.ranges.iter().any(|range| { - let byte_range = range.start_byte..range.end_byte; - byte_range.contains(&start) || byte_range.contains(&end) - }) - } -} - -#[derive(Debug, Clone)] -pub enum InjectionLanguageMarker<'a> { - Name(Cow<'a, str>), - Filename(Cow<'a, Path>), - Shebang(String), -} - -const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; - -#[derive(Debug)] -pub enum CapturedNode<'a> { - Single(Node<'a>), - /// Guaranteed to be not empty - Grouped(Vec>), -} - -impl<'a> CapturedNode<'a> { - pub fn start_byte(&self) -> usize { - match self { - Self::Single(n) => n.start_byte(), - Self::Grouped(ns) => ns[0].start_byte(), - } - } - - pub fn end_byte(&self) -> usize { - match self { - Self::Single(n) => n.end_byte(), - Self::Grouped(ns) => ns.last().unwrap().end_byte(), - } + self.parse_tree.as_ref().unwrap() } - pub fn byte_range(&self) -> std::ops::Range { - self.start_byte()..self.end_byte() - } -} - -/// The maximum number of in-progress matches a TS cursor can consider at once. -/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`. -/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here. -/// -/// -/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually). -/// However, this causes performance issues for medium to large files. -/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc). -/// -/// -/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream -/// (see and ). -/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance. -/// -/// -/// Neovim chose 64 for this value somewhat arbitrarily (). -/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions. -/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. -const TREE_SITTER_MATCH_LIMIT: u32 = 256; - -#[derive(Debug)] -pub struct TextObjectQuery { - pub query: Query, -} - -impl TextObjectQuery { - /// Run the query on the given node and return sub nodes which match given - /// capture ("function.inside", "class.around", etc). - /// - /// Captures may contain multiple nodes by using quantifiers (+, *, etc), - /// and support for this is partial and could use improvement. - /// - /// ```query - /// (comment)+ @capture - /// - /// ; OR - /// ( - /// (comment)* - /// . - /// (function) - /// ) @capture - /// ``` - pub fn capture_nodes<'a>( - &'a self, - capture_name: &str, - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - self.capture_nodes_any(&[capture_name], node, slice, cursor) - } - - /// Find the first capture that exists out of all given `capture_names` - /// and return sub nodes that match this capture. - pub fn capture_nodes_any<'a>( - &'a self, - capture_names: &[&str], - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - let capture_idx = capture_names - .iter() - .find_map(|cap| self.query.capture_index_for_name(cap))?; - - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let nodes = cursor - .captures(&self.query, node, RopeProvider(slice)) - .filter_map(move |(mat, _)| { - let nodes: Vec<_> = mat - .captures - .iter() - .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node)) - .collect(); - - if nodes.len() > 1 { - Some(CapturedNode::Grouped(nodes)) - } else { - nodes.into_iter().map(CapturedNode::Single).next() - } - }); - - Some(nodes) + /// Returns the injection range **within this layers** that contains `idx`. + /// This function will not descend into nested injections + pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&Injection> { + let i = self + .injections + .partition_point(|range| range.byte_range.start < idx); + self.injections + .get(i) + .filter(|injection| injection.byte_range.end > idx) } } @@ -304,42 +245,6 @@ pub enum Error { Unknown, } -#[derive(Clone)] -enum IncludedChildren { - None, - All, - Unnamed, -} - -impl Default for IncludedChildren { - fn default() -> Self { - Self::None - } -} - fn byte_range_to_str(range: std::ops::Range, source: RopeSlice) -> Cow { Cow::from(source.byte_slice(range)) } - -struct TsParser { - parser: ::tree_sitter::Parser, - pub cursors: Vec, -} - -// could also just use a pool, or a single instance? -thread_local! { - static PARSER: RefCell = RefCell::new(TsParser { - parser: Parser::new(), - cursors: Vec::new(), - }) -} - -pub fn with_cursor(f: impl FnOnce(&mut QueryCursor) -> T) -> T { - PARSER.with(|parser| { - let mut parser = parser.borrow_mut(); - let mut cursor = parser.cursors.pop().unwrap_or_default(); - let res = f(&mut cursor); - parser.cursors.push(cursor); - res - }) -} diff --git a/helix-syntax/src/parse.rs b/helix-syntax/src/parse.rs index de70f2a16..d2903b076 100644 --- a/helix-syntax/src/parse.rs +++ b/helix-syntax/src/parse.rs @@ -1,18 +1,18 @@ -use std::collections::VecDeque; -use std::mem::replace; -use std::sync::Arc; +// use std::collections::VecDeque; +// use std::mem::replace; +// use std::sync::Arc; -use ahash::RandomState; +// use ahash::RandomState; use bitflags::bitflags; -use hashbrown::raw::RawTable; -use ropey::RopeSlice; -use tree_sitter::{Node, Parser, Point, QueryCursor, Range}; +// use hashbrown::raw::RawTable; +// use ropey::RopeSlice; +// use tree_sitter::{Node, Parser, Point, QueryCursor, Range}; -use crate::ropey::RopeProvider; -use crate::{ - Error, HighlightConfiguration, IncludedChildren, InjectionLanguageMarker, LanguageLayer, - Syntax, PARSER, TREE_SITTER_MATCH_LIMIT, -}; +// use crate::ropey::RopeProvider; +// use crate::{ +// Error, HighlightConfiguration, IncludedChildren, InjectionLanguageMarker, LanguageLayer, +// Syntax, PARSER, TREE_SITTER_MATCH_LIMIT, +// }; bitflags! { /// Flags that track the status of a layer @@ -25,405 +25,405 @@ bitflags! { } } -impl Syntax { - pub fn update( - &mut self, - source: RopeSlice, - edits: Vec, - injection_callback: impl Fn(&InjectionLanguageMarker) -> Option>, - ) -> Result<(), Error> { - let mut queue = VecDeque::new(); - queue.push_back(self.root); - - // This table allows inverse indexing of `layers`. - // That is by hashing a `Layer` you can find - // the `LayerId` of an existing equivalent `Layer` in `layers`. - // - // It is used to determine if a new layer exists for an injection - // or if an existing layer needs to be updated. - let mut layers_table = RawTable::with_capacity(self.layers.len()); - let layers_hasher = RandomState::new(); - // Use the edits to update all layers markers - fn point_add(a: Point, b: Point) -> Point { - if b.row > 0 { - Point::new(a.row.saturating_add(b.row), b.column) - } else { - Point::new(0, a.column.saturating_add(b.column)) - } - } - fn point_sub(a: Point, b: Point) -> Point { - if a.row > b.row { - Point::new(a.row.saturating_sub(b.row), a.column) - } else { - Point::new(0, a.column.saturating_sub(b.column)) - } - } - - for (layer_id, layer) in self.layers.iter_mut() { - // The root layer always covers the whole range (0..usize::MAX) - if layer.depth == 0 { - layer.flags = LayerUpdateFlags::MODIFIED; - continue; - } - - if !edits.is_empty() { - for range in &mut layer.ranges { - // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 - for edit in edits.iter().rev() { - let is_pure_insertion = edit.old_end_byte == edit.start_byte; - - // if edit is after range, skip - if edit.start_byte > range.end_byte { - // TODO: || (is_noop && edit.start_byte == range.end_byte) - continue; - } - - // if edit is before range, shift entire range by len - if edit.old_end_byte < range.start_byte { - range.start_byte = - edit.new_end_byte + (range.start_byte - edit.old_end_byte); - range.start_point = point_add( - edit.new_end_position, - point_sub(range.start_point, edit.old_end_position), - ); - - range.end_byte = edit - .new_end_byte - .saturating_add(range.end_byte - edit.old_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - - layer.flags |= LayerUpdateFlags::MOVED; - } - // if the edit starts in the space before and extends into the range - else if edit.start_byte < range.start_byte { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - // If the edit is an insertion at the start of the tree, shift - else if edit.start_byte == range.start_byte && is_pure_insertion { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - layer.flags |= LayerUpdateFlags::MOVED; - } else { - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - } - } - } - - let hash = layers_hasher.hash_one(layer); - // Safety: insert_no_grow is unsafe because it assumes that the table - // has enough capacity to hold additional elements. - // This is always the case as we reserved enough capacity above. - unsafe { layers_table.insert_no_grow(hash, layer_id) }; - } - - PARSER.with(|ts_parser| { - let ts_parser = &mut ts_parser.borrow_mut(); - ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours - let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); - // TODO: might need to set cursor range - cursor.set_byte_range(0..usize::MAX); - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let source_slice = source.slice(..); - - while let Some(layer_id) = queue.pop_front() { - let layer = &mut self.layers[layer_id]; - - // Mark the layer as touched - layer.flags |= LayerUpdateFlags::TOUCHED; - - // If a tree already exists, notify it of changes. - if let Some(tree) = &mut layer.tree { - if layer - .flags - .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED) - { - for edit in edits.iter().rev() { - // Apply the edits in reverse. - // If we applied them in order then edit 1 would disrupt the positioning of edit 2. - tree.edit(edit); - } - } - - if layer.flags.contains(LayerUpdateFlags::MODIFIED) { - // Re-parse the tree. - layer.parse(&mut ts_parser.parser, source)?; - } - } else { - // always parse if this layer has never been parsed before - layer.parse(&mut ts_parser.parser, source)?; - } - - // Switch to an immutable borrow. - let layer = &self.layers[layer_id]; - - // Process injections. - let matches = cursor.matches( - &layer.config.injections_query, - layer.tree().root_node(), - RopeProvider(source_slice), - ); - let mut combined_injections = vec![ - (None, Vec::new(), IncludedChildren::default()); - layer.config.combined_injections_patterns.len() - ]; - let mut injections = Vec::new(); - let mut last_injection_end = 0; - for mat in matches { - let (injection_capture, content_node, included_children) = layer - .config - .injection_for_match(&layer.config.injections_query, &mat, source_slice); - - // in case this is a combined injection save it for more processing later - if let Some(combined_injection_idx) = layer - .config - .combined_injections_patterns - .iter() - .position(|&pattern| pattern == mat.pattern_index) - { - let entry = &mut combined_injections[combined_injection_idx]; - if injection_capture.is_some() { - entry.0 = injection_capture; - } - if let Some(content_node) = content_node { - if content_node.start_byte() >= last_injection_end { - entry.1.push(content_node); - last_injection_end = content_node.end_byte(); - } - } - entry.2 = included_children; - continue; - } - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - mat.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(injection_capture), Some(content_node)) = - (injection_capture, content_node) - { - if let Some(config) = (injection_callback)(&injection_capture) { - let ranges = - intersect_ranges(&layer.ranges, &[content_node], included_children); - - if !ranges.is_empty() { - if content_node.start_byte() < last_injection_end { - continue; - } - last_injection_end = content_node.end_byte(); - injections.push((config, ranges)); - } - } - } - } - - for (lang_name, content_nodes, included_children) in combined_injections { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { - if let Some(config) = (injection_callback)(&lang_name) { - let ranges = - intersect_ranges(&layer.ranges, &content_nodes, included_children); - if !ranges.is_empty() { - injections.push((config, ranges)); - } - } - } - } - - let depth = layer.depth + 1; - // TODO: can't inline this since matches borrows self.layers - for (config, ranges) in injections { - let parent = Some(layer_id); - let new_layer = LanguageLayer { - tree: None, - config, - depth, - ranges, - flags: LayerUpdateFlags::empty(), - parent: None, - }; - - // Find an identical existing layer - let layer = layers_table - .get(layers_hasher.hash_one(&new_layer), |&it| { - self.layers[it] == new_layer - }) - .copied(); - - // ...or insert a new one. - let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer)); - self.layers[layer_id].parent = parent; - - queue.push_back(layer_id); - } - - // TODO: pre-process local scopes at this time, rather than highlight? - // would solve problems with locals not working across boundaries - } - - // Return the cursor back in the pool. - ts_parser.cursors.push(cursor); - - // Reset all `LayerUpdateFlags` and remove all untouched layers - self.layers.retain(|_, layer| { - replace(&mut layer.flags, LayerUpdateFlags::empty()) - .contains(LayerUpdateFlags::TOUCHED) - }); - - Ok(()) - }) - } -} - -/// Compute the ranges that should be included when parsing an injection. -/// This takes into account three things: -/// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. -/// * `nodes` - Every injection takes place within a set of nodes. The injection ranges -/// are the ranges of those nodes. -/// * `includes_children` - For some injections, the content nodes' children should be -/// excluded from the nested document, so that only the content nodes' *own* content -/// is reparsed. For other injections, the content nodes' entire ranges should be -/// reparsed, including the ranges of their children. -fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - included_children: IncludedChildren, -) -> Vec { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), - }; - - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| match included_children { - IncludedChildren::None => Some(child.range()), - IncludedChildren::All => None, - IncludedChildren::Unnamed => { - if child.is_named() { - Some(child.range()) - } else { - None - } - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; - - if range.end_byte < parent_range.start_byte { - continue; - } - - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } - - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } - - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; - } else { - return result; - } - } - } - } - result -} - -impl LanguageLayer { - fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> { - parser - .set_included_ranges(&self.ranges) - .map_err(|_| Error::InvalidRanges)?; - - parser - .set_language(&self.config.language) - .map_err(|_| Error::InvalidLanguage)?; - - // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; - let tree = parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - &chunk.as_bytes()[byte - start_byte..] - } else { - // out of range - &[] - } - }, - self.tree.as_ref(), - ) - .ok_or(Error::Cancelled)?; - // unsafe { ts_parser.parser.set_cancellation_flag(None) }; - self.tree = Some(tree); - Ok(()) - } -} +// impl Syntax { +// pub fn update( +// &mut self, +// source: RopeSlice, +// edits: Vec, +// injection_callback: impl Fn(&InjectionLanguageMarker) -> Option>, +// ) -> Result<(), Error> { +// let mut queue = VecDeque::new(); +// queue.push_back(self.root); + +// // This table allows inverse indexing of `layers`. +// // That is by hashing a `Layer` you can find +// // the `LayerId` of an existing equivalent `Layer` in `layers`. +// // +// // It is used to determine if a new layer exists for an injection +// // or if an existing layer needs to be updated. +// let mut layers_table = RawTable::with_capacity(self.layers.len()); +// let layers_hasher = RandomState::new(); +// // Use the edits to update all layers markers +// fn point_add(a: Point, b: Point) -> Point { +// if b.row > 0 { +// Point::new(a.row.saturating_add(b.row), b.column) +// } else { +// Point::new(0, a.column.saturating_add(b.column)) +// } +// } +// fn point_sub(a: Point, b: Point) -> Point { +// if a.row > b.row { +// Point::new(a.row.saturating_sub(b.row), a.column) +// } else { +// Point::new(0, a.column.saturating_sub(b.column)) +// } +// } + +// for (layer_id, layer) in self.layers.iter_mut() { +// // The root layer always covers the whole range (0..usize::MAX) +// if layer.depth == 0 { +// layer.flags = LayerUpdateFlags::MODIFIED; +// continue; +// } + +// if !edits.is_empty() { +// for range in &mut layer.ranges { +// // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 +// for edit in edits.iter().rev() { +// let is_pure_insertion = edit.old_end_byte == edit.start_byte; + +// // if edit is after range, skip +// if edit.start_byte > range.end_byte { +// // TODO: || (is_noop && edit.start_byte == range.end_byte) +// continue; +// } + +// // if edit is before range, shift entire range by len +// if edit.old_end_byte < range.start_byte { +// range.start_byte = +// edit.new_end_byte + (range.start_byte - edit.old_end_byte); +// range.start_point = point_add( +// edit.new_end_position, +// point_sub(range.start_point, edit.old_end_position), +// ); + +// range.end_byte = edit +// .new_end_byte +// .saturating_add(range.end_byte - edit.old_end_byte); +// range.end_point = point_add( +// edit.new_end_position, +// point_sub(range.end_point, edit.old_end_position), +// ); + +// layer.flags |= LayerUpdateFlags::MOVED; +// } +// // if the edit starts in the space before and extends into the range +// else if edit.start_byte < range.start_byte { +// range.start_byte = edit.new_end_byte; +// range.start_point = edit.new_end_position; + +// range.end_byte = range +// .end_byte +// .saturating_sub(edit.old_end_byte) +// .saturating_add(edit.new_end_byte); +// range.end_point = point_add( +// edit.new_end_position, +// point_sub(range.end_point, edit.old_end_position), +// ); +// layer.flags = LayerUpdateFlags::MODIFIED; +// } +// // If the edit is an insertion at the start of the tree, shift +// else if edit.start_byte == range.start_byte && is_pure_insertion { +// range.start_byte = edit.new_end_byte; +// range.start_point = edit.new_end_position; +// layer.flags |= LayerUpdateFlags::MOVED; +// } else { +// range.end_byte = range +// .end_byte +// .saturating_sub(edit.old_end_byte) +// .saturating_add(edit.new_end_byte); +// range.end_point = point_add( +// edit.new_end_position, +// point_sub(range.end_point, edit.old_end_position), +// ); +// layer.flags = LayerUpdateFlags::MODIFIED; +// } +// } +// } +// } + +// let hash = layers_hasher.hash_one(layer); +// // Safety: insert_no_grow is unsafe because it assumes that the table +// // has enough capacity to hold additional elements. +// // This is always the case as we reserved enough capacity above. +// unsafe { layers_table.insert_no_grow(hash, layer_id) }; +// } + +// PARSER.with(|ts_parser| { +// let ts_parser = &mut ts_parser.borrow_mut(); +// ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours +// let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); +// // TODO: might need to set cursor range +// cursor.set_byte_range(0..usize::MAX); +// cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + +// let source_slice = source.slice(..); + +// while let Some(layer_id) = queue.pop_front() { +// let layer = &mut self.layers[layer_id]; + +// // Mark the layer as touched +// layer.flags |= LayerUpdateFlags::TOUCHED; + +// // If a tree already exists, notify it of changes. +// if let Some(tree) = &mut layer.parse_tree { +// if layer +// .flags +// .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED) +// { +// for edit in edits.iter().rev() { +// // Apply the edits in reverse. +// // If we applied them in order then edit 1 would disrupt the positioning of edit 2. +// tree.edit(edit); +// } +// } + +// if layer.flags.contains(LayerUpdateFlags::MODIFIED) { +// // Re-parse the tree. +// layer.parse(&mut ts_parser.parser, source)?; +// } +// } else { +// // always parse if this layer has never been parsed before +// layer.parse(&mut ts_parser.parser, source)?; +// } + +// // Switch to an immutable borrow. +// let layer = &self.layers[layer_id]; + +// // Process injections. +// let matches = cursor.matches( +// &layer.config.injections_query, +// layer.tree().root_node(), +// RopeProvider(source_slice), +// ); +// let mut combined_injections = vec![ +// (None, Vec::new(), IncludedChildren::default()); +// layer.config.combined_injections_patterns.len() +// ]; +// let mut injections = Vec::new(); +// let mut last_injection_end = 0; +// for mat in matches { +// let (injection_capture, content_node, included_children) = layer +// .config +// .injection_for_match(&layer.config.injections_query, &mat, source_slice); + +// // in case this is a combined injection save it for more processing later +// if let Some(combined_injection_idx) = layer +// .config +// .combined_injections_patterns +// .iter() +// .position(|&pattern| pattern == mat.pattern_index) +// { +// let entry = &mut combined_injections[combined_injection_idx]; +// if injection_capture.is_some() { +// entry.0 = injection_capture; +// } +// if let Some(content_node) = content_node { +// if content_node.start_byte() >= last_injection_end { +// entry.1.push(content_node); +// last_injection_end = content_node.end_byte(); +// } +// } +// entry.2 = included_children; +// continue; +// } + +// // Explicitly remove this match so that none of its other captures will remain +// // in the stream of captures. +// mat.remove(); + +// // If a language is found with the given name, then add a new language layer +// // to the highlighted document. +// if let (Some(injection_capture), Some(content_node)) = +// (injection_capture, content_node) +// { +// if let Some(config) = (injection_callback)(&injection_capture) { +// let ranges = +// intersect_ranges(&layer.ranges, &[content_node], included_children); + +// if !ranges.is_empty() { +// if content_node.start_byte() < last_injection_end { +// continue; +// } +// last_injection_end = content_node.end_byte(); +// injections.push((config, ranges)); +// } +// } +// } +// } + +// for (lang_name, content_nodes, included_children) in combined_injections { +// if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { +// if let Some(config) = (injection_callback)(&lang_name) { +// let ranges = +// intersect_ranges(&layer.ranges, &content_nodes, included_children); +// if !ranges.is_empty() { +// injections.push((config, ranges)); +// } +// } +// } +// } + +// let depth = layer.depth + 1; +// // TODO: can't inline this since matches borrows self.layers +// for (config, ranges) in injections { +// let parent = Some(layer_id); +// let new_layer = LanguageLayer { +// parse_tree: None, +// config, +// depth, +// ranges, +// flags: LayerUpdateFlags::empty(), +// parent: None, +// }; + +// // Find an identical existing layer +// let layer = layers_table +// .get(layers_hasher.hash_one(&new_layer), |&it| { +// self.layers[it] == new_layer +// }) +// .copied(); + +// // ...or insert a new one. +// let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer)); +// self.layers[layer_id].parent = parent; + +// queue.push_back(layer_id); +// } + +// // TODO: pre-process local scopes at this time, rather than highlight? +// // would solve problems with locals not working across boundaries +// } + +// // Return the cursor back in the pool. +// ts_parser.cursors.push(cursor); + +// // Reset all `LayerUpdateFlags` and remove all untouched layers +// self.layers.retain(|_, layer| { +// replace(&mut layer.flags, LayerUpdateFlags::empty()) +// .contains(LayerUpdateFlags::TOUCHED) +// }); + +// Ok(()) +// }) +// } +// } + +// /// Compute the ranges that should be included when parsing an injection. +// /// This takes into account three things: +// /// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. +// /// * `nodes` - Every injection takes place within a set of nodes. The injection ranges +// /// are the ranges of those nodes. +// /// * `includes_children` - For some injections, the content nodes' children should be +// /// excluded from the nested document, so that only the content nodes' *own* content +// /// is reparsed. For other injections, the content nodes' entire ranges should be +// /// reparsed, including the ranges of their children. +// fn intersect_ranges( +// parent_ranges: &[Range], +// nodes: &[Node], +// included_children: IncludedChildren, +// ) -> Vec { +// let mut cursor = nodes[0].walk(); +// let mut result = Vec::new(); +// let mut parent_range_iter = parent_ranges.iter(); +// let mut parent_range = parent_range_iter +// .next() +// .expect("Layers should only be constructed with non-empty ranges vectors"); +// for node in nodes.iter() { +// let mut preceding_range = Range { +// start_byte: 0, +// start_point: Point::new(0, 0), +// end_byte: node.start_byte(), +// end_point: node.start_position(), +// }; +// let following_range = Range { +// start_byte: node.end_byte(), +// start_point: node.end_position(), +// end_byte: usize::MAX, +// end_point: Point::new(usize::MAX, usize::MAX), +// }; + +// for excluded_range in node +// .children(&mut cursor) +// .filter_map(|child| match included_children { +// IncludedChildren::None => Some(child.range()), +// IncludedChildren::All => None, +// IncludedChildren::Unnamed => { +// if child.is_named() { +// Some(child.range()) +// } else { +// None +// } +// } +// }) +// .chain([following_range].iter().cloned()) +// { +// let mut range = Range { +// start_byte: preceding_range.end_byte, +// start_point: preceding_range.end_point, +// end_byte: excluded_range.start_byte, +// end_point: excluded_range.start_point, +// }; +// preceding_range = excluded_range; + +// if range.end_byte < parent_range.start_byte { +// continue; +// } + +// while parent_range.start_byte <= range.end_byte { +// if parent_range.end_byte > range.start_byte { +// if range.start_byte < parent_range.start_byte { +// range.start_byte = parent_range.start_byte; +// range.start_point = parent_range.start_point; +// } + +// if parent_range.end_byte < range.end_byte { +// if range.start_byte < parent_range.end_byte { +// result.push(Range { +// start_byte: range.start_byte, +// start_point: range.start_point, +// end_byte: parent_range.end_byte, +// end_point: parent_range.end_point, +// }); +// } +// range.start_byte = parent_range.end_byte; +// range.start_point = parent_range.end_point; +// } else { +// if range.start_byte < range.end_byte { +// result.push(range); +// } +// break; +// } +// } + +// if let Some(next_range) = parent_range_iter.next() { +// parent_range = next_range; +// } else { +// return result; +// } +// } +// } +// } +// result +// } + +// impl LanguageLayer { +// fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> { +// parser +// .set_included_ranges(&self.ranges) +// .map_err(|_| Error::InvalidRanges)?; + +// parser +// .set_language(&self.config.language) +// .map_err(|_| Error::InvalidLanguage)?; + +// // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; +// let tree = parser +// .parse_with( +// &mut |byte, _| { +// if byte <= source.len_bytes() { +// let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); +// &chunk.as_bytes()[byte - start_byte..] +// } else { +// // out of range +// &[] +// } +// }, +// self.parse_tree.as_ref(), +// ) +// .ok_or(Error::Cancelled)?; +// // unsafe { ts_parser.parser.set_cancellation_flag(None) }; +// self.parse_tree = Some(tree); +// Ok(()) +// } +// } diff --git a/helix-syntax/src/query_iter.rs b/helix-syntax/src/query_iter.rs new file mode 100644 index 000000000..e672fe77b --- /dev/null +++ b/helix-syntax/src/query_iter.rs @@ -0,0 +1,236 @@ +use core::slice; +use std::iter::Peekable; +use std::mem::replace; + +use hashbrown::HashMap; +use ropey::RopeSlice; + +use crate::tree_sitter::{ + Capture, InactiveQueryCursor, Query, QueryCursor, RopeTsInput, SyntaxTreeNode, +}; +use crate::{Injection, LayerId, Range, Syntax}; + +#[derive(Clone)] +pub struct MatchedNode { + pub capture: Capture, + pub byte_range: Range, +} + +struct LayerQueryIter<'a> { + cursor: QueryCursor<'a, 'a, RopeTsInput<'a>>, + peeked: Option, +} + +impl<'a> LayerQueryIter<'a> { + fn peek(&mut self) -> Option<&MatchedNode> { + if self.peeked.is_none() { + let (query_match, node_idx) = self.cursor.next_matched_node()?; + let matched_node = query_match.matched_node(node_idx); + self.peeked = Some(MatchedNode { + capture: matched_node.capture, + byte_range: matched_node.syntax_node.byte_range(), + }); + } + self.peeked.as_ref() + } + + fn consume(&mut self) -> MatchedNode { + self.peeked.take().unwrap() + } +} + +struct ActiveLayer<'a, S> { + state: S, + query_iter: LayerQueryIter<'a>, + injections: Peekable>, +} + +// data only needed when entering and exiting injections +// seperate struck to keep the QueryIter reasonably small +struct QueryIterLayerManager<'a, S> { + query: &'a Query, + node: SyntaxTreeNode<'a>, + src: RopeSlice<'a>, + syntax: &'a Syntax, + active_layers: HashMap>>, + active_injections: Vec, +} + +impl<'a, S: Default> QueryIterLayerManager<'a, S> { + fn init_layer(&mut self, injection: Injection) -> Box> { + self.active_layers + .remove(&injection.layer) + .unwrap_or_else(|| { + let layer = &self.syntax.layers[injection.layer]; + let injection_start = layer + .injections + .partition_point(|child| child.byte_range.start < injection.byte_range.start); + let cursor = InactiveQueryCursor::new().execute_query( + self.query, + &self.node, + RopeTsInput::new(self.src), + ); + Box::new(ActiveLayer { + state: S::default(), + query_iter: LayerQueryIter { + cursor, + peeked: None, + }, + injections: layer.injections[injection_start..].iter().peekable(), + }) + }) + } +} + +pub struct QueryIter<'a, LayerState: Default = ()> { + layer_manager: Box>, + current_layer: Box>, + current_injection: Injection, +} + +impl<'a, LayerState: Default> QueryIter<'a, LayerState> { + pub fn new(syntax: &'a Syntax, src: RopeSlice<'a>, query: &'a Query) -> Self { + Self::at(syntax, src, query, syntax.tree().root_node(), syntax.root) + } + + pub fn at( + syntax: &'a Syntax, + src: RopeSlice<'a>, + query: &'a Query, + node: SyntaxTreeNode<'a>, + layer: LayerId, + ) -> Self { + // create fake injection for query root + let injection = Injection { + byte_range: node.byte_range(), + layer, + }; + let mut layer_manager = Box::new(QueryIterLayerManager { + query, + node, + src, + syntax, + // TODO: reuse allocations with an allocation pool + active_layers: HashMap::with_capacity(8), + active_injections: Vec::with_capacity(8), + }); + Self { + current_layer: layer_manager.init_layer(injection), + current_injection: injection, + layer_manager, + } + } + + pub fn current_layer_state(&mut self) -> &mut LayerState { + &mut self.current_layer.state + } + + pub fn layer_state(&mut self, layer: LayerId) -> &mut LayerState { + if layer == self.current_injection.layer { + self.current_layer_state() + } else { + &mut self + .layer_manager + .active_layers + .get_mut(&layer) + .unwrap() + .state + } + } + + fn enter_injection(&mut self, injection: Injection) { + let active_layer = self.layer_manager.init_layer(injection); + let old_injection = replace(&mut self.current_injection, injection); + let old_layer = replace(&mut self.current_layer, active_layer); + self.layer_manager + .active_layers + .insert(old_injection.layer, old_layer); + self.layer_manager.active_injections.push(old_injection); + } + + fn exit_injection(&mut self) -> Option<(Injection, Option)> { + let injection = replace( + &mut self.current_injection, + self.layer_manager.active_injections.pop()?, + ); + let layer = replace( + &mut self.current_layer, + self.layer_manager + .active_layers + .remove(&self.current_injection.layer)?, + ); + let layer_unfinished = layer.query_iter.peeked.is_some(); + if layer_unfinished { + self.layer_manager + .active_layers + .insert(injection.layer, layer) + .unwrap(); + Some((injection, None)) + } else { + Some((injection, Some(layer.state))) + } + } +} + +impl<'a, S: Default> Iterator for QueryIter<'a, S> { + type Item = QueryIterEvent; + + fn next(&mut self) -> Option> { + loop { + let next_injection = self.current_layer.injections.peek().filter(|injection| { + injection.byte_range.start < self.current_injection.byte_range.end + }); + let next_match = self.current_layer.query_iter.peek().filter(|matched_node| { + matched_node.byte_range.start < self.current_injection.byte_range.end + }); + + match (next_match, next_injection) { + (None, None) => { + return self.exit_injection().map(|(injection, state)| { + QueryIterEvent::ExitInjection { injection, state } + }); + } + (Some(_), None) => { + // consume match + let matched_node = self.current_layer.query_iter.consume(); + return Some(QueryIterEvent::Match(matched_node)); + } + (Some(matched_node), Some(injection)) + if matched_node.byte_range.start <= injection.byte_range.end => + { + // consume match + let matched_node = self.current_layer.query_iter.consume(); + // ignore nodes that are overlapped by the injection + if matched_node.byte_range.start <= injection.byte_range.start { + return Some(QueryIterEvent::Match(matched_node)); + } + } + (Some(_), Some(_)) | (None, Some(_)) => { + // consume injection + let injection = self.current_layer.injections.next().unwrap(); + self.enter_injection(injection.clone()); + return Some(QueryIterEvent::EnterInjection(injection.clone())); + } + } + } + } +} + +pub enum QueryIterEvent { + EnterInjection(Injection), + Match(MatchedNode), + ExitInjection { + injection: Injection, + state: Option, + }, +} + +impl QueryIterEvent { + pub fn start(&self) -> u32 { + match self { + QueryIterEvent::EnterInjection(injection) => injection.byte_range.start as u32, + QueryIterEvent::Match(mat) => mat.byte_range.start as u32, + QueryIterEvent::ExitInjection { injection, .. } => injection.byte_range.start as u32, + } + } +} diff --git a/helix-syntax/src/ropey.rs b/helix-syntax/src/ropey.rs index 650fcfb90..8b1378917 100644 --- a/helix-syntax/src/ropey.rs +++ b/helix-syntax/src/ropey.rs @@ -1,29 +1 @@ -// glue code for using TS with ropey, this should be put behind a feature flag -// in the future (and potentially be partially removed) -use ropey::RopeSlice; -use tree_sitter::{Node, TextProvider}; - -// Adapter to convert rope chunks to bytes -pub struct ChunksBytes<'a> { - chunks: ropey::iter::Chunks<'a>, -} -impl<'a> Iterator for ChunksBytes<'a> { - type Item = &'a [u8]; - fn next(&mut self) -> Option { - self.chunks.next().map(str::as_bytes) - } -} - -pub struct RopeProvider<'a>(pub RopeSlice<'a>); - -impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> { - type I = ChunksBytes<'a>; - - fn text(&mut self, node: Node) -> Self::I { - let fragment = self.0.byte_slice(node.start_byte()..node.end_byte()); - ChunksBytes { - chunks: fragment.chunks(), - } - } -} diff --git a/helix-syntax/src/text_object.rs b/helix-syntax/src/text_object.rs new file mode 100644 index 000000000..09cb8a10f --- /dev/null +++ b/helix-syntax/src/text_object.rs @@ -0,0 +1,93 @@ +// TODO: rework using query iter + +use std::iter; + +use ropey::RopeSlice; + +use crate::tree_sitter::{InactiveQueryCursor, Query, RopeTsInput, SyntaxTreeNode}; +use crate::TREE_SITTER_MATCH_LIMIT; + +#[derive(Debug)] +pub enum CapturedNode<'a> { + Single(SyntaxTreeNode<'a>), + /// Guaranteed to be not empty + Grouped(Vec>), +} + +impl<'a> CapturedNode<'a> { + pub fn start_byte(&self) -> usize { + match self { + Self::Single(n) => n.start_byte(), + Self::Grouped(ns) => ns[0].start_byte(), + } + } + + pub fn end_byte(&self) -> usize { + match self { + Self::Single(n) => n.end_byte(), + Self::Grouped(ns) => ns.last().unwrap().end_byte(), + } + } +} + +#[derive(Debug)] +pub struct TextObjectQuery { + pub query: Query, +} + +impl TextObjectQuery { + /// Run the query on the given node and return sub nodes which match given + /// capture ("function.inside", "class.around", etc). + /// + /// Captures may contain multiple nodes by using quantifiers (+, *, etc), + /// and support for this is partial and could use improvement. + /// + /// ```query + /// (comment)+ @capture + /// + /// ; OR + /// ( + /// (comment)* + /// . + /// (function) + /// ) @capture + /// ``` + pub fn capture_nodes<'a>( + &'a self, + capture_name: &str, + node: SyntaxTreeNode<'a>, + slice: RopeSlice<'a>, + cursor: InactiveQueryCursor, + ) -> Option>> { + self.capture_nodes_any(&[capture_name], node, slice, cursor) + } + + /// Find the first capture that exists out of all given `capture_names` + /// and return sub nodes that match this capture. + pub fn capture_nodes_any<'a>( + &'a self, + capture_names: &[&str], + node: SyntaxTreeNode<'a>, + slice: RopeSlice<'a>, + mut cursor: InactiveQueryCursor, + ) -> Option>> { + let capture = capture_names + .iter() + .find_map(|cap| self.query.get_capture(cap))?; + + cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + let mut cursor = cursor.execute_query(&self.query, &node, RopeTsInput::new(slice)); + let capture_node = iter::from_fn(move || { + let (mat, _) = cursor.next_matched_node()?; + Some(mat.nodes_for_capture(capture).cloned().collect()) + }) + .filter_map(move |nodes: Vec<_>| { + if nodes.len() > 1 { + Some(CapturedNode::Grouped(nodes)) + } else { + nodes.into_iter().map(CapturedNode::Single).next() + } + }); + Some(capture_node) + } +} diff --git a/helix-syntax/src/tree_sitter.rs b/helix-syntax/src/tree_sitter.rs index bb188d120..78e3d4a06 100644 --- a/helix-syntax/src/tree_sitter.rs +++ b/helix-syntax/src/tree_sitter.rs @@ -1,6 +1,6 @@ mod grammar; mod parser; -mod query; +pub mod query; mod query_cursor; mod query_match; mod ropey; @@ -11,21 +11,21 @@ use std::ops; pub use grammar::Grammar; pub use parser::{Parser, ParserInputRaw}; -pub use query::{Capture, ParserErrorLocation, Pattern, Query, QueryStr}; +pub use query::{Capture, Pattern, Query, QueryStr}; pub use query_cursor::{InactiveQueryCursor, MatchedNode, MatchedNodeIdx, QueryCursor, QueryMatch}; pub use ropey::RopeTsInput; pub use syntax_tree::{InputEdit, SyntaxTree}; pub use syntax_tree_node::SyntaxTreeNode; #[repr(C)] -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Point { pub row: u32, - pub column: u32, + pub col: u32, } #[repr(C)] -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Range { pub start_point: Point, pub end_point: Point, diff --git a/helix-syntax/src/tree_sitter/query.rs b/helix-syntax/src/tree_sitter/query.rs index 3fb1fc188..69a394179 100644 --- a/helix-syntax/src/tree_sitter/query.rs +++ b/helix-syntax/src/tree_sitter/query.rs @@ -5,28 +5,64 @@ use std::ptr::NonNull; use std::{slice, str}; use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate, TextPredicate}; -use crate::tree_sitter::query::property::QueryProperty; use crate::tree_sitter::Grammar; mod predicate; mod property; +pub enum UserPredicate<'a> { + IsPropertySet { + negate: bool, + key: &'a str, + val: Option<&'a str>, + }, + SetProperty { + key: &'a str, + val: Option<&'a str>, + }, + Other(Predicate<'a>), +} + +impl Display for UserPredicate<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + UserPredicate::IsPropertySet { negate, key, val } => { + let predicate = if negate { "is-not?" } else { "is?" }; + write!(f, " ({predicate} {key} {})", val.unwrap_or("")) + } + UserPredicate::SetProperty { key, val } => { + write!(f, "(set! {key} {})", val.unwrap_or("")) + } + UserPredicate::Other(ref predicate) => { + write!(f, "{}", predicate.name()) + } + } + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Pattern(pub(crate) u32); +impl Pattern { + pub const SENTINEL: Pattern = Pattern(u32::MAX); + pub fn idx(&self) -> usize { + self.0 as usize + } +} + pub enum QueryData {} +#[derive(Debug)] pub(super) struct PatternData { text_predicates: Range, - properties: Range, } +#[derive(Debug)] pub struct Query { pub(crate) raw: NonNull, num_captures: u32, num_strings: u32, text_predicates: Vec, - properties: Vec, patterns: Box<[PatternData]>, } @@ -41,7 +77,7 @@ impl Query { grammar: Grammar, source: &str, path: impl AsRef, - mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>, + mut custom_predicate: impl FnMut(Pattern, UserPredicate) -> Result<(), InvalidPredicateError>, ) -> Result { assert!( source.len() <= i32::MAX as usize, @@ -136,7 +172,6 @@ impl Query { num_captures, num_strings, text_predicates: Vec::new(), - properties: Vec::new(), patterns: Box::default(), }; let patterns: Result<_, ParseError> = (0..num_patterns) @@ -190,15 +225,54 @@ impl Query { } } - pub fn pattern_properies(&self, pattern_idx: Pattern) -> &[QueryProperty] { - let range = self.patterns[pattern_idx.0 as usize].properties.clone(); - &self.properties[range.start as usize..range.end as usize] + #[inline] + pub fn captures(&self) -> impl ExactSizeIterator { + (0..self.num_captures).map(|cap| (Capture(cap), self.capture_name(Capture(cap)))) + } + + #[inline] + pub fn num_captures(&self) -> u32 { + self.num_captures + } + + #[inline] + pub fn get_capture(&self, capture_name: &str) -> Option { + for capture in 0..self.num_captures { + if capture_name == self.capture_name(Capture(capture)) { + return Some(Capture(capture)); + } + } + None } pub(crate) fn pattern_text_predicates(&self, pattern_idx: u16) -> &[TextPredicate] { let range = self.patterns[pattern_idx as usize].text_predicates.clone(); &self.text_predicates[range.start as usize..range.end as usize] } + + /// Get the byte offset where the given pattern starts in the query's + /// source. + #[doc(alias = "ts_query_start_byte_for_pattern")] + #[must_use] + pub fn start_byte_for_pattern(&self, pattern: Pattern) -> usize { + assert!( + pattern.0 < self.text_predicates.len() as u32, + "Pattern index is {pattern_index} but the pattern count is {}", + self.text_predicates.len(), + ); + unsafe { ts_query_start_byte_for_pattern(self.raw, pattern.0) as usize } + } + + /// Get the number of patterns in the query. + #[must_use] + pub fn pattern_count(&self) -> usize { + unsafe { ts_query_pattern_count(self.raw) as usize } + } + /// Get the number of patterns in the query. + #[must_use] + pub fn patterns(&self) -> impl ExactSizeIterator { + (0..self.pattern_count() as u32).map(Pattern) + } } impl Drop for Query { @@ -215,6 +289,9 @@ impl Capture { pub fn name(self, query: &Query) -> &str { query.capture_name(self) } + pub fn idx(self) -> usize { + self.0 as usize + } } /// A reference to a string stroed in a query diff --git a/helix-syntax/src/tree_sitter/query/predicate.rs b/helix-syntax/src/tree_sitter/query/predicate.rs index 7a2f858ed..91f3dabeb 100644 --- a/helix-syntax/src/tree_sitter/query/predicate.rs +++ b/helix-syntax/src/tree_sitter/query/predicate.rs @@ -5,7 +5,9 @@ use std::ptr::NonNull; use std::{fmt, slice}; use crate::tree_sitter::query::property::QueryProperty; -use crate::tree_sitter::query::{Capture, Pattern, PatternData, Query, QueryData, QueryStr}; +use crate::tree_sitter::query::{ + Capture, Pattern, PatternData, Query, QueryData, QueryStr, UserPredicate, +}; use crate::tree_sitter::query_cursor::MatchedNode; use crate::tree_sitter::TsInput; @@ -34,6 +36,7 @@ pub(super) enum TextPredicateKind { AnyString(Box<[QueryStr]>), } +#[derive(Debug)] pub(crate) struct TextPredicate { capture: Capture, kind: TextPredicateKind, @@ -161,10 +164,9 @@ impl Query { pub(super) fn parse_pattern_predicates( &mut self, pattern: Pattern, - mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>, + mut custom_predicate: impl FnMut(Pattern, UserPredicate) -> Result<(), InvalidPredicateError>, ) -> Result { let text_predicate_start = self.text_predicates.len() as u32; - let property_start = self.properties.len() as u32; let predicate_steps = unsafe { let mut len = 0u32; @@ -203,7 +205,7 @@ impl Query { "match?" | "not-match?" | "any-match?" | "any-not-match?" => { predicate.check_arg_count(2)?; let capture_idx = predicate.capture_arg(0)?; - let regex = predicate.str_arg(1)?.get(self); + let regex = predicate.query_str_arg(1)?.get(self); let negated = matches!(predicate.name(), "not-match?" | "any-not-match?"); let match_all = matches!(predicate.name(), "match?" | "not-match?"); @@ -219,14 +221,34 @@ impl Query { }); } - "set!" => self.properties.push(QueryProperty::parse(&predicate)?), + "set!" => { + let property = QueryProperty::parse(&predicate)?; + custom_predicate( + pattern, + UserPredicate::SetProperty { + key: property.key.get(&self), + val: property.val.map(|val| val.get(&self)), + }, + )? + } + "is-not?" | "is?" => { + let property = QueryProperty::parse(&predicate)?; + custom_predicate( + pattern, + UserPredicate::IsPropertySet { + negate: predicate.name() == "is-not?", + key: property.key.get(&self), + val: property.val.map(|val| val.get(&self)), + }, + )? + } "any-of?" | "not-any-of?" => { predicate.check_min_arg_count(1)?; let capture = predicate.capture_arg(0)?; let negated = predicate.name() == "not-any-of?"; let values: Result<_, InvalidPredicateError> = (1..predicate.num_args()) - .map(|i| predicate.str_arg(i)) + .map(|i| predicate.query_str_arg(i)) .collect(); self.text_predicates.push(TextPredicate { capture, @@ -239,12 +261,11 @@ impl Query { // is and is-not are better handeled as custom predicates since interpreting is context dependent // "is?" => property_predicates.push((QueryProperty::parse(&predicate), false)), // "is-not?" => property_predicates.push((QueryProperty::parse(&predicate), true)), - _ => custom_predicate(pattern, predicate)?, + _ => custom_predicate(pattern, UserPredicate::Other(predicate))?, } } Ok(PatternData { text_predicates: text_predicate_start..self.text_predicates.len() as u32, - properties: property_start..self.properties.len() as u32, }) } } @@ -312,7 +333,7 @@ impl<'a> Predicate<'a> { Ok(()) } - pub fn str_arg(&self, i: usize) -> Result { + pub fn query_str_arg(&self, i: usize) -> Result { match self.arg(i) { PredicateArg::String(str) => Ok(str), PredicateArg::Capture(capture) => bail!( @@ -323,6 +344,10 @@ impl<'a> Predicate<'a> { } } + pub fn str_arg(&self, i: usize) -> Result<&str, InvalidPredicateError> { + Ok(self.query_str_arg(i)?.get(self.query)) + } + pub fn num_args(&self) -> usize { self.args.len() } @@ -352,6 +377,20 @@ pub struct InvalidPredicateError { pub(super) msg: Box, } +impl From for InvalidPredicateError { + fn from(value: String) -> Self { + InvalidPredicateError { + msg: value.into_boxed_str(), + } + } +} + +impl<'a> From<&'a str> for InvalidPredicateError { + fn from(value: &'a str) -> Self { + InvalidPredicateError { msg: value.into() } + } +} + impl fmt::Display for InvalidPredicateError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(&self.msg) diff --git a/helix-syntax/src/tree_sitter/query/property.rs b/helix-syntax/src/tree_sitter/query/property.rs index 037644b91..53f162c58 100644 --- a/helix-syntax/src/tree_sitter/query/property.rs +++ b/helix-syntax/src/tree_sitter/query/property.rs @@ -1,6 +1,7 @@ use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate}; use crate::tree_sitter::query::QueryStr; +#[derive(Debug)] pub struct QueryProperty { pub key: QueryStr, pub val: Option, @@ -10,9 +11,9 @@ impl QueryProperty { pub fn parse(predicate: &Predicate) -> Result { predicate.check_min_arg_count(1)?; predicate.check_max_arg_count(2)?; - let key = predicate.str_arg(0)?; + let key = predicate.query_str_arg(0)?; let val = (predicate.num_args() == 1) - .then(|| predicate.str_arg(1)) + .then(|| predicate.query_str_arg(1)) .transpose()?; Ok(QueryProperty { key, val }) } diff --git a/helix-syntax/src/tree_sitter/query_cursor.rs b/helix-syntax/src/tree_sitter/query_cursor.rs index 368aeadfa..83df365dc 100644 --- a/helix-syntax/src/tree_sitter/query_cursor.rs +++ b/helix-syntax/src/tree_sitter/query_cursor.rs @@ -1,4 +1,5 @@ use core::slice; +use std::cell::UnsafeCell; use std::marker::PhantomData; use std::mem::replace; use std::ops::Range; @@ -10,6 +11,15 @@ use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode, TsInput}; enum QueryCursorData {} +thread_local! { + static CURSOR_CACHE: UnsafeCell> = UnsafeCell::new(Vec::with_capacity(8)); +} + +/// SAFETY: must not call itself recuresively +unsafe fn with_cache(f: impl FnOnce(&mut Vec) -> T) -> T { + CURSOR_CACHE.with(|cache| f(&mut *cache.get())) +} + pub struct QueryCursor<'a, 'tree, I: TsInput> { query: &'a Query, ptr: *mut QueryCursorData, @@ -115,8 +125,8 @@ impl Drop for QueryCursor<'_, '_, I> { fn drop(&mut self) { // we allow moving the cursor data out so we need the null check here // would be cleaner with a subtype but doesn't really matter at the end of the day - if !self.ptr.is_null() { - unsafe { ts_query_cursor_delete(self.ptr) } + if let Some(ptr) = NonNull::new(self.ptr) { + unsafe { with_cache(|cache| cache.push(InactiveQueryCursor { ptr })) } } } } @@ -128,8 +138,12 @@ pub struct InactiveQueryCursor { impl InactiveQueryCursor { pub fn new() -> Self { - InactiveQueryCursor { - ptr: unsafe { NonNull::new_unchecked(ts_query_cursor_new()) }, + unsafe { + with_cache(|cache| { + cache.pop().unwrap_or_else(|| InactiveQueryCursor { + ptr: NonNull::new_unchecked(ts_query_cursor_new()), + }) + }) } } @@ -208,6 +222,16 @@ impl<'tree> QueryMatch<'_, 'tree> { self.matched_nodes.iter() } + pub fn nodes_for_capture( + &self, + capture: Capture, + ) -> impl Iterator> { + self.matched_nodes + .iter() + .filter(move |mat| mat.capture == capture) + .map(|mat| &mat.syntax_node) + } + pub fn matched_node(&self, i: MatchedNodeIdx) -> &MatchedNode { &self.matched_nodes[i as usize] }