diff --git a/Cargo.lock b/Cargo.lock index f1cd1632f..aa8b613cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1311,6 +1311,7 @@ dependencies = [ "hashbrown 0.14.5", "helix-loader", "helix-stdx", + "helix-syntax", "imara-diff", "indoc", "log", @@ -1425,6 +1426,23 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "helix-syntax" +version = "24.7.0" +dependencies = [ + "ahash", + "arc-swap", + "bitflags 2.6.0", + "hashbrown 0.14.5", + "helix-stdx", + "log", + "once_cell", + "regex", + "ropey", + "slotmap", + "tree-sitter", +] + [[package]] name = "helix-term" version = "24.7.0" diff --git a/Cargo.toml b/Cargo.toml index e7f784428..c04e4ca23 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "helix-vcs", "helix-parsec", "helix-stdx", + "helix-syntax", "xtask", ] diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index 392b4a4ca..d2eda7585 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -17,6 +17,7 @@ integration = [] [dependencies] helix-stdx = { path = "../helix-stdx" } +helix-syntax = { path = "../helix-syntax" } helix-loader = { path = "../helix-loader" } ropey = { version = "1.6.1", default-features = false, features = ["simd"] } diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index fd2b6c959..55685d0a5 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -738,18 +738,9 @@ fn init_indent_query<'a, 'b>( .map(|prec| prec.byte_range().end - 1..byte_pos + 1) .unwrap_or(byte_pos..byte_pos + 1); - crate::syntax::PARSER.with(|ts_parser| { - let mut ts_parser = ts_parser.borrow_mut(); - let mut cursor = ts_parser.cursors.pop().unwrap_or_default(); - let query_result = query_indents( - query, - syntax, - &mut cursor, - text, - query_range, - new_line_byte_pos, - ); - ts_parser.cursors.push(cursor); + crate::syntax::with_cursor(|cursor| { + let query_result = + query_indents(query, syntax, cursor, text, query_range, new_line_byte_pos); (query_result, deepest_preceding) }) }; diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 93f618c09..e9451ed43 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,41 +1,32 @@ -mod tree_cursor; - -use crate::{ - auto_pairs::AutoPairs, - chars::char_is_line_ending, - diagnostic::Severity, - regex::Regex, - transaction::{ChangeSet, Operation}, - RopeSlice, Tendril, -}; +use std::borrow::Cow; +use std::fmt::{self, Display}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::Arc; -use ahash::RandomState; use arc_swap::{ArcSwap, Guard}; -use bitflags::bitflags; use globset::GlobSet; -use hashbrown::raw::RawTable; -use helix_stdx::rope::{self, RopeSliceExt}; -use slotmap::{DefaultKey as LayerId, HopSlotMap}; - -use std::{ - borrow::Cow, - cell::RefCell, - collections::{HashMap, HashSet, VecDeque}, - fmt::{self, Display, Write}, - hash::{Hash, Hasher}, - mem::replace, - path::{Path, PathBuf}, - str::FromStr, - sync::Arc, +pub use helix_syntax::highlighter::{Highlight, HighlightEvent}; +pub use helix_syntax::{ + merge, pretty_print_tree, HighlightConfiguration, InjectionLanguageMarker, RopeProvider, + TextObjectQuery, TreeCursor, }; - +pub use helix_syntax::{with_cursor, Syntax}; use once_cell::sync::{Lazy, OnceCell}; -use serde::{ser::SerializeSeq, Deserialize, Serialize}; +use regex::Regex; +use ropey::RopeSlice; +use serde::ser::SerializeSeq; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use tree_sitter::{Point, Query}; + +use crate::auto_pairs::AutoPairs; +use crate::chars::char_is_line_ending; +use crate::diagnostic::Severity; +use crate::{ChangeSet, Operation, Tendril}; use helix_loader::grammar::{get_language, load_runtime_file}; -pub use tree_cursor::TreeCursor; - fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, @@ -171,6 +162,104 @@ pub struct LanguageConfiguration { pub persistent_diagnostic_sources: Vec, } +fn read_query(language: &str, filename: &str) -> String { + helix_syntax::read_query(language, filename, |lang, filename| { + load_runtime_file(lang, filename).unwrap_or_default() + }) +} +impl LanguageConfiguration { + fn initialize_highlight(&self, scopes: &[String]) -> Option> { + let highlights_query = read_query(&self.language_id, "highlights.scm"); + // always highlight syntax errors + // highlights_query += "\n(ERROR) @error"; + + let injections_query = read_query(&self.language_id, "injections.scm"); + let locals_query = read_query(&self.language_id, "locals.scm"); + + if highlights_query.is_empty() { + None + } else { + let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) + .map_err(|err| { + log::error!( + "Failed to load tree-sitter parser for language {:?}: {}", + self.language_id, + err + ) + }) + .ok()?; + let config = HighlightConfiguration::new( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err)) + .ok()?; + + config.configure(scopes); + Some(Arc::new(config)) + } + } + + pub fn reconfigure(&self, scopes: &[String]) { + if let Some(Some(config)) = self.highlight_config.get() { + config.configure(scopes); + } + } + + pub fn get_highlight_config(&self) -> Option> { + self.highlight_config.get().cloned().flatten() + } + + pub fn highlight_config(&self, scopes: &[String]) -> Option> { + self.highlight_config + .get_or_init(|| self.initialize_highlight(scopes)) + .clone() + } + + pub fn is_highlight_initialized(&self) -> bool { + self.highlight_config.get().is_some() + } + + pub fn indent_query(&self) -> Option<&Query> { + self.indent_query + .get_or_init(|| self.load_query("indents.scm")) + .as_ref() + } + + pub fn textobject_query(&self) -> Option<&TextObjectQuery> { + self.textobject_query + .get_or_init(|| { + self.load_query("textobjects.scm") + .map(|query| TextObjectQuery { query }) + }) + .as_ref() + } + + pub fn scope(&self) -> &str { + &self.scope + } + + fn load_query(&self, kind: &str) -> Option { + let query_text = read_query(&self.language_id, kind); + if query_text.is_empty() { + return None; + } + let lang = &self.highlight_config.get()?.as_ref()?.language; + Query::new(lang, &query_text) + .map_err(|e| { + log::error!( + "Failed to parse {} queries for {}: {}", + kind, + self.language_id, + e + ) + }) + .ok() + } +} + #[derive(Debug, PartialEq, Eq, Hash)] pub enum FileType { /// The extension of the file, either the `Path::extension` or the full @@ -604,228 +693,6 @@ impl FromStr for AutoPairConfig { } } -#[derive(Debug)] -pub struct TextObjectQuery { - pub query: Query, -} - -#[derive(Debug)] -pub enum CapturedNode<'a> { - Single(Node<'a>), - /// Guaranteed to be not empty - Grouped(Vec>), -} - -impl<'a> CapturedNode<'a> { - pub fn start_byte(&self) -> usize { - match self { - Self::Single(n) => n.start_byte(), - Self::Grouped(ns) => ns[0].start_byte(), - } - } - - pub fn end_byte(&self) -> usize { - match self { - Self::Single(n) => n.end_byte(), - Self::Grouped(ns) => ns.last().unwrap().end_byte(), - } - } - - pub fn byte_range(&self) -> std::ops::Range { - self.start_byte()..self.end_byte() - } -} - -/// The maximum number of in-progress matches a TS cursor can consider at once. -/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`. -/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here. -/// -/// -/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually). -/// However, this causes performance issues for medium to large files. -/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc). -/// -/// -/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream -/// (see and ). -/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance. -/// -/// -/// Neovim chose 64 for this value somewhat arbitrarily (). -/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions. -/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. -const TREE_SITTER_MATCH_LIMIT: u32 = 256; - -impl TextObjectQuery { - /// Run the query on the given node and return sub nodes which match given - /// capture ("function.inside", "class.around", etc). - /// - /// Captures may contain multiple nodes by using quantifiers (+, *, etc), - /// and support for this is partial and could use improvement. - /// - /// ```query - /// (comment)+ @capture - /// - /// ; OR - /// ( - /// (comment)* - /// . - /// (function) - /// ) @capture - /// ``` - pub fn capture_nodes<'a>( - &'a self, - capture_name: &str, - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - self.capture_nodes_any(&[capture_name], node, slice, cursor) - } - - /// Find the first capture that exists out of all given `capture_names` - /// and return sub nodes that match this capture. - pub fn capture_nodes_any<'a>( - &'a self, - capture_names: &[&str], - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - let capture_idx = capture_names - .iter() - .find_map(|cap| self.query.capture_index_for_name(cap))?; - - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let nodes = cursor - .captures(&self.query, node, RopeProvider(slice)) - .filter_map(move |(mat, _)| { - let nodes: Vec<_> = mat - .captures - .iter() - .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node)) - .collect(); - - if nodes.len() > 1 { - Some(CapturedNode::Grouped(nodes)) - } else { - nodes.into_iter().map(CapturedNode::Single).next() - } - }); - - Some(nodes) - } -} - -pub fn read_query(language: &str, filename: &str) -> String { - static INHERITS_REGEX: Lazy = - Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap()); - - let query = load_runtime_file(language, filename).unwrap_or_default(); - - // replaces all "; inherits (,)*" with the queries of the given language(s) - INHERITS_REGEX - .replace_all(&query, |captures: ®ex::Captures| { - captures[1] - .split(',') - .fold(String::new(), |mut output, language| { - // `write!` to a String cannot fail. - write!(output, "\n{}\n", read_query(language, filename)).unwrap(); - output - }) - }) - .to_string() -} - -impl LanguageConfiguration { - fn initialize_highlight(&self, scopes: &[String]) -> Option> { - let highlights_query = read_query(&self.language_id, "highlights.scm"); - // always highlight syntax errors - // highlights_query += "\n(ERROR) @error"; - - let injections_query = read_query(&self.language_id, "injections.scm"); - let locals_query = read_query(&self.language_id, "locals.scm"); - - if highlights_query.is_empty() { - None - } else { - let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) - .map_err(|err| { - log::error!( - "Failed to load tree-sitter parser for language {:?}: {}", - self.language_id, - err - ) - }) - .ok()?; - let config = HighlightConfiguration::new( - language, - &highlights_query, - &injections_query, - &locals_query, - ) - .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err)) - .ok()?; - - config.configure(scopes); - Some(Arc::new(config)) - } - } - - pub fn reconfigure(&self, scopes: &[String]) { - if let Some(Some(config)) = self.highlight_config.get() { - config.configure(scopes); - } - } - - pub fn highlight_config(&self, scopes: &[String]) -> Option> { - self.highlight_config - .get_or_init(|| self.initialize_highlight(scopes)) - .clone() - } - - pub fn is_highlight_initialized(&self) -> bool { - self.highlight_config.get().is_some() - } - - pub fn indent_query(&self) -> Option<&Query> { - self.indent_query - .get_or_init(|| self.load_query("indents.scm")) - .as_ref() - } - - pub fn textobject_query(&self) -> Option<&TextObjectQuery> { - self.textobject_query - .get_or_init(|| { - self.load_query("textobjects.scm") - .map(|query| TextObjectQuery { query }) - }) - .as_ref() - } - - pub fn scope(&self) -> &str { - &self.scope - } - - fn load_query(&self, kind: &str) -> Option { - let query_text = read_query(&self.language_id, kind); - if query_text.is_empty() { - return None; - } - let lang = &self.highlight_config.get()?.as_ref()?.language; - Query::new(lang, &query_text) - .map_err(|e| { - log::error!( - "Failed to parse {} queries for {}: {}", - kind, - self.language_id, - e - ) - }) - .ok() - } -} #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(default, rename_all = "kebab-case", deny_unknown_fields)] pub struct SoftWrap { @@ -975,6 +842,8 @@ impl Loader { source: RopeSlice, ) -> Option> { let line = Cow::from(source.line(0)); + // TODO: resue detection from helix-syntax + const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; static SHEBANG_REGEX: Lazy = Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap()); let configuration_id = SHEBANG_REGEX @@ -1057,610 +926,34 @@ impl Loader { } } -pub struct TsParser { - parser: tree_sitter::Parser, - pub cursors: Vec, -} - -// could also just use a pool, or a single instance? -thread_local! { - pub static PARSER: RefCell = RefCell::new(TsParser { - parser: Parser::new(), - cursors: Vec::new(), - }) -} - -#[derive(Debug)] -pub struct Syntax { - layers: HopSlotMap, - root: LayerId, - loader: Arc>, -} - -fn byte_range_to_str(range: std::ops::Range, source: RopeSlice) -> Cow { - Cow::from(source.byte_slice(range)) -} +pub fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec { + use Operation::*; + let mut old_pos = 0; -impl Syntax { - pub fn new( - source: RopeSlice, - config: Arc, - loader: Arc>, - ) -> Option { - let root_layer = LanguageLayer { - tree: None, - config, - depth: 0, - flags: LayerUpdateFlags::empty(), - ranges: vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - parent: None, - }; + let mut edits = Vec::new(); - // track scope_descriptor: a Vec of scopes for item in tree + if changeset.changes.is_empty() { + return edits; + } - let mut layers = HopSlotMap::default(); - let root = layers.insert(root_layer); + let mut iter = changeset.changes.iter().peekable(); - let mut syntax = Self { - root, - layers, - loader, - }; + // TODO; this is a lot easier with Change instead of Operation. - let res = syntax.update(source, source, &ChangeSet::new(source)); + fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { + let byte = text.char_to_byte(pos); // <- attempted to index past end + let line = text.char_to_line(pos); + let line_start_byte = text.line_to_byte(line); + let col = byte - line_start_byte; - if res.is_err() { - log::error!("TS parser failed, disabling TS for the current buffer: {res:?}"); - return None; - } - Some(syntax) + (byte, Point::new(line, col)) } - pub fn update( - &mut self, - old_source: RopeSlice, - source: RopeSlice, - changeset: &ChangeSet, - ) -> Result<(), Error> { - let mut queue = VecDeque::new(); - queue.push_back(self.root); - - let loader = self.loader.load(); - let scopes = loader.scopes.load(); - let injection_callback = |language: &InjectionLanguageMarker| { - loader - .language_configuration_for_injection_string(language) - .and_then(|language_config| language_config.highlight_config(&scopes)) - }; - - // Convert the changeset into tree sitter edits. - let edits = generate_edits(old_source, changeset); - - // This table allows inverse indexing of `layers`. - // That is by hashing a `Layer` you can find - // the `LayerId` of an existing equivalent `Layer` in `layers`. - // - // It is used to determine if a new layer exists for an injection - // or if an existing layer needs to be updated. - let mut layers_table = RawTable::with_capacity(self.layers.len()); - let layers_hasher = RandomState::new(); - // Use the edits to update all layers markers - fn point_add(a: Point, b: Point) -> Point { - if b.row > 0 { - Point::new(a.row.saturating_add(b.row), b.column) - } else { - Point::new(0, a.column.saturating_add(b.column)) - } - } - fn point_sub(a: Point, b: Point) -> Point { - if a.row > b.row { - Point::new(a.row.saturating_sub(b.row), a.column) - } else { - Point::new(0, a.column.saturating_sub(b.column)) - } - } - - for (layer_id, layer) in self.layers.iter_mut() { - // The root layer always covers the whole range (0..usize::MAX) - if layer.depth == 0 { - layer.flags = LayerUpdateFlags::MODIFIED; - continue; - } - - if !edits.is_empty() { - for range in &mut layer.ranges { - // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 - for edit in edits.iter().rev() { - let is_pure_insertion = edit.old_end_byte == edit.start_byte; - - // if edit is after range, skip - if edit.start_byte > range.end_byte { - // TODO: || (is_noop && edit.start_byte == range.end_byte) - continue; - } - - // if edit is before range, shift entire range by len - if edit.old_end_byte < range.start_byte { - range.start_byte = - edit.new_end_byte + (range.start_byte - edit.old_end_byte); - range.start_point = point_add( - edit.new_end_position, - point_sub(range.start_point, edit.old_end_position), - ); - - range.end_byte = edit - .new_end_byte - .saturating_add(range.end_byte - edit.old_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - - layer.flags |= LayerUpdateFlags::MOVED; - } - // if the edit starts in the space before and extends into the range - else if edit.start_byte < range.start_byte { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - // If the edit is an insertion at the start of the tree, shift - else if edit.start_byte == range.start_byte && is_pure_insertion { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - layer.flags |= LayerUpdateFlags::MOVED; - } else { - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - } - } - } - - let hash = layers_hasher.hash_one(layer); - // Safety: insert_no_grow is unsafe because it assumes that the table - // has enough capacity to hold additional elements. - // This is always the case as we reserved enough capacity above. - unsafe { layers_table.insert_no_grow(hash, layer_id) }; - } - - PARSER.with(|ts_parser| { - let ts_parser = &mut ts_parser.borrow_mut(); - ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours - let mut cursor = ts_parser.cursors.pop().unwrap_or_default(); - // TODO: might need to set cursor range - cursor.set_byte_range(0..usize::MAX); - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let source_slice = source.slice(..); - - while let Some(layer_id) = queue.pop_front() { - let layer = &mut self.layers[layer_id]; - - // Mark the layer as touched - layer.flags |= LayerUpdateFlags::TOUCHED; - - // If a tree already exists, notify it of changes. - if let Some(tree) = &mut layer.tree { - if layer - .flags - .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED) - { - for edit in edits.iter().rev() { - // Apply the edits in reverse. - // If we applied them in order then edit 1 would disrupt the positioning of edit 2. - tree.edit(edit); - } - } - - if layer.flags.contains(LayerUpdateFlags::MODIFIED) { - // Re-parse the tree. - layer.parse(&mut ts_parser.parser, source)?; - } - } else { - // always parse if this layer has never been parsed before - layer.parse(&mut ts_parser.parser, source)?; - } - - // Switch to an immutable borrow. - let layer = &self.layers[layer_id]; - - // Process injections. - let matches = cursor.matches( - &layer.config.injections_query, - layer.tree().root_node(), - RopeProvider(source_slice), - ); - let mut combined_injections = vec![ - (None, Vec::new(), IncludedChildren::default()); - layer.config.combined_injections_patterns.len() - ]; - let mut injections = Vec::new(); - let mut last_injection_end = 0; - for mat in matches { - let (injection_capture, content_node, included_children) = layer - .config - .injection_for_match(&layer.config.injections_query, &mat, source_slice); - - // in case this is a combined injection save it for more processing later - if let Some(combined_injection_idx) = layer - .config - .combined_injections_patterns - .iter() - .position(|&pattern| pattern == mat.pattern_index) - { - let entry = &mut combined_injections[combined_injection_idx]; - if injection_capture.is_some() { - entry.0 = injection_capture; - } - if let Some(content_node) = content_node { - if content_node.start_byte() >= last_injection_end { - entry.1.push(content_node); - last_injection_end = content_node.end_byte(); - } - } - entry.2 = included_children; - continue; - } - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - mat.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(injection_capture), Some(content_node)) = - (injection_capture, content_node) - { - if let Some(config) = (injection_callback)(&injection_capture) { - let ranges = - intersect_ranges(&layer.ranges, &[content_node], included_children); - - if !ranges.is_empty() { - if content_node.start_byte() < last_injection_end { - continue; - } - last_injection_end = content_node.end_byte(); - injections.push((config, ranges)); - } - } - } - } - - for (lang_name, content_nodes, included_children) in combined_injections { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { - if let Some(config) = (injection_callback)(&lang_name) { - let ranges = - intersect_ranges(&layer.ranges, &content_nodes, included_children); - if !ranges.is_empty() { - injections.push((config, ranges)); - } - } - } - } - - let depth = layer.depth + 1; - // TODO: can't inline this since matches borrows self.layers - for (config, ranges) in injections { - let parent = Some(layer_id); - let new_layer = LanguageLayer { - tree: None, - config, - depth, - ranges, - flags: LayerUpdateFlags::empty(), - parent: None, - }; - - // Find an identical existing layer - let layer = layers_table - .get(layers_hasher.hash_one(&new_layer), |&it| { - self.layers[it] == new_layer - }) - .copied(); - - // ...or insert a new one. - let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer)); - self.layers[layer_id].parent = parent; - - queue.push_back(layer_id); - } - - // TODO: pre-process local scopes at this time, rather than highlight? - // would solve problems with locals not working across boundaries - } - - // Return the cursor back in the pool. - ts_parser.cursors.push(cursor); - - // Reset all `LayerUpdateFlags` and remove all untouched layers - self.layers.retain(|_, layer| { - replace(&mut layer.flags, LayerUpdateFlags::empty()) - .contains(LayerUpdateFlags::TOUCHED) - }); - - Ok(()) - }) - } - - pub fn tree(&self) -> &Tree { - self.layers[self.root].tree() - } - - /// Iterate over the highlighted regions for a given slice of source code. - pub fn highlight_iter<'a>( - &'a self, - source: RopeSlice<'a>, - range: Option>, - cancellation_flag: Option<&'a AtomicUsize>, - ) -> impl Iterator> + 'a { - let mut layers = self - .layers - .iter() - .filter_map(|(_, layer)| { - // TODO: if range doesn't overlap layer range, skip it - - // Reuse a cursor from the pool if available. - let mut cursor = PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.pop().unwrap_or_default() - }); - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let cursor_ref = - unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - - // if reusing cursors & no range this resets to whole range - cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); - cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let mut captures = cursor_ref - .captures( - &layer.config.query, - layer.tree().root_node(), - RopeProvider(source), - ) - .peekable(); - - // If there's no captures, skip the layer - captures.peek()?; - - Some(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - _tree: None, - captures: RefCell::new(captures), - config: layer.config.as_ref(), // TODO: just reuse `layer` - depth: layer.depth, // TODO: just reuse `layer` - }) - }) - .collect::>(); - - layers.sort_unstable_by_key(|layer| layer.sort_key()); - - let mut result = HighlightIter { - source, - byte_offset: range.map_or(0, |r| r.start), - cancellation_flag, - iter_count: 0, - layers, - next_event: None, - last_highlight_range: None, - }; - result.sort_layers(); - result - } - - pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree { - let mut container_id = self.root; - - for (layer_id, layer) in self.layers.iter() { - if layer.depth > self.layers[container_id].depth - && layer.contains_byte_range(start, end) - { - container_id = layer_id; - } - } - - self.layers[container_id].tree() - } - - pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { - self.tree_for_byte_range(start, end) - .root_node() - .named_descendant_for_byte_range(start, end) - } - - pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { - self.tree_for_byte_range(start, end) - .root_node() - .descendant_for_byte_range(start, end) - } - - pub fn walk(&self) -> TreeCursor<'_> { - // data structure to find the smallest range that contains a point - // when some of the ranges in the structure can overlap. - TreeCursor::new(&self.layers, self.root) - } - - // Commenting - // comment_strings_for_pos - // is_commented - - // Indentation - // suggested_indent_for_line_at_buffer_row - // suggested_indent_for_buffer_row - // indent_level_for_line - - // TODO: Folding -} - -bitflags! { - /// Flags that track the status of a layer - /// in the `Sytaxn::update` function - #[derive(Debug)] - struct LayerUpdateFlags : u32{ - const MODIFIED = 0b001; - const MOVED = 0b010; - const TOUCHED = 0b100; - } -} - -#[derive(Debug)] -pub struct LanguageLayer { - // mode - // grammar - pub config: Arc, - pub(crate) tree: Option, - pub ranges: Vec, - pub depth: u32, - flags: LayerUpdateFlags, - parent: Option, -} - -/// This PartialEq implementation only checks if that -/// two layers are theoretically identical (meaning they highlight the same text range with the same language). -/// It does not check whether the layers have the same internal treesitter -/// state. -impl PartialEq for LanguageLayer { - fn eq(&self, other: &Self) -> bool { - self.depth == other.depth - && self.config.language == other.config.language - && self.ranges == other.ranges - } -} - -/// Hash implementation belongs to PartialEq implementation above. -/// See its documentation for details. -impl Hash for LanguageLayer { - fn hash(&self, state: &mut H) { - self.depth.hash(state); - self.config.language.hash(state); - self.ranges.hash(state); - } -} - -impl LanguageLayer { - pub fn tree(&self) -> &Tree { - // TODO: no unwrap - self.tree.as_ref().unwrap() - } - - fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> { - parser - .set_included_ranges(&self.ranges) - .map_err(|_| Error::InvalidRanges)?; - - parser - .set_language(&self.config.language) - .map_err(|_| Error::InvalidLanguage)?; - - // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; - let tree = parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - &chunk.as_bytes()[byte - start_byte..] - } else { - // out of range - &[] - } - }, - self.tree.as_ref(), - ) - .ok_or(Error::Cancelled)?; - // unsafe { ts_parser.parser.set_cancellation_flag(None) }; - self.tree = Some(tree); - Ok(()) - } - - /// Whether the layer contains the given byte range. - /// - /// If the layer has multiple ranges (i.e. combined injections), the - /// given range is considered contained if it is within the start and - /// end bytes of the first and last ranges **and** if the given range - /// starts or ends within any of the layer's ranges. - fn contains_byte_range(&self, start: usize, end: usize) -> bool { - let layer_start = self - .ranges - .first() - .expect("ranges should not be empty") - .start_byte; - let layer_end = self - .ranges - .last() - .expect("ranges should not be empty") - .end_byte; - - layer_start <= start - && layer_end >= end - && self.ranges.iter().any(|range| { - let byte_range = range.start_byte..range.end_byte; - byte_range.contains(&start) || byte_range.contains(&end) - }) - } -} - -pub(crate) fn generate_edits( - old_text: RopeSlice, - changeset: &ChangeSet, -) -> Vec { - use Operation::*; - let mut old_pos = 0; - - let mut edits = Vec::new(); - - if changeset.changes.is_empty() { - return edits; - } - - let mut iter = changeset.changes.iter().peekable(); - - // TODO; this is a lot easier with Change instead of Operation. - - fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { - let byte = text.char_to_byte(pos); // <- attempted to index past end - let line = text.char_to_line(pos); - let line_start_byte = text.line_to_byte(line); - let col = byte - line_start_byte; - - (byte, Point::new(line, col)) - } - - fn traverse(point: Point, text: &Tendril) -> Point { - let Point { - mut row, - mut column, - } = point; + fn traverse(point: Point, text: &Tendril) -> Point { + let Point { + mut row, + mut column, + } = point; // TODO: there should be a better way here. let mut chars = text.chars().peekable(); @@ -1735,990 +1028,10 @@ pub(crate) fn generate_edits( edits } -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{iter, mem, ops, str, usize}; -use tree_sitter::{ - Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, -}; - -const CANCELLATION_CHECK_INTERVAL: usize = 100; - -/// Indicates which highlight should be applied to a region of source code. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct Highlight(pub usize); - -/// Represents the reason why syntax highlighting failed. -#[derive(Debug, PartialEq, Eq)] -pub enum Error { - Cancelled, - InvalidLanguage, - InvalidRanges, - Unknown, -} - -/// Represents a single step in rendering a syntax-highlighted document. -#[derive(Copy, Clone, Debug)] -pub enum HighlightEvent { - Source { start: usize, end: usize }, - HighlightStart(Highlight), - HighlightEnd, -} - -/// Contains the data needed to highlight code written in a particular language. -/// -/// This struct is immutable and can be shared between threads. -#[derive(Debug)] -pub struct HighlightConfiguration { - pub language: Grammar, - pub query: Query, - injections_query: Query, - combined_injections_patterns: Vec, - highlights_pattern_index: usize, - highlight_indices: ArcSwap>>, - non_local_variable_patterns: Vec, - injection_content_capture_index: Option, - injection_language_capture_index: Option, - injection_filename_capture_index: Option, - injection_shebang_capture_index: Option, - local_scope_capture_index: Option, - local_def_capture_index: Option, - local_def_value_capture_index: Option, - local_ref_capture_index: Option, -} - -#[derive(Debug)] -struct LocalDef<'a> { - name: Cow<'a, str>, - value_range: ops::Range, - highlight: Option, -} - -#[derive(Debug)] -struct LocalScope<'a> { - inherits: bool, - range: ops::Range, - local_defs: Vec>, -} - -#[derive(Debug)] -struct HighlightIter<'a> { - source: RopeSlice<'a>, - byte_offset: usize, - cancellation_flag: Option<&'a AtomicUsize>, - layers: Vec>, - iter_count: usize, - next_event: Option, - last_highlight_range: Option<(usize, usize, u32)>, -} - -// Adapter to convert rope chunks to bytes -pub struct ChunksBytes<'a> { - chunks: ropey::iter::Chunks<'a>, -} -impl<'a> Iterator for ChunksBytes<'a> { - type Item = &'a [u8]; - fn next(&mut self) -> Option { - self.chunks.next().map(str::as_bytes) - } -} - -pub struct RopeProvider<'a>(pub RopeSlice<'a>); -impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> { - type I = ChunksBytes<'a>; - - fn text(&mut self, node: Node) -> Self::I { - let fragment = self.0.byte_slice(node.start_byte()..node.end_byte()); - ChunksBytes { - chunks: fragment.chunks(), - } - } -} - -struct HighlightIterLayer<'a> { - _tree: Option, - cursor: QueryCursor, - captures: RefCell, &'a [u8]>>>, - config: &'a HighlightConfiguration, - highlight_end_stack: Vec, - scope_stack: Vec>, - depth: u32, -} - -impl<'a> fmt::Debug for HighlightIterLayer<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("HighlightIterLayer").finish() - } -} - -impl HighlightConfiguration { - /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting - /// queries. - /// - /// # Parameters - /// - /// * `language` - The Tree-sitter `Grammar` that should be used for parsing. - /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This - /// should be non-empty, otherwise no syntax highlights will be added. - /// * `injections_query` - A string containing tree patterns for injecting other languages - /// into the document. This can be empty if no injections are desired. - /// * `locals_query` - A string containing tree patterns for tracking local variable - /// definitions and references. This can be empty if local variable tracking is not needed. - /// - /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. - pub fn new( - language: Grammar, - highlights_query: &str, - injection_query: &str, - locals_query: &str, - ) -> Result { - // Concatenate the query strings, keeping track of the start offset of each section. - let mut query_source = String::new(); - query_source.push_str(locals_query); - let highlights_query_offset = query_source.len(); - query_source.push_str(highlights_query); - - // Construct a single query by concatenating the three query strings, but record the - // range of pattern indices that belong to each individual string. - let query = Query::new(&language, &query_source)?; - let mut highlights_pattern_index = 0; - for i in 0..(query.pattern_count()) { - let pattern_offset = query.start_byte_for_pattern(i); - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; - } - } - - let injections_query = Query::new(&language, injection_query)?; - let combined_injections_patterns = (0..injections_query.pattern_count()) - .filter(|&i| { - injections_query - .property_settings(i) - .iter() - .any(|s| &*s.key == "injection.combined") - }) - .collect(); - - // Find all of the highlighting patterns that are disabled for nodes that - // have been identified as local variables. - let non_local_variable_patterns = (0..query.pattern_count()) - .map(|i| { - query - .property_predicates(i) - .iter() - .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") - }) - .collect(); - - // Store the numeric ids for all of the special captures. - let mut injection_content_capture_index = None; - let mut injection_language_capture_index = None; - let mut injection_filename_capture_index = None; - let mut injection_shebang_capture_index = None; - let mut local_def_capture_index = None; - let mut local_def_value_capture_index = None; - let mut local_ref_capture_index = None; - let mut local_scope_capture_index = None; - for (i, name) in query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "local.definition" => local_def_capture_index = i, - "local.definition-value" => local_def_value_capture_index = i, - "local.reference" => local_ref_capture_index = i, - "local.scope" => local_scope_capture_index = i, - _ => {} - } - } - - for (i, name) in injections_query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, - "injection.filename" => injection_filename_capture_index = i, - "injection.shebang" => injection_shebang_capture_index = i, - _ => {} - } - } - - let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); - Ok(Self { - language, - query, - injections_query, - combined_injections_patterns, - highlights_pattern_index, - highlight_indices, - non_local_variable_patterns, - injection_content_capture_index, - injection_language_capture_index, - injection_filename_capture_index, - injection_shebang_capture_index, - local_scope_capture_index, - local_def_capture_index, - local_def_value_capture_index, - local_ref_capture_index, - }) - } - - /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[&str] { - self.query.capture_names() - } - - /// Set the list of recognized highlight names. - /// - /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated - /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of - /// these queries can choose to recognize highlights with different levels of specificity. - /// For example, the string `function.builtin` will match against `function.builtin.constructor` - /// but will not match `function.method.builtin` and `function.method`. - /// - /// When highlighting, results are returned as `Highlight` values, which contain the index - /// of the matched highlight this list of highlight names. - pub fn configure(&self, recognized_names: &[String]) { - let mut capture_parts = Vec::new(); - let indices: Vec<_> = self - .query - .capture_names() - .iter() - .map(move |capture_name| { - capture_parts.clear(); - capture_parts.extend(capture_name.split('.')); - - let mut best_index = None; - let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.iter().enumerate() { - let mut len = 0; - let mut matches = true; - for (i, part) in recognized_name.split('.').enumerate() { - match capture_parts.get(i) { - Some(capture_part) if *capture_part == part => len += 1, - _ => { - matches = false; - break; - } - } - } - if matches && len > best_match_len { - best_index = Some(i); - best_match_len = len; - } - } - best_index.map(Highlight) - }) - .collect(); - - self.highlight_indices.store(Arc::new(indices)); - } - - fn injection_pair<'a>( - &self, - query_match: &QueryMatch<'a, 'a>, - source: RopeSlice<'a>, - ) -> (Option>, Option>) { - let mut injection_capture = None; - let mut content_node = None; - - for capture in query_match.captures { - let index = Some(capture.index); - if index == self.injection_language_capture_index { - let name = byte_range_to_str(capture.node.byte_range(), source); - injection_capture = Some(InjectionLanguageMarker::Name(name)); - } else if index == self.injection_filename_capture_index { - let name = byte_range_to_str(capture.node.byte_range(), source); - let path = Path::new(name.as_ref()).to_path_buf(); - injection_capture = Some(InjectionLanguageMarker::Filename(path.into())); - } else if index == self.injection_shebang_capture_index { - let node_slice = source.byte_slice(capture.node.byte_range()); - - // some languages allow space and newlines before the actual string content - // so a shebang could be on either the first or second line - let lines = if let Ok(end) = node_slice.try_line_to_byte(2) { - node_slice.byte_slice(..end) - } else { - node_slice - }; - - static SHEBANG_REGEX: Lazy = - Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); - - injection_capture = SHEBANG_REGEX - .captures_iter(lines.regex_input()) - .map(|cap| { - let cap = lines.byte_slice(cap.get_group(1).unwrap().range()); - InjectionLanguageMarker::Shebang(cap.into()) - }) - .next() - } else if index == self.injection_content_capture_index { - content_node = Some(capture.node); - } - } - (injection_capture, content_node) - } - - fn injection_for_match<'a>( - &self, - query: &'a Query, - query_match: &QueryMatch<'a, 'a>, - source: RopeSlice<'a>, - ) -> ( - Option>, - Option>, - IncludedChildren, - ) { - let (mut injection_capture, content_node) = self.injection_pair(query_match, source); - - let mut included_children = IncludedChildren::default(); - for prop in query.property_settings(query_match.pattern_index) { - match prop.key.as_ref() { - // In addition to specifying the language name via the text of a - // captured node, it can also be hard-coded via a `#set!` predicate - // that sets the injection.language key. - "injection.language" if injection_capture.is_none() => { - injection_capture = prop - .value - .as_ref() - .map(|s| InjectionLanguageMarker::Name(s.as_ref().into())); - } - - // By default, injections do not include the *children* of an - // `injection.content` node - only the ranges that belong to the - // node itself. This can be changed using a `#set!` predicate that - // sets the `injection.include-children` key. - "injection.include-children" => included_children = IncludedChildren::All, - - // Some queries might only exclude named children but include unnamed - // children in their `injection.content` node. This can be enabled using - // a `#set!` predicate that sets the `injection.include-unnamed-children` key. - "injection.include-unnamed-children" => { - included_children = IncludedChildren::Unnamed - } - _ => {} - } - } - - (injection_capture, content_node, included_children) - } -} - -impl<'a> HighlightIterLayer<'a> { - // First, sort scope boundaries by their byte offset in the document. At a - // given position, emit scope endings before scope beginnings. Finally, emit - // scope boundaries from deeper layers first. - fn sort_key(&self) -> Option<(usize, bool, isize)> { - let depth = -(self.depth as isize); - let next_start = self - .captures - .borrow_mut() - .peek() - .map(|(m, i)| m.captures[*i].node.start_byte()); - let next_end = self.highlight_end_stack.last().cloned(); - match (next_start, next_end) { - (Some(start), Some(end)) => { - if start < end { - Some((start, true, depth)) - } else { - Some((end, false, depth)) - } - } - (Some(i), None) => Some((i, true, depth)), - (None, Some(j)) => Some((j, false, depth)), - _ => None, - } - } -} - -#[derive(Clone)] -enum IncludedChildren { - None, - All, - Unnamed, -} - -impl Default for IncludedChildren { - fn default() -> Self { - Self::None - } -} - -// Compute the ranges that should be included when parsing an injection. -// This takes into account three things: -// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. -// * `nodes` - Every injection takes place within a set of nodes. The injection ranges -// are the ranges of those nodes. -// * `includes_children` - For some injections, the content nodes' children should be -// excluded from the nested document, so that only the content nodes' *own* content -// is reparsed. For other injections, the content nodes' entire ranges should be -// reparsed, including the ranges of their children. -fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - included_children: IncludedChildren, -) -> Vec { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), - }; - - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| match included_children { - IncludedChildren::None => Some(child.range()), - IncludedChildren::All => None, - IncludedChildren::Unnamed => { - if child.is_named() { - Some(child.range()) - } else { - None - } - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; - - if range.end_byte < parent_range.start_byte { - continue; - } - - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } - - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } - - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; - } else { - return result; - } - } - } - } - result -} - -impl<'a> HighlightIter<'a> { - fn emit_event( - &mut self, - offset: usize, - event: Option, - ) -> Option> { - let result; - if self.byte_offset < offset { - result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: offset, - })); - self.byte_offset = offset; - self.next_event = event; - } else { - result = event.map(Ok); - } - self.sort_layers(); - result - } - - fn sort_layers(&mut self) { - while !self.layers.is_empty() { - if let Some(sort_key) = self.layers[0].sort_key() { - let mut i = 0; - while i + 1 < self.layers.len() { - if let Some(next_offset) = self.layers[i + 1].sort_key() { - if next_offset < sort_key { - i += 1; - continue; - } - } else { - let layer = self.layers.remove(i + 1); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); - } - break; - } - if i > 0 { - self.layers[0..(i + 1)].rotate_left(1); - } - break; - } else { - let layer = self.layers.remove(0); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); - } - } - } -} - -impl<'a> Iterator for HighlightIter<'a> { - type Item = Result; - - fn next(&mut self) -> Option { - 'main: loop { - // If we've already determined the next highlight boundary, just return it. - if let Some(e) = self.next_event.take() { - return Some(Ok(e)); - } - - // Periodically check for cancellation, returning `Cancelled` error if the - // cancellation flag was flipped. - if let Some(cancellation_flag) = self.cancellation_flag { - self.iter_count += 1; - if self.iter_count >= CANCELLATION_CHECK_INTERVAL { - self.iter_count = 0; - if cancellation_flag.load(Ordering::Relaxed) != 0 { - return Some(Err(Error::Cancelled)); - } - } - } - - // If none of the layers have any more highlight boundaries, terminate. - if self.layers.is_empty() { - let len = self.source.len_bytes(); - return if self.byte_offset < len { - let result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: len, - })); - self.byte_offset = len; - result - } else { - None - }; - } - - // Get the next capture from whichever layer has the earliest highlight boundary. - let range; - let layer = &mut self.layers[0]; - let captures = layer.captures.get_mut(); - if let Some((next_match, capture_index)) = captures.peek() { - let next_capture = next_match.captures[*capture_index]; - range = next_capture.node.byte_range(); - - // If any previous highlight ends before this node starts, then before - // processing this capture, emit the source code up until the end of the - // previous highlight, and an end event for that highlight. - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - if end_byte <= range.start { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } - } - } - // If there are no more captures, then emit any remaining highlight end events. - // And if there are none of those, then just advance to the end of the document. - else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } else { - return self.emit_event(self.source.len_bytes(), None); - }; - - let (mut match_, capture_index) = captures.next().unwrap(); - let mut capture = match_.captures[capture_index]; - - // Remove from the local scope stack any local scopes that have already ended. - while range.start > layer.scope_stack.last().unwrap().range.end { - layer.scope_stack.pop(); - } - - // If this capture is for tracking local variables, then process the - // local variable info. - let mut reference_highlight = None; - let mut definition_highlight = None; - while match_.pattern_index < layer.config.highlights_pattern_index { - // If the node represents a local scope, push a new local scope onto - // the scope stack. - if Some(capture.index) == layer.config.local_scope_capture_index { - definition_highlight = None; - let mut scope = LocalScope { - inherits: true, - range: range.clone(), - local_defs: Vec::new(), - }; - for prop in layer.config.query.property_settings(match_.pattern_index) { - if let "local.scope-inherits" = prop.key.as_ref() { - scope.inherits = - prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); - } - } - layer.scope_stack.push(scope); - } - // If the node represents a definition, add a new definition to the - // local scope at the top of the scope stack. - else if Some(capture.index) == layer.config.local_def_capture_index { - reference_highlight = None; - let scope = layer.scope_stack.last_mut().unwrap(); - - let mut value_range = 0..0; - for capture in match_.captures { - if Some(capture.index) == layer.config.local_def_value_capture_index { - value_range = capture.node.byte_range(); - } - } - - let name = byte_range_to_str(range.clone(), self.source); - scope.local_defs.push(LocalDef { - name, - value_range, - highlight: None, - }); - definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight); - } - // If the node represents a reference, then try to find the corresponding - // definition in the scope stack. - else if Some(capture.index) == layer.config.local_ref_capture_index - && definition_highlight.is_none() - { - definition_highlight = None; - let name = byte_range_to_str(range.clone(), self.source); - for scope in layer.scope_stack.iter().rev() { - if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { - if def.name == name && range.start >= def.value_range.end { - Some(def.highlight) - } else { - None - } - }) { - reference_highlight = highlight; - break; - } - if !scope.inherits { - break; - } - } - } - - // Continue processing any additional matches for the same node. - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - - // Otherwise, this capture must represent a highlight. - // If this exact range has already been highlighted by an earlier pattern, or by - // a different layer, then skip over this one. - if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { - if range.start == last_start && range.end == last_end && layer.depth < last_depth { - self.sort_layers(); - continue 'main; - } - } - - // If the current node was found to be a local variable, then skip over any - // highlighting patterns that are disabled for local variables. - if definition_highlight.is_some() || reference_highlight.is_some() { - while layer.config.non_local_variable_patterns[match_.pattern_index] { - match_.remove(); - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - } - - // Once a highlighting pattern is found for the current node, skip over - // any later highlighting patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent - // captures are guaranteed to be for highlighting, not injections or - // local variables. - while let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - captures.next(); - } else { - break; - } - } - - let current_highlight = layer.config.highlight_indices.load()[capture.index as usize]; - - // If this node represents a local definition, then store the current - // highlight value on the local scope entry representing this node. - if let Some(definition_highlight) = definition_highlight { - *definition_highlight = current_highlight; - } - - // Emit a scope start event and push the node's end position to the stack. - if let Some(highlight) = reference_highlight.or(current_highlight) { - self.last_highlight_range = Some((range.start, range.end, layer.depth)); - layer.highlight_end_stack.push(range.end); - return self - .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); - } - - self.sort_layers(); - } - } -} - -#[derive(Debug, Clone)] -pub enum InjectionLanguageMarker<'a> { - Name(Cow<'a, str>), - Filename(Cow<'a, Path>), - Shebang(String), -} - -const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; - -pub struct Merge { - iter: I, - spans: Box)>>, - - next_event: Option, - next_span: Option<(usize, std::ops::Range)>, - - queue: Vec, -} - -/// Merge a list of spans into the highlight event stream. -pub fn merge>( - iter: I, - spans: Vec<(usize, std::ops::Range)>, -) -> Merge { - let spans = Box::new(spans.into_iter()); - let mut merge = Merge { - iter, - spans, - next_event: None, - next_span: None, - queue: Vec::new(), - }; - merge.next_event = merge.iter.next(); - merge.next_span = merge.spans.next(); - merge -} - -impl> Iterator for Merge { - type Item = HighlightEvent; - fn next(&mut self) -> Option { - use HighlightEvent::*; - if let Some(event) = self.queue.pop() { - return Some(event); - } - - loop { - match (self.next_event, &self.next_span) { - // this happens when range is partially or fully offscreen - (Some(Source { start, .. }), Some((span, range))) if start > range.start => { - if start > range.end { - self.next_span = self.spans.next(); - } else { - self.next_span = Some((*span, start..range.end)); - }; - } - _ => break, - } - } - - match (self.next_event, &self.next_span) { - (Some(HighlightStart(i)), _) => { - self.next_event = self.iter.next(); - Some(HighlightStart(i)) - } - (Some(HighlightEnd), _) => { - self.next_event = self.iter.next(); - Some(HighlightEnd) - } - (Some(Source { start, end }), Some((_, range))) if start < range.start => { - let intersect = range.start.min(end); - let event = Source { - start, - end: intersect, - }; - - if end == intersect { - // the event is complete - self.next_event = self.iter.next(); - } else { - // subslice the event - self.next_event = Some(Source { - start: intersect, - end, - }); - }; - - Some(event) - } - (Some(Source { start, end }), Some((span, range))) if start == range.start => { - let intersect = range.end.min(end); - let event = HighlightStart(Highlight(*span)); - - // enqueue in reverse order - self.queue.push(HighlightEnd); - self.queue.push(Source { - start, - end: intersect, - }); - - if end == intersect { - // the event is complete - self.next_event = self.iter.next(); - } else { - // subslice the event - self.next_event = Some(Source { - start: intersect, - end, - }); - }; - - if intersect == range.end { - self.next_span = self.spans.next(); - } else { - self.next_span = Some((*span, intersect..range.end)); - } - - Some(event) - } - (Some(event), None) => { - self.next_event = self.iter.next(); - Some(event) - } - // Can happen if cursor at EOF and/or diagnostic reaches past the end. - // We need to actually emit events for the cursor-at-EOF situation, - // even though the range is past the end of the text. This needs to be - // handled appropriately by the drawing code by not assuming that - // all `Source` events point to valid indices in the rope. - (None, Some((span, range))) => { - let event = HighlightStart(Highlight(*span)); - self.queue.push(HighlightEnd); - self.queue.push(Source { - start: range.start, - end: range.end, - }); - self.next_span = self.spans.next(); - Some(event) - } - (None, None) => None, - e => unreachable!("{:?}", e), - } - } -} - -fn node_is_visible(node: &Node) -> bool { - node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) -} - -pub fn pretty_print_tree(fmt: &mut W, node: Node) -> fmt::Result { - if node.child_count() == 0 { - if node_is_visible(&node) { - write!(fmt, "({})", node.kind()) - } else { - write!(fmt, "\"{}\"", node.kind()) - } - } else { - pretty_print_tree_impl(fmt, &mut node.walk(), 0) - } -} - -fn pretty_print_tree_impl( - fmt: &mut W, - cursor: &mut tree_sitter::TreeCursor, - depth: usize, -) -> fmt::Result { - let node = cursor.node(); - let visible = node_is_visible(&node); - - if visible { - let indentation_columns = depth * 2; - write!(fmt, "{:indentation_columns$}", "")?; - - if let Some(field_name) = cursor.field_name() { - write!(fmt, "{}: ", field_name)?; - } - - write!(fmt, "({}", node.kind())?; - } - - // Handle children. - if cursor.goto_first_child() { - loop { - if node_is_visible(&cursor.node()) { - fmt.write_char('\n')?; - } - - pretty_print_tree_impl(fmt, cursor, depth + 1)?; - - if !cursor.goto_next_sibling() { - break; - } - } - - let moved = cursor.goto_parent(); - // The parent of the first child must exist, and must be `node`. - debug_assert!(moved); - debug_assert!(cursor.node() == node); - } - - if visible { - fmt.write_char(')')?; - } - - Ok(()) -} - #[cfg(test)] mod test { + use tree_sitter::QueryCursor; + use super::*; use crate::{Rope, Transaction}; @@ -2736,11 +1049,6 @@ mod test { "#, ); - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); let language = get_language("rust").unwrap(); let query = Query::new(&language, query_str).unwrap(); @@ -2748,12 +1056,7 @@ mod test { let mut cursor = QueryCursor::new(); let config = HighlightConfiguration::new(language, "", "", "").unwrap(); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap(); let root = syntax.tree().root_node(); let mut test = |capture, range| { @@ -2804,12 +1107,6 @@ mod test { .map(String::from) .collect(); - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - let language = get_language("rust").unwrap(); let config = HighlightConfiguration::new( language, @@ -2828,12 +1125,7 @@ mod test { fn main() {} ", ); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap(); let tree = syntax.tree(); let root = tree.root_node(); assert_eq!(root.kind(), "source_file"); @@ -2916,20 +1208,10 @@ mod test { ) { let source = Rope::from_str(source); - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); let language = get_language(language_name).unwrap(); let config = HighlightConfiguration::new(language, "", "", "").unwrap(); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap(); let root = syntax .tree() diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs index 56b4d2ba9..87a847a15 100644 --- a/helix-core/tests/indent.rs +++ b/helix-core/tests/indent.rs @@ -1,4 +1,3 @@ -use arc_swap::ArcSwap; use helix_core::{ indent::{indent_level_for_line, treesitter_indent_for_pos, IndentStyle}, syntax::{Configuration, Loader}, @@ -6,7 +5,7 @@ use helix_core::{ }; use helix_stdx::rope::RopeSliceExt; use ropey::Rope; -use std::{ops::Range, path::PathBuf, process::Command, sync::Arc}; +use std::{ops::Range, path::PathBuf, process::Command}; #[test] fn test_treesitter_indent_rust() { @@ -200,12 +199,7 @@ fn test_treesitter_indent( let indent_style = IndentStyle::from_str(&language_config.indent.as_ref().unwrap().unit); let highlight_config = language_config.highlight_config(&[]).unwrap(); let text = doc.slice(..); - let syntax = Syntax::new( - text, - highlight_config, - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let syntax = Syntax::new(text, highlight_config, |_| None).unwrap(); let indent_query = language_config.indent_query().unwrap(); for i in 0..doc.len_lines() { diff --git a/helix-syntax/Cargo.toml b/helix-syntax/Cargo.toml new file mode 100644 index 000000000..3ba12ddd1 --- /dev/null +++ b/helix-syntax/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "helix-syntax" +description = "Helix syntax highlighting " +include = ["src/**/*", "README.md"] +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +rust-version.workspace = true +categories.workspace = true +repository.workspace = true +homepage.workspace = true + +[features] + +[dependencies] +helix-stdx = { path = "../helix-stdx" } + +ropey = { version = "1.6.1", default-features = false, features = ["simd"] } +slotmap = "1.0" +tree-sitter.workspace = true +once_cell = "1.19" +arc-swap = "1" +regex = "1" +bitflags = "2.4" +ahash = "0.8.9" +hashbrown = { version = "0.14.3", features = ["raw"] } +log = "0.4" diff --git a/helix-syntax/src/config.rs b/helix-syntax/src/config.rs new file mode 100644 index 000000000..a983d2932 --- /dev/null +++ b/helix-syntax/src/config.rs @@ -0,0 +1,331 @@ +use std::path::Path; +use std::sync::Arc; + +use arc_swap::ArcSwap; +use helix_stdx::rope::{self, RopeSliceExt}; +use once_cell::sync::Lazy; +use regex::Regex; +use ropey::RopeSlice; +use tree_sitter::{Language as Grammar, Node, Query, QueryError, QueryMatch}; + +use crate::highlighter::Highlight; +use crate::{byte_range_to_str, IncludedChildren, InjectionLanguageMarker, SHEBANG}; + +/// Contains the data needed to highlight code written in a particular language. +/// +/// This struct is immutable and can be shared between threads. +#[derive(Debug)] +pub struct HighlightConfiguration { + pub language: Grammar, + pub query: Query, + pub(crate) injections_query: Query, + pub(crate) combined_injections_patterns: Vec, + pub(crate) highlights_pattern_index: usize, + pub(crate) highlight_indices: ArcSwap>>, + pub(crate) non_local_variable_patterns: Vec, + pub(crate) injection_content_capture_index: Option, + pub(crate) injection_language_capture_index: Option, + pub(crate) injection_filename_capture_index: Option, + pub(crate) injection_shebang_capture_index: Option, + pub(crate) local_scope_capture_index: Option, + pub(crate) local_def_capture_index: Option, + pub(crate) local_def_value_capture_index: Option, + pub(crate) local_ref_capture_index: Option, +} + +impl HighlightConfiguration { + /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting + /// queries. + /// + /// # Parameters + /// + /// * `language` - The Tree-sitter `Grammar` that should be used for parsing. + /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This + /// should be non-empty, otherwise no syntax highlights will be added. + /// * `injections_query` - A string containing tree patterns for injecting other languages + /// into the document. This can be empty if no injections are desired. + /// * `locals_query` - A string containing tree patterns for tracking local variable + /// definitions and references. This can be empty if local variable tracking is not needed. + /// + /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. + pub fn new( + language: Grammar, + highlights_query: &str, + injection_query: &str, + locals_query: &str, + ) -> Result { + // Concatenate the query strings, keeping track of the start offset of each section. + let mut query_source = String::new(); + query_source.push_str(locals_query); + let highlights_query_offset = query_source.len(); + query_source.push_str(highlights_query); + + // Construct a single query by concatenating the three query strings, but record the + // range of pattern indices that belong to each individual string. + let query = Query::new(&language, &query_source)?; + let mut highlights_pattern_index = 0; + for i in 0..(query.pattern_count()) { + let pattern_offset = query.start_byte_for_pattern(i); + if pattern_offset < highlights_query_offset { + highlights_pattern_index += 1; + } + } + + let injections_query = Query::new(&language, injection_query)?; + let combined_injections_patterns = (0..injections_query.pattern_count()) + .filter(|&i| { + injections_query + .property_settings(i) + .iter() + .any(|s| &*s.key == "injection.combined") + }) + .collect(); + + // Find all of the highlighting patterns that are disabled for nodes that + // have been identified as local variables. + let non_local_variable_patterns = (0..query.pattern_count()) + .map(|i| { + query + .property_predicates(i) + .iter() + .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") + }) + .collect(); + + // Store the numeric ids for all of the special captures. + let mut injection_content_capture_index = None; + let mut injection_language_capture_index = None; + let mut injection_filename_capture_index = None; + let mut injection_shebang_capture_index = None; + let mut local_def_capture_index = None; + let mut local_def_value_capture_index = None; + let mut local_ref_capture_index = None; + let mut local_scope_capture_index = None; + for (i, name) in query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match *name { + "local.definition" => local_def_capture_index = i, + "local.definition-value" => local_def_value_capture_index = i, + "local.reference" => local_ref_capture_index = i, + "local.scope" => local_scope_capture_index = i, + _ => {} + } + } + + for (i, name) in injections_query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match *name { + "injection.content" => injection_content_capture_index = i, + "injection.language" => injection_language_capture_index = i, + "injection.filename" => injection_filename_capture_index = i, + "injection.shebang" => injection_shebang_capture_index = i, + _ => {} + } + } + + let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); + Ok(Self { + language, + query, + injections_query, + combined_injections_patterns, + highlights_pattern_index, + highlight_indices, + non_local_variable_patterns, + injection_content_capture_index, + injection_language_capture_index, + injection_filename_capture_index, + injection_shebang_capture_index, + local_scope_capture_index, + local_def_capture_index, + local_def_value_capture_index, + local_ref_capture_index, + }) + } + + /// Get a slice containing all of the highlight names used in the configuration. + pub fn names(&self) -> &[&str] { + self.query.capture_names() + } + + /// Set the list of recognized highlight names. + /// + /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated + /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of + /// these queries can choose to recognize highlights with different levels of specificity. + /// For example, the string `function.builtin` will match against `function.builtin.constructor` + /// but will not match `function.method.builtin` and `function.method`. + /// + /// When highlighting, results are returned as `Highlight` values, which contain the index + /// of the matched highlight this list of highlight names. + pub fn configure(&self, recognized_names: &[String]) { + let mut capture_parts = Vec::new(); + let indices: Vec<_> = self + .query + .capture_names() + .iter() + .map(move |capture_name| { + capture_parts.clear(); + capture_parts.extend(capture_name.split('.')); + + let mut best_index = None; + let mut best_match_len = 0; + for (i, recognized_name) in recognized_names.iter().enumerate() { + let mut len = 0; + let mut matches = true; + for (i, part) in recognized_name.split('.').enumerate() { + match capture_parts.get(i) { + Some(capture_part) if *capture_part == part => len += 1, + _ => { + matches = false; + break; + } + } + } + if matches && len > best_match_len { + best_index = Some(i); + best_match_len = len; + } + } + best_index.map(Highlight) + }) + .collect(); + + self.highlight_indices.store(Arc::new(indices)); + } + + fn injection_pair<'a>( + &self, + query_match: &QueryMatch<'a, 'a>, + source: RopeSlice<'a>, + ) -> (Option>, Option>) { + let mut injection_capture = None; + let mut content_node = None; + + for capture in query_match.captures { + let index = Some(capture.index); + if index == self.injection_language_capture_index { + let name = byte_range_to_str(capture.node.byte_range(), source); + injection_capture = Some(InjectionLanguageMarker::Name(name)); + } else if index == self.injection_filename_capture_index { + let name = byte_range_to_str(capture.node.byte_range(), source); + let path = Path::new(name.as_ref()).to_path_buf(); + injection_capture = Some(InjectionLanguageMarker::Filename(path.into())); + } else if index == self.injection_shebang_capture_index { + let node_slice = source.byte_slice(capture.node.byte_range()); + + // some languages allow space and newlines before the actual string content + // so a shebang could be on either the first or second line + let lines = if let Ok(end) = node_slice.try_line_to_byte(2) { + node_slice.byte_slice(..end) + } else { + node_slice + }; + + static SHEBANG_REGEX: Lazy = + Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); + + injection_capture = SHEBANG_REGEX + .captures_iter(lines.regex_input()) + .map(|cap| { + let cap = lines.byte_slice(cap.get_group(1).unwrap().range()); + InjectionLanguageMarker::Shebang(cap.into()) + }) + .next() + } else if index == self.injection_content_capture_index { + content_node = Some(capture.node); + } + } + (injection_capture, content_node) + } + + pub(super) fn injection_for_match<'a>( + &self, + query: &'a Query, + query_match: &QueryMatch<'a, 'a>, + source: RopeSlice<'a>, + ) -> ( + Option>, + Option>, + IncludedChildren, + ) { + let (mut injection_capture, content_node) = self.injection_pair(query_match, source); + + let mut included_children = IncludedChildren::default(); + for prop in query.property_settings(query_match.pattern_index) { + match prop.key.as_ref() { + // In addition to specifying the language name via the text of a + // captured node, it can also be hard-coded via a `#set!` predicate + // that sets the injection.language key. + "injection.language" if injection_capture.is_none() => { + injection_capture = prop + .value + .as_ref() + .map(|s| InjectionLanguageMarker::Name(s.as_ref().into())); + } + + // By default, injections do not include the *children* of an + // `injection.content` node - only the ranges that belong to the + // node itself. This can be changed using a `#set!` predicate that + // sets the `injection.include-children` key. + "injection.include-children" => included_children = IncludedChildren::All, + + // Some queries might only exclude named children but include unnamed + // children in their `injection.content` node. This can be enabled using + // a `#set!` predicate that sets the `injection.include-unnamed-children` key. + "injection.include-unnamed-children" => { + included_children = IncludedChildren::Unnamed + } + _ => {} + } + } + + (injection_capture, content_node, included_children) + } + pub fn load_query( + &self, + language: &str, + filename: &str, + read_query_text: impl FnMut(&str, &str) -> String, + ) -> Result, QueryError> { + let query_text = read_query(language, filename, read_query_text); + if query_text.is_empty() { + return Ok(None); + } + Query::new(&self.language, &query_text).map(Some) + } +} + +/// reads a query by invoking `read_query_text`, handeles any `inherits` directives +pub fn read_query( + language: &str, + filename: &str, + mut read_query_text: impl FnMut(&str, &str) -> String, +) -> String { + fn read_query_impl( + language: &str, + filename: &str, + read_query_text: &mut impl FnMut(&str, &str) -> String, + ) -> String { + static INHERITS_REGEX: Lazy = + Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap()); + + let query = read_query_text(language, filename); + + // replaces all "; inherits (,)*" with the queries of the given language(s) + INHERITS_REGEX + .replace_all(&query, |captures: ®ex::Captures| { + captures[1] + .split(',') + .map(|language| { + format!( + "\n{}\n", + read_query_impl(language, filename, &mut *read_query_text) + ) + }) + .collect::() + }) + .to_string() + } + read_query_impl(language, filename, &mut read_query_text) +} diff --git a/helix-syntax/src/highlighter.rs b/helix-syntax/src/highlighter.rs new file mode 100644 index 000000000..1b53672f0 --- /dev/null +++ b/helix-syntax/src/highlighter.rs @@ -0,0 +1,439 @@ +use std::borrow::Cow; +use std::cell::RefCell; +use std::sync::atomic::{self, AtomicUsize}; +use std::{fmt, iter, mem, ops}; + +use ropey::RopeSlice; +use tree_sitter::{QueryCaptures, QueryCursor, Tree}; + +use crate::ropey::RopeProvider; +use crate::{ + byte_range_to_str, Error, HighlightConfiguration, Syntax, PARSER, TREE_SITTER_MATCH_LIMIT, +}; + +const CANCELLATION_CHECK_INTERVAL: usize = 100; + +/// Indicates which highlight should be applied to a region of source code. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Highlight(pub usize); + +/// Represents a single step in rendering a syntax-highlighted document. +#[derive(Copy, Clone, Debug)] +pub enum HighlightEvent { + Source { start: usize, end: usize }, + HighlightStart(Highlight), + HighlightEnd, +} + +#[derive(Debug)] +struct LocalDef<'a> { + name: Cow<'a, str>, + value_range: ops::Range, + highlight: Option, +} + +#[derive(Debug)] +struct LocalScope<'a> { + inherits: bool, + range: ops::Range, + local_defs: Vec>, +} + +#[derive(Debug)] +struct HighlightIter<'a> { + source: RopeSlice<'a>, + byte_offset: usize, + cancellation_flag: Option<&'a AtomicUsize>, + layers: Vec>, + iter_count: usize, + next_event: Option, + last_highlight_range: Option<(usize, usize, u32)>, +} + +struct HighlightIterLayer<'a> { + _tree: Option, + cursor: QueryCursor, + captures: RefCell, &'a [u8]>>>, + config: &'a HighlightConfiguration, + highlight_end_stack: Vec, + scope_stack: Vec>, + depth: u32, +} + +impl<'a> fmt::Debug for HighlightIterLayer<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("HighlightIterLayer").finish() + } +} + +impl<'a> HighlightIterLayer<'a> { + // First, sort scope boundaries by their byte offset in the document. At a + // given position, emit scope endings before scope beginnings. Finally, emit + // scope boundaries from deeper layers first. + fn sort_key(&self) -> Option<(usize, bool, isize)> { + let depth = -(self.depth as isize); + let next_start = self + .captures + .borrow_mut() + .peek() + .map(|(m, i)| m.captures[*i].node.start_byte()); + let next_end = self.highlight_end_stack.last().cloned(); + match (next_start, next_end) { + (Some(start), Some(end)) => { + if start < end { + Some((start, true, depth)) + } else { + Some((end, false, depth)) + } + } + (Some(i), None) => Some((i, true, depth)), + (None, Some(j)) => Some((j, false, depth)), + _ => None, + } + } +} + +impl<'a> HighlightIter<'a> { + fn emit_event( + &mut self, + offset: usize, + event: Option, + ) -> Option> { + let result; + if self.byte_offset < offset { + result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: offset, + })); + self.byte_offset = offset; + self.next_event = event; + } else { + result = event.map(Ok); + } + self.sort_layers(); + result + } + + fn sort_layers(&mut self) { + while !self.layers.is_empty() { + if let Some(sort_key) = self.layers[0].sort_key() { + let mut i = 0; + while i + 1 < self.layers.len() { + if let Some(next_offset) = self.layers[i + 1].sort_key() { + if next_offset < sort_key { + i += 1; + continue; + } + } else { + let layer = self.layers.remove(i + 1); + PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.push(layer.cursor); + }); + } + break; + } + if i > 0 { + self.layers[0..(i + 1)].rotate_left(1); + } + break; + } else { + let layer = self.layers.remove(0); + PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.push(layer.cursor); + }); + } + } + } +} + +impl<'a> Iterator for HighlightIter<'a> { + type Item = Result; + + fn next(&mut self) -> Option { + 'main: loop { + // If we've already determined the next highlight boundary, just return it. + if let Some(e) = self.next_event.take() { + return Some(Ok(e)); + } + + // Periodically check for cancellation, returning `Cancelled` error if the + // cancellation flag was flipped. + if let Some(cancellation_flag) = self.cancellation_flag { + self.iter_count += 1; + if self.iter_count >= CANCELLATION_CHECK_INTERVAL { + self.iter_count = 0; + if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 { + return Some(Err(Error::Cancelled)); + } + } + } + + // If none of the layers have any more highlight boundaries, terminate. + if self.layers.is_empty() { + let len = self.source.len_bytes(); + return if self.byte_offset < len { + let result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: len, + })); + self.byte_offset = len; + result + } else { + None + }; + } + + // Get the next capture from whichever layer has the earliest highlight boundary. + let range; + let layer = &mut self.layers[0]; + let captures = layer.captures.get_mut(); + if let Some((next_match, capture_index)) = captures.peek() { + let next_capture = next_match.captures[*capture_index]; + range = next_capture.node.byte_range(); + + // If any previous highlight ends before this node starts, then before + // processing this capture, emit the source code up until the end of the + // previous highlight, and an end event for that highlight. + if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if end_byte <= range.start { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } + } + } + // If there are no more captures, then emit any remaining highlight end events. + // And if there are none of those, then just advance to the end of the document. + else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } else { + return self.emit_event(self.source.len_bytes(), None); + }; + + let (mut match_, capture_index) = captures.next().unwrap(); + let mut capture = match_.captures[capture_index]; + + // Remove from the local scope stack any local scopes that have already ended. + while range.start > layer.scope_stack.last().unwrap().range.end { + layer.scope_stack.pop(); + } + + // If this capture is for tracking local variables, then process the + // local variable info. + let mut reference_highlight = None; + let mut definition_highlight = None; + while match_.pattern_index < layer.config.highlights_pattern_index { + // If the node represents a local scope, push a new local scope onto + // the scope stack. + if Some(capture.index) == layer.config.local_scope_capture_index { + definition_highlight = None; + let mut scope = LocalScope { + inherits: true, + range: range.clone(), + local_defs: Vec::new(), + }; + for prop in layer.config.query.property_settings(match_.pattern_index) { + if let "local.scope-inherits" = prop.key.as_ref() { + scope.inherits = + prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); + } + } + layer.scope_stack.push(scope); + } + // If the node represents a definition, add a new definition to the + // local scope at the top of the scope stack. + else if Some(capture.index) == layer.config.local_def_capture_index { + reference_highlight = None; + let scope = layer.scope_stack.last_mut().unwrap(); + + let mut value_range = 0..0; + for capture in match_.captures { + if Some(capture.index) == layer.config.local_def_value_capture_index { + value_range = capture.node.byte_range(); + } + } + + let name = byte_range_to_str(range.clone(), self.source); + scope.local_defs.push(LocalDef { + name, + value_range, + highlight: None, + }); + definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight); + } + // If the node represents a reference, then try to find the corresponding + // definition in the scope stack. + else if Some(capture.index) == layer.config.local_ref_capture_index + && definition_highlight.is_none() + { + definition_highlight = None; + let name = byte_range_to_str(range.clone(), self.source); + for scope in layer.scope_stack.iter().rev() { + if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { + if def.name == name && range.start >= def.value_range.end { + Some(def.highlight) + } else { + None + } + }) { + reference_highlight = highlight; + break; + } + if !scope.inherits { + break; + } + } + } + + // Continue processing any additional matches for the same node. + if let Some((next_match, next_capture_index)) = captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + match_ = captures.next().unwrap().0; + continue; + } + } + + self.sort_layers(); + continue 'main; + } + + // Otherwise, this capture must represent a highlight. + // If this exact range has already been highlighted by an earlier pattern, or by + // a different layer, then skip over this one. + if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { + if range.start == last_start && range.end == last_end && layer.depth < last_depth { + self.sort_layers(); + continue 'main; + } + } + + // If the current node was found to be a local variable, then skip over any + // highlighting patterns that are disabled for local variables. + if definition_highlight.is_some() || reference_highlight.is_some() { + while layer.config.non_local_variable_patterns[match_.pattern_index] { + match_.remove(); + if let Some((next_match, next_capture_index)) = captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + match_ = captures.next().unwrap().0; + continue; + } + } + + self.sort_layers(); + continue 'main; + } + } + + // Once a highlighting pattern is found for the current node, skip over + // any later highlighting patterns that also match this node. Captures + // for a given node are ordered by pattern index, so these subsequent + // captures are guaranteed to be for highlighting, not injections or + // local variables. + while let Some((next_match, next_capture_index)) = captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + captures.next(); + } else { + break; + } + } + + let current_highlight = layer.config.highlight_indices.load()[capture.index as usize]; + + // If this node represents a local definition, then store the current + // highlight value on the local scope entry representing this node. + if let Some(definition_highlight) = definition_highlight { + *definition_highlight = current_highlight; + } + + // Emit a scope start event and push the node's end position to the stack. + if let Some(highlight) = reference_highlight.or(current_highlight) { + self.last_highlight_range = Some((range.start, range.end, layer.depth)); + layer.highlight_end_stack.push(range.end); + return self + .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); + } + + self.sort_layers(); + } + } +} + +impl Syntax { + /// Iterate over the highlighted regions for a given slice of source code. + pub fn highlight_iter<'a>( + &'a self, + source: RopeSlice<'a>, + range: Option>, + cancellation_flag: Option<&'a AtomicUsize>, + ) -> impl Iterator> + 'a { + let mut layers = self + .layers + .iter() + .filter_map(|(_, layer)| { + // TODO: if range doesn't overlap layer range, skip it + + // Reuse a cursor from the pool if available. + let mut cursor = PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) + }); + + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let cursor_ref = + unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + + // if reusing cursors & no range this resets to whole range + cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); + cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT); + + let mut captures = cursor_ref + .captures( + &layer.config.query, + layer.tree().root_node(), + RopeProvider(source), + ) + .peekable(); + + // If there's no captures, skip the layer + captures.peek()?; + + Some(HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + _tree: None, + captures: RefCell::new(captures), + config: layer.config.as_ref(), // TODO: just reuse `layer` + depth: layer.depth, // TODO: just reuse `layer` + }) + }) + .collect::>(); + + layers.sort_unstable_by_key(|layer| layer.sort_key()); + + let mut result = HighlightIter { + source, + byte_offset: range.map_or(0, |r| r.start), + cancellation_flag, + iter_count: 0, + layers, + next_event: None, + last_highlight_range: None, + }; + result.sort_layers(); + result + } +} diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs new file mode 100644 index 000000000..04a2d27d5 --- /dev/null +++ b/helix-syntax/src/lib.rs @@ -0,0 +1,342 @@ +use ::ropey::RopeSlice; +use slotmap::{DefaultKey as LayerId, HopSlotMap}; +use tree_sitter::{Node, Parser, Point, Query, QueryCursor, Range, Tree}; + +use std::borrow::Cow; +use std::cell::RefCell; +use std::hash::{Hash, Hasher}; +use std::path::Path; +use std::str; +use std::sync::Arc; + +use crate::parse::LayerUpdateFlags; + +pub use crate::config::{read_query, HighlightConfiguration}; +pub use crate::ropey::RopeProvider; +pub use merge::merge; +pub use pretty_print::pretty_print_tree; +pub use tree_cursor::TreeCursor; + +mod config; +pub mod highlighter; +mod merge; +mod parse; +mod pretty_print; +mod ropey; +mod tree_cursor; + +#[derive(Debug)] +pub struct Syntax { + layers: HopSlotMap, + root: LayerId, +} + +impl Syntax { + pub fn new( + source: RopeSlice, + config: Arc, + injection_callback: impl Fn(&InjectionLanguageMarker) -> Option>, + ) -> Option { + let root_layer = LanguageLayer { + tree: None, + config, + depth: 0, + flags: LayerUpdateFlags::empty(), + ranges: vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + parent: None, + }; + + // track scope_descriptor: a Vec of scopes for item in tree + + let mut layers = HopSlotMap::default(); + let root = layers.insert(root_layer); + + let mut syntax = Self { root, layers }; + + let res = syntax.update(source, Vec::new(), injection_callback); + + if res.is_err() { + log::error!("TS parser failed, disabling TS for the current buffer: {res:?}"); + return None; + } + Some(syntax) + } + + pub fn tree(&self) -> &Tree { + self.layers[self.root].tree() + } + + pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree { + let mut container_id = self.root; + + for (layer_id, layer) in self.layers.iter() { + if layer.depth > self.layers[container_id].depth + && layer.contains_byte_range(start, end) + { + container_id = layer_id; + } + } + + self.layers[container_id].tree() + } + + pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { + self.tree_for_byte_range(start, end) + .root_node() + .named_descendant_for_byte_range(start, end) + } + + pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { + self.tree_for_byte_range(start, end) + .root_node() + .descendant_for_byte_range(start, end) + } + + pub fn walk(&self) -> TreeCursor<'_> { + TreeCursor::new(&self.layers, self.root) + } +} + +#[derive(Debug)] +pub struct LanguageLayer { + // mode + // grammar + pub config: Arc, + pub(crate) tree: Option, + pub ranges: Vec, + pub depth: u32, + flags: LayerUpdateFlags, + parent: Option, +} + +/// This PartialEq implementation only checks if that +/// two layers are theoretically identical (meaning they highlight the same text range with the same language). +/// It does not check whether the layers have the same internal treesitter +/// state. +impl PartialEq for LanguageLayer { + fn eq(&self, other: &Self) -> bool { + self.depth == other.depth + && self.config.language == other.config.language + && self.ranges == other.ranges + } +} + +/// Hash implementation belongs to PartialEq implementation above. +/// See its documentation for details. +impl Hash for LanguageLayer { + fn hash(&self, state: &mut H) { + self.depth.hash(state); + self.config.language.hash(state); + self.ranges.hash(state); + } +} + +impl LanguageLayer { + pub fn tree(&self) -> &Tree { + // TODO: no unwrap + self.tree.as_ref().unwrap() + } + + /// Whether the layer contains the given byte range. + /// + /// If the layer has multiple ranges (i.e. combined injections), the + /// given range is considered contained if it is within the start and + /// end bytes of the first and last ranges **and** if the given range + /// starts or ends within any of the layer's ranges. + fn contains_byte_range(&self, start: usize, end: usize) -> bool { + let layer_start = self + .ranges + .first() + .expect("ranges should not be empty") + .start_byte; + let layer_end = self + .ranges + .last() + .expect("ranges should not be empty") + .end_byte; + + layer_start <= start + && layer_end >= end + && self.ranges.iter().any(|range| { + let byte_range = range.start_byte..range.end_byte; + byte_range.contains(&start) || byte_range.contains(&end) + }) + } +} + +#[derive(Debug, Clone)] +pub enum InjectionLanguageMarker<'a> { + Name(Cow<'a, str>), + Filename(Cow<'a, Path>), + Shebang(String), +} + +const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; + +#[derive(Debug)] +pub enum CapturedNode<'a> { + Single(Node<'a>), + /// Guaranteed to be not empty + Grouped(Vec>), +} + +impl<'a> CapturedNode<'a> { + pub fn start_byte(&self) -> usize { + match self { + Self::Single(n) => n.start_byte(), + Self::Grouped(ns) => ns[0].start_byte(), + } + } + + pub fn end_byte(&self) -> usize { + match self { + Self::Single(n) => n.end_byte(), + Self::Grouped(ns) => ns.last().unwrap().end_byte(), + } + } + + pub fn byte_range(&self) -> std::ops::Range { + self.start_byte()..self.end_byte() + } +} + +/// The maximum number of in-progress matches a TS cursor can consider at once. +/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`. +/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here. +/// +/// +/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually). +/// However, this causes performance issues for medium to large files. +/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc). +/// +/// +/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream +/// (see and ). +/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance. +/// +/// +/// Neovim chose 64 for this value somewhat arbitrarily (). +/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions. +/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. +const TREE_SITTER_MATCH_LIMIT: u32 = 256; + +#[derive(Debug)] +pub struct TextObjectQuery { + pub query: Query, +} + +impl TextObjectQuery { + /// Run the query on the given node and return sub nodes which match given + /// capture ("function.inside", "class.around", etc). + /// + /// Captures may contain multiple nodes by using quantifiers (+, *, etc), + /// and support for this is partial and could use improvement. + /// + /// ```query + /// (comment)+ @capture + /// + /// ; OR + /// ( + /// (comment)* + /// . + /// (function) + /// ) @capture + /// ``` + pub fn capture_nodes<'a>( + &'a self, + capture_name: &str, + node: Node<'a>, + slice: RopeSlice<'a>, + cursor: &'a mut QueryCursor, + ) -> Option>> { + self.capture_nodes_any(&[capture_name], node, slice, cursor) + } + + /// Find the first capture that exists out of all given `capture_names` + /// and return sub nodes that match this capture. + pub fn capture_nodes_any<'a>( + &'a self, + capture_names: &[&str], + node: Node<'a>, + slice: RopeSlice<'a>, + cursor: &'a mut QueryCursor, + ) -> Option>> { + let capture_idx = capture_names + .iter() + .find_map(|cap| self.query.capture_index_for_name(cap))?; + + cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + + let nodes = cursor + .captures(&self.query, node, RopeProvider(slice)) + .filter_map(move |(mat, _)| { + let nodes: Vec<_> = mat + .captures + .iter() + .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node)) + .collect(); + + if nodes.len() > 1 { + Some(CapturedNode::Grouped(nodes)) + } else { + nodes.into_iter().map(CapturedNode::Single).next() + } + }); + + Some(nodes) + } +} + +/// Represents the reason why syntax highlighting failed. +#[derive(Debug, PartialEq, Eq)] +pub enum Error { + Cancelled, + InvalidLanguage, + InvalidRanges, + Unknown, +} + +#[derive(Clone)] +enum IncludedChildren { + None, + All, + Unnamed, +} + +impl Default for IncludedChildren { + fn default() -> Self { + Self::None + } +} + +fn byte_range_to_str(range: std::ops::Range, source: RopeSlice) -> Cow { + Cow::from(source.byte_slice(range)) +} + +struct TsParser { + parser: tree_sitter::Parser, + pub cursors: Vec, +} + +// could also just use a pool, or a single instance? +thread_local! { + static PARSER: RefCell = RefCell::new(TsParser { + parser: Parser::new(), + cursors: Vec::new(), + }) +} + +pub fn with_cursor(f: impl FnOnce(&mut QueryCursor) -> T) -> T { + PARSER.with(|parser| { + let mut parser = parser.borrow_mut(); + let mut cursor = parser.cursors.pop().unwrap_or_else(QueryCursor::new); + let res = f(&mut cursor); + parser.cursors.push(cursor); + res + }) +} diff --git a/helix-syntax/src/merge.rs b/helix-syntax/src/merge.rs new file mode 100644 index 000000000..2b9063a06 --- /dev/null +++ b/helix-syntax/src/merge.rs @@ -0,0 +1,135 @@ +use crate::highlighter::{Highlight, HighlightEvent}; + +pub struct Merge { + iter: I, + spans: Box)>>, + + next_event: Option, + next_span: Option<(usize, std::ops::Range)>, + + queue: Vec, +} + +/// Merge a list of spans into the highlight event stream. +pub fn merge>( + iter: I, + spans: Vec<(usize, std::ops::Range)>, +) -> Merge { + let spans = Box::new(spans.into_iter()); + let mut merge = Merge { + iter, + spans, + next_event: None, + next_span: None, + queue: Vec::new(), + }; + merge.next_event = merge.iter.next(); + merge.next_span = merge.spans.next(); + merge +} + +impl> Iterator for Merge { + type Item = HighlightEvent; + fn next(&mut self) -> Option { + use HighlightEvent::*; + if let Some(event) = self.queue.pop() { + return Some(event); + } + + loop { + match (self.next_event, &self.next_span) { + // this happens when range is partially or fully offscreen + (Some(Source { start, .. }), Some((span, range))) if start > range.start => { + if start > range.end { + self.next_span = self.spans.next(); + } else { + self.next_span = Some((*span, start..range.end)); + }; + } + _ => break, + } + } + + match (self.next_event, &self.next_span) { + (Some(HighlightStart(i)), _) => { + self.next_event = self.iter.next(); + Some(HighlightStart(i)) + } + (Some(HighlightEnd), _) => { + self.next_event = self.iter.next(); + Some(HighlightEnd) + } + (Some(Source { start, end }), Some((_, range))) if start < range.start => { + let intersect = range.start.min(end); + let event = Source { + start, + end: intersect, + }; + + if end == intersect { + // the event is complete + self.next_event = self.iter.next(); + } else { + // subslice the event + self.next_event = Some(Source { + start: intersect, + end, + }); + }; + + Some(event) + } + (Some(Source { start, end }), Some((span, range))) if start == range.start => { + let intersect = range.end.min(end); + let event = HighlightStart(Highlight(*span)); + + // enqueue in reverse order + self.queue.push(HighlightEnd); + self.queue.push(Source { + start, + end: intersect, + }); + + if end == intersect { + // the event is complete + self.next_event = self.iter.next(); + } else { + // subslice the event + self.next_event = Some(Source { + start: intersect, + end, + }); + }; + + if intersect == range.end { + self.next_span = self.spans.next(); + } else { + self.next_span = Some((*span, intersect..range.end)); + } + + Some(event) + } + (Some(event), None) => { + self.next_event = self.iter.next(); + Some(event) + } + // Can happen if cursor at EOF and/or diagnostic reaches past the end. + // We need to actually emit events for the cursor-at-EOF situation, + // even though the range is past the end of the text. This needs to be + // handled appropriately by the drawing code by not assuming that + // all `Source` events point to valid indices in the rope. + (None, Some((span, range))) => { + let event = HighlightStart(Highlight(*span)); + self.queue.push(HighlightEnd); + self.queue.push(Source { + start: range.start, + end: range.end, + }); + self.next_span = self.spans.next(); + Some(event) + } + (None, None) => None, + e => unreachable!("{:?}", e), + } + } +} diff --git a/helix-syntax/src/parse.rs b/helix-syntax/src/parse.rs new file mode 100644 index 000000000..de70f2a16 --- /dev/null +++ b/helix-syntax/src/parse.rs @@ -0,0 +1,429 @@ +use std::collections::VecDeque; +use std::mem::replace; +use std::sync::Arc; + +use ahash::RandomState; +use bitflags::bitflags; +use hashbrown::raw::RawTable; +use ropey::RopeSlice; +use tree_sitter::{Node, Parser, Point, QueryCursor, Range}; + +use crate::ropey::RopeProvider; +use crate::{ + Error, HighlightConfiguration, IncludedChildren, InjectionLanguageMarker, LanguageLayer, + Syntax, PARSER, TREE_SITTER_MATCH_LIMIT, +}; + +bitflags! { + /// Flags that track the status of a layer + /// in the `Sytaxn::update` function + #[derive(Debug)] + pub(crate) struct LayerUpdateFlags : u32{ + const MODIFIED = 0b001; + const MOVED = 0b010; + const TOUCHED = 0b100; + } +} + +impl Syntax { + pub fn update( + &mut self, + source: RopeSlice, + edits: Vec, + injection_callback: impl Fn(&InjectionLanguageMarker) -> Option>, + ) -> Result<(), Error> { + let mut queue = VecDeque::new(); + queue.push_back(self.root); + + // This table allows inverse indexing of `layers`. + // That is by hashing a `Layer` you can find + // the `LayerId` of an existing equivalent `Layer` in `layers`. + // + // It is used to determine if a new layer exists for an injection + // or if an existing layer needs to be updated. + let mut layers_table = RawTable::with_capacity(self.layers.len()); + let layers_hasher = RandomState::new(); + // Use the edits to update all layers markers + fn point_add(a: Point, b: Point) -> Point { + if b.row > 0 { + Point::new(a.row.saturating_add(b.row), b.column) + } else { + Point::new(0, a.column.saturating_add(b.column)) + } + } + fn point_sub(a: Point, b: Point) -> Point { + if a.row > b.row { + Point::new(a.row.saturating_sub(b.row), a.column) + } else { + Point::new(0, a.column.saturating_sub(b.column)) + } + } + + for (layer_id, layer) in self.layers.iter_mut() { + // The root layer always covers the whole range (0..usize::MAX) + if layer.depth == 0 { + layer.flags = LayerUpdateFlags::MODIFIED; + continue; + } + + if !edits.is_empty() { + for range in &mut layer.ranges { + // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 + for edit in edits.iter().rev() { + let is_pure_insertion = edit.old_end_byte == edit.start_byte; + + // if edit is after range, skip + if edit.start_byte > range.end_byte { + // TODO: || (is_noop && edit.start_byte == range.end_byte) + continue; + } + + // if edit is before range, shift entire range by len + if edit.old_end_byte < range.start_byte { + range.start_byte = + edit.new_end_byte + (range.start_byte - edit.old_end_byte); + range.start_point = point_add( + edit.new_end_position, + point_sub(range.start_point, edit.old_end_position), + ); + + range.end_byte = edit + .new_end_byte + .saturating_add(range.end_byte - edit.old_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + + layer.flags |= LayerUpdateFlags::MOVED; + } + // if the edit starts in the space before and extends into the range + else if edit.start_byte < range.start_byte { + range.start_byte = edit.new_end_byte; + range.start_point = edit.new_end_position; + + range.end_byte = range + .end_byte + .saturating_sub(edit.old_end_byte) + .saturating_add(edit.new_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + layer.flags = LayerUpdateFlags::MODIFIED; + } + // If the edit is an insertion at the start of the tree, shift + else if edit.start_byte == range.start_byte && is_pure_insertion { + range.start_byte = edit.new_end_byte; + range.start_point = edit.new_end_position; + layer.flags |= LayerUpdateFlags::MOVED; + } else { + range.end_byte = range + .end_byte + .saturating_sub(edit.old_end_byte) + .saturating_add(edit.new_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + layer.flags = LayerUpdateFlags::MODIFIED; + } + } + } + } + + let hash = layers_hasher.hash_one(layer); + // Safety: insert_no_grow is unsafe because it assumes that the table + // has enough capacity to hold additional elements. + // This is always the case as we reserved enough capacity above. + unsafe { layers_table.insert_no_grow(hash, layer_id) }; + } + + PARSER.with(|ts_parser| { + let ts_parser = &mut ts_parser.borrow_mut(); + ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours + let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); + // TODO: might need to set cursor range + cursor.set_byte_range(0..usize::MAX); + cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + + let source_slice = source.slice(..); + + while let Some(layer_id) = queue.pop_front() { + let layer = &mut self.layers[layer_id]; + + // Mark the layer as touched + layer.flags |= LayerUpdateFlags::TOUCHED; + + // If a tree already exists, notify it of changes. + if let Some(tree) = &mut layer.tree { + if layer + .flags + .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED) + { + for edit in edits.iter().rev() { + // Apply the edits in reverse. + // If we applied them in order then edit 1 would disrupt the positioning of edit 2. + tree.edit(edit); + } + } + + if layer.flags.contains(LayerUpdateFlags::MODIFIED) { + // Re-parse the tree. + layer.parse(&mut ts_parser.parser, source)?; + } + } else { + // always parse if this layer has never been parsed before + layer.parse(&mut ts_parser.parser, source)?; + } + + // Switch to an immutable borrow. + let layer = &self.layers[layer_id]; + + // Process injections. + let matches = cursor.matches( + &layer.config.injections_query, + layer.tree().root_node(), + RopeProvider(source_slice), + ); + let mut combined_injections = vec![ + (None, Vec::new(), IncludedChildren::default()); + layer.config.combined_injections_patterns.len() + ]; + let mut injections = Vec::new(); + let mut last_injection_end = 0; + for mat in matches { + let (injection_capture, content_node, included_children) = layer + .config + .injection_for_match(&layer.config.injections_query, &mat, source_slice); + + // in case this is a combined injection save it for more processing later + if let Some(combined_injection_idx) = layer + .config + .combined_injections_patterns + .iter() + .position(|&pattern| pattern == mat.pattern_index) + { + let entry = &mut combined_injections[combined_injection_idx]; + if injection_capture.is_some() { + entry.0 = injection_capture; + } + if let Some(content_node) = content_node { + if content_node.start_byte() >= last_injection_end { + entry.1.push(content_node); + last_injection_end = content_node.end_byte(); + } + } + entry.2 = included_children; + continue; + } + + // Explicitly remove this match so that none of its other captures will remain + // in the stream of captures. + mat.remove(); + + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let (Some(injection_capture), Some(content_node)) = + (injection_capture, content_node) + { + if let Some(config) = (injection_callback)(&injection_capture) { + let ranges = + intersect_ranges(&layer.ranges, &[content_node], included_children); + + if !ranges.is_empty() { + if content_node.start_byte() < last_injection_end { + continue; + } + last_injection_end = content_node.end_byte(); + injections.push((config, ranges)); + } + } + } + } + + for (lang_name, content_nodes, included_children) in combined_injections { + if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { + if let Some(config) = (injection_callback)(&lang_name) { + let ranges = + intersect_ranges(&layer.ranges, &content_nodes, included_children); + if !ranges.is_empty() { + injections.push((config, ranges)); + } + } + } + } + + let depth = layer.depth + 1; + // TODO: can't inline this since matches borrows self.layers + for (config, ranges) in injections { + let parent = Some(layer_id); + let new_layer = LanguageLayer { + tree: None, + config, + depth, + ranges, + flags: LayerUpdateFlags::empty(), + parent: None, + }; + + // Find an identical existing layer + let layer = layers_table + .get(layers_hasher.hash_one(&new_layer), |&it| { + self.layers[it] == new_layer + }) + .copied(); + + // ...or insert a new one. + let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer)); + self.layers[layer_id].parent = parent; + + queue.push_back(layer_id); + } + + // TODO: pre-process local scopes at this time, rather than highlight? + // would solve problems with locals not working across boundaries + } + + // Return the cursor back in the pool. + ts_parser.cursors.push(cursor); + + // Reset all `LayerUpdateFlags` and remove all untouched layers + self.layers.retain(|_, layer| { + replace(&mut layer.flags, LayerUpdateFlags::empty()) + .contains(LayerUpdateFlags::TOUCHED) + }); + + Ok(()) + }) + } +} + +/// Compute the ranges that should be included when parsing an injection. +/// This takes into account three things: +/// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. +/// * `nodes` - Every injection takes place within a set of nodes. The injection ranges +/// are the ranges of those nodes. +/// * `includes_children` - For some injections, the content nodes' children should be +/// excluded from the nested document, so that only the content nodes' *own* content +/// is reparsed. For other injections, the content nodes' entire ranges should be +/// reparsed, including the ranges of their children. +fn intersect_ranges( + parent_ranges: &[Range], + nodes: &[Node], + included_children: IncludedChildren, +) -> Vec { + let mut cursor = nodes[0].walk(); + let mut result = Vec::new(); + let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range = parent_range_iter + .next() + .expect("Layers should only be constructed with non-empty ranges vectors"); + for node in nodes.iter() { + let mut preceding_range = Range { + start_byte: 0, + start_point: Point::new(0, 0), + end_byte: node.start_byte(), + end_point: node.start_position(), + }; + let following_range = Range { + start_byte: node.end_byte(), + start_point: node.end_position(), + end_byte: usize::MAX, + end_point: Point::new(usize::MAX, usize::MAX), + }; + + for excluded_range in node + .children(&mut cursor) + .filter_map(|child| match included_children { + IncludedChildren::None => Some(child.range()), + IncludedChildren::All => None, + IncludedChildren::Unnamed => { + if child.is_named() { + Some(child.range()) + } else { + None + } + } + }) + .chain([following_range].iter().cloned()) + { + let mut range = Range { + start_byte: preceding_range.end_byte, + start_point: preceding_range.end_point, + end_byte: excluded_range.start_byte, + end_point: excluded_range.start_point, + }; + preceding_range = excluded_range; + + if range.end_byte < parent_range.start_byte { + continue; + } + + while parent_range.start_byte <= range.end_byte { + if parent_range.end_byte > range.start_byte { + if range.start_byte < parent_range.start_byte { + range.start_byte = parent_range.start_byte; + range.start_point = parent_range.start_point; + } + + if parent_range.end_byte < range.end_byte { + if range.start_byte < parent_range.end_byte { + result.push(Range { + start_byte: range.start_byte, + start_point: range.start_point, + end_byte: parent_range.end_byte, + end_point: parent_range.end_point, + }); + } + range.start_byte = parent_range.end_byte; + range.start_point = parent_range.end_point; + } else { + if range.start_byte < range.end_byte { + result.push(range); + } + break; + } + } + + if let Some(next_range) = parent_range_iter.next() { + parent_range = next_range; + } else { + return result; + } + } + } + } + result +} + +impl LanguageLayer { + fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> { + parser + .set_included_ranges(&self.ranges) + .map_err(|_| Error::InvalidRanges)?; + + parser + .set_language(&self.config.language) + .map_err(|_| Error::InvalidLanguage)?; + + // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; + let tree = parser + .parse_with( + &mut |byte, _| { + if byte <= source.len_bytes() { + let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); + &chunk.as_bytes()[byte - start_byte..] + } else { + // out of range + &[] + } + }, + self.tree.as_ref(), + ) + .ok_or(Error::Cancelled)?; + // unsafe { ts_parser.parser.set_cancellation_flag(None) }; + self.tree = Some(tree); + Ok(()) + } +} diff --git a/helix-syntax/src/pretty_print.rs b/helix-syntax/src/pretty_print.rs new file mode 100644 index 000000000..bd1082294 --- /dev/null +++ b/helix-syntax/src/pretty_print.rs @@ -0,0 +1,65 @@ +use std::fmt; + +use tree_sitter::{Node, TreeCursor}; + +pub fn pretty_print_tree(fmt: &mut W, node: Node) -> fmt::Result { + if node.child_count() == 0 { + if node_is_visible(&node) { + write!(fmt, "({})", node.kind()) + } else { + write!(fmt, "\"{}\"", node.kind()) + } + } else { + pretty_print_tree_impl(fmt, &mut node.walk(), 0) + } +} + +fn pretty_print_tree_impl( + fmt: &mut W, + cursor: &mut TreeCursor, + depth: usize, +) -> fmt::Result { + let node = cursor.node(); + let visible = node_is_visible(&node); + + if visible { + let indentation_columns = depth * 2; + write!(fmt, "{:indentation_columns$}", "")?; + + if let Some(field_name) = cursor.field_name() { + write!(fmt, "{}: ", field_name)?; + } + + write!(fmt, "({}", node.kind())?; + } + + // Handle children. + if cursor.goto_first_child() { + loop { + if node_is_visible(&cursor.node()) { + fmt.write_char('\n')?; + } + + pretty_print_tree_impl(fmt, cursor, depth + 1)?; + + if !cursor.goto_next_sibling() { + break; + } + } + + let moved = cursor.goto_parent(); + // The parent of the first child must exist, and must be `node`. + debug_assert!(moved); + debug_assert!(cursor.node() == node); + } + + if visible { + fmt.write_char(')')?; + } + + Ok(()) +} + +fn node_is_visible(node: &Node) -> bool { + node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) +} diff --git a/helix-syntax/src/ropey.rs b/helix-syntax/src/ropey.rs new file mode 100644 index 000000000..650fcfb90 --- /dev/null +++ b/helix-syntax/src/ropey.rs @@ -0,0 +1,29 @@ +// glue code for using TS with ropey, this should be put behind a feature flag +// in the future (and potentially be partially removed) + +use ropey::RopeSlice; +use tree_sitter::{Node, TextProvider}; + +// Adapter to convert rope chunks to bytes +pub struct ChunksBytes<'a> { + chunks: ropey::iter::Chunks<'a>, +} +impl<'a> Iterator for ChunksBytes<'a> { + type Item = &'a [u8]; + fn next(&mut self) -> Option { + self.chunks.next().map(str::as_bytes) + } +} + +pub struct RopeProvider<'a>(pub RopeSlice<'a>); + +impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> { + type I = ChunksBytes<'a>; + + fn text(&mut self, node: Node) -> Self::I { + let fragment = self.0.byte_slice(node.start_byte()..node.end_byte()); + ChunksBytes { + chunks: fragment.chunks(), + } + } +} diff --git a/helix-syntax/src/tree_cursor.rs b/helix-syntax/src/tree_cursor.rs new file mode 100644 index 000000000..692d5890a --- /dev/null +++ b/helix-syntax/src/tree_cursor.rs @@ -0,0 +1,264 @@ +use std::{cmp::Reverse, ops::Range}; + +use super::{LanguageLayer, LayerId}; + +use slotmap::HopSlotMap; +use tree_sitter::Node; + +/// The byte range of an injection layer. +/// +/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges. +/// This allows us to sort the ranges ahead of time in order to efficiently find a range that +/// contains a point with maximum depth. +#[derive(Debug)] +struct InjectionRange { + start: usize, + end: usize, + layer_id: LayerId, + depth: u32, +} + +pub struct TreeCursor<'a> { + layers: &'a HopSlotMap, + root: LayerId, + current: LayerId, + injection_ranges: Vec, + // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but + // that returns very surprising results in testing. + cursor: Node<'a>, +} + +impl<'a> TreeCursor<'a> { + pub(super) fn new(layers: &'a HopSlotMap, root: LayerId) -> Self { + let mut injection_ranges = Vec::new(); + + for (layer_id, layer) in layers.iter() { + // Skip the root layer + if layer.parent.is_none() { + continue; + } + for byte_range in layer.ranges.iter() { + let range = InjectionRange { + start: byte_range.start_byte, + end: byte_range.end_byte, + layer_id, + depth: layer.depth, + }; + injection_ranges.push(range); + } + } + + injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth))); + + let cursor = layers[root].tree().root_node(); + + Self { + layers, + root, + current: root, + injection_ranges, + cursor, + } + } + + pub fn node(&self) -> Node<'a> { + self.cursor + } + + pub fn goto_parent(&mut self) -> bool { + if let Some(parent) = self.node().parent() { + self.cursor = parent; + return true; + } + + // If we are already on the root layer, we cannot ascend. + if self.current == self.root { + return false; + } + + // Ascend to the parent layer. + let range = self.node().byte_range(); + let parent_id = self.layers[self.current] + .parent + .expect("non-root layers have a parent"); + self.current = parent_id; + let root = self.layers[self.current].tree().root_node(); + self.cursor = root + .descendant_for_byte_range(range.start, range.end) + .unwrap_or(root); + + true + } + + pub fn goto_parent_with

(&mut self, predicate: P) -> bool + where + P: Fn(&Node) -> bool, + { + while self.goto_parent() { + if predicate(&self.node()) { + return true; + } + } + + false + } + + /// Finds the injection layer that has exactly the same range as the given `range`. + fn layer_id_of_byte_range(&self, search_range: Range) -> Option { + let start_idx = self + .injection_ranges + .partition_point(|range| range.end < search_range.end); + + self.injection_ranges[start_idx..] + .iter() + .take_while(|range| range.end == search_range.end) + .find_map(|range| (range.start == search_range.start).then_some(range.layer_id)) + } + + fn goto_first_child_impl(&mut self, named: bool) -> bool { + // Check if the current node's range is an exact injection layer range. + if let Some(layer_id) = self + .layer_id_of_byte_range(self.node().byte_range()) + .filter(|&layer_id| layer_id != self.current) + { + // Switch to the child layer. + self.current = layer_id; + self.cursor = self.layers[self.current].tree().root_node(); + return true; + } + + let child = if named { + self.cursor.named_child(0) + } else { + self.cursor.child(0) + }; + + if let Some(child) = child { + // Otherwise descend in the current tree. + self.cursor = child; + true + } else { + false + } + } + + pub fn goto_first_child(&mut self) -> bool { + self.goto_first_child_impl(false) + } + + pub fn goto_first_named_child(&mut self) -> bool { + self.goto_first_child_impl(true) + } + + fn goto_next_sibling_impl(&mut self, named: bool) -> bool { + let sibling = if named { + self.cursor.next_named_sibling() + } else { + self.cursor.next_sibling() + }; + + if let Some(sibling) = sibling { + self.cursor = sibling; + true + } else { + false + } + } + + pub fn goto_next_sibling(&mut self) -> bool { + self.goto_next_sibling_impl(false) + } + + pub fn goto_next_named_sibling(&mut self) -> bool { + self.goto_next_sibling_impl(true) + } + + fn goto_prev_sibling_impl(&mut self, named: bool) -> bool { + let sibling = if named { + self.cursor.prev_named_sibling() + } else { + self.cursor.prev_sibling() + }; + + if let Some(sibling) = sibling { + self.cursor = sibling; + true + } else { + false + } + } + + pub fn goto_prev_sibling(&mut self) -> bool { + self.goto_prev_sibling_impl(false) + } + + pub fn goto_prev_named_sibling(&mut self) -> bool { + self.goto_prev_sibling_impl(true) + } + + /// Finds the injection layer that contains the given start-end range. + fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId { + let start_idx = self + .injection_ranges + .partition_point(|range| range.end < end); + + self.injection_ranges[start_idx..] + .iter() + .take_while(|range| range.start < end) + .find_map(|range| (range.start <= start).then_some(range.layer_id)) + .unwrap_or(self.root) + } + + pub fn reset_to_byte_range(&mut self, start: usize, end: usize) { + self.current = self.layer_id_containing_byte_range(start, end); + let root = self.layers[self.current].tree().root_node(); + self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root); + } + + /// Returns an iterator over the children of the node the TreeCursor is on + /// at the time this is called. + pub fn children(&'a mut self) -> ChildIter { + let parent = self.node(); + + ChildIter { + cursor: self, + parent, + named: false, + } + } + + /// Returns an iterator over the named children of the node the TreeCursor is on + /// at the time this is called. + pub fn named_children(&'a mut self) -> ChildIter { + let parent = self.node(); + + ChildIter { + cursor: self, + parent, + named: true, + } + } +} + +pub struct ChildIter<'n> { + cursor: &'n mut TreeCursor<'n>, + parent: Node<'n>, + named: bool, +} + +impl<'n> Iterator for ChildIter<'n> { + type Item = Node<'n>; + + fn next(&mut self) -> Option { + // first iteration, just visit the first child + if self.cursor.node() == self.parent { + self.cursor + .goto_first_child_impl(self.named) + .then(|| self.cursor.node()) + } else { + self.cursor + .goto_next_sibling_impl(self.named) + .then(|| self.cursor.node()) + } + } +} diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs index 96614443f..d3fdec02f 100644 --- a/helix-term/src/ui/markdown.rs +++ b/helix-term/src/ui/markdown.rs @@ -54,7 +54,14 @@ pub fn highlighted_code_block<'a>( language.into(), )) .and_then(|config| config.highlight_config(theme.scopes())) - .and_then(|config| Syntax::new(ropeslice, config, Arc::clone(&config_loader))); + .and_then(|config| { + Syntax::new(ropeslice, config, |injection| { + config_loader + .load() + .language_configuration_for_injection_string(injection) + .and_then(|config| config.get_highlight_config()) + }) + }); let syntax = match syntax { Some(s) => s, diff --git a/helix-term/src/ui/picker/handlers.rs b/helix-term/src/ui/picker/handlers.rs index 4896ccbc6..ac34e8931 100644 --- a/helix-term/src/ui/picker/handlers.rs +++ b/helix-term/src/ui/picker/handlers.rs @@ -82,7 +82,12 @@ impl AsyncHook let Some(syntax) = language_config .highlight_config(&loader.load().scopes()) .and_then(|highlight_config| { - helix_core::Syntax::new(text.slice(..), highlight_config, loader) + helix_core::Syntax::new(text.slice(..), highlight_config, |injection| { + loader + .load() + .language_configuration_for_injection_string(injection) + .and_then(|config| config.get_highlight_config()) + }) }) else { log::info!("highlighting picker item failed"); diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 3314a243f..b45f80ca7 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -7,7 +7,7 @@ use helix_core::auto_pairs::AutoPairs; use helix_core::chars::char_is_word; use helix_core::doc_formatter::TextFormat; use helix_core::encoding::Encoding; -use helix_core::syntax::{Highlight, LanguageServerFeature}; +use helix_core::syntax::{generate_edits, Highlight, LanguageServerFeature}; use helix_core::text_annotations::{InlineAnnotation, Overlay}; use helix_lsp::util::lsp_pos_to_pos; use helix_stdx::faccess::{copy_metadata, readonly}; @@ -156,6 +156,7 @@ pub struct Document { pub syntax: Option, /// Corresponding language scope name. Usually `source.`. pub language: Option>, + loader: Option>>, /// Pending changes since last history commit. changes: ChangeSet, @@ -678,6 +679,7 @@ impl Document { focused_at: std::time::Instant::now(), readonly: false, jump_labels: HashMap::new(), + loader: None, } } @@ -1131,9 +1133,15 @@ impl Document { if let Some(highlight_config) = language_config.highlight_config(&(*loader).load().scopes()) { - self.syntax = Syntax::new(self.text.slice(..), highlight_config, loader); + let loader_ = loader.load_full(); + self.syntax = Syntax::new(self.text.slice(..), highlight_config, |injection| { + loader_ + .language_configuration_for_injection_string(injection) + .and_then(|config| config.get_highlight_config()) + }); } + self.loader = Some(loader); self.language = Some(language_config); } else { self.syntax = None; @@ -1275,11 +1283,16 @@ impl Document { // update tree-sitter syntax tree if let Some(syntax) = &mut self.syntax { + let loader = self.loader.as_ref().unwrap().load_full(); // TODO: no unwrap let res = syntax.update( - old_doc.slice(..), self.text.slice(..), - transaction.changes(), + generate_edits(old_doc.slice(..), transaction.changes()), + |injection| { + loader + .language_configuration_for_injection_string(injection) + .and_then(|config| config.get_highlight_config()) + }, ); if res.is_err() { log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");