Run textobject queries across injections

With this change textobjects work even within injection layers, so you
can use `]f` to jump to a function definition in a JavaScript `<script>`
tag within HTML for example.

This requires `Syntax::query_iter` - a utility function for running a
query from `HighlightConfiguration` across injection layers - which
comes from the rainbow brackets branch (merged into my driver).
We need to relocate the textobject query from the `LanguageConfiguration`
to the `HighlightConfiguration` in order to access it
per-injection-layer, like we do for the rainbow brackets query. With
that, the only necessary change is to port the contents of
`TextObjectQuery::capture_nodes_any` to a new function that uses
`query_iter` and update callers.

The callers end up being a bit cleaner: they only need to take `Syntax`
now and not `LanguageConfiguration` and the root layer's root
`tree_sitter::Node`.
textobjects-across-injections
Michael Davis 10 months ago
parent 12e7d126b6
commit d3575dc0ff
No known key found for this signature in database

@ -1,7 +1,7 @@
use std::{cmp::Reverse, iter}; use std::{cmp::Reverse, iter};
use ropey::iter::Chars; use ropey::iter::Chars;
use tree_sitter::{Node, QueryCursor}; use tree_sitter::Node;
use crate::{ use crate::{
char_idx_at_visual_offset, char_idx_at_visual_offset,
@ -13,7 +13,6 @@ use crate::{
}, },
line_ending::rope_is_line_ending, line_ending::rope_is_line_ending,
position::char_idx_at_visual_block_offset, position::char_idx_at_visual_block_offset,
syntax::LanguageConfiguration,
text_annotations::TextAnnotations, text_annotations::TextAnnotations,
textobject::TextObject, textobject::TextObject,
visual_offset_from_block, Range, RopeSlice, Selection, Syntax, visual_offset_from_block, Range, RopeSlice, Selection, Syntax,
@ -500,29 +499,22 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo
/// Finds the range of the next or previous textobject in the syntax sub-tree of `node`. /// Finds the range of the next or previous textobject in the syntax sub-tree of `node`.
/// Returns the range in the forwards direction. /// Returns the range in the forwards direction.
pub fn goto_treesitter_object( pub fn goto_treesitter_object(
syntax: &Syntax,
slice: RopeSlice, slice: RopeSlice,
range: Range, range: Range,
object_name: &str, object_name: &str,
dir: Direction, dir: Direction,
slice_tree: Node,
lang_config: &LanguageConfiguration,
count: usize, count: usize,
) -> Range { ) -> Range {
let get_range = move |range: Range| -> Option<Range> { let get_range = move |range: Range| -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice)); let byte_pos = slice.char_to_byte(range.cursor(slice));
let cap_name = |t: TextObject| format!("{}.{}", object_name, t); let cap_name = |t: TextObject| format!("{}.{}", object_name, t);
let mut cursor = QueryCursor::new(); let movement = cap_name(TextObject::Movement);
let nodes = lang_config.textobject_query()?.capture_nodes_any( let around = cap_name(TextObject::Around);
&[ let inside = cap_name(TextObject::Inside);
&cap_name(TextObject::Movement), let capture_names = &[movement.as_str(), around.as_str(), inside.as_str()];
&cap_name(TextObject::Around), let nodes = syntax.textobject_nodes(capture_names, slice, None);
&cap_name(TextObject::Inside),
],
slice_tree,
slice,
&mut cursor,
)?;
let node = match dir { let node = match dir {
Direction::Forward => nodes Direction::Forward => nodes

@ -134,8 +134,6 @@ pub struct LanguageConfiguration {
#[serde(skip)] #[serde(skip)]
pub(crate) indent_query: OnceCell<Option<Query>>, pub(crate) indent_query: OnceCell<Option<Query>>,
#[serde(skip)]
pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub debugger: Option<DebugAdapterConfig>, pub debugger: Option<DebugAdapterConfig>,
@ -534,11 +532,6 @@ impl FromStr for AutoPairConfig {
} }
} }
#[derive(Debug)]
pub struct TextObjectQuery {
pub query: Query,
}
#[derive(Debug)] #[derive(Debug)]
pub enum CapturedNode<'a> { pub enum CapturedNode<'a> {
Single(Node<'a>), Single(Node<'a>),
@ -586,118 +579,57 @@ impl<'a> CapturedNode<'a> {
/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. /// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
const TREE_SITTER_MATCH_LIMIT: u32 = 256; const TREE_SITTER_MATCH_LIMIT: u32 = 256;
impl TextObjectQuery { pub fn read_query(language: &str, filename: &str) -> Option<String> {
/// Run the query on the given node and return sub nodes which match given
/// capture ("function.inside", "class.around", etc).
///
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
/// and support for this is partial and could use improvement.
///
/// ```query
/// (comment)+ @capture
///
/// ; OR
/// (
/// (comment)*
/// .
/// (function)
/// ) @capture
/// ```
pub fn capture_nodes<'a>(
&'a self,
capture_name: &str,
node: Node<'a>,
slice: RopeSlice<'a>,
cursor: &'a mut QueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
self.capture_nodes_any(&[capture_name], node, slice, cursor)
}
/// Find the first capture that exists out of all given `capture_names`
/// and return sub nodes that match this capture.
pub fn capture_nodes_any<'a>(
&'a self,
capture_names: &[&str],
node: Node<'a>,
slice: RopeSlice<'a>,
cursor: &'a mut QueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
let capture_idx = capture_names
.iter()
.find_map(|cap| self.query.capture_index_for_name(cap))?;
cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
let nodes = cursor
.captures(&self.query, node, RopeProvider(slice))
.filter_map(move |(mat, _)| {
let nodes: Vec<_> = mat
.captures
.iter()
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
.collect();
if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
});
Some(nodes)
}
}
pub fn read_query(language: &str, filename: &str) -> String {
static INHERITS_REGEX: Lazy<Regex> = static INHERITS_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap()); Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
let query = load_runtime_file(language, filename).unwrap_or_default(); let query = load_runtime_file(language, filename).ok()?;
// replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s) // replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
INHERITS_REGEX let contents = INHERITS_REGEX
.replace_all(&query, |captures: &regex::Captures| { .replace_all(&query, |captures: &regex::Captures| {
captures[1] captures[1]
.split(',') .split(',')
.map(|language| format!("\n{}\n", read_query(language, filename))) .filter_map(|language| Some(format!("\n{}\n", read_query(language, filename)?)))
.collect::<String>() .collect::<String>()
}) })
.to_string() .to_string();
Some(contents)
} }
impl LanguageConfiguration { impl LanguageConfiguration {
fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> { fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
let highlights_query = read_query(&self.language_id, "highlights.scm"); let highlights_query = read_query(&self.language_id, "highlights.scm")?;
// always highlight syntax errors // always highlight syntax errors
// highlights_query += "\n(ERROR) @error"; // highlights_query += "\n(ERROR) @error";
let textobjects_query = read_query(&self.language_id, "textobjects.scm");
let injections_query = read_query(&self.language_id, "injections.scm"); let injections_query = read_query(&self.language_id, "injections.scm");
let locals_query = read_query(&self.language_id, "locals.scm"); let locals_query = read_query(&self.language_id, "locals.scm");
if highlights_query.is_empty() { let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
None .map_err(|err| {
} else { log::error!(
let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) "Failed to load tree-sitter parser for language {:?}: {}",
.map_err(|err| { self.language_id,
log::error!( err
"Failed to load tree-sitter parser for language {:?}: {}", )
self.language_id, })
err
)
})
.ok()?;
let config = HighlightConfiguration::new(
language,
&highlights_query,
&injections_query,
&locals_query,
)
.map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
.ok()?; .ok()?;
let config = HighlightConfiguration::new(
language,
&highlights_query,
textobjects_query.as_deref(),
&injections_query.unwrap_or_default(),
&locals_query.unwrap_or_default(),
)
.map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
.ok()?;
config.configure(scopes); config.configure(scopes);
Some(Arc::new(config)) Some(Arc::new(config))
}
} }
pub fn reconfigure(&self, scopes: &[String]) { pub fn reconfigure(&self, scopes: &[String]) {
@ -722,24 +654,12 @@ impl LanguageConfiguration {
.as_ref() .as_ref()
} }
pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
self.textobject_query
.get_or_init(|| {
self.load_query("textobjects.scm")
.map(|query| TextObjectQuery { query })
})
.as_ref()
}
pub fn scope(&self) -> &str { pub fn scope(&self) -> &str {
&self.scope &self.scope
} }
fn load_query(&self, kind: &str) -> Option<Query> { fn load_query(&self, kind: &str) -> Option<Query> {
let query_text = read_query(&self.language_id, kind); let query_text = read_query(&self.language_id, kind)?;
if query_text.is_empty() {
return None;
}
let lang = self.highlight_config.get()?.as_ref()?.language; let lang = self.highlight_config.get()?.as_ref()?.language;
Query::new(lang, &query_text) Query::new(lang, &query_text)
.map_err(|e| { .map_err(|e| {
@ -1457,6 +1377,42 @@ impl Syntax {
} }
} }
pub fn textobject_nodes<'a>(
&'a self,
capture_names: &'a [&str],
source: RopeSlice<'a>,
query_range: Option<std::ops::Range<usize>>,
) -> impl Iterator<Item = CapturedNode<'a>> {
self.query_iter(
|config| config.textobjects_query.as_ref(),
source,
query_range,
)
.filter_map(move |(layer, match_, _)| {
// TODO: cache this per-language with a hashmap?
let capture_idx = capture_names.iter().find_map(|name| {
layer
.config
.textobjects_query
.as_ref()
.expect("layer must have textobjects query in order to match")
.capture_index_for_name(name)
})?;
let nodes: Vec<_> = match_
.captures
.iter()
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
.collect();
if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
})
}
pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree { pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
let mut container_id = self.root; let mut container_id = self.root;
@ -1748,7 +1704,8 @@ pub enum HighlightEvent {
#[derive(Debug)] #[derive(Debug)]
pub struct HighlightConfiguration { pub struct HighlightConfiguration {
pub language: Grammar, pub language: Grammar,
pub query: Query, query: Query,
textobjects_query: Option<Query>,
injections_query: Query, injections_query: Query,
combined_injections_patterns: Vec<usize>, combined_injections_patterns: Vec<usize>,
highlights_pattern_index: usize, highlights_pattern_index: usize,
@ -1846,6 +1803,7 @@ impl HighlightConfiguration {
pub fn new( pub fn new(
language: Grammar, language: Grammar,
highlights_query: &str, highlights_query: &str,
textobjects_query: Option<&str>,
injection_query: &str, injection_query: &str,
locals_query: &str, locals_query: &str,
) -> Result<Self, QueryError> { ) -> Result<Self, QueryError> {
@ -1865,6 +1823,9 @@ impl HighlightConfiguration {
highlights_pattern_index += 1; highlights_pattern_index += 1;
} }
} }
let textobjects_query = textobjects_query
.map(|source| Query::new(language, source))
.transpose()?;
let injections_query = Query::new(language, injection_query)?; let injections_query = Query::new(language, injection_query)?;
let combined_injections_patterns = (0..injections_query.pattern_count()) let combined_injections_patterns = (0..injections_query.pattern_count())
@ -1922,6 +1883,7 @@ impl HighlightConfiguration {
Ok(Self { Ok(Self {
language, language,
query, query,
textobjects_query,
injections_query, injections_query,
combined_injections_patterns, combined_injections_patterns,
highlights_pattern_index, highlights_pattern_index,
@ -2809,11 +2771,7 @@ mod test {
.unwrap(); .unwrap();
let language = get_language("rust").unwrap(); let language = get_language("rust").unwrap();
let query = Query::new(language, query_str).unwrap(); let config = HighlightConfiguration::new(language, "", Some(query_str), "", "").unwrap();
let textobject = TextObjectQuery { query };
let mut cursor = QueryCursor::new();
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let syntax = Syntax::new( let syntax = Syntax::new(
source.slice(..), source.slice(..),
Arc::new(config), Arc::new(config),
@ -2821,11 +2779,10 @@ mod test {
) )
.unwrap(); .unwrap();
let root = syntax.tree().root_node(); let test = |capture, range| {
let mut test = |capture, range| { let capture_names = &[capture];
let matches: Vec<_> = textobject let matches: Vec<_> = syntax
.capture_nodes(capture, root, source.slice(..), &mut cursor) .textobject_nodes(capture_names, source.slice(..), None)
.unwrap()
.collect(); .collect();
assert_eq!( assert_eq!(
@ -2881,6 +2838,7 @@ mod test {
language, language,
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm") &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm")
.unwrap(), .unwrap(),
None, // textobjects.scm
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm") &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm")
.unwrap(), .unwrap(),
"", // locals.scm "", // locals.scm
@ -2989,7 +2947,7 @@ mod test {
.unwrap(); .unwrap();
let language = get_language(language_name).unwrap(); let language = get_language(language_name).unwrap();
let config = HighlightConfiguration::new(language, "", "", "").unwrap(); let config = HighlightConfiguration::new(language, "", None, "", "").unwrap();
let syntax = Syntax::new( let syntax = Syntax::new(
source.slice(..), source.slice(..),
Arc::new(config), Arc::new(config),

@ -1,14 +1,12 @@
use std::fmt::Display; use std::fmt::Display;
use ropey::RopeSlice; use ropey::RopeSlice;
use tree_sitter::{Node, QueryCursor};
use crate::chars::{categorize_char, char_is_whitespace, CharCategory}; use crate::chars::{categorize_char, char_is_whitespace, CharCategory};
use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary}; use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary};
use crate::line_ending::rope_is_line_ending; use crate::line_ending::rope_is_line_ending;
use crate::movement::Direction; use crate::movement::Direction;
use crate::surround; use crate::surround;
use crate::syntax::LanguageConfiguration;
use crate::Range; use crate::Range;
fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize { fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize {
@ -254,22 +252,20 @@ fn textobject_pair_surround_impl(
/// `object_name` is a query capture base name like "function", "class", etc. /// `object_name` is a query capture base name like "function", "class", etc.
/// `slice_tree` is the tree-sitter node corresponding to given text slice. /// `slice_tree` is the tree-sitter node corresponding to given text slice.
pub fn textobject_treesitter( pub fn textobject_treesitter(
syntax: &crate::Syntax,
slice: RopeSlice, slice: RopeSlice,
range: Range, range: Range,
textobject: TextObject, textobject: TextObject,
object_name: &str, object_name: &str,
slice_tree: Node,
lang_config: &LanguageConfiguration,
_count: usize, _count: usize,
) -> Range { ) -> Range {
let get_range = move || -> Option<Range> { let get_range = move || -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice)); let byte_pos = slice.char_to_byte(range.cursor(slice));
let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner
let mut cursor = QueryCursor::new(); let capture_names = &[capture_name.as_str()];
let node = lang_config let node = syntax
.textobject_query()? .textobject_nodes(capture_names, slice, None)
.capture_nodes(&capture_name, slice_tree, slice, &mut cursor)?
.filter(|node| node.byte_range().contains(&byte_pos)) .filter(|node| node.byte_range().contains(&byte_pos))
.min_by_key(|node| node.byte_range().len())?; .min_by_key(|node| node.byte_range().len())?;

@ -4905,20 +4905,12 @@ fn goto_ts_object_impl(cx: &mut Context, object: &'static str, direction: Direct
let count = cx.count(); let count = cx.count();
let motion = move |editor: &mut Editor| { let motion = move |editor: &mut Editor| {
let (view, doc) = current!(editor); let (view, doc) = current!(editor);
if let Some((lang_config, syntax)) = doc.language_config().zip(doc.syntax()) { if let Some(syntax) = doc.syntax() {
let text = doc.text().slice(..); let text = doc.text().slice(..);
let root = syntax.tree().root_node();
let selection = doc.selection(view.id).clone().transform(|range| { let selection = doc.selection(view.id).clone().transform(|range| {
let new_range = movement::goto_treesitter_object( let new_range =
text, movement::goto_treesitter_object(syntax, text, range, object, direction, count);
range,
object,
direction,
root,
lang_config,
count,
);
if editor.mode == Mode::Select { if editor.mode == Mode::Select {
let head = if new_range.head < range.anchor { let head = if new_range.head < range.anchor {
@ -5000,19 +4992,10 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
let text = doc.text().slice(..); let text = doc.text().slice(..);
let textobject_treesitter = |obj_name: &str, range: Range| -> Range { let textobject_treesitter = |obj_name: &str, range: Range| -> Range {
let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) { let Some(syntax) = doc.syntax() else {
Some(t) => t, return range;
None => return range,
}; };
textobject::textobject_treesitter( textobject::textobject_treesitter(syntax, text, range, objtype, obj_name, count)
text,
range,
objtype,
obj_name,
syntax.tree().root_node(),
lang_config,
count,
)
}; };
if ch == 'g' && doc.diff_handle().is_none() { if ch == 'g' && doc.diff_handle().is_none() {

@ -18,7 +18,7 @@ pub fn query_check() -> Result<(), DynError> {
let grammar_name = language.grammar.as_ref().unwrap_or(language_name); let grammar_name = language.grammar.as_ref().unwrap_or(language_name);
for query_file in query_files { for query_file in query_files {
let language = get_language(grammar_name); let language = get_language(grammar_name);
let query_text = read_query(language_name, query_file); let Some(query_text) = read_query(language_name, query_file) else { continue };
if let Ok(lang) = language { if let Ok(lang) = language {
if !query_text.is_empty() { if !query_text.is_empty() {
if let Err(reason) = Query::new(lang, &query_text) { if let Err(reason) = Query::new(lang, &query_text) {

Loading…
Cancel
Save