From 4ee92cad19cc94f0751f91fa9391d1899353d740 Mon Sep 17 00:00:00 2001 From: Gokul Soumya Date: Sat, 23 Oct 2021 08:11:19 +0530 Subject: [PATCH] Add treesitter textobjects (#728) * Add treesitter textobject queries Only for Go, Python and Rust for now. * Add tree-sitter textobjects Only has functions and class objects as of now. * Fix tests * Add docs for tree-sitter textobjects * Add guide for creating new textobject queries * Add parameter textobject Only parameter.inside is implemented now, parameter.around will probably require custom predicates akin to nvim' `make-range` since we want to select a trailing comma too (a comma will be an anonymous node and matching against them doesn't work similar to named nodes) * Simplify TextObject cell init --- book/src/SUMMARY.md | 2 + book/src/guides/README.md | 4 ++ book/src/guides/textobject.md | 30 +++++++++++++++ book/src/usage.md | 13 +++++-- helix-core/src/indent.rs | 1 + helix-core/src/syntax.rs | 43 +++++++++++++++++++++- helix-core/src/textobject.rs | 51 ++++++++++++++++++++++++++ helix-term/src/commands.rs | 19 ++++++++++ runtime/queries/go/textobjects.scm | 21 +++++++++++ runtime/queries/python/textobjects.scm | 14 +++++++ runtime/queries/rust/textobjects.scm | 26 +++++++++++++ 11 files changed, 219 insertions(+), 5 deletions(-) create mode 100644 book/src/guides/README.md create mode 100644 book/src/guides/textobject.md create mode 100644 runtime/queries/go/textobjects.scm create mode 100644 runtime/queries/python/textobjects.scm create mode 100644 runtime/queries/rust/textobjects.scm diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 3fa8e067..56f50e21 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -8,3 +8,5 @@ - [Keymap](./keymap.md) - [Key Remapping](./remapping.md) - [Hooks](./hooks.md) +- [Guides](./guides/README.md) + - [Adding Textobject Queries](./guides/textobject.md) diff --git a/book/src/guides/README.md b/book/src/guides/README.md new file mode 100644 index 00000000..96e62978 --- /dev/null +++ b/book/src/guides/README.md @@ -0,0 +1,4 @@ +# Guides + +This section contains guides for adding new language server configurations, +tree-sitter grammers, textobject queries, etc. diff --git a/book/src/guides/textobject.md b/book/src/guides/textobject.md new file mode 100644 index 00000000..50b3b574 --- /dev/null +++ b/book/src/guides/textobject.md @@ -0,0 +1,30 @@ +# Adding Textobject Queries + +Textobjects that are language specific ([like functions, classes, etc][textobjects]) +require an accompanying tree-sitter grammar and a `textobjects.scm` query file +to work properly. Tree-sitter allows us to query the source code syntax tree +and capture specific parts of it. The queries are written in a lisp dialect. +More information on how to write queries can be found in the [official tree-sitter +documentation](tree-sitter-queries). + +Query files should be placed in `runtime/queries/{language}/textobjects.scm` +when contributing. Note that to test the query files locally you should put +them under your local runtime directory (`~/.config/helix/runtime` on Linux +for example). + +The following [captures][tree-sitter-captures] are recognized: + +| Capture Name | +| --- | +| `function.inside` | +| `function.around` | +| `class.inside` | +| `class.around` | +| `parameter.inside` | + +[Example query files][textobject-examples] can be found in the helix GitHub repository. + +[textobjects]: ../usage.md#textobjects +[tree-sitter-queries]: https://tree-sitter.github.io/tree-sitter/using-parsers#query-syntax +[tree-sitter-captures]: https://tree-sitter.github.io/tree-sitter/using-parsers#capturing-nodes +[textobject-examples]: https://github.com/search?q=repo%3Ahelix-editor%2Fhelix+filename%3Atextobjects.scm&type=Code&ref=advsearch&l=&l= diff --git a/book/src/usage.md b/book/src/usage.md index 2de8d01a..d31e03a1 100644 --- a/book/src/usage.md +++ b/book/src/usage.md @@ -51,9 +51,10 @@ Multiple characters are currently not supported, but planned. ## Textobjects -Currently supported: `word`, `surround`. +Currently supported: `word`, `surround`, `function`, `class`, `parameter`. ![textobject-demo](https://user-images.githubusercontent.com/23398472/124231131-81a4bb00-db2d-11eb-9d10-8e577ca7b177.gif) +![textobject-treesitter-demo](https://user-images.githubusercontent.com/23398472/132537398-2a2e0a54-582b-44ab-a77f-eb818942203d.gif) - `ma` - Select around the object (`va` in vim, `` in kakoune) - `mi` - Select inside the object (`vi` in vim, `` in kakoune) @@ -62,5 +63,11 @@ Currently supported: `word`, `surround`. | --- | --- | | `w` | Word | | `(`, `[`, `'`, etc | Specified surround pairs | - -Textobjects based on treesitter, like `function`, `class`, etc are planned. +| `f` | Function | +| `c` | Class | +| `p` | Parameter | + +Note: `f`, `c`, etc need a tree-sitter grammar active for the current +document and a special tree-sitter query file to work properly. [Only +some grammars](https://github.com/search?q=repo%3Ahelix-editor%2Fhelix+filename%3Atextobjects.scm&type=Code&ref=advsearch&l=&l=) +currently have the query file implemented. Contributions are welcome ! diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index d9a0155f..20f034ea 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -464,6 +464,7 @@ where unit: String::from(" "), }), indent_query: OnceCell::new(), + textobject_query: OnceCell::new(), }], }); diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 0929e38f..f4b4535b 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -49,7 +49,7 @@ pub struct Configuration { #[serde(rename_all = "kebab-case")] pub struct LanguageConfiguration { #[serde(rename = "name")] - pub(crate) language_id: String, + pub language_id: String, pub scope: String, // source.rust pub file_types: Vec, // filename ends_with? pub roots: Vec, // these indicate project roots <.git, Cargo.toml> @@ -76,6 +76,8 @@ pub struct LanguageConfiguration { #[serde(skip)] pub(crate) indent_query: OnceCell>, + #[serde(skip)] + pub(crate) textobject_query: OnceCell>, } #[derive(Debug, Serialize, Deserialize)] @@ -105,6 +107,32 @@ pub struct IndentQuery { pub outdent: HashSet, } +#[derive(Debug)] +pub struct TextObjectQuery { + pub query: Query, +} + +impl TextObjectQuery { + /// Run the query on the given node and return sub nodes which match given + /// capture ("function.inside", "class.around", etc). + pub fn capture_nodes<'a>( + &'a self, + capture_name: &str, + node: Node<'a>, + slice: RopeSlice<'a>, + cursor: &'a mut QueryCursor, + ) -> Option>> { + let capture_idx = self.query.capture_index_for_name(capture_name)?; + let captures = cursor.captures(&self.query, node, RopeProvider(slice)); + + captures + .filter_map(move |(mat, idx)| { + (mat.captures[idx].index == capture_idx).then(|| mat.captures[idx].node) + }) + .into() + } +} + fn load_runtime_file(language: &str, filename: &str) -> Result { let path = crate::RUNTIME_DIR .join("queries") @@ -153,7 +181,6 @@ impl LanguageConfiguration { // highlights_query += "\n(ERROR) @error"; let injections_query = read_query(&language, "injections.scm"); - let locals_query = read_query(&language, "locals.scm"); if highlights_query.is_empty() { @@ -203,6 +230,18 @@ impl LanguageConfiguration { .as_ref() } + pub fn textobject_query(&self) -> Option<&TextObjectQuery> { + self.textobject_query + .get_or_init(|| -> Option { + let lang_name = self.language_id.to_ascii_lowercase(); + let query_text = read_query(&lang_name, "textobjects.scm"); + let lang = self.highlight_config.get()?.as_ref()?.language; + let query = Query::new(lang, &query_text).ok()?; + Some(TextObjectQuery { query }) + }) + .as_ref() + } + pub fn scope(&self) -> &str { &self.scope } diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs index b965f6df..975ed115 100644 --- a/helix-core/src/textobject.rs +++ b/helix-core/src/textobject.rs @@ -1,9 +1,13 @@ +use std::fmt::Display; + use ropey::RopeSlice; +use tree_sitter::{Node, QueryCursor}; use crate::chars::{categorize_char, char_is_whitespace, CharCategory}; use crate::graphemes::next_grapheme_boundary; use crate::movement::Direction; use crate::surround; +use crate::syntax::LanguageConfiguration; use crate::Range; fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize { @@ -51,6 +55,15 @@ pub enum TextObject { Inside, } +impl Display for TextObject { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + Self::Around => "around", + Self::Inside => "inside", + }) + } +} + // count doesn't do anything yet pub fn textobject_word( slice: RopeSlice, @@ -108,6 +121,44 @@ pub fn textobject_surround( .unwrap_or(range) } +/// Transform the given range to select text objects based on tree-sitter. +/// `object_name` is a query capture base name like "function", "class", etc. +/// `slice_tree` is the tree-sitter node corresponding to given text slice. +pub fn textobject_treesitter( + slice: RopeSlice, + range: Range, + textobject: TextObject, + object_name: &str, + slice_tree: Node, + lang_config: &LanguageConfiguration, + _count: usize, +) -> Range { + let get_range = move || -> Option { + let byte_pos = slice.char_to_byte(range.cursor(slice)); + + let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner + let mut cursor = QueryCursor::new(); + let node = lang_config + .textobject_query()? + .capture_nodes(&capture_name, slice_tree, slice, &mut cursor)? + .filter(|node| node.byte_range().contains(&byte_pos)) + .min_by_key(|node| node.byte_range().len())?; + + let len = slice.len_bytes(); + let start_byte = node.start_byte(); + let end_byte = node.end_byte(); + if start_byte >= len || end_byte >= len { + return None; + } + + let start_char = slice.byte_to_char(start_byte); + let end_char = slice.byte_to_char(end_byte); + + Some(Range::new(start_char, end_char)) + }; + get_range().unwrap_or(range) +} + #[cfg(test)] mod test { use super::TextObject::*; diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 9f54292d..272a9d9a 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -4465,9 +4465,28 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) { let (view, doc) = current!(cx.editor); let text = doc.text().slice(..); + let textobject_treesitter = |obj_name: &str, range: Range| -> Range { + let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) { + Some(t) => t, + None => return range, + }; + textobject::textobject_treesitter( + text, + range, + objtype, + obj_name, + syntax.tree().root_node(), + lang_config, + count, + ) + }; + let selection = doc.selection(view.id).clone().transform(|range| { match ch { 'w' => textobject::textobject_word(text, range, objtype, count), + 'c' => textobject_treesitter("class", range), + 'f' => textobject_treesitter("function", range), + 'p' => textobject_treesitter("parameter", range), // TODO: cancel new ranges if inconsistent surround matches across lines ch if !ch.is_ascii_alphanumeric() => { textobject::textobject_surround(text, range, objtype, ch, count) diff --git a/runtime/queries/go/textobjects.scm b/runtime/queries/go/textobjects.scm new file mode 100644 index 00000000..9bcfc690 --- /dev/null +++ b/runtime/queries/go/textobjects.scm @@ -0,0 +1,21 @@ +(function_declaration + body: (block)? @function.inside) @function.around + +(func_literal + (_)? @function.inside) @function.around + +(method_declaration + body: (block)? @function.inside) @function.around + +;; struct and interface declaration as class textobject? +(type_declaration + (type_spec (type_identifier) (struct_type (field_declaration_list (_)?) @class.inside))) @class.around + +(type_declaration + (type_spec (type_identifier) (interface_type (method_spec_list (_)?) @class.inside))) @class.around + +(parameter_list + (_) @parameter.inside) + +(argument_list + (_) @parameter.inside) diff --git a/runtime/queries/python/textobjects.scm b/runtime/queries/python/textobjects.scm new file mode 100644 index 00000000..a52538af --- /dev/null +++ b/runtime/queries/python/textobjects.scm @@ -0,0 +1,14 @@ +(function_definition + body: (block)? @function.inside) @function.around + +(class_definition + body: (block)? @class.inside) @class.around + +(parameters + (_) @parameter.inside) + +(lambda_parameters + (_) @parameter.inside) + +(argument_list + (_) @parameter.inside) diff --git a/runtime/queries/rust/textobjects.scm b/runtime/queries/rust/textobjects.scm new file mode 100644 index 00000000..e3132687 --- /dev/null +++ b/runtime/queries/rust/textobjects.scm @@ -0,0 +1,26 @@ +(function_item + body: (_) @function.inside) @function.around + +(struct_item + body: (_) @class.inside) @class.around + +(enum_item + body: (_) @class.inside) @class.around + +(union_item + body: (_) @class.inside) @class.around + +(trait_item + body: (_) @class.inside) @class.around + +(impl_item + body: (_) @class.inside) @class.around + +(parameters + (_) @parameter.inside) + +(closure_parameters + (_) @parameter.inside) + +(arguments + (_) @parameter.inside)