From 94346356e7657011adf0012651ec031b98d87411 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 16 Nov 2022 19:03:02 -0600 Subject: [PATCH] Use TreeCursor to pretty-print :tree-sitter-subtree (#4606) The current `:tree-sitter-subtree` has a bug for field-names when the field name belongs to an unnamed child node. Take this ruby example: def self.method_name true end The subtree given by tree-sitter-cli is: (singleton_method [2, 0] - [4, 3] object: (self [2, 4] - [2, 8]) name: (identifier [2, 9] - [2, 20]) body: (body_statement [3, 2] - [3, 6] (true [3, 2] - [3, 6]))) But the `:tree-sitter-subtree` output was (singleton_method object: (self) body: (identifier) (body_statement (true))) The `singleton_method` rule defines the `name` and `body` fields in an unnamed helper rule `_method_rest` and the old implementation of `pretty_print_tree_impl` would pass the `field_name` down from the named `singleton_method` node. To fix it we switch to the [TreeCursor] API which is recommended by the tree-sitter docs for traversing the tree. `TreeCursor::field_name` accurately determines the field name for the current cursor position even when the node is unnamed. [TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html --- helix-core/src/syntax.rs | 101 +++++++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 30 deletions(-) diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index b320fcdab..93bd7fe90 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1283,7 +1283,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, + QueryMatch, Range, TextProvider, Tree, TreeCursor, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; @@ -2153,57 +2153,68 @@ impl> Iterator for Merge { } } +fn node_is_visible(node: &Node) -> bool { + node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) +} + pub fn pretty_print_tree(fmt: &mut W, node: Node) -> fmt::Result { - pretty_print_tree_impl(fmt, node, true, None, 0) + if node.child_count() == 0 { + if node_is_visible(&node) { + write!(fmt, "({})", node.kind()) + } else { + write!(fmt, "\"{}\"", node.kind()) + } + } else { + pretty_print_tree_impl(fmt, &mut node.walk(), 0) + } } fn pretty_print_tree_impl( fmt: &mut W, - node: Node, - is_root: bool, - field_name: Option<&str>, + cursor: &mut TreeCursor, depth: usize, ) -> fmt::Result { - fn is_visible(node: Node) -> bool { - node.is_missing() - || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) - } + let node = cursor.node(); + let visible = node_is_visible(&node); - if is_visible(node) { + if visible { let indentation_columns = depth * 2; write!(fmt, "{:indentation_columns$}", "")?; - if let Some(field_name) = field_name { + if let Some(field_name) = cursor.field_name() { write!(fmt, "{}: ", field_name)?; } write!(fmt, "({}", node.kind())?; - } else if is_root { - write!(fmt, "(\"{}\")", node.kind())?; } - for child_idx in 0..node.child_count() { - if let Some(child) = node.child(child_idx) { - if is_visible(child) { + // Handle children. + if cursor.goto_first_child() { + loop { + if node_is_visible(&cursor.node()) { fmt.write_char('\n')?; } - pretty_print_tree_impl( - fmt, - child, - false, - node.field_name_for_child(child_idx as u32), - depth + 1, - )?; + pretty_print_tree_impl(fmt, cursor, depth + 1)?; + + if !cursor.goto_next_sibling() { + break; + } } + + let moved = cursor.goto_parent(); + // The parent of the first child must exist, and must be `node`. + debug_assert!(moved); + debug_assert!(cursor.node() == node); } - if is_visible(node) { - write!(fmt, ")")?; + if visible { + fmt.write_char(')')?; } Ok(()) } + #[cfg(test)] mod test { use super::*; @@ -2376,11 +2387,17 @@ mod test { } #[track_caller] - fn assert_pretty_print(source: &str, expected: &str, start: usize, end: usize) { + fn assert_pretty_print( + language_name: &str, + source: &str, + expected: &str, + start: usize, + end: usize, + ) { let source = Rope::from_str(source); let loader = Loader::new(Configuration { language: vec![] }); - let language = get_language("rust").unwrap(); + let language = get_language(language_name).unwrap(); let config = HighlightConfiguration::new(language, "", "", "").unwrap(); let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader)); @@ -2400,13 +2417,14 @@ mod test { #[test] fn test_pretty_print() { let source = r#"/// Hello"#; - assert_pretty_print(source, "(line_comment)", 0, source.len()); + assert_pretty_print("rust", source, "(line_comment)", 0, source.len()); // A large tree should be indented with fields: let source = r#"fn main() { println!("Hello, World!"); }"#; assert_pretty_print( + "rust", source, concat!( "(function_item\n", @@ -2425,11 +2443,34 @@ mod test { // Selecting a token should print just that token: let source = r#"fn main() {}"#; - assert_pretty_print(source, r#"("fn")"#, 0, 1); + assert_pretty_print("rust", source, r#""fn""#, 0, 1); // Error nodes are printed as errors: let source = r#"}{"#; - assert_pretty_print(source, "(ERROR)", 0, source.len()); + assert_pretty_print("rust", source, "(ERROR)", 0, source.len()); + + // Fields broken under unnamed nodes are determined correctly. + // In the following source, `object` belongs to the `singleton_method` + // rule but `name` and `body` belong to an unnamed helper `_method_rest`. + // This can cause a bug with a pretty-printing implementation that + // uses `Node::field_name_for_child` to determine field names but is + // fixed when using `TreeCursor::field_name`. + let source = "def self.method_name + true + end"; + assert_pretty_print( + "ruby", + source, + concat!( + "(singleton_method\n", + " object: (self)\n", + " name: (identifier)\n", + " body: (body_statement\n", + " (true)))" + ), + 0, + source.len(), + ); } #[test]