Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)

The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:

    def self.method_name
      true
    end

The subtree given by tree-sitter-cli is:

    (singleton_method [2, 0] - [4, 3]
      object: (self [2, 4] - [2, 8])
      name: (identifier [2, 9] - [2, 20])
      body: (body_statement [3, 2] - [3, 6]
        (true [3, 2] - [3, 6])))

But the `:tree-sitter-subtree` output was

    (singleton_method
      object: (self)
      body: (identifier)
      (body_statement (true)))

The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.

To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.

[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
pull/4783/head
Michael Davis 2 years ago committed by GitHub
parent c6b83368b3
commit 94346356e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1283,7 +1283,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::{iter, mem, ops, str, usize}; use std::{iter, mem, ops, str, usize};
use tree_sitter::{ use tree_sitter::{
Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
QueryMatch, Range, TextProvider, Tree, QueryMatch, Range, TextProvider, Tree, TreeCursor,
}; };
const CANCELLATION_CHECK_INTERVAL: usize = 100; const CANCELLATION_CHECK_INTERVAL: usize = 100;
@ -2153,57 +2153,68 @@ impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> {
} }
} }
fn node_is_visible(node: &Node) -> bool {
node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
}
pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result { pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result {
pretty_print_tree_impl(fmt, node, true, None, 0) if node.child_count() == 0 {
if node_is_visible(&node) {
write!(fmt, "({})", node.kind())
} else {
write!(fmt, "\"{}\"", node.kind())
}
} else {
pretty_print_tree_impl(fmt, &mut node.walk(), 0)
}
} }
fn pretty_print_tree_impl<W: fmt::Write>( fn pretty_print_tree_impl<W: fmt::Write>(
fmt: &mut W, fmt: &mut W,
node: Node, cursor: &mut TreeCursor,
is_root: bool,
field_name: Option<&str>,
depth: usize, depth: usize,
) -> fmt::Result { ) -> fmt::Result {
fn is_visible(node: Node) -> bool { let node = cursor.node();
node.is_missing() let visible = node_is_visible(&node);
|| (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
}
if is_visible(node) { if visible {
let indentation_columns = depth * 2; let indentation_columns = depth * 2;
write!(fmt, "{:indentation_columns$}", "")?; write!(fmt, "{:indentation_columns$}", "")?;
if let Some(field_name) = field_name { if let Some(field_name) = cursor.field_name() {
write!(fmt, "{}: ", field_name)?; write!(fmt, "{}: ", field_name)?;
} }
write!(fmt, "({}", node.kind())?; write!(fmt, "({}", node.kind())?;
} else if is_root {
write!(fmt, "(\"{}\")", node.kind())?;
} }
for child_idx in 0..node.child_count() { // Handle children.
if let Some(child) = node.child(child_idx) { if cursor.goto_first_child() {
if is_visible(child) { loop {
if node_is_visible(&cursor.node()) {
fmt.write_char('\n')?; fmt.write_char('\n')?;
} }
pretty_print_tree_impl( pretty_print_tree_impl(fmt, cursor, depth + 1)?;
fmt,
child, if !cursor.goto_next_sibling() {
false, break;
node.field_name_for_child(child_idx as u32), }
depth + 1,
)?;
} }
let moved = cursor.goto_parent();
// The parent of the first child must exist, and must be `node`.
debug_assert!(moved);
debug_assert!(cursor.node() == node);
} }
if is_visible(node) { if visible {
write!(fmt, ")")?; fmt.write_char(')')?;
} }
Ok(()) Ok(())
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
@ -2376,11 +2387,17 @@ mod test {
} }
#[track_caller] #[track_caller]
fn assert_pretty_print(source: &str, expected: &str, start: usize, end: usize) { fn assert_pretty_print(
language_name: &str,
source: &str,
expected: &str,
start: usize,
end: usize,
) {
let source = Rope::from_str(source); let source = Rope::from_str(source);
let loader = Loader::new(Configuration { language: vec![] }); let loader = Loader::new(Configuration { language: vec![] });
let language = get_language("rust").unwrap(); let language = get_language(language_name).unwrap();
let config = HighlightConfiguration::new(language, "", "", "").unwrap(); let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader)); let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader));
@ -2400,13 +2417,14 @@ mod test {
#[test] #[test]
fn test_pretty_print() { fn test_pretty_print() {
let source = r#"/// Hello"#; let source = r#"/// Hello"#;
assert_pretty_print(source, "(line_comment)", 0, source.len()); assert_pretty_print("rust", source, "(line_comment)", 0, source.len());
// A large tree should be indented with fields: // A large tree should be indented with fields:
let source = r#"fn main() { let source = r#"fn main() {
println!("Hello, World!"); println!("Hello, World!");
}"#; }"#;
assert_pretty_print( assert_pretty_print(
"rust",
source, source,
concat!( concat!(
"(function_item\n", "(function_item\n",
@ -2425,11 +2443,34 @@ mod test {
// Selecting a token should print just that token: // Selecting a token should print just that token:
let source = r#"fn main() {}"#; let source = r#"fn main() {}"#;
assert_pretty_print(source, r#"("fn")"#, 0, 1); assert_pretty_print("rust", source, r#""fn""#, 0, 1);
// Error nodes are printed as errors: // Error nodes are printed as errors:
let source = r#"}{"#; let source = r#"}{"#;
assert_pretty_print(source, "(ERROR)", 0, source.len()); assert_pretty_print("rust", source, "(ERROR)", 0, source.len());
// Fields broken under unnamed nodes are determined correctly.
// In the following source, `object` belongs to the `singleton_method`
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
// This can cause a bug with a pretty-printing implementation that
// uses `Node::field_name_for_child` to determine field names but is
// fixed when using `TreeCursor::field_name`.
let source = "def self.method_name
true
end";
assert_pretty_print(
"ruby",
source,
concat!(
"(singleton_method\n",
" object: (self)\n",
" name: (identifier)\n",
" body: (body_statement\n",
" (true)))"
),
0,
source.len(),
);
} }
#[test] #[test]

Loading…
Cancel
Save