Implement @align (and @anchor) indent query.

main
Daniel Ebert 1 year ago committed by Blaž Hrastnik
parent 929eb0c39e
commit eab0d4fa4b

@ -1,13 +1,13 @@
use std::collections::HashMap; use std::{borrow::Cow, collections::HashMap};
use tree_sitter::{Query, QueryCursor, QueryPredicateArg}; use tree_sitter::{Query, QueryCursor, QueryPredicateArg};
use crate::{ use crate::{
chars::{char_is_line_ending, char_is_whitespace}, chars::{char_is_line_ending, char_is_whitespace},
graphemes::tab_width_at, graphemes::{grapheme_width, tab_width_at},
syntax::{LanguageConfiguration, RopeProvider, Syntax}, syntax::{LanguageConfiguration, RopeProvider, Syntax},
tree_sitter::Node, tree_sitter::Node,
Rope, RopeSlice, Rope, RopeGraphemes, RopeSlice,
}; };
/// Enum representing indentation style. /// Enum representing indentation style.
@ -237,19 +237,33 @@ fn get_first_in_line(mut node: Node, new_line_byte_pos: Option<usize>) -> Vec<bo
/// This is usually constructed in one of 2 ways: /// This is usually constructed in one of 2 ways:
/// - Successively add indent captures to get the (added) indent from a single line /// - Successively add indent captures to get the (added) indent from a single line
/// - Successively add the indent results for each line /// - Successively add the indent results for each line
/// The string that this indentation defines starts with the string contained in the align field (unless it is None), followed by:
/// - max(0, indent - outdent) tabs, if tabs are used for indentation
/// - max(0, indent - outdent)*indent_width spaces, if spaces are used for indentation
#[derive(Default, Debug, PartialEq, Eq, Clone)] #[derive(Default, Debug, PartialEq, Eq, Clone)]
pub struct Indentation { pub struct Indentation {
/// The total indent (the number of indent levels) is defined as max(0, indent-outdent).
/// The string that this results in depends on the indent style (spaces or tabs, etc.)
indent: usize, indent: usize,
indent_always: usize, indent_always: usize,
outdent: usize, outdent: usize,
outdent_always: usize, outdent_always: usize,
/// The alignment, as a string containing only tabs & spaces. Storing this as a string instead of e.g.
/// the (visual) width ensures that the alignment is preserved even if the tab width changes.
align: Option<String>,
} }
impl Indentation { impl Indentation {
/// Add some other [Indentation] to this. /// Add some other [Indentation] to this.
/// The added indent should be the total added indent from one line /// The added indent should be the total added indent from one line.
fn add_line(&mut self, added: &Indentation) { /// Indent should always be added starting from the bottom (or equivalently, the innermost tree-sitter node).
fn add_line(&mut self, added: Indentation) {
// Align overrides the indent from outer scopes.
if self.align.is_some() {
return;
}
if added.align.is_some() {
self.align = added.align;
return;
}
self.indent += added.indent; self.indent += added.indent;
self.indent_always += added.indent_always; self.indent_always += added.indent_always;
self.outdent += added.outdent; self.outdent += added.outdent;
@ -280,10 +294,12 @@ impl Indentation {
self.outdent_always += 1; self.outdent_always += 1;
self.outdent = 0; self.outdent = 0;
} }
IndentCaptureType::Align(align) => {
self.align = Some(align);
} }
} }
}
fn as_string(&self, indent_style: &IndentStyle) -> String { fn into_string(self, indent_style: &IndentStyle) -> String {
let indent = self.indent_always + self.indent; let indent = self.indent_always + self.indent;
let outdent = self.outdent_always + self.outdent; let outdent = self.outdent_always + self.outdent;
@ -293,7 +309,13 @@ impl Indentation {
log::warn!("Encountered more outdent than indent nodes while calculating indentation: {} outdent, {} indent", self.outdent, self.indent); log::warn!("Encountered more outdent than indent nodes while calculating indentation: {} outdent, {} indent", self.outdent, self.indent);
0 0
}; };
indent_style.as_str().repeat(indent_level) let mut indent_string = if let Some(align) = self.align {
align
} else {
String::new()
};
indent_string.push_str(&indent_style.as_str().repeat(indent_level));
indent_string
} }
} }
@ -303,13 +325,14 @@ struct IndentCapture {
capture_type: IndentCaptureType, capture_type: IndentCaptureType,
scope: IndentScope, scope: IndentScope,
} }
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, Copy, PartialEq)]
enum IndentCaptureType { enum IndentCaptureType {
Indent, Indent,
IndentAlways, IndentAlways,
Outdent, Outdent,
OutdentAlways, OutdentAlways,
/// Alignment given as a string of whitespace
Align(String),
} }
impl IndentCaptureType { impl IndentCaptureType {
@ -317,6 +340,7 @@ impl IndentCaptureType {
match self { match self {
IndentCaptureType::Indent | IndentCaptureType::IndentAlways => IndentScope::Tail, IndentCaptureType::Indent | IndentCaptureType::IndentAlways => IndentScope::Tail,
IndentCaptureType::Outdent | IndentCaptureType::OutdentAlways => IndentScope::All, IndentCaptureType::Outdent | IndentCaptureType::OutdentAlways => IndentScope::All,
IndentCaptureType::Align(_) => IndentScope::All,
} }
} }
} }
@ -348,46 +372,35 @@ struct IndentQueryResult {
extend_captures: HashMap<usize, Vec<ExtendCapture>>, extend_captures: HashMap<usize, Vec<ExtendCapture>>,
} }
fn get_node_start_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
let mut node_line = node.start_position().row;
// Adjust for the new line that will be inserted
if new_line_byte_pos.map_or(false, |pos| node.start_byte() >= pos) {
node_line += 1;
}
node_line
}
fn get_node_end_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
let mut node_line = node.end_position().row;
// Adjust for the new line that will be inserted (with a strict inequality since end_byte is exclusive)
if new_line_byte_pos.map_or(false, |pos| node.end_byte() > pos) {
node_line += 1;
}
node_line
}
fn query_indents( fn query_indents(
query: &Query, query: &Query,
syntax: &Syntax, syntax: &Syntax,
cursor: &mut QueryCursor, cursor: &mut QueryCursor,
text: RopeSlice, text: RopeSlice,
range: std::ops::Range<usize>, range: std::ops::Range<usize>,
// Position of the (optional) newly inserted line break. new_line_byte_pos: Option<usize>,
// Given as (line, byte_pos)
new_line_break: Option<(usize, usize)>,
) -> IndentQueryResult { ) -> IndentQueryResult {
let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new(); let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new();
let mut extend_captures: HashMap<usize, Vec<ExtendCapture>> = HashMap::new(); let mut extend_captures: HashMap<usize, Vec<ExtendCapture>> = HashMap::new();
cursor.set_byte_range(range); cursor.set_byte_range(range);
let get_node_start_line = |node: Node| {
let mut node_line = node.start_position().row;
// Adjust for the new line that will be inserted
if let Some((line, byte)) = new_line_break {
if node_line == line && node.start_byte() >= byte {
node_line += 1;
}
}
node_line
};
let get_node_end_line = |node: Node| {
let mut node_line = node.end_position().row;
// Adjust for the new line that will be inserted
if let Some((line, byte)) = new_line_break {
if node_line == line && node.end_byte() < byte {
node_line += 1;
}
}
node_line
};
// Iterate over all captures from the query // Iterate over all captures from the query
for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) { for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) {
// Skip matches where not all custom predicates are fulfilled // Skip matches where not all custom predicates are fulfilled
@ -418,8 +431,8 @@ fn query_indents(
let n2 = m.nodes_for_capture_index(*capt2).next(); let n2 = m.nodes_for_capture_index(*capt2).next();
match (n1, n2) { match (n1, n2) {
(Some(n1), Some(n2)) => { (Some(n1), Some(n2)) => {
let n1_line = get_node_start_line(n1); let n1_line = get_node_start_line(n1, new_line_byte_pos);
let n2_line = get_node_start_line(n2); let n2_line = get_node_start_line(n2, new_line_byte_pos);
let same_line = n1_line == n2_line; let same_line = n1_line == n2_line;
same_line==(pred.operator.as_ref()=="same-line?") same_line==(pred.operator.as_ref()=="same-line?")
} }
@ -437,7 +450,7 @@ fn query_indents(
match node { match node {
Some(node) => { Some(node) => {
let (start_line, end_line) = (get_node_start_line(node), get_node_end_line(node)); let (start_line, end_line) = (get_node_start_line(node,new_line_byte_pos), get_node_end_line(node, new_line_byte_pos));
let one_line = end_line == start_line; let one_line = end_line == start_line;
one_line != (pred.operator.as_ref() == "not-one-line?") one_line != (pred.operator.as_ref() == "not-one-line?")
}, },
@ -458,6 +471,11 @@ fn query_indents(
}) { }) {
continue; continue;
} }
// A list of pairs (node_id, indent_capture) that are added by this match.
// They cannot be added to indent_captures immediately since they may depend on other captures (such as an @anchor).
let mut added_indent_captures: Vec<(usize, IndentCapture)> = Vec::new();
// The row/column position of the optional anchor in this query
let mut anchor: Option<tree_sitter::Node> = None;
for capture in m.captures { for capture in m.captures {
let capture_name = query.capture_names()[capture.index as usize].as_str(); let capture_name = query.capture_names()[capture.index as usize].as_str();
let capture_type = match capture_name { let capture_type = match capture_name {
@ -465,6 +483,16 @@ fn query_indents(
"indent.always" => IndentCaptureType::IndentAlways, "indent.always" => IndentCaptureType::IndentAlways,
"outdent" => IndentCaptureType::Outdent, "outdent" => IndentCaptureType::Outdent,
"outdent.always" => IndentCaptureType::OutdentAlways, "outdent.always" => IndentCaptureType::OutdentAlways,
// The alignment will be updated to the correct value at the end, when the anchor is known.
"align" => IndentCaptureType::Align(String::from("")),
"anchor" => {
if anchor.is_some() {
log::error!("Invalid indent query: Encountered more than one @anchor in the same match.")
} else {
anchor = Some(capture.node);
}
continue;
}
"extend" => { "extend" => {
extend_captures extend_captures
.entry(capture.node.id()) .entry(capture.node.id())
@ -514,11 +542,41 @@ fn query_indents(
} }
} }
} }
added_indent_captures.push((capture.node.id(), indent_capture))
}
for (node_id, mut capture) in added_indent_captures {
// Set the anchor for all align queries.
if let IndentCaptureType::Align(_) = capture.capture_type {
let anchor = match anchor {
None => {
log::error!(
"Invalid indent query: @align requires an accompanying @anchor."
);
continue;
}
Some(anchor) => anchor,
};
// Create a string of tabs & spaces that should have the same width
// as the string that precedes the anchor (independent of the tab width).
let mut align = String::new();
for grapheme in RopeGraphemes::new(
text.line(anchor.start_position().row)
.byte_slice(0..anchor.start_position().column),
) {
if grapheme == "\t" {
align.push('\t');
} else {
align.extend(
std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))),
);
}
}
capture.capture_type = IndentCaptureType::Align(align);
}
indent_captures indent_captures
.entry(capture.node.id()) .entry(node_id)
// Most entries only need to contain a single IndentCapture
.or_insert_with(|| Vec::with_capacity(1)) .or_insert_with(|| Vec::with_capacity(1))
.push(indent_capture); .push(capture);
} }
} }
@ -648,6 +706,7 @@ pub fn treesitter_indent_for_pos(
new_line: bool, new_line: bool,
) -> Option<String> { ) -> Option<String> {
let byte_pos = text.char_to_byte(pos); let byte_pos = text.char_to_byte(pos);
let new_line_byte_pos = new_line.then_some(byte_pos);
// The innermost tree-sitter node which is considered for the indent // The innermost tree-sitter node which is considered for the indent
// computation. It may change if some predeceding node is extended // computation. It may change if some predeceding node is extended
let mut node = syntax let mut node = syntax
@ -685,13 +744,13 @@ pub fn treesitter_indent_for_pos(
&mut cursor, &mut cursor,
text, text,
query_range, query_range,
new_line.then_some((line, byte_pos)), new_line_byte_pos,
); );
ts_parser.cursors.push(cursor); ts_parser.cursors.push(cursor);
(query_result, deepest_preceding) (query_result, deepest_preceding)
}) })
}; };
let indent_captures = query_result.indent_captures; let mut indent_captures = query_result.indent_captures;
let extend_captures = query_result.extend_captures; let extend_captures = query_result.extend_captures;
// Check for extend captures, potentially changing the node that the indent calculation starts with // Check for extend captures, potentially changing the node that the indent calculation starts with
@ -719,8 +778,10 @@ pub fn treesitter_indent_for_pos(
// one entry for each ancestor of the node (which is what we iterate over) // one entry for each ancestor of the node (which is what we iterate over)
let is_first = *first_in_line.last().unwrap(); let is_first = *first_in_line.last().unwrap();
// Apply all indent definitions for this node // Apply all indent definitions for this node.
if let Some(definitions) = indent_captures.get(&node.id()) { // Since we only iterate over each node once, we can remove the
// corresponding captures from the HashMap to avoid cloning them.
if let Some(definitions) = indent_captures.remove(&node.id()) {
for definition in definitions { for definition in definitions {
match definition.scope { match definition.scope {
IndentScope::All => { IndentScope::All => {
@ -738,29 +799,19 @@ pub fn treesitter_indent_for_pos(
} }
if let Some(parent) = node.parent() { if let Some(parent) = node.parent() {
let mut node_line = node.start_position().row; let node_line = get_node_start_line(node, new_line_byte_pos);
let mut parent_line = parent.start_position().row; let parent_line = get_node_start_line(parent, new_line_byte_pos);
if node_line == line && new_line {
// Also consider the line that will be inserted
if node.start_byte() >= byte_pos {
node_line += 1;
}
if parent.start_byte() >= byte_pos {
parent_line += 1;
}
};
if node_line != parent_line { if node_line != parent_line {
// Don't add indent for the line below the line of the query // Don't add indent for the line below the line of the query
if node_line < line + (new_line as usize) { if node_line < line + (new_line as usize) {
result.add_line(&indent_for_line_below); result.add_line(indent_for_line_below);
} }
if node_line == parent_line + 1 { if node_line == parent_line + 1 {
indent_for_line_below = indent_for_line; indent_for_line_below = indent_for_line;
} else { } else {
result.add_line(&indent_for_line); result.add_line(indent_for_line);
indent_for_line_below = Indentation::default(); indent_for_line_below = Indentation::default();
} }
@ -775,14 +826,13 @@ pub fn treesitter_indent_for_pos(
if (node.start_position().row < line) if (node.start_position().row < line)
|| (new_line && node.start_position().row == line && node.start_byte() < byte_pos) || (new_line && node.start_position().row == line && node.start_byte() < byte_pos)
{ {
result.add_line(&indent_for_line_below); result.add_line(indent_for_line_below);
} }
result.add_line(indent_for_line);
result.add_line(&indent_for_line);
break; break;
} }
} }
Some(result.as_string(indent_style)) Some(result.into_string(indent_style))
} }
/// Returns the indentation for a new line. /// Returns the indentation for a new line.

Loading…
Cancel
Save