mirror of https://github.com/helix-editor/helix
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1247 lines
46 KiB
Rust
1247 lines
46 KiB
Rust
use std::{borrow::Cow, collections::HashMap};
|
|
|
|
use helix_stdx::rope::RopeSliceExt;
|
|
use tree_sitter::{Query, QueryCursor, QueryPredicateArg};
|
|
|
|
use crate::{
|
|
chars::{char_is_line_ending, char_is_whitespace},
|
|
graphemes::{grapheme_width, tab_width_at},
|
|
syntax::{IndentationHeuristic, LanguageConfiguration, RopeProvider, Syntax},
|
|
tree_sitter::Node,
|
|
Position, Rope, RopeGraphemes, RopeSlice,
|
|
};
|
|
|
|
/// Enum representing indentation style.
|
|
///
|
|
/// Only values 1-8 are valid for the `Spaces` variant.
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
|
pub enum IndentStyle {
|
|
Tabs,
|
|
Spaces(u8),
|
|
}
|
|
|
|
// 16 spaces
|
|
const INDENTS: &str = " ";
|
|
pub const MAX_INDENT: u8 = 16;
|
|
|
|
impl IndentStyle {
|
|
/// Creates an `IndentStyle` from an indentation string.
|
|
///
|
|
/// For example, passing `" "` (four spaces) will create `IndentStyle::Spaces(4)`.
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[inline]
|
|
pub fn from_str(indent: &str) -> Self {
|
|
// XXX: do we care about validating the input more than this? Probably not...?
|
|
debug_assert!(!indent.is_empty() && indent.len() <= MAX_INDENT as usize);
|
|
|
|
if indent.starts_with(' ') {
|
|
IndentStyle::Spaces(indent.len().clamp(1, MAX_INDENT as usize) as u8)
|
|
} else {
|
|
IndentStyle::Tabs
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn as_str(&self) -> &'static str {
|
|
match *self {
|
|
IndentStyle::Tabs => "\t",
|
|
IndentStyle::Spaces(n) => {
|
|
// Unsupported indentation style. This should never happen,
|
|
debug_assert!(n > 0 && n <= MAX_INDENT);
|
|
|
|
// Either way, clamp to the nearest supported value
|
|
let closest_n = n.clamp(1, MAX_INDENT) as usize;
|
|
&INDENTS[0..closest_n]
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn indent_width(&self, tab_width: usize) -> usize {
|
|
match *self {
|
|
IndentStyle::Tabs => tab_width,
|
|
IndentStyle::Spaces(width) => width as usize,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Attempts to detect the indentation style used in a document.
|
|
///
|
|
/// Returns the indentation style if the auto-detect confidence is
|
|
/// reasonably high, otherwise returns `None`.
|
|
pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> {
|
|
// Build a histogram of the indentation *increases* between
|
|
// subsequent lines, ignoring lines that are all whitespace.
|
|
//
|
|
// Index 0 is for tabs, the rest are 1-MAX_INDENT spaces.
|
|
let histogram: [usize; MAX_INDENT as usize + 1] = {
|
|
let mut histogram = [0; MAX_INDENT as usize + 1];
|
|
let mut prev_line_is_tabs = false;
|
|
let mut prev_line_leading_count = 0usize;
|
|
|
|
// Loop through the lines, checking for and recording indentation
|
|
// increases as we go.
|
|
'outer: for line in document_text.lines().take(1000) {
|
|
let mut c_iter = line.chars();
|
|
|
|
// Is first character a tab or space?
|
|
let is_tabs = match c_iter.next() {
|
|
Some('\t') => true,
|
|
Some(' ') => false,
|
|
|
|
// Ignore blank lines.
|
|
Some(c) if char_is_line_ending(c) => continue,
|
|
|
|
_ => {
|
|
prev_line_is_tabs = false;
|
|
prev_line_leading_count = 0;
|
|
continue;
|
|
}
|
|
};
|
|
|
|
// Count the line's total leading tab/space characters.
|
|
let mut leading_count = 1;
|
|
let mut count_is_done = false;
|
|
for c in c_iter {
|
|
match c {
|
|
'\t' if is_tabs && !count_is_done => leading_count += 1,
|
|
' ' if !is_tabs && !count_is_done => leading_count += 1,
|
|
|
|
// We stop counting if we hit whitespace that doesn't
|
|
// qualify as indent or doesn't match the leading
|
|
// whitespace, but we don't exit the loop yet because
|
|
// we still want to determine if the line is blank.
|
|
c if char_is_whitespace(c) => count_is_done = true,
|
|
|
|
// Ignore blank lines.
|
|
c if char_is_line_ending(c) => continue 'outer,
|
|
|
|
_ => break,
|
|
}
|
|
|
|
// Bound the worst-case execution time for weird text files.
|
|
if leading_count > 256 {
|
|
continue 'outer;
|
|
}
|
|
}
|
|
|
|
// If there was an increase in indentation over the previous
|
|
// line, update the histogram with that increase.
|
|
if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
|
|
&& prev_line_leading_count < leading_count
|
|
{
|
|
if is_tabs {
|
|
histogram[0] += 1;
|
|
} else {
|
|
let amount = leading_count - prev_line_leading_count;
|
|
if amount <= MAX_INDENT as usize {
|
|
histogram[amount] += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Store this line's leading whitespace info for use with
|
|
// the next line.
|
|
prev_line_is_tabs = is_tabs;
|
|
prev_line_leading_count = leading_count;
|
|
}
|
|
|
|
// Give more weight to tabs, because their presence is a very
|
|
// strong indicator.
|
|
histogram[0] *= 2;
|
|
|
|
histogram
|
|
};
|
|
|
|
// Find the most frequent indent, its frequency, and the frequency of
|
|
// the next-most frequent indent.
|
|
let indent = histogram
|
|
.iter()
|
|
.enumerate()
|
|
.max_by_key(|kv| kv.1)
|
|
.unwrap()
|
|
.0;
|
|
let indent_freq = histogram[indent];
|
|
let indent_freq_2 = *histogram
|
|
.iter()
|
|
.enumerate()
|
|
.filter(|kv| kv.0 != indent)
|
|
.map(|kv| kv.1)
|
|
.max()
|
|
.unwrap();
|
|
|
|
// Return the the auto-detected result if we're confident enough in its
|
|
// accuracy, based on some heuristics.
|
|
if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
|
|
Some(match indent {
|
|
0 => IndentStyle::Tabs,
|
|
_ => IndentStyle::Spaces(indent as u8),
|
|
})
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// To determine indentation of a newly inserted line, figure out the indentation at the last col
|
|
/// of the previous line.
|
|
pub fn indent_level_for_line(line: RopeSlice, tab_width: usize, indent_width: usize) -> usize {
|
|
let mut len = 0;
|
|
for ch in line.chars() {
|
|
match ch {
|
|
'\t' => len += tab_width_at(len, tab_width as u16),
|
|
' ' => len += 1,
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
len / indent_width
|
|
}
|
|
|
|
/// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width).
|
|
fn whitespace_with_same_width(text: RopeSlice) -> String {
|
|
let mut s = String::new();
|
|
for grapheme in RopeGraphemes::new(text) {
|
|
if grapheme == "\t" {
|
|
s.push('\t');
|
|
} else {
|
|
s.extend(std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))));
|
|
}
|
|
}
|
|
s
|
|
}
|
|
|
|
fn add_indent_level(
|
|
mut base_indent: String,
|
|
added_indent_level: isize,
|
|
indent_style: &IndentStyle,
|
|
tab_width: usize,
|
|
) -> String {
|
|
if added_indent_level >= 0 {
|
|
// Adding a non-negative indent is easy, we can simply append the indent string
|
|
base_indent.push_str(&indent_style.as_str().repeat(added_indent_level as usize));
|
|
base_indent
|
|
} else {
|
|
// In this case, we want to return a prefix of `base_indent`.
|
|
// Since the width of a tab depends on its offset, we cannot simply iterate over
|
|
// the chars of `base_indent` in reverse until we have the desired indent reduction,
|
|
// instead we iterate over them twice in forward direction.
|
|
let base_indent_rope = RopeSlice::from(base_indent.as_str());
|
|
#[allow(deprecated)]
|
|
let base_indent_width =
|
|
crate::visual_coords_at_pos(base_indent_rope, base_indent_rope.len_chars(), tab_width)
|
|
.col;
|
|
let target_indent_width = base_indent_width
|
|
.saturating_sub((-added_indent_level) as usize * indent_style.indent_width(tab_width));
|
|
#[allow(deprecated)]
|
|
let char_end_idx = crate::pos_at_visual_coords(
|
|
base_indent_rope,
|
|
Position {
|
|
row: 0,
|
|
col: target_indent_width,
|
|
},
|
|
tab_width,
|
|
);
|
|
let byte_end_idx = base_indent_rope.char_to_byte(char_end_idx);
|
|
base_indent.truncate(byte_end_idx);
|
|
base_indent
|
|
}
|
|
}
|
|
|
|
/// Return true if only whitespace comes before the node on its line.
|
|
/// If given, new_line_byte_pos is treated the same way as any existing newline.
|
|
fn is_first_in_line(node: Node, text: RopeSlice, new_line_byte_pos: Option<usize>) -> bool {
|
|
let mut line_start_byte_pos = text.line_to_byte(node.start_position().row);
|
|
if let Some(pos) = new_line_byte_pos {
|
|
if line_start_byte_pos < pos && pos <= node.start_byte() {
|
|
line_start_byte_pos = pos;
|
|
}
|
|
}
|
|
text.byte_slice(line_start_byte_pos..node.start_byte())
|
|
.chars()
|
|
.all(|c| c.is_whitespace())
|
|
}
|
|
|
|
/// The total indent for some line of code.
|
|
/// This is usually constructed in one of 2 ways:
|
|
/// - Successively add indent captures to get the (added) indent from a single line
|
|
/// - Successively add the indent results for each line
|
|
/// The string that this indentation defines starts with the string contained in the align field (unless it is None), followed by:
|
|
/// - max(0, indent - outdent) tabs, if tabs are used for indentation
|
|
/// - max(0, indent - outdent)*indent_width spaces, if spaces are used for indentation
|
|
#[derive(Default, Debug, PartialEq, Eq, Clone)]
|
|
pub struct Indentation<'a> {
|
|
indent: usize,
|
|
indent_always: usize,
|
|
outdent: usize,
|
|
outdent_always: usize,
|
|
/// The alignment, as a string containing only tabs & spaces. Storing this as a string instead of e.g.
|
|
/// the (visual) width ensures that the alignment is preserved even if the tab width changes.
|
|
align: Option<RopeSlice<'a>>,
|
|
}
|
|
|
|
impl<'a> Indentation<'a> {
|
|
/// Add some other [Indentation] to this.
|
|
/// The added indent should be the total added indent from one line.
|
|
/// Indent should always be added starting from the bottom (or equivalently, the innermost tree-sitter node).
|
|
fn add_line(&mut self, added: Indentation<'a>) {
|
|
// Align overrides the indent from outer scopes.
|
|
if self.align.is_some() {
|
|
return;
|
|
}
|
|
if added.align.is_some() {
|
|
self.align = added.align;
|
|
return;
|
|
}
|
|
self.indent += added.indent;
|
|
self.indent_always += added.indent_always;
|
|
self.outdent += added.outdent;
|
|
self.outdent_always += added.outdent_always;
|
|
}
|
|
|
|
/// Add an indent capture to this indent.
|
|
/// Only captures that apply to the same line should be added together in this way (otherwise use `add_line`)
|
|
/// and the captures should be added starting from the innermost tree-sitter node (currently this only matters
|
|
/// if multiple `@align` patterns occur on the same line).
|
|
fn add_capture(&mut self, added: IndentCaptureType<'a>) {
|
|
match added {
|
|
IndentCaptureType::Indent => {
|
|
if self.indent_always == 0 {
|
|
self.indent = 1;
|
|
}
|
|
}
|
|
IndentCaptureType::IndentAlways => {
|
|
// any time we encounter an `indent.always` on the same line, we
|
|
// want to cancel out all regular indents
|
|
self.indent_always += 1;
|
|
self.indent = 0;
|
|
}
|
|
IndentCaptureType::Outdent => {
|
|
if self.outdent_always == 0 {
|
|
self.outdent = 1;
|
|
}
|
|
}
|
|
IndentCaptureType::OutdentAlways => {
|
|
self.outdent_always += 1;
|
|
self.outdent = 0;
|
|
}
|
|
IndentCaptureType::Align(align) => {
|
|
if self.align.is_none() {
|
|
self.align = Some(align);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fn net_indent(&self) -> isize {
|
|
(self.indent + self.indent_always) as isize
|
|
- ((self.outdent + self.outdent_always) as isize)
|
|
}
|
|
/// Convert `self` into a string, taking into account the computed and actual indentation of some other line.
|
|
fn relative_indent(
|
|
&self,
|
|
other_computed_indent: &Self,
|
|
other_leading_whitespace: RopeSlice,
|
|
indent_style: &IndentStyle,
|
|
tab_width: usize,
|
|
) -> Option<String> {
|
|
if self.align == other_computed_indent.align {
|
|
// If self and baseline are either not aligned to anything or both aligned the same way,
|
|
// we can simply take `other_leading_whitespace` and add some indent / outdent to it (in the second
|
|
// case, the alignment should already be accounted for in `other_leading_whitespace`).
|
|
let indent_diff = self.net_indent() - other_computed_indent.net_indent();
|
|
Some(add_indent_level(
|
|
String::from(other_leading_whitespace),
|
|
indent_diff,
|
|
indent_style,
|
|
tab_width,
|
|
))
|
|
} else {
|
|
// If the alignment of both lines is different, we cannot compare their indentation in any meaningful way
|
|
None
|
|
}
|
|
}
|
|
pub fn to_string(&self, indent_style: &IndentStyle, tab_width: usize) -> String {
|
|
add_indent_level(
|
|
self.align
|
|
.map_or_else(String::new, whitespace_with_same_width),
|
|
self.net_indent(),
|
|
indent_style,
|
|
tab_width,
|
|
)
|
|
}
|
|
}
|
|
|
|
/// An indent definition which corresponds to a capture from the indent query
|
|
#[derive(Debug)]
|
|
struct IndentCapture<'a> {
|
|
capture_type: IndentCaptureType<'a>,
|
|
scope: IndentScope,
|
|
}
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
enum IndentCaptureType<'a> {
|
|
Indent,
|
|
IndentAlways,
|
|
Outdent,
|
|
OutdentAlways,
|
|
/// Alignment given as a string of whitespace
|
|
Align(RopeSlice<'a>),
|
|
}
|
|
|
|
impl<'a> IndentCaptureType<'a> {
|
|
fn default_scope(&self) -> IndentScope {
|
|
match self {
|
|
IndentCaptureType::Indent | IndentCaptureType::IndentAlways => IndentScope::Tail,
|
|
IndentCaptureType::Outdent | IndentCaptureType::OutdentAlways => IndentScope::All,
|
|
IndentCaptureType::Align(_) => IndentScope::All,
|
|
}
|
|
}
|
|
}
|
|
/// This defines which part of a node an [IndentCapture] applies to.
|
|
/// Each [IndentCaptureType] has a default scope, but the scope can be changed
|
|
/// with `#set!` property declarations.
|
|
#[derive(Debug, Clone, Copy)]
|
|
enum IndentScope {
|
|
/// The indent applies to the whole node
|
|
All,
|
|
/// The indent applies to everything except for the first line of the node
|
|
Tail,
|
|
}
|
|
|
|
/// A capture from the indent query which does not define an indent but extends
|
|
/// the range of a node. This is used before the indent is calculated.
|
|
#[derive(Debug)]
|
|
enum ExtendCapture {
|
|
Extend,
|
|
PreventOnce,
|
|
}
|
|
|
|
/// The result of running a tree-sitter indent query. This stores for
|
|
/// each node (identified by its ID) the relevant captures (already filtered
|
|
/// by predicates).
|
|
#[derive(Debug)]
|
|
struct IndentQueryResult<'a> {
|
|
indent_captures: HashMap<usize, Vec<IndentCapture<'a>>>,
|
|
extend_captures: HashMap<usize, Vec<ExtendCapture>>,
|
|
}
|
|
|
|
fn get_node_start_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
|
|
let mut node_line = node.start_position().row;
|
|
// Adjust for the new line that will be inserted
|
|
if new_line_byte_pos.map_or(false, |pos| node.start_byte() >= pos) {
|
|
node_line += 1;
|
|
}
|
|
node_line
|
|
}
|
|
fn get_node_end_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
|
|
let mut node_line = node.end_position().row;
|
|
// Adjust for the new line that will be inserted (with a strict inequality since end_byte is exclusive)
|
|
if new_line_byte_pos.map_or(false, |pos| node.end_byte() > pos) {
|
|
node_line += 1;
|
|
}
|
|
node_line
|
|
}
|
|
|
|
fn query_indents<'a>(
|
|
query: &Query,
|
|
syntax: &Syntax,
|
|
cursor: &mut QueryCursor,
|
|
text: RopeSlice<'a>,
|
|
range: std::ops::Range<usize>,
|
|
new_line_byte_pos: Option<usize>,
|
|
) -> IndentQueryResult<'a> {
|
|
let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new();
|
|
let mut extend_captures: HashMap<usize, Vec<ExtendCapture>> = HashMap::new();
|
|
cursor.set_byte_range(range);
|
|
|
|
// Iterate over all captures from the query
|
|
for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) {
|
|
// Skip matches where not all custom predicates are fulfilled
|
|
if !query.general_predicates(m.pattern_index).iter().all(|pred| {
|
|
match pred.operator.as_ref() {
|
|
"not-kind-eq?" => match (pred.args.first(), pred.args.get(1)) {
|
|
(
|
|
Some(QueryPredicateArg::Capture(capture_idx)),
|
|
Some(QueryPredicateArg::String(kind)),
|
|
) => {
|
|
let node = m.nodes_for_capture_index(*capture_idx).next();
|
|
match node {
|
|
Some(node) => node.kind()!=kind.as_ref(),
|
|
_ => true,
|
|
}
|
|
}
|
|
_ => {
|
|
panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string");
|
|
}
|
|
},
|
|
"same-line?" | "not-same-line?" => {
|
|
match (pred.args.first(), pred.args.get(1)) {
|
|
(
|
|
Some(QueryPredicateArg::Capture(capt1)),
|
|
Some(QueryPredicateArg::Capture(capt2))
|
|
) => {
|
|
let n1 = m.nodes_for_capture_index(*capt1).next();
|
|
let n2 = m.nodes_for_capture_index(*capt2).next();
|
|
match (n1, n2) {
|
|
(Some(n1), Some(n2)) => {
|
|
let n1_line = get_node_start_line(n1, new_line_byte_pos);
|
|
let n2_line = get_node_start_line(n2, new_line_byte_pos);
|
|
let same_line = n1_line == n2_line;
|
|
same_line==(pred.operator.as_ref()=="same-line?")
|
|
}
|
|
_ => true,
|
|
}
|
|
}
|
|
_ => {
|
|
panic!("Invalid indent query: Arguments to \"{}\" must be 2 captures", pred.operator);
|
|
}
|
|
}
|
|
}
|
|
"one-line?" | "not-one-line?" => match pred.args.first() {
|
|
Some(QueryPredicateArg::Capture(capture_idx)) => {
|
|
let node = m.nodes_for_capture_index(*capture_idx).next();
|
|
|
|
match node {
|
|
Some(node) => {
|
|
let (start_line, end_line) = (get_node_start_line(node,new_line_byte_pos), get_node_end_line(node, new_line_byte_pos));
|
|
let one_line = end_line == start_line;
|
|
one_line != (pred.operator.as_ref() == "not-one-line?")
|
|
},
|
|
_ => true,
|
|
}
|
|
}
|
|
_ => {
|
|
panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string");
|
|
}
|
|
},
|
|
_ => {
|
|
panic!(
|
|
"Invalid indent query: Unknown predicate (\"{}\")",
|
|
pred.operator
|
|
);
|
|
}
|
|
}
|
|
}) {
|
|
continue;
|
|
}
|
|
// A list of pairs (node_id, indent_capture) that are added by this match.
|
|
// They cannot be added to indent_captures immediately since they may depend on other captures (such as an @anchor).
|
|
let mut added_indent_captures: Vec<(usize, IndentCapture)> = Vec::new();
|
|
// The row/column position of the optional anchor in this query
|
|
let mut anchor: Option<tree_sitter::Node> = None;
|
|
for capture in m.captures {
|
|
let capture_name = query.capture_names()[capture.index as usize];
|
|
let capture_type = match capture_name {
|
|
"indent" => IndentCaptureType::Indent,
|
|
"indent.always" => IndentCaptureType::IndentAlways,
|
|
"outdent" => IndentCaptureType::Outdent,
|
|
"outdent.always" => IndentCaptureType::OutdentAlways,
|
|
// The alignment will be updated to the correct value at the end, when the anchor is known.
|
|
"align" => IndentCaptureType::Align(RopeSlice::from("")),
|
|
"anchor" => {
|
|
if anchor.is_some() {
|
|
log::error!("Invalid indent query: Encountered more than one @anchor in the same match.")
|
|
} else {
|
|
anchor = Some(capture.node);
|
|
}
|
|
continue;
|
|
}
|
|
"extend" => {
|
|
extend_captures
|
|
.entry(capture.node.id())
|
|
.or_insert_with(|| Vec::with_capacity(1))
|
|
.push(ExtendCapture::Extend);
|
|
continue;
|
|
}
|
|
"extend.prevent-once" => {
|
|
extend_captures
|
|
.entry(capture.node.id())
|
|
.or_insert_with(|| Vec::with_capacity(1))
|
|
.push(ExtendCapture::PreventOnce);
|
|
continue;
|
|
}
|
|
_ => {
|
|
// Ignore any unknown captures (these may be needed for predicates such as #match?)
|
|
continue;
|
|
}
|
|
};
|
|
let scope = capture_type.default_scope();
|
|
let mut indent_capture = IndentCapture {
|
|
capture_type,
|
|
scope,
|
|
};
|
|
// Apply additional settings for this capture
|
|
for property in query.property_settings(m.pattern_index) {
|
|
match property.key.as_ref() {
|
|
"scope" => {
|
|
indent_capture.scope = match property.value.as_deref() {
|
|
Some("all") => IndentScope::All,
|
|
Some("tail") => IndentScope::Tail,
|
|
Some(s) => {
|
|
panic!("Invalid indent query: Unknown value for \"scope\" property (\"{}\")", s);
|
|
}
|
|
None => {
|
|
panic!(
|
|
"Invalid indent query: Missing value for \"scope\" property"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
_ => {
|
|
panic!(
|
|
"Invalid indent query: Unknown property \"{}\"",
|
|
property.key
|
|
);
|
|
}
|
|
}
|
|
}
|
|
added_indent_captures.push((capture.node.id(), indent_capture))
|
|
}
|
|
for (node_id, mut capture) in added_indent_captures {
|
|
// Set the anchor for all align queries.
|
|
if let IndentCaptureType::Align(_) = capture.capture_type {
|
|
let anchor = match anchor {
|
|
None => {
|
|
log::error!(
|
|
"Invalid indent query: @align requires an accompanying @anchor."
|
|
);
|
|
continue;
|
|
}
|
|
Some(anchor) => anchor,
|
|
};
|
|
capture.capture_type = IndentCaptureType::Align(
|
|
text.line(anchor.start_position().row)
|
|
.byte_slice(0..anchor.start_position().column),
|
|
);
|
|
}
|
|
indent_captures
|
|
.entry(node_id)
|
|
.or_insert_with(|| Vec::with_capacity(1))
|
|
.push(capture);
|
|
}
|
|
}
|
|
|
|
let result = IndentQueryResult {
|
|
indent_captures,
|
|
extend_captures,
|
|
};
|
|
|
|
log::trace!("indent result = {:?}", result);
|
|
|
|
result
|
|
}
|
|
|
|
/// Handle extend queries. deepest_preceding is the deepest descendant of node that directly precedes the cursor position.
|
|
/// Any ancestor of deepest_preceding which is also a descendant of node may be "extended". In that case, node will be updated,
|
|
/// so that the indent computation starts with the correct syntax node.
|
|
fn extend_nodes<'a>(
|
|
node: &mut Node<'a>,
|
|
mut deepest_preceding: Node<'a>,
|
|
extend_captures: &HashMap<usize, Vec<ExtendCapture>>,
|
|
text: RopeSlice,
|
|
line: usize,
|
|
tab_width: usize,
|
|
indent_width: usize,
|
|
) {
|
|
let mut stop_extend = false;
|
|
|
|
while deepest_preceding != *node {
|
|
let mut extend_node = false;
|
|
// This will be set to true if this node is captured, regardless of whether
|
|
// it actually will be extended (e.g. because the cursor isn't indented
|
|
// more than the node).
|
|
let mut node_captured = false;
|
|
if let Some(captures) = extend_captures.get(&deepest_preceding.id()) {
|
|
for capture in captures {
|
|
match capture {
|
|
ExtendCapture::PreventOnce => {
|
|
stop_extend = true;
|
|
}
|
|
ExtendCapture::Extend => {
|
|
node_captured = true;
|
|
// We extend the node if
|
|
// - the cursor is on the same line as the end of the node OR
|
|
// - the line that the cursor is on is more indented than the
|
|
// first line of the node
|
|
if deepest_preceding.end_position().row == line {
|
|
extend_node = true;
|
|
} else {
|
|
let cursor_indent =
|
|
indent_level_for_line(text.line(line), tab_width, indent_width);
|
|
let node_indent = indent_level_for_line(
|
|
text.line(deepest_preceding.start_position().row),
|
|
tab_width,
|
|
indent_width,
|
|
);
|
|
if cursor_indent > node_indent {
|
|
extend_node = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// If we encountered some `StopExtend` capture before, we don't
|
|
// extend the node even if we otherwise would
|
|
if node_captured && stop_extend {
|
|
stop_extend = false;
|
|
} else if extend_node && !stop_extend {
|
|
*node = deepest_preceding;
|
|
break;
|
|
}
|
|
// If the tree contains a syntax error, `deepest_preceding` may not
|
|
// have a parent despite being a descendant of `node`.
|
|
deepest_preceding = match deepest_preceding.parent() {
|
|
Some(parent) => parent,
|
|
None => return,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Prepare an indent query by computing:
|
|
/// - The node from which to start the query (this is non-trivial due to `@extend` captures)
|
|
/// - The indent captures for all relevant nodes.
|
|
#[allow(clippy::too_many_arguments)]
|
|
fn init_indent_query<'a, 'b>(
|
|
query: &Query,
|
|
syntax: &'a Syntax,
|
|
text: RopeSlice<'b>,
|
|
tab_width: usize,
|
|
indent_width: usize,
|
|
line: usize,
|
|
byte_pos: usize,
|
|
new_line_byte_pos: Option<usize>,
|
|
) -> Option<(Node<'a>, HashMap<usize, Vec<IndentCapture<'b>>>)> {
|
|
// The innermost tree-sitter node which is considered for the indent
|
|
// computation. It may change if some predeceding node is extended
|
|
let mut node = syntax
|
|
.tree()
|
|
.root_node()
|
|
.descendant_for_byte_range(byte_pos, byte_pos)?;
|
|
|
|
let (query_result, deepest_preceding) = {
|
|
// The query range should intersect with all nodes directly preceding
|
|
// the position of the indent query in case one of them is extended.
|
|
let mut deepest_preceding = None; // The deepest node preceding the indent query position
|
|
let mut tree_cursor = node.walk();
|
|
for child in node.children(&mut tree_cursor) {
|
|
if child.byte_range().end <= byte_pos {
|
|
deepest_preceding = Some(child);
|
|
}
|
|
}
|
|
deepest_preceding = deepest_preceding.map(|mut prec| {
|
|
// Get the deepest directly preceding node
|
|
while prec.child_count() > 0 {
|
|
prec = prec.child(prec.child_count() - 1).unwrap();
|
|
}
|
|
prec
|
|
});
|
|
let query_range = deepest_preceding
|
|
.map(|prec| prec.byte_range().end - 1..byte_pos + 1)
|
|
.unwrap_or(byte_pos..byte_pos + 1);
|
|
|
|
crate::syntax::PARSER.with(|ts_parser| {
|
|
let mut ts_parser = ts_parser.borrow_mut();
|
|
let mut cursor = ts_parser.cursors.pop().unwrap_or_default();
|
|
let query_result = query_indents(
|
|
query,
|
|
syntax,
|
|
&mut cursor,
|
|
text,
|
|
query_range,
|
|
new_line_byte_pos,
|
|
);
|
|
ts_parser.cursors.push(cursor);
|
|
(query_result, deepest_preceding)
|
|
})
|
|
};
|
|
let extend_captures = query_result.extend_captures;
|
|
|
|
// Check for extend captures, potentially changing the node that the indent calculation starts with
|
|
if let Some(deepest_preceding) = deepest_preceding {
|
|
extend_nodes(
|
|
&mut node,
|
|
deepest_preceding,
|
|
&extend_captures,
|
|
text,
|
|
line,
|
|
tab_width,
|
|
indent_width,
|
|
);
|
|
}
|
|
Some((node, query_result.indent_captures))
|
|
}
|
|
|
|
/// Use the syntax tree to determine the indentation for a given position.
|
|
/// This can be used in 2 ways:
|
|
///
|
|
/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
|
|
/// - In this case, pos should be inside the first tree-sitter node on that line.
|
|
/// In most cases, this can just be the first non-whitespace on that line.
|
|
/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
|
|
/// after pos were moved to a new line.
|
|
///
|
|
/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
|
|
/// Each of these nodes produces some [Indentation] for:
|
|
///
|
|
/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
|
|
/// - The line after the node. This is defined by:
|
|
/// - The scope `tail`.
|
|
/// - The scope `all` if this node is not the first node on its line.
|
|
///
|
|
/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
|
|
/// The indents from different nodes for the same line are then combined.
|
|
/// The result [Indentation] is simply the sum of the [Indentation] for all lines.
|
|
///
|
|
/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines:
|
|
/// ```ignore
|
|
/// some_function(|| {
|
|
/// // Both the function parameters as well as the contained block should be indented.
|
|
/// // Because they are on the same line, this only yields one indent level
|
|
/// });
|
|
/// ```
|
|
///
|
|
/// ```ignore
|
|
/// some_function(
|
|
/// param1,
|
|
/// || {
|
|
/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
|
|
/// },
|
|
/// );
|
|
/// ```
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn treesitter_indent_for_pos<'a>(
|
|
query: &Query,
|
|
syntax: &Syntax,
|
|
tab_width: usize,
|
|
indent_width: usize,
|
|
text: RopeSlice<'a>,
|
|
line: usize,
|
|
pos: usize,
|
|
new_line: bool,
|
|
) -> Option<Indentation<'a>> {
|
|
let byte_pos = text.char_to_byte(pos);
|
|
let new_line_byte_pos = new_line.then_some(byte_pos);
|
|
let (mut node, mut indent_captures) = init_indent_query(
|
|
query,
|
|
syntax,
|
|
text,
|
|
tab_width,
|
|
indent_width,
|
|
line,
|
|
byte_pos,
|
|
new_line_byte_pos,
|
|
)?;
|
|
|
|
let mut result = Indentation::default();
|
|
// We always keep track of all the indent changes on one line, in order to only indent once
|
|
// even if there are multiple "indent" nodes on the same line
|
|
let mut indent_for_line = Indentation::default();
|
|
let mut indent_for_line_below = Indentation::default();
|
|
|
|
loop {
|
|
let is_first = is_first_in_line(node, text, new_line_byte_pos);
|
|
|
|
// Apply all indent definitions for this node.
|
|
// Since we only iterate over each node once, we can remove the
|
|
// corresponding captures from the HashMap to avoid cloning them.
|
|
if let Some(definitions) = indent_captures.remove(&node.id()) {
|
|
for definition in definitions {
|
|
match definition.scope {
|
|
IndentScope::All => {
|
|
if is_first {
|
|
indent_for_line.add_capture(definition.capture_type);
|
|
} else {
|
|
indent_for_line_below.add_capture(definition.capture_type);
|
|
}
|
|
}
|
|
IndentScope::Tail => {
|
|
indent_for_line_below.add_capture(definition.capture_type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(parent) = node.parent() {
|
|
let node_line = get_node_start_line(node, new_line_byte_pos);
|
|
let parent_line = get_node_start_line(parent, new_line_byte_pos);
|
|
|
|
if node_line != parent_line {
|
|
// Don't add indent for the line below the line of the query
|
|
if node_line < line + (new_line as usize) {
|
|
result.add_line(indent_for_line_below);
|
|
}
|
|
|
|
if node_line == parent_line + 1 {
|
|
indent_for_line_below = indent_for_line;
|
|
} else {
|
|
result.add_line(indent_for_line);
|
|
indent_for_line_below = Indentation::default();
|
|
}
|
|
|
|
indent_for_line = Indentation::default();
|
|
}
|
|
|
|
node = parent;
|
|
} else {
|
|
// Only add the indentation for the line below if that line
|
|
// is not after the line that the indentation is calculated for.
|
|
if (node.start_position().row < line)
|
|
|| (new_line && node.start_position().row == line && node.start_byte() < byte_pos)
|
|
{
|
|
result.add_line(indent_for_line_below);
|
|
}
|
|
result.add_line(indent_for_line);
|
|
break;
|
|
}
|
|
}
|
|
Some(result)
|
|
}
|
|
|
|
/// Returns the indentation for a new line.
|
|
/// This is done either using treesitter, or if that's not available by copying the indentation from the current line
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn indent_for_newline(
|
|
language_config: Option<&LanguageConfiguration>,
|
|
syntax: Option<&Syntax>,
|
|
indent_heuristic: &IndentationHeuristic,
|
|
indent_style: &IndentStyle,
|
|
tab_width: usize,
|
|
text: RopeSlice,
|
|
line_before: usize,
|
|
line_before_end_pos: usize,
|
|
current_line: usize,
|
|
) -> String {
|
|
let indent_width = indent_style.indent_width(tab_width);
|
|
if let (
|
|
IndentationHeuristic::TreeSitter | IndentationHeuristic::Hybrid,
|
|
Some(query),
|
|
Some(syntax),
|
|
) = (
|
|
indent_heuristic,
|
|
language_config.and_then(|config| config.indent_query()),
|
|
syntax,
|
|
) {
|
|
if let Some(indent) = treesitter_indent_for_pos(
|
|
query,
|
|
syntax,
|
|
tab_width,
|
|
indent_width,
|
|
text,
|
|
line_before,
|
|
line_before_end_pos,
|
|
true,
|
|
) {
|
|
if *indent_heuristic == IndentationHeuristic::Hybrid {
|
|
// We want to compute the indentation not only based on the
|
|
// syntax tree but also on the actual indentation of a previous
|
|
// line. This makes indentation computation more resilient to
|
|
// incomplete queries, incomplete source code & differing indentation
|
|
// styles for the same language.
|
|
// However, using the indent of a previous line as a baseline may not
|
|
// make sense, e.g. if it has a different alignment than the new line.
|
|
// In order to prevent edge cases with long running times, we only try
|
|
// a constant number of (non-empty) lines.
|
|
const MAX_ATTEMPTS: usize = 4;
|
|
let mut num_attempts = 0;
|
|
for line_idx in (0..=line_before).rev() {
|
|
let line = text.line(line_idx);
|
|
let first_non_whitespace_char = match line.first_non_whitespace_char() {
|
|
Some(i) => i,
|
|
None => {
|
|
continue;
|
|
}
|
|
};
|
|
if let Some(indent) = (|| {
|
|
let computed_indent = treesitter_indent_for_pos(
|
|
query,
|
|
syntax,
|
|
tab_width,
|
|
indent_width,
|
|
text,
|
|
line_idx,
|
|
text.line_to_char(line_idx) + first_non_whitespace_char,
|
|
false,
|
|
)?;
|
|
let leading_whitespace = line.slice(0..first_non_whitespace_char);
|
|
indent.relative_indent(
|
|
&computed_indent,
|
|
leading_whitespace,
|
|
indent_style,
|
|
tab_width,
|
|
)
|
|
})() {
|
|
return indent;
|
|
}
|
|
num_attempts += 1;
|
|
if num_attempts == MAX_ATTEMPTS {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return indent.to_string(indent_style, tab_width);
|
|
};
|
|
}
|
|
// Fallback in case we either don't have indent queries or they failed for some reason
|
|
let indent_level = indent_level_for_line(text.line(current_line), tab_width, indent_width);
|
|
indent_style.as_str().repeat(indent_level)
|
|
}
|
|
|
|
pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> {
|
|
let mut scopes = Vec::new();
|
|
if let Some(syntax) = syntax {
|
|
let pos = text.char_to_byte(pos);
|
|
let mut node = match syntax
|
|
.tree()
|
|
.root_node()
|
|
.descendant_for_byte_range(pos, pos)
|
|
{
|
|
Some(node) => node,
|
|
None => return scopes,
|
|
};
|
|
|
|
scopes.push(node.kind());
|
|
|
|
while let Some(parent) = node.parent() {
|
|
scopes.push(parent.kind());
|
|
node = parent;
|
|
}
|
|
}
|
|
|
|
scopes.reverse();
|
|
scopes
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use crate::Rope;
|
|
|
|
#[test]
|
|
fn test_indent_level() {
|
|
let tab_width = 4;
|
|
let indent_width = 4;
|
|
let line = Rope::from(" fn new"); // 8 spaces
|
|
assert_eq!(
|
|
indent_level_for_line(line.slice(..), tab_width, indent_width),
|
|
2
|
|
);
|
|
let line = Rope::from("\t\t\tfn new"); // 3 tabs
|
|
assert_eq!(
|
|
indent_level_for_line(line.slice(..), tab_width, indent_width),
|
|
3
|
|
);
|
|
// mixed indentation
|
|
let line = Rope::from("\t \tfn new"); // 1 tab, 4 spaces, tab
|
|
assert_eq!(
|
|
indent_level_for_line(line.slice(..), tab_width, indent_width),
|
|
3
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_large_indent_level() {
|
|
let tab_width = 16;
|
|
let indent_width = 16;
|
|
let line = Rope::from(" fn new"); // 16 spaces
|
|
assert_eq!(
|
|
indent_level_for_line(line.slice(..), tab_width, indent_width),
|
|
1
|
|
);
|
|
let line = Rope::from(" fn new"); // 32 spaces
|
|
assert_eq!(
|
|
indent_level_for_line(line.slice(..), tab_width, indent_width),
|
|
2
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn add_capture() {
|
|
let indent = || Indentation {
|
|
indent: 1,
|
|
..Default::default()
|
|
};
|
|
let indent_always = || Indentation {
|
|
indent_always: 1,
|
|
..Default::default()
|
|
};
|
|
let outdent = || Indentation {
|
|
outdent: 1,
|
|
..Default::default()
|
|
};
|
|
let outdent_always = || Indentation {
|
|
outdent_always: 1,
|
|
..Default::default()
|
|
};
|
|
|
|
fn add_capture<'a>(
|
|
mut indent: Indentation<'a>,
|
|
capture: IndentCaptureType<'a>,
|
|
) -> Indentation<'a> {
|
|
indent.add_capture(capture);
|
|
indent
|
|
}
|
|
|
|
// adding an indent to no indent makes an indent
|
|
assert_eq!(
|
|
indent(),
|
|
add_capture(Indentation::default(), IndentCaptureType::Indent)
|
|
);
|
|
assert_eq!(
|
|
indent_always(),
|
|
add_capture(Indentation::default(), IndentCaptureType::IndentAlways)
|
|
);
|
|
assert_eq!(
|
|
outdent(),
|
|
add_capture(Indentation::default(), IndentCaptureType::Outdent)
|
|
);
|
|
assert_eq!(
|
|
outdent_always(),
|
|
add_capture(Indentation::default(), IndentCaptureType::OutdentAlways)
|
|
);
|
|
|
|
// adding an indent to an already indented has no effect
|
|
assert_eq!(indent(), add_capture(indent(), IndentCaptureType::Indent));
|
|
assert_eq!(
|
|
outdent(),
|
|
add_capture(outdent(), IndentCaptureType::Outdent)
|
|
);
|
|
|
|
// adding an always to a regular makes it always
|
|
assert_eq!(
|
|
indent_always(),
|
|
add_capture(indent(), IndentCaptureType::IndentAlways)
|
|
);
|
|
assert_eq!(
|
|
outdent_always(),
|
|
add_capture(outdent(), IndentCaptureType::OutdentAlways)
|
|
);
|
|
|
|
// adding an always to an always is additive
|
|
assert_eq!(
|
|
Indentation {
|
|
indent_always: 2,
|
|
..Default::default()
|
|
},
|
|
add_capture(indent_always(), IndentCaptureType::IndentAlways)
|
|
);
|
|
assert_eq!(
|
|
Indentation {
|
|
outdent_always: 2,
|
|
..Default::default()
|
|
},
|
|
add_capture(outdent_always(), IndentCaptureType::OutdentAlways)
|
|
);
|
|
|
|
// adding regular to always should be associative
|
|
assert_eq!(
|
|
Indentation {
|
|
indent_always: 1,
|
|
..Default::default()
|
|
},
|
|
add_capture(
|
|
add_capture(indent(), IndentCaptureType::Indent),
|
|
IndentCaptureType::IndentAlways
|
|
)
|
|
);
|
|
assert_eq!(
|
|
Indentation {
|
|
indent_always: 1,
|
|
..Default::default()
|
|
},
|
|
add_capture(
|
|
add_capture(indent(), IndentCaptureType::IndentAlways),
|
|
IndentCaptureType::Indent
|
|
)
|
|
);
|
|
assert_eq!(
|
|
Indentation {
|
|
outdent_always: 1,
|
|
..Default::default()
|
|
},
|
|
add_capture(
|
|
add_capture(outdent(), IndentCaptureType::Outdent),
|
|
IndentCaptureType::OutdentAlways
|
|
)
|
|
);
|
|
assert_eq!(
|
|
Indentation {
|
|
outdent_always: 1,
|
|
..Default::default()
|
|
},
|
|
add_capture(
|
|
add_capture(outdent(), IndentCaptureType::OutdentAlways),
|
|
IndentCaptureType::Outdent
|
|
)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_relative_indent() {
|
|
let indent_style = IndentStyle::Spaces(4);
|
|
let tab_width: usize = 4;
|
|
let no_align = [
|
|
Indentation::default(),
|
|
Indentation {
|
|
indent: 1,
|
|
..Default::default()
|
|
},
|
|
Indentation {
|
|
indent: 5,
|
|
outdent: 1,
|
|
..Default::default()
|
|
},
|
|
];
|
|
let align = no_align.clone().map(|indent| Indentation {
|
|
align: Some(RopeSlice::from("12345")),
|
|
..indent
|
|
});
|
|
let different_align = Indentation {
|
|
align: Some(RopeSlice::from("123456")),
|
|
..Default::default()
|
|
};
|
|
|
|
// Check that relative and absolute indentation computation are the same when the line we compare to is
|
|
// indented as we expect.
|
|
let check_consistency = |indent: &Indentation, other: &Indentation| {
|
|
assert_eq!(
|
|
indent.relative_indent(
|
|
other,
|
|
RopeSlice::from(other.to_string(&indent_style, tab_width).as_str()),
|
|
&indent_style,
|
|
tab_width
|
|
),
|
|
Some(indent.to_string(&indent_style, tab_width))
|
|
);
|
|
};
|
|
for a in &no_align {
|
|
for b in &no_align {
|
|
check_consistency(a, b);
|
|
}
|
|
}
|
|
for a in &align {
|
|
for b in &align {
|
|
check_consistency(a, b);
|
|
}
|
|
}
|
|
|
|
// Relative indent computation makes no sense if the alignment differs
|
|
assert_eq!(
|
|
align[0].relative_indent(
|
|
&no_align[0],
|
|
RopeSlice::from(" "),
|
|
&indent_style,
|
|
tab_width
|
|
),
|
|
None
|
|
);
|
|
assert_eq!(
|
|
align[0].relative_indent(
|
|
&different_align,
|
|
RopeSlice::from(" "),
|
|
&indent_style,
|
|
tab_width
|
|
),
|
|
None
|
|
);
|
|
}
|
|
}
|