mirror of https://github.com/helix-editor/helix
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
636 lines
23 KiB
Rust
636 lines
23 KiB
Rust
use std::collections::HashMap;
|
|
|
|
use tree_sitter::{Query, QueryCursor, QueryPredicateArg};
|
|
|
|
use crate::{
|
|
chars::{char_is_line_ending, char_is_whitespace},
|
|
syntax::{LanguageConfiguration, RopeProvider, Syntax},
|
|
tree_sitter::Node,
|
|
Rope, RopeSlice,
|
|
};
|
|
|
|
/// Enum representing indentation style.
|
|
///
|
|
/// Only values 1-8 are valid for the `Spaces` variant.
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
|
pub enum IndentStyle {
|
|
Tabs,
|
|
Spaces(u8),
|
|
}
|
|
|
|
impl IndentStyle {
|
|
/// Creates an `IndentStyle` from an indentation string.
|
|
///
|
|
/// For example, passing `" "` (four spaces) will create `IndentStyle::Spaces(4)`.
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[inline]
|
|
pub fn from_str(indent: &str) -> Self {
|
|
// XXX: do we care about validating the input more than this? Probably not...?
|
|
debug_assert!(!indent.is_empty() && indent.len() <= 8);
|
|
|
|
if indent.starts_with(' ') {
|
|
IndentStyle::Spaces(indent.len() as u8)
|
|
} else {
|
|
IndentStyle::Tabs
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn as_str(&self) -> &'static str {
|
|
match *self {
|
|
IndentStyle::Tabs => "\t",
|
|
IndentStyle::Spaces(1) => " ",
|
|
IndentStyle::Spaces(2) => " ",
|
|
IndentStyle::Spaces(3) => " ",
|
|
IndentStyle::Spaces(4) => " ",
|
|
IndentStyle::Spaces(5) => " ",
|
|
IndentStyle::Spaces(6) => " ",
|
|
IndentStyle::Spaces(7) => " ",
|
|
IndentStyle::Spaces(8) => " ",
|
|
|
|
// Unsupported indentation style. This should never happen,
|
|
// but just in case fall back to two spaces.
|
|
IndentStyle::Spaces(n) => {
|
|
debug_assert!(n > 0 && n <= 8); // Always triggers. `debug_panic!()` wanted.
|
|
" "
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Attempts to detect the indentation style used in a document.
|
|
///
|
|
/// Returns the indentation style if the auto-detect confidence is
|
|
/// reasonably high, otherwise returns `None`.
|
|
pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> {
|
|
// Build a histogram of the indentation *increases* between
|
|
// subsequent lines, ignoring lines that are all whitespace.
|
|
//
|
|
// Index 0 is for tabs, the rest are 1-8 spaces.
|
|
let histogram: [usize; 9] = {
|
|
let mut histogram = [0; 9];
|
|
let mut prev_line_is_tabs = false;
|
|
let mut prev_line_leading_count = 0usize;
|
|
|
|
// Loop through the lines, checking for and recording indentation
|
|
// increases as we go.
|
|
'outer: for line in document_text.lines().take(1000) {
|
|
let mut c_iter = line.chars();
|
|
|
|
// Is first character a tab or space?
|
|
let is_tabs = match c_iter.next() {
|
|
Some('\t') => true,
|
|
Some(' ') => false,
|
|
|
|
// Ignore blank lines.
|
|
Some(c) if char_is_line_ending(c) => continue,
|
|
|
|
_ => {
|
|
prev_line_is_tabs = false;
|
|
prev_line_leading_count = 0;
|
|
continue;
|
|
}
|
|
};
|
|
|
|
// Count the line's total leading tab/space characters.
|
|
let mut leading_count = 1;
|
|
let mut count_is_done = false;
|
|
for c in c_iter {
|
|
match c {
|
|
'\t' if is_tabs && !count_is_done => leading_count += 1,
|
|
' ' if !is_tabs && !count_is_done => leading_count += 1,
|
|
|
|
// We stop counting if we hit whitespace that doesn't
|
|
// qualify as indent or doesn't match the leading
|
|
// whitespace, but we don't exit the loop yet because
|
|
// we still want to determine if the line is blank.
|
|
c if char_is_whitespace(c) => count_is_done = true,
|
|
|
|
// Ignore blank lines.
|
|
c if char_is_line_ending(c) => continue 'outer,
|
|
|
|
_ => break,
|
|
}
|
|
|
|
// Bound the worst-case execution time for weird text files.
|
|
if leading_count > 256 {
|
|
continue 'outer;
|
|
}
|
|
}
|
|
|
|
// If there was an increase in indentation over the previous
|
|
// line, update the histogram with that increase.
|
|
if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
|
|
&& prev_line_leading_count < leading_count
|
|
{
|
|
if is_tabs {
|
|
histogram[0] += 1;
|
|
} else {
|
|
let amount = leading_count - prev_line_leading_count;
|
|
if amount <= 8 {
|
|
histogram[amount] += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Store this line's leading whitespace info for use with
|
|
// the next line.
|
|
prev_line_is_tabs = is_tabs;
|
|
prev_line_leading_count = leading_count;
|
|
}
|
|
|
|
// Give more weight to tabs, because their presence is a very
|
|
// strong indicator.
|
|
histogram[0] *= 2;
|
|
|
|
histogram
|
|
};
|
|
|
|
// Find the most frequent indent, its frequency, and the frequency of
|
|
// the next-most frequent indent.
|
|
let indent = histogram
|
|
.iter()
|
|
.enumerate()
|
|
.max_by_key(|kv| kv.1)
|
|
.unwrap()
|
|
.0;
|
|
let indent_freq = histogram[indent];
|
|
let indent_freq_2 = *histogram
|
|
.iter()
|
|
.enumerate()
|
|
.filter(|kv| kv.0 != indent)
|
|
.map(|kv| kv.1)
|
|
.max()
|
|
.unwrap();
|
|
|
|
// Return the the auto-detected result if we're confident enough in its
|
|
// accuracy, based on some heuristics.
|
|
if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
|
|
Some(match indent {
|
|
0 => IndentStyle::Tabs,
|
|
_ => IndentStyle::Spaces(indent as u8),
|
|
})
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// To determine indentation of a newly inserted line, figure out the indentation at the last col
|
|
/// of the previous line.
|
|
pub fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize {
|
|
let mut len = 0;
|
|
for ch in line.chars() {
|
|
match ch {
|
|
'\t' => len += tab_width,
|
|
' ' => len += 1,
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
len / tab_width
|
|
}
|
|
|
|
/// Computes for node and all ancestors whether they are the first node on their line.
|
|
/// The first entry in the return value represents the root node, the last one the node itself
|
|
fn get_first_in_line(mut node: Node, byte_pos: usize, new_line: bool) -> Vec<bool> {
|
|
let mut first_in_line = Vec::new();
|
|
loop {
|
|
if let Some(prev) = node.prev_sibling() {
|
|
// If we insert a new line, the first node at/after the cursor is considered to be the first in its line
|
|
let first = prev.end_position().row != node.start_position().row
|
|
|| (new_line && node.start_byte() >= byte_pos && prev.start_byte() < byte_pos);
|
|
first_in_line.push(Some(first));
|
|
} else {
|
|
// Nodes that have no previous siblings are first in their line if and only if their parent is
|
|
// (which we don't know yet)
|
|
first_in_line.push(None);
|
|
}
|
|
if let Some(parent) = node.parent() {
|
|
node = parent;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
let mut result = Vec::with_capacity(first_in_line.len());
|
|
let mut parent_is_first = true; // The root node is by definition the first node in its line
|
|
for first in first_in_line.into_iter().rev() {
|
|
if let Some(first) = first {
|
|
result.push(first);
|
|
parent_is_first = first;
|
|
} else {
|
|
result.push(parent_is_first);
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
/// The total indent for some line of code.
|
|
/// This is usually constructed in one of 2 ways:
|
|
/// - Successively add indent captures to get the (added) indent from a single line
|
|
/// - Successively add the indent results for each line
|
|
#[derive(Default)]
|
|
struct Indentation {
|
|
/// The total indent (the number of indent levels) is defined as max(0, indent-outdent).
|
|
/// The string that this results in depends on the indent style (spaces or tabs, etc.)
|
|
indent: usize,
|
|
outdent: usize,
|
|
}
|
|
impl Indentation {
|
|
/// Add some other [IndentResult] to this.
|
|
/// The added indent should be the total added indent from one line
|
|
fn add_line(&mut self, added: &Indentation) {
|
|
if added.indent > 0 && added.outdent == 0 {
|
|
self.indent += 1;
|
|
} else if added.outdent > 0 && added.indent == 0 {
|
|
self.outdent += 1;
|
|
}
|
|
}
|
|
/// Add an indent capture to this indent.
|
|
/// All the captures that are added in this way should be on the same line.
|
|
fn add_capture(&mut self, added: IndentCaptureType) {
|
|
match added {
|
|
IndentCaptureType::Indent => {
|
|
self.indent = 1;
|
|
}
|
|
IndentCaptureType::Outdent => {
|
|
self.outdent = 1;
|
|
}
|
|
}
|
|
}
|
|
fn as_string(&self, indent_style: &IndentStyle) -> String {
|
|
let indent_level = if self.indent >= self.outdent {
|
|
self.indent - self.outdent
|
|
} else {
|
|
log::warn!("Encountered more outdent than indent nodes while calculating indentation: {} outdent, {} indent", self.outdent, self.indent);
|
|
0
|
|
};
|
|
indent_style.as_str().repeat(indent_level)
|
|
}
|
|
}
|
|
|
|
/// An indent definition which corresponds to a capture from the indent query
|
|
struct IndentCapture {
|
|
capture_type: IndentCaptureType,
|
|
scope: IndentScope,
|
|
}
|
|
#[derive(Clone, Copy)]
|
|
enum IndentCaptureType {
|
|
Indent,
|
|
Outdent,
|
|
}
|
|
impl IndentCaptureType {
|
|
fn default_scope(&self) -> IndentScope {
|
|
match self {
|
|
IndentCaptureType::Indent => IndentScope::Tail,
|
|
IndentCaptureType::Outdent => IndentScope::All,
|
|
}
|
|
}
|
|
}
|
|
/// This defines which part of a node an [IndentCapture] applies to.
|
|
/// Each [IndentCaptureType] has a default scope, but the scope can be changed
|
|
/// with `#set!` property declarations.
|
|
#[derive(Clone, Copy)]
|
|
enum IndentScope {
|
|
/// The indent applies to the whole node
|
|
All,
|
|
/// The indent applies to everything except for the first line of the node
|
|
Tail,
|
|
}
|
|
|
|
/// Execute the indent query.
|
|
/// Returns for each node (identified by its id) a list of indent captures for that node.
|
|
fn query_indents(
|
|
query: &Query,
|
|
syntax: &Syntax,
|
|
cursor: &mut QueryCursor,
|
|
text: RopeSlice,
|
|
range: std::ops::Range<usize>,
|
|
// Position of the (optional) newly inserted line break.
|
|
// Given as (line, byte_pos)
|
|
new_line_break: Option<(usize, usize)>,
|
|
) -> HashMap<usize, Vec<IndentCapture>> {
|
|
let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new();
|
|
cursor.set_byte_range(range);
|
|
// Iterate over all captures from the query
|
|
for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) {
|
|
// Skip matches where not all custom predicates are fulfilled
|
|
if !query.general_predicates(m.pattern_index).iter().all(|pred| {
|
|
match pred.operator.as_ref() {
|
|
"not-kind-eq?" => match (pred.args.get(0), pred.args.get(1)) {
|
|
(
|
|
Some(QueryPredicateArg::Capture(capture_idx)),
|
|
Some(QueryPredicateArg::String(kind)),
|
|
) => {
|
|
let node = m.nodes_for_capture_index(*capture_idx).next();
|
|
match node {
|
|
Some(node) => node.kind()!=kind.as_ref(),
|
|
_ => true,
|
|
}
|
|
}
|
|
_ => {
|
|
panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string");
|
|
}
|
|
},
|
|
"same-line?" | "not-same-line?" => {
|
|
match (pred.args.get(0), pred.args.get(1)) {
|
|
(
|
|
Some(QueryPredicateArg::Capture(capt1)),
|
|
Some(QueryPredicateArg::Capture(capt2))
|
|
) => {
|
|
let get_line_num = |node: Node| {
|
|
let mut node_line = node.start_position().row;
|
|
// Adjust for the new line that will be inserted
|
|
if let Some((line, byte)) = new_line_break {
|
|
if node_line==line && node.start_byte()>=byte {
|
|
node_line += 1;
|
|
}
|
|
}
|
|
node_line
|
|
};
|
|
let n1 = m.nodes_for_capture_index(*capt1).next();
|
|
let n2 = m.nodes_for_capture_index(*capt2).next();
|
|
match (n1, n2) {
|
|
(Some(n1), Some(n2)) => {
|
|
let same_line = get_line_num(n1)==get_line_num(n2);
|
|
same_line==(pred.operator.as_ref()=="same-line?")
|
|
}
|
|
_ => true,
|
|
}
|
|
}
|
|
_ => {
|
|
panic!("Invalid indent query: Arguments to \"{}\" must be 2 captures", pred.operator);
|
|
}
|
|
}
|
|
}
|
|
_ => {
|
|
panic!(
|
|
"Invalid indent query: Unknown predicate (\"{}\")",
|
|
pred.operator
|
|
);
|
|
}
|
|
}
|
|
}) {
|
|
continue;
|
|
}
|
|
for capture in m.captures {
|
|
let capture_type = query.capture_names()[capture.index as usize].as_str();
|
|
let capture_type = match capture_type {
|
|
"indent" => IndentCaptureType::Indent,
|
|
"outdent" => IndentCaptureType::Outdent,
|
|
_ => {
|
|
// Ignore any unknown captures (these may be needed for predicates such as #match?)
|
|
continue;
|
|
}
|
|
};
|
|
let scope = capture_type.default_scope();
|
|
let mut indent_capture = IndentCapture {
|
|
capture_type,
|
|
scope,
|
|
};
|
|
// Apply additional settings for this capture
|
|
for property in query.property_settings(m.pattern_index) {
|
|
match property.key.as_ref() {
|
|
"scope" => {
|
|
indent_capture.scope = match property.value.as_deref() {
|
|
Some("all") => IndentScope::All,
|
|
Some("tail") => IndentScope::Tail,
|
|
Some(s) => {
|
|
panic!("Invalid indent query: Unknown value for \"scope\" property (\"{}\")", s);
|
|
}
|
|
None => {
|
|
panic!(
|
|
"Invalid indent query: Missing value for \"scope\" property"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
_ => {
|
|
panic!(
|
|
"Invalid indent query: Unknown property \"{}\"",
|
|
property.key
|
|
);
|
|
}
|
|
}
|
|
}
|
|
indent_captures
|
|
.entry(capture.node.id())
|
|
// Most entries only need to contain a single IndentCapture
|
|
.or_insert_with(|| Vec::with_capacity(1))
|
|
.push(indent_capture);
|
|
}
|
|
}
|
|
indent_captures
|
|
}
|
|
|
|
/// Use the syntax tree to determine the indentation for a given position.
|
|
/// This can be used in 2 ways:
|
|
///
|
|
/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
|
|
/// - In this case, pos should be inside the first tree-sitter node on that line.
|
|
/// In most cases, this can just be the first non-whitespace on that line.
|
|
/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
|
|
/// after pos were moved to a new line.
|
|
///
|
|
/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
|
|
/// Each of these nodes produces some [AddedIndent] for:
|
|
///
|
|
/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
|
|
/// - The line after the node. This is defined by:
|
|
/// - The scope `tail`.
|
|
/// - The scope `all` if this node is not the first node on its line.
|
|
/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
|
|
/// The indents from different nodes for the same line are then combined.
|
|
/// The [IndentResult] is simply the sum of the [AddedIndent] for all lines.
|
|
///
|
|
/// Specifying which line exactly an [AddedIndent] applies to is important because indents on the same line combine differently than indents on different lines:
|
|
/// ```ignore
|
|
/// some_function(|| {
|
|
/// // Both the function parameters as well as the contained block should be indented.
|
|
/// // Because they are on the same line, this only yields one indent level
|
|
/// });
|
|
/// ```
|
|
///
|
|
/// ```ignore
|
|
/// some_function(
|
|
/// parm1,
|
|
/// || {
|
|
/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
|
|
/// },
|
|
/// );
|
|
/// ```
|
|
pub fn treesitter_indent_for_pos(
|
|
query: &Query,
|
|
syntax: &Syntax,
|
|
indent_style: &IndentStyle,
|
|
text: RopeSlice,
|
|
line: usize,
|
|
pos: usize,
|
|
new_line: bool,
|
|
) -> Option<String> {
|
|
let byte_pos = text.char_to_byte(pos);
|
|
let mut node = syntax
|
|
.tree()
|
|
.root_node()
|
|
.descendant_for_byte_range(byte_pos, byte_pos)?;
|
|
let mut first_in_line = get_first_in_line(node, byte_pos, new_line);
|
|
let new_line_break = if new_line {
|
|
Some((line, byte_pos))
|
|
} else {
|
|
None
|
|
};
|
|
let query_result = crate::syntax::PARSER.with(|ts_parser| {
|
|
let mut ts_parser = ts_parser.borrow_mut();
|
|
let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new);
|
|
let query_result = query_indents(
|
|
query,
|
|
syntax,
|
|
&mut cursor,
|
|
text,
|
|
byte_pos..byte_pos + 1,
|
|
new_line_break,
|
|
);
|
|
ts_parser.cursors.push(cursor);
|
|
query_result
|
|
});
|
|
|
|
let mut result = Indentation::default();
|
|
// We always keep track of all the indent changes on one line, in order to only indent once
|
|
// even if there are multiple "indent" nodes on the same line
|
|
let mut indent_for_line = Indentation::default();
|
|
let mut indent_for_line_below = Indentation::default();
|
|
loop {
|
|
// This can safely be unwrapped because `first_in_line` contains
|
|
// one entry for each ancestor of the node (which is what we iterate over)
|
|
let is_first = *first_in_line.last().unwrap();
|
|
// Apply all indent definitions for this node
|
|
if let Some(definitions) = query_result.get(&node.id()) {
|
|
for definition in definitions {
|
|
match definition.scope {
|
|
IndentScope::All => {
|
|
if is_first {
|
|
indent_for_line.add_capture(definition.capture_type);
|
|
} else {
|
|
indent_for_line_below.add_capture(definition.capture_type);
|
|
}
|
|
}
|
|
IndentScope::Tail => {
|
|
indent_for_line_below.add_capture(definition.capture_type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(parent) = node.parent() {
|
|
let mut node_line = node.start_position().row;
|
|
let mut parent_line = parent.start_position().row;
|
|
if node_line == line && new_line {
|
|
// Also consider the line that will be inserted
|
|
if node.start_byte() >= byte_pos {
|
|
node_line += 1;
|
|
}
|
|
if parent.start_byte() >= byte_pos {
|
|
parent_line += 1;
|
|
}
|
|
};
|
|
if node_line != parent_line {
|
|
if node_line < line + (new_line as usize) {
|
|
// Don't add indent for the line below the line of the query
|
|
result.add_line(&indent_for_line_below);
|
|
}
|
|
if node_line == parent_line + 1 {
|
|
indent_for_line_below = indent_for_line;
|
|
} else {
|
|
result.add_line(&indent_for_line);
|
|
indent_for_line_below = Indentation::default();
|
|
}
|
|
indent_for_line = Indentation::default();
|
|
}
|
|
|
|
node = parent;
|
|
first_in_line.pop();
|
|
} else {
|
|
result.add_line(&indent_for_line_below);
|
|
result.add_line(&indent_for_line);
|
|
break;
|
|
}
|
|
}
|
|
Some(result.as_string(indent_style))
|
|
}
|
|
|
|
/// Returns the indentation for a new line.
|
|
/// This is done either using treesitter, or if that's not available by copying the indentation from the current line
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn indent_for_newline(
|
|
language_config: Option<&LanguageConfiguration>,
|
|
syntax: Option<&Syntax>,
|
|
indent_style: &IndentStyle,
|
|
tab_width: usize,
|
|
text: RopeSlice,
|
|
line_before: usize,
|
|
line_before_end_pos: usize,
|
|
current_line: usize,
|
|
) -> String {
|
|
if let (Some(query), Some(syntax)) = (
|
|
language_config.and_then(|config| config.indent_query()),
|
|
syntax,
|
|
) {
|
|
if let Some(indent) = treesitter_indent_for_pos(
|
|
query,
|
|
syntax,
|
|
indent_style,
|
|
text,
|
|
line_before,
|
|
line_before_end_pos,
|
|
true,
|
|
) {
|
|
return indent;
|
|
};
|
|
}
|
|
let indent_level = indent_level_for_line(text.line(current_line), tab_width);
|
|
indent_style.as_str().repeat(indent_level)
|
|
}
|
|
|
|
pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> {
|
|
let mut scopes = Vec::new();
|
|
if let Some(syntax) = syntax {
|
|
let pos = text.char_to_byte(pos);
|
|
let mut node = match syntax
|
|
.tree()
|
|
.root_node()
|
|
.descendant_for_byte_range(pos, pos)
|
|
{
|
|
Some(node) => node,
|
|
None => return scopes,
|
|
};
|
|
|
|
scopes.push(node.kind());
|
|
|
|
while let Some(parent) = node.parent() {
|
|
scopes.push(parent.kind());
|
|
node = parent;
|
|
}
|
|
}
|
|
|
|
scopes.reverse();
|
|
scopes
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use crate::Rope;
|
|
|
|
#[test]
|
|
fn test_indent_level() {
|
|
let tab_width = 4;
|
|
let line = Rope::from(" fn new"); // 8 spaces
|
|
assert_eq!(indent_level_for_line(line.slice(..), tab_width), 2);
|
|
let line = Rope::from("\t\t\tfn new"); // 3 tabs
|
|
assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
|
|
// mixed indentation
|
|
let line = Rope::from("\t \tfn new"); // 1 tab, 4 spaces, tab
|
|
assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
|
|
}
|
|
}
|