significantly improve treesitter performance while editing large files (#4716)

* significantly improve treesitter performance while editing large files

* Apply stylistic suggestions from code review

Co-authored-by: Michael Davis <mcarsondavis@gmail.com>

* use PartialEq and Hash instead of a freestanding function

Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
pull/4843/head
Pascal Kuthe 2 years ago committed by GitHub
parent 9059c65a53
commit f538b69759
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

27
Cargo.lock generated

@ -13,6 +13,18 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "ahash"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
dependencies = [
"cfg-if",
"getrandom",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "0.7.18" version = "0.7.18"
@ -400,18 +412,29 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [ dependencies = [
"ahash", "ahash 0.7.6",
]
[[package]]
name = "hashbrown"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
dependencies = [
"ahash 0.8.2",
] ]
[[package]] [[package]]
name = "helix-core" name = "helix-core"
version = "0.6.0" version = "0.6.0"
dependencies = [ dependencies = [
"ahash 0.8.2",
"arc-swap", "arc-swap",
"bitflags", "bitflags",
"chrono", "chrono",
"encoding_rs", "encoding_rs",
"etcetera", "etcetera",
"hashbrown 0.13.1",
"helix-loader", "helix-loader",
"log", "log",
"once_cell", "once_cell",
@ -1288,7 +1311,7 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5faade31a542b8b35855fff6e8def199853b2da8da256da52f52f1316ee3137" checksum = "c5faade31a542b8b35855fff6e8def199853b2da8da256da52f52f1316ee3137"
dependencies = [ dependencies = [
"hashbrown", "hashbrown 0.12.3",
"regex", "regex",
] ]

@ -30,6 +30,8 @@ once_cell = "1.16"
arc-swap = "1" arc-swap = "1"
regex = "1" regex = "1"
bitflags = "1.3" bitflags = "1.3"
ahash = "0.8.2"
hashbrown = { version = "0.13.1", features = ["raw"] }
log = "0.4" log = "0.4"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

@ -7,8 +7,10 @@ use crate::{
Rope, RopeSlice, Tendril, Rope, RopeSlice, Tendril,
}; };
use ahash::RandomState;
use arc_swap::{ArcSwap, Guard}; use arc_swap::{ArcSwap, Guard};
use bitflags::bitflags; use bitflags::bitflags;
use hashbrown::raw::RawTable;
use slotmap::{DefaultKey as LayerId, HopSlotMap}; use slotmap::{DefaultKey as LayerId, HopSlotMap};
use std::{ use std::{
@ -16,7 +18,8 @@ use std::{
cell::RefCell, cell::RefCell,
collections::{HashMap, VecDeque}, collections::{HashMap, VecDeque},
fmt, fmt,
mem::replace, hash::{Hash, Hasher},
mem::{replace, transmute},
path::Path, path::Path,
str::FromStr, str::FromStr,
sync::Arc, sync::Arc,
@ -770,8 +773,15 @@ impl Syntax {
// Convert the changeset into tree sitter edits. // Convert the changeset into tree sitter edits.
let edits = generate_edits(old_source, changeset); let edits = generate_edits(old_source, changeset);
// This table allows inverse indexing of `layers`.
// That is by hashing a `Layer` you can find
// the `LayerId` of an existing equivalent `Layer` in `layers`.
//
// It is used to determine if a new layer exists for an injection
// or if an existing layer needs to be updated.
let mut layers_table = RawTable::with_capacity(self.layers.len());
let layers_hasher = RandomState::new();
// Use the edits to update all layers markers // Use the edits to update all layers markers
if !edits.is_empty() {
fn point_add(a: Point, b: Point) -> Point { fn point_add(a: Point, b: Point) -> Point {
if b.row > 0 { if b.row > 0 {
Point::new(a.row.saturating_add(b.row), b.column) Point::new(a.row.saturating_add(b.row), b.column)
@ -787,13 +797,14 @@ impl Syntax {
} }
} }
for layer in self.layers.values_mut() { for (layer_id, layer) in self.layers.iter_mut() {
// The root layer always covers the whole range (0..usize::MAX) // The root layer always covers the whole range (0..usize::MAX)
if layer.depth == 0 { if layer.depth == 0 {
layer.flags = LayerUpdateFlags::MODIFIED; layer.flags = LayerUpdateFlags::MODIFIED;
continue; continue;
} }
if !edits.is_empty() {
for range in &mut layer.ranges { for range in &mut layer.ranges {
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
for edit in edits.iter().rev() { for edit in edits.iter().rev() {
@ -858,6 +869,12 @@ impl Syntax {
} }
} }
} }
let hash = layers_hasher.hash_one(layer);
// Safety: insert_no_grow is unsafe because it assumes that the table
// has enough capacity to hold additional elements.
// This is always the case as we reserved enough capacity above.
unsafe { layers_table.insert_no_grow(hash, layer_id) };
} }
PARSER.with(|ts_parser| { PARSER.with(|ts_parser| {
@ -982,27 +999,23 @@ impl Syntax {
let depth = layer.depth + 1; let depth = layer.depth + 1;
// TODO: can't inline this since matches borrows self.layers // TODO: can't inline this since matches borrows self.layers
for (config, ranges) in injections { for (config, ranges) in injections {
// Find an existing layer let new_layer = LanguageLayer {
let layer = self
.layers
.iter_mut()
.find(|(_, layer)| {
layer.depth == depth && // TODO: track parent id instead
layer.config.language == config.language && layer.ranges == ranges
})
.map(|(id, _layer)| id);
// ...or insert a new one.
let layer_id = layer.unwrap_or_else(|| {
self.layers.insert(LanguageLayer {
tree: None, tree: None,
config, config,
depth, depth,
ranges, ranges,
// set the modified flag to ensure the layer is parsed
flags: LayerUpdateFlags::empty(), flags: LayerUpdateFlags::empty(),
};
// Find an identical existing layer
let layer = layers_table
.get(layers_hasher.hash_one(&new_layer), |&it| {
self.layers[it] == new_layer
}) })
}); .copied();
// ...or insert a new one.
let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
queue.push_back(layer_id); queue.push_back(layer_id);
} }
@ -1139,6 +1152,34 @@ pub struct LanguageLayer {
flags: LayerUpdateFlags, flags: LayerUpdateFlags,
} }
/// This PartialEq implementation only checks if that
/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
/// It does not check whether the layers have the same internal treesitter
/// state.
impl PartialEq for LanguageLayer {
fn eq(&self, other: &Self) -> bool {
self.depth == other.depth
&& self.config.language == other.config.language
&& self.ranges == other.ranges
}
}
/// Hash implementation belongs to PartialEq implementation above.
/// See its documentation for details.
impl Hash for LanguageLayer {
fn hash<H: Hasher>(&self, state: &mut H) {
self.depth.hash(state);
// The transmute is necessary here because tree_sitter::Language does not derive Hash at the moment.
// However it does use #[repr] transparent so the transmute here is safe
// as `Language` (which `Grammar` is an alias for) is just a newtype wrapper around a (thin) pointer.
// This is also compatible with the PartialEq implementation of language
// as that is just a pointer comparison.
let language: *const () = unsafe { transmute(self.config.language) };
language.hash(state);
self.ranges.hash(state);
}
}
impl LanguageLayer { impl LanguageLayer {
pub fn tree(&self) -> &Tree { pub fn tree(&self) -> &Tree {
// TODO: no unwrap // TODO: no unwrap

Loading…
Cancel
Save