syntax: Highlight using ropes, avoiding dumping whole doc to string.

pull/8/head
Blaž Hrastnik 4 years ago
parent 80eca5c32f
commit 71f899cb5b

@ -2,6 +2,7 @@ use crate::{Change, Rope, RopeSlice, Transaction};
pub use helix_syntax::Lang; pub use helix_syntax::Lang;
pub use helix_syntax::{get_language, get_language_name}; pub use helix_syntax::{get_language, get_language_name};
use std::borrow::Cow;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::HashMap; use std::collections::HashMap;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -177,6 +178,22 @@ pub struct Syntax {
pub(crate) root_layer: LanguageLayer, pub(crate) root_layer: LanguageLayer,
} }
fn byte_range_to_str<'a>(range: std::ops::Range<usize>, source: RopeSlice<'a>) -> Cow<'a, str> {
let start_char = source.byte_to_char(range.start);
let end_char = source.byte_to_char(range.end);
Cow::from(source.slice(start_char..end_char))
}
fn node_to_bytes<'a>(node: Node, source: RopeSlice<'a>) -> Cow<'a, [u8]> {
let start_char = source.byte_to_char(node.start_byte());
let end_char = source.byte_to_char(node.end_byte());
let fragment = source.slice(start_char..end_char);
match fragment.as_str() {
Some(fragment) => Cow::Borrowed(fragment.as_bytes()),
None => Cow::Owned(String::from(fragment).into_bytes()),
}
}
impl Syntax { impl Syntax {
// buffer, grammar, config, grammars, sync_timeout? // buffer, grammar, config, grammars, sync_timeout?
pub fn new( pub fn new(
@ -248,7 +265,7 @@ impl Syntax {
/// Iterate over the highlighted regions for a given slice of source code. /// Iterate over the highlighted regions for a given slice of source code.
pub fn highlight_iter<'a>( pub fn highlight_iter<'a>(
&self, &self,
source: &'a [u8], source: RopeSlice<'a>,
range: Option<std::ops::Range<usize>>, range: Option<std::ops::Range<usize>>,
cancellation_flag: Option<&'a AtomicUsize>, cancellation_flag: Option<&'a AtomicUsize>,
mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
@ -271,7 +288,8 @@ impl Syntax {
let captures = cursor_ref let captures = cursor_ref
.captures(query_ref, tree_ref.root_node(), move |n: Node| { .captures(query_ref, tree_ref.root_node(), move |n: Node| {
&source[n.byte_range()] // &source[n.byte_range()]
node_to_bytes(n, source)
}) })
.peekable(); .peekable();
@ -390,7 +408,8 @@ impl LanguageLayer {
// vec![(None, Vec::new(), false); combined_injections_query.pattern_count()]; // vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
// let matches = // let matches =
// cursor.matches(combined_injections_query, tree.root_node(), |n: Node| { // cursor.matches(combined_injections_query, tree.root_node(), |n: Node| {
// &source[n.byte_range()] // // &source[n.byte_range()]
// node_to_bytes(n, source)
// }); // });
// for mat in matches { // for mat in matches {
// let entry = &mut injections_by_pattern_index[mat.pattern_index]; // let entry = &mut injections_by_pattern_index[mat.pattern_index];
@ -653,7 +672,7 @@ pub struct HighlightConfiguration {
#[derive(Debug)] #[derive(Debug)]
struct LocalDef<'a> { struct LocalDef<'a> {
name: &'a str, name: Cow<'a, str>,
value_range: ops::Range<usize>, value_range: ops::Range<usize>,
highlight: Option<Highlight>, highlight: Option<Highlight>,
} }
@ -669,7 +688,7 @@ struct HighlightIter<'a, F>
where where
F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
{ {
source: &'a [u8], source: RopeSlice<'a>,
byte_offset: usize, byte_offset: usize,
injection_callback: F, injection_callback: F,
cancellation_flag: Option<&'a AtomicUsize>, cancellation_flag: Option<&'a AtomicUsize>,
@ -682,7 +701,7 @@ where
struct HighlightIterLayer<'a> { struct HighlightIterLayer<'a> {
_tree: Option<Tree>, _tree: Option<Tree>,
cursor: QueryCursor, cursor: QueryCursor,
captures: iter::Peekable<QueryCaptures<'a, &'a [u8]>>, captures: iter::Peekable<QueryCaptures<'a, Cow<'a, [u8]>>>,
config: &'a HighlightConfiguration, config: &'a HighlightConfiguration,
highlight_end_stack: Vec<usize>, highlight_end_stack: Vec<usize>,
scope_stack: Vec<LocalScope<'a>>, scope_stack: Vec<LocalScope<'a>>,
@ -856,7 +875,7 @@ impl<'a> HighlightIterLayer<'a> {
/// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
/// added to the returned vector. /// added to the returned vector.
fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>( fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
source: &'a [u8], source: RopeSlice<'a>,
cancellation_flag: Option<&'a AtomicUsize>, cancellation_flag: Option<&'a AtomicUsize>,
injection_callback: &mut F, injection_callback: &mut F,
mut config: &'a HighlightConfiguration, mut config: &'a HighlightConfiguration,
@ -880,7 +899,18 @@ impl<'a> HighlightIterLayer<'a> {
unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) };
let tree = highlighter let tree = highlighter
.parser .parser
.parse(source, None) .parse_with(
&mut |byte, _| {
if byte <= source.len_bytes() {
let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
chunk[byte - start_byte..].as_bytes()
} else {
// out of range
&[]
}
},
None,
)
.ok_or(Error::Cancelled)?; .ok_or(Error::Cancelled)?;
unsafe { highlighter.parser.set_cancellation_flag(None) }; unsafe { highlighter.parser.set_cancellation_flag(None) };
let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new); let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new);
@ -895,7 +925,10 @@ impl<'a> HighlightIterLayer<'a> {
let matches = cursor.matches( let matches = cursor.matches(
combined_injections_query, combined_injections_query,
tree.root_node(), tree.root_node(),
|n: Node| &source[n.byte_range()], |n: Node| {
// &source[n.byte_range()]
node_to_bytes(n, source)
},
); );
for mat in matches { for mat in matches {
let entry = &mut injections_by_pattern_index[mat.pattern_index]; let entry = &mut injections_by_pattern_index[mat.pattern_index];
@ -919,7 +952,7 @@ impl<'a> HighlightIterLayer<'a> {
{ {
if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty())
{ {
if let Some(next_config) = (injection_callback)(lang_name) { if let Some(next_config) = (injection_callback)(&lang_name) {
let ranges = Self::intersect_ranges( let ranges = Self::intersect_ranges(
&ranges, &ranges,
&content_nodes, &content_nodes,
@ -943,7 +976,8 @@ impl<'a> HighlightIterLayer<'a> {
unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
let captures = cursor_ref let captures = cursor_ref
.captures(&config.query, tree_ref.root_node(), move |n: Node| { .captures(&config.query, tree_ref.root_node(), move |n: Node| {
&source[n.byte_range()] // &source[n.byte_range()]
node_to_bytes(n, source)
}) })
.peekable(); .peekable();
@ -1195,12 +1229,13 @@ where
// If none of the layers have any more highlight boundaries, terminate. // If none of the layers have any more highlight boundaries, terminate.
if self.layers.is_empty() { if self.layers.is_empty() {
return if self.byte_offset < self.source.len() { let len = self.source.len_bytes();
return if self.byte_offset < len {
let result = Some(Ok(HighlightEvent::Source { let result = Some(Ok(HighlightEvent::Source {
start: self.byte_offset, start: self.byte_offset,
end: self.source.len(), end: len,
})); }));
self.byte_offset = self.source.len(); self.byte_offset = len;
result result
} else { } else {
None None
@ -1240,7 +1275,7 @@ where
// If this capture represents an injection, then process the injection. // If this capture represents an injection, then process the injection.
if match_.pattern_index < layer.config.locals_pattern_index { if match_.pattern_index < layer.config.locals_pattern_index {
let (language_name, content_node, include_children) = let (language_name, content_node, include_children) =
injection_for_match(&layer.config, &layer.config.query, &match_, &self.source); injection_for_match(&layer.config, &layer.config.query, &match_, self.source);
// Explicitly remove this match so that none of its other captures will remain // Explicitly remove this match so that none of its other captures will remain
// in the stream of captures. // in the stream of captures.
@ -1249,7 +1284,7 @@ where
// If a language is found with the given name, then add a new language layer // If a language is found with the given name, then add a new language layer
// to the highlighted document. // to the highlighted document.
if let (Some(language_name), Some(content_node)) = (language_name, content_node) { if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
if let Some(config) = (self.injection_callback)(language_name) { if let Some(config) = (self.injection_callback)(&language_name) {
let ranges = HighlightIterLayer::intersect_ranges( let ranges = HighlightIterLayer::intersect_ranges(
&self.layers[0].ranges, &self.layers[0].ranges,
&[content_node], &[content_node],
@ -1320,15 +1355,13 @@ where
} }
} }
if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { let name = byte_range_to_str(range.clone(), self.source);
scope.local_defs.push(LocalDef { scope.local_defs.push(LocalDef {
name, name,
value_range, value_range,
highlight: None, highlight: None,
}); });
definition_highlight = definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
scope.local_defs.last_mut().map(|s| &mut s.highlight);
}
} }
// If the node represents a reference, then try to find the corresponding // If the node represents a reference, then try to find the corresponding
// definition in the scope stack. // definition in the scope stack.
@ -1336,7 +1369,7 @@ where
&& definition_highlight.is_none() && definition_highlight.is_none()
{ {
definition_highlight = None; definition_highlight = None;
if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { let name = byte_range_to_str(range.clone(), self.source);
for scope in layer.scope_stack.iter().rev() { for scope in layer.scope_stack.iter().rev() {
if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
if def.name == name && range.start >= def.value_range.end { if def.name == name && range.start >= def.value_range.end {
@ -1353,7 +1386,6 @@ where
} }
} }
} }
}
// Continue processing any additional matches for the same node. // Continue processing any additional matches for the same node.
if let Some((next_match, next_capture_index)) = layer.captures.peek() { if let Some((next_match, next_capture_index)) = layer.captures.peek() {
@ -1436,8 +1468,8 @@ fn injection_for_match<'a>(
config: &HighlightConfiguration, config: &HighlightConfiguration,
query: &'a Query, query: &'a Query,
query_match: &QueryMatch<'a>, query_match: &QueryMatch<'a>,
source: &'a [u8], source: RopeSlice<'a>,
) -> (Option<&'a str>, Option<Node<'a>>, bool) { ) -> (Option<Cow<'a, str>>, Option<Node<'a>>, bool) {
let content_capture_index = config.injection_content_capture_index; let content_capture_index = config.injection_content_capture_index;
let language_capture_index = config.injection_language_capture_index; let language_capture_index = config.injection_language_capture_index;
@ -1446,7 +1478,8 @@ fn injection_for_match<'a>(
for capture in query_match.captures { for capture in query_match.captures {
let index = Some(capture.index); let index = Some(capture.index);
if index == language_capture_index { if index == language_capture_index {
language_name = capture.node.utf8_text(source).ok(); let name = byte_range_to_str(capture.node.byte_range(), source);
language_name = Some(name);
} else if index == content_capture_index { } else if index == content_capture_index {
content_node = Some(capture.node); content_node = Some(capture.node);
} }
@ -1460,7 +1493,7 @@ fn injection_for_match<'a>(
// that sets the injection.language key. // that sets the injection.language key.
"injection.language" => { "injection.language" => {
if language_name.is_none() { if language_name.is_none() {
language_name = prop.value.as_ref().map(|s| s.as_ref()) language_name = prop.value.as_ref().map(|s| s.as_ref().into())
} }
} }

@ -75,9 +75,7 @@ impl EditorView {
theme: &Theme, theme: &Theme,
is_focused: bool, is_focused: bool,
) { ) {
// TODO: inefficient, should feed chunks.iter() to tree_sitter.parse_with(|offset, pos|)
let text = view.doc.text(); let text = view.doc.text();
let source_code = text.to_string();
let last_line = view.last_line(); let last_line = view.last_line();
@ -95,7 +93,7 @@ impl EditorView {
let highlights: Vec<_> = match &view.doc.syntax { let highlights: Vec<_> = match &view.doc.syntax {
Some(syntax) => { Some(syntax) => {
syntax syntax
.highlight_iter(source_code.as_bytes(), Some(range), None, |_| None) .highlight_iter(text.slice(..), Some(range), None, |_| None)
.unwrap() .unwrap()
.collect() // TODO: we collect here to avoid holding the lock, fix later .collect() // TODO: we collect here to avoid holding the lock, fix later
} }

Loading…
Cancel
Save