mod tree_cursor ;
use crate ::{
auto_pairs ::AutoPairs ,
chars ::char_is_line_ending ,
diagnostic ::Severity ,
regex ::Regex ,
transaction ::{ ChangeSet , Operation } ,
RopeSlice , Tendril ,
} ;
use ahash ::RandomState ;
use arc_swap ::{ ArcSwap , Guard } ;
use bitflags ::bitflags ;
use globset ::GlobSet ;
use hashbrown ::raw ::RawTable ;
use helix_stdx ::rope ::{ self , RopeSliceExt } ;
use slotmap ::{ DefaultKey as LayerId , HopSlotMap } ;
use std ::{
borrow ::Cow ,
cell ::RefCell ,
collections ::{ HashMap , HashSet , VecDeque } ,
fmt ::{ self , Display } ,
hash ::{ Hash , Hasher } ,
mem ::replace ,
path ::{ Path , PathBuf } ,
str ::FromStr ,
sync ::Arc ,
} ;
use once_cell ::sync ::{ Lazy , OnceCell } ;
use serde ::{ ser ::SerializeSeq , Deserialize , Serialize } ;
use helix_loader ::grammar ::{ get_language , load_runtime_file } ;
pub use tree_cursor ::TreeCursor ;
fn deserialize_regex < ' de , D > ( deserializer : D ) -> Result < Option < Regex > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Option ::< String > ::deserialize ( deserializer ) ?
. map ( | buf | Regex ::new ( & buf ) . map_err ( serde ::de ::Error ::custom ) )
. transpose ( )
}
fn deserialize_lsp_config < ' de , D > ( deserializer : D ) -> Result < Option < serde_json ::Value > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Option ::< toml ::Value > ::deserialize ( deserializer ) ?
. map ( | toml | toml . try_into ( ) . map_err ( serde ::de ::Error ::custom ) )
. transpose ( )
}
fn deserialize_tab_width < ' de , D > ( deserializer : D ) -> Result < usize , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
usize ::deserialize ( deserializer ) . and_then ( | n | {
if n > 0 & & n < = 16 {
Ok ( n )
} else {
Err ( serde ::de ::Error ::custom (
"tab width must be a value from 1 to 16 inclusive" ,
) )
}
} )
}
pub fn deserialize_auto_pairs < ' de , D > ( deserializer : D ) -> Result < Option < AutoPairs > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
Ok ( Option ::< AutoPairConfig > ::deserialize ( deserializer ) ? . and_then ( AutoPairConfig ::into ) )
}
fn default_timeout ( ) -> u64 {
20
}
#[ derive(Debug, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct Configuration {
pub language : Vec < LanguageConfiguration > ,
#[ serde(default) ]
pub language_server : HashMap < String , LanguageServerConfiguration > ,
}
// largely based on tree-sitter/cli/src/loader.rs
#[ derive(Debug, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " , deny_unknown_fields) ]
pub struct LanguageConfiguration {
#[ serde(rename = " name " ) ]
pub language_id : String , // c-sharp, rust, tsx
#[ serde(rename = " language-id " ) ]
// see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem
pub language_server_language_id : Option < String > , // csharp, rust, typescriptreact, for the language-server
pub scope : String , // source.rust
pub file_types : Vec < FileType > , // filename extension or ends_with? <Gemfile, rb, etc>
#[ serde(default) ]
pub shebangs : Vec < String > , // interpreter(s) associated with language
#[ serde(default) ]
pub roots : Vec < String > , // these indicate project roots <.git, Cargo.toml>
#[ serde(
default ,
skip_serializing ,
deserialize_with = "from_comment_tokens" ,
alias = "comment-token"
) ]
pub comment_tokens : Option < Vec < String > > ,
#[ serde(
default ,
skip_serializing ,
deserialize_with = "from_block_comment_tokens"
) ]
pub block_comment_tokens : Option < Vec < BlockCommentToken > > ,
pub text_width : Option < usize > ,
pub soft_wrap : Option < SoftWrap > ,
#[ serde(default) ]
pub auto_format : bool ,
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub formatter : Option < FormatterConfiguration > ,
#[ serde(default) ]
pub diagnostic_severity : Severity ,
pub grammar : Option < String > , // tree-sitter grammar name, defaults to language_id
// content_regex
#[ serde(default, skip_serializing, deserialize_with = " deserialize_regex " ) ]
pub injection_regex : Option < Regex > ,
// first_line_regex
//
#[ serde(skip) ]
pub ( crate ) highlight_config : OnceCell < Option < Arc < HighlightConfiguration > > > ,
// tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
#[ serde(
default ,
skip_serializing_if = "Vec::is_empty" ,
serialize_with = "serialize_lang_features" ,
deserialize_with = "deserialize_lang_features"
) ]
pub language_servers : Vec < LanguageServerFeatures > ,
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub indent : Option < IndentationConfiguration > ,
#[ serde(skip) ]
pub ( crate ) indent_query : OnceCell < Option < Query > > ,
#[ serde(skip) ]
pub ( crate ) textobject_query : OnceCell < Option < TextObjectQuery > > ,
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub debugger : Option < DebugAdapterConfig > ,
/// Automatic insertion of pairs to parentheses, brackets,
/// etc. Defaults to true. Optionally, this can be a list of 2-tuples
/// to specify a list of characters to pair. This overrides the
/// global setting.
#[ serde(default, skip_serializing, deserialize_with = " deserialize_auto_pairs " ) ]
pub auto_pairs : Option < AutoPairs > ,
pub rulers : Option < Vec < u16 > > , // if set, override editor's rulers
/// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`.
/// Falling back to the current working directory if none are configured.
pub workspace_lsp_roots : Option < Vec < PathBuf > > ,
#[ serde(default) ]
pub persistent_diagnostic_sources : Vec < String > ,
}
#[ derive(Debug, PartialEq, Eq, Hash) ]
pub enum FileType {
/// The extension of the file, either the `Path::extension` or the full
/// filename if the file does not have an extension.
Extension ( String ) ,
/// A Unix-style path glob. This is compared to the file's absolute path, so
/// it can be used to detect files based on their directories. If the glob
/// is not an absolute path and does not already start with a glob pattern,
/// a glob pattern will be prepended to it.
Glob ( globset ::Glob ) ,
}
impl Serialize for FileType {
fn serialize < S > ( & self , serializer : S ) -> Result < S ::Ok , S ::Error >
where
S : serde ::Serializer ,
{
use serde ::ser ::SerializeMap ;
match self {
FileType ::Extension ( extension ) = > serializer . serialize_str ( extension ) ,
FileType ::Glob ( glob ) = > {
let mut map = serializer . serialize_map ( Some ( 1 ) ) ? ;
map . serialize_entry ( "glob" , glob . glob ( ) ) ? ;
map . end ( )
}
}
}
}
impl < ' de > Deserialize < ' de > for FileType {
fn deserialize < D > ( deserializer : D ) -> Result < Self , D ::Error >
where
D : serde ::de ::Deserializer < ' de > ,
{
struct FileTypeVisitor ;
impl < ' de > serde ::de ::Visitor < ' de > for FileTypeVisitor {
type Value = FileType ;
fn expecting ( & self , formatter : & mut std ::fmt ::Formatter ) -> std ::fmt ::Result {
formatter . write_str ( "string or table" )
}
fn visit_str < E > ( self , value : & str ) -> Result < Self ::Value , E >
where
E : serde ::de ::Error ,
{
Ok ( FileType ::Extension ( value . to_string ( ) ) )
}
fn visit_map < M > ( self , mut map : M ) -> Result < Self ::Value , M ::Error >
where
M : serde ::de ::MapAccess < ' de > ,
{
match map . next_entry ::< String , String > ( ) ? {
Some ( ( key , mut glob ) ) if key = = "glob" = > {
// If the glob isn't an absolute path or already starts
// with a glob pattern, add a leading glob so we
// properly match relative paths.
if ! glob . starts_with ( '/' ) & & ! glob . starts_with ( "*/" ) {
glob . insert_str ( 0 , "*/" ) ;
}
globset ::Glob ::new ( glob . as_str ( ) )
. map ( FileType ::Glob )
. map_err ( | err | {
serde ::de ::Error ::custom ( format! ( "invalid `glob` pattern: {}" , err ) )
} )
}
Some ( ( key , _value ) ) = > Err ( serde ::de ::Error ::custom ( format! (
"unknown key in `file-types` list: {}" ,
key
) ) ) ,
None = > Err ( serde ::de ::Error ::custom (
"expected a `suffix` key in the `file-types` entry" ,
) ) ,
}
}
}
deserializer . deserialize_any ( FileTypeVisitor )
}
}
fn from_comment_tokens < ' de , D > ( deserializer : D ) -> Result < Option < Vec < String > > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
#[ derive(Deserialize) ]
#[ serde(untagged) ]
enum CommentTokens {
Multiple ( Vec < String > ) ,
Single ( String ) ,
}
Ok (
Option ::< CommentTokens > ::deserialize ( deserializer ) ? . map ( | tokens | match tokens {
CommentTokens ::Single ( val ) = > vec! [ val ] ,
CommentTokens ::Multiple ( vals ) = > vals ,
} ) ,
)
}
#[ derive(Clone, Debug, Serialize, Deserialize) ]
pub struct BlockCommentToken {
pub start : String ,
pub end : String ,
}
impl Default for BlockCommentToken {
fn default ( ) -> Self {
BlockCommentToken {
start : "/*" . to_string ( ) ,
end : "*/" . to_string ( ) ,
}
}
}
fn from_block_comment_tokens < ' de , D > (
deserializer : D ,
) -> Result < Option < Vec < BlockCommentToken > > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
#[ derive(Deserialize) ]
#[ serde(untagged) ]
enum BlockCommentTokens {
Multiple ( Vec < BlockCommentToken > ) ,
Single ( BlockCommentToken ) ,
}
Ok (
Option ::< BlockCommentTokens > ::deserialize ( deserializer ) ? . map ( | tokens | match tokens {
BlockCommentTokens ::Single ( val ) = > vec! [ val ] ,
BlockCommentTokens ::Multiple ( vals ) = > vals ,
} ) ,
)
}
#[ derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash) ]
#[ serde(rename_all = " kebab-case " ) ]
pub enum LanguageServerFeature {
Format ,
GotoDeclaration ,
GotoDefinition ,
GotoTypeDefinition ,
GotoReference ,
GotoImplementation ,
// Goto, use bitflags, combining previous Goto members?
SignatureHelp ,
Hover ,
DocumentHighlight ,
Completion ,
CodeAction ,
WorkspaceCommand ,
DocumentSymbols ,
WorkspaceSymbols ,
// Symbols, use bitflags, see above?
Diagnostics ,
RenameSymbol ,
InlayHints ,
}
impl Display for LanguageServerFeature {
fn fmt ( & self , f : & mut fmt ::Formatter < ' _ > ) -> fmt ::Result {
use LanguageServerFeature ::* ;
let feature = match self {
Format = > "format" ,
GotoDeclaration = > "goto-declaration" ,
GotoDefinition = > "goto-definition" ,
GotoTypeDefinition = > "goto-type-definition" ,
GotoReference = > "goto-reference" ,
GotoImplementation = > "goto-implementation" ,
SignatureHelp = > "signature-help" ,
Hover = > "hover" ,
DocumentHighlight = > "document-highlight" ,
Completion = > "completion" ,
CodeAction = > "code-action" ,
WorkspaceCommand = > "workspace-command" ,
DocumentSymbols = > "document-symbols" ,
WorkspaceSymbols = > "workspace-symbols" ,
Diagnostics = > "diagnostics" ,
RenameSymbol = > "rename-symbol" ,
InlayHints = > "inlay-hints" ,
} ;
write! ( f , "{feature}" , )
}
}
#[ derive(Debug, Serialize, Deserialize) ]
#[ serde(untagged, rename_all = " kebab-case " , deny_unknown_fields) ]
enum LanguageServerFeatureConfiguration {
#[ serde(rename_all = " kebab-case " ) ]
Features {
#[ serde(default, skip_serializing_if = " HashSet::is_empty " ) ]
only_features : HashSet < LanguageServerFeature > ,
#[ serde(default, skip_serializing_if = " HashSet::is_empty " ) ]
except_features : HashSet < LanguageServerFeature > ,
name : String ,
} ,
Simple ( String ) ,
}
#[ derive(Debug, Default) ]
pub struct LanguageServerFeatures {
pub name : String ,
pub only : HashSet < LanguageServerFeature > ,
pub excluded : HashSet < LanguageServerFeature > ,
}
impl LanguageServerFeatures {
pub fn has_feature ( & self , feature : LanguageServerFeature ) -> bool {
( self . only . is_empty ( ) | | self . only . contains ( & feature ) ) & & ! self . excluded . contains ( & feature )
}
}
fn deserialize_lang_features < ' de , D > (
deserializer : D ,
) -> Result < Vec < LanguageServerFeatures > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
let raw : Vec < LanguageServerFeatureConfiguration > = Deserialize ::deserialize ( deserializer ) ? ;
let res = raw
. into_iter ( )
. map ( | config | match config {
LanguageServerFeatureConfiguration ::Simple ( name ) = > LanguageServerFeatures {
name ,
.. Default ::default ( )
} ,
LanguageServerFeatureConfiguration ::Features {
only_features ,
except_features ,
name ,
} = > LanguageServerFeatures {
name ,
only : only_features ,
excluded : except_features ,
} ,
} )
. collect ( ) ;
Ok ( res )
}
fn serialize_lang_features < S > (
map : & Vec < LanguageServerFeatures > ,
serializer : S ,
) -> Result < S ::Ok , S ::Error >
where
S : serde ::Serializer ,
{
let mut serializer = serializer . serialize_seq ( Some ( map . len ( ) ) ) ? ;
for features in map {
let features = if features . only . is_empty ( ) & & features . excluded . is_empty ( ) {
LanguageServerFeatureConfiguration ::Simple ( features . name . to_owned ( ) )
} else {
LanguageServerFeatureConfiguration ::Features {
only_features : features . only . clone ( ) ,
except_features : features . excluded . clone ( ) ,
name : features . name . to_owned ( ) ,
}
} ;
serializer . serialize_element ( & features ) ? ;
}
serializer . end ( )
}
fn deserialize_required_root_patterns < ' de , D > ( deserializer : D ) -> Result < Option < GlobSet > , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
let patterns = Vec ::< String > ::deserialize ( deserializer ) ? ;
if patterns . is_empty ( ) {
return Ok ( None ) ;
}
let mut builder = globset ::GlobSetBuilder ::new ( ) ;
for pattern in patterns {
let glob = globset ::Glob ::new ( & pattern ) . map_err ( serde ::de ::Error ::custom ) ? ;
builder . add ( glob ) ;
}
builder . build ( ) . map ( Some ) . map_err ( serde ::de ::Error ::custom )
}
#[ derive(Debug, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct LanguageServerConfiguration {
pub command : String ,
#[ serde(default) ]
#[ serde(skip_serializing_if = " Vec::is_empty " ) ]
pub args : Vec < String > ,
#[ serde(default, skip_serializing_if = " HashMap::is_empty " ) ]
pub environment : HashMap < String , String > ,
#[ serde(default, skip_serializing, deserialize_with = " deserialize_lsp_config " ) ]
pub config : Option < serde_json ::Value > ,
#[ serde(default = " default_timeout " ) ]
pub timeout : u64 ,
#[ serde(
default ,
skip_serializing ,
deserialize_with = "deserialize_required_root_patterns"
) ]
pub required_root_patterns : Option < GlobSet > ,
}
#[ derive(Debug, Clone, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct FormatterConfiguration {
pub command : String ,
#[ serde(default) ]
#[ serde(skip_serializing_if = " Vec::is_empty " ) ]
pub args : Vec < String > ,
}
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct AdvancedCompletion {
pub name : Option < String > ,
pub completion : Option < String > ,
pub default : Option < String > ,
}
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
#[ serde(rename_all = " kebab-case " , untagged) ]
pub enum DebugConfigCompletion {
Named ( String ) ,
Advanced ( AdvancedCompletion ) ,
}
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
#[ serde(untagged) ]
pub enum DebugArgumentValue {
String ( String ) ,
Array ( Vec < String > ) ,
Boolean ( bool ) ,
}
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct DebugTemplate {
pub name : String ,
pub request : String ,
pub completion : Vec < DebugConfigCompletion > ,
pub args : HashMap < String , DebugArgumentValue > ,
}
#[ derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct DebugAdapterConfig {
pub name : String ,
pub transport : String ,
#[ serde(default) ]
pub command : String ,
#[ serde(default) ]
pub args : Vec < String > ,
pub port_arg : Option < String > ,
pub templates : Vec < DebugTemplate > ,
#[ serde(default) ]
pub quirks : DebuggerQuirks ,
}
// Different workarounds for adapters' differences
#[ derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize) ]
pub struct DebuggerQuirks {
#[ serde(default) ]
pub absolute_paths : bool ,
}
#[ derive(Debug, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub struct IndentationConfiguration {
#[ serde(deserialize_with = " deserialize_tab_width " ) ]
pub tab_width : usize ,
pub unit : String ,
}
/// How the indentation for a newly inserted line should be determined.
/// If the selected heuristic is not available (e.g. because the current
/// language has no tree-sitter indent queries), a simpler one will be used.
#[ derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " ) ]
pub enum IndentationHeuristic {
/// Just copy the indentation of the line that the cursor is currently on.
Simple ,
/// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line.
TreeSitter ,
/// Use tree-sitter indent queries to compute the expected difference in indentation between the new line
/// and the line before. Add this to the actual indentation level of the line before.
#[ default ]
Hybrid ,
}
/// Configuration for auto pairs
#[ derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize) ]
#[ serde(rename_all = " kebab-case " , deny_unknown_fields, untagged) ]
pub enum AutoPairConfig {
/// Enables or disables auto pairing. False means disabled. True means to use the default pairs.
Enable ( bool ) ,
/// The mappings of pairs.
Pairs ( HashMap < char , char > ) ,
}
impl Default for AutoPairConfig {
fn default ( ) -> Self {
AutoPairConfig ::Enable ( true )
}
}
impl From < & AutoPairConfig > for Option < AutoPairs > {
fn from ( auto_pair_config : & AutoPairConfig ) -> Self {
match auto_pair_config {
AutoPairConfig ::Enable ( false ) = > None ,
AutoPairConfig ::Enable ( true ) = > Some ( AutoPairs ::default ( ) ) ,
AutoPairConfig ::Pairs ( pairs ) = > Some ( AutoPairs ::new ( pairs . iter ( ) ) ) ,
}
}
}
impl From < AutoPairConfig > for Option < AutoPairs > {
fn from ( auto_pairs_config : AutoPairConfig ) -> Self {
( & auto_pairs_config ) . into ( )
}
}
impl FromStr for AutoPairConfig {
type Err = std ::str ::ParseBoolError ;
// only do bool parsing for runtime setting
fn from_str ( s : & str ) -> Result < Self , Self ::Err > {
let enable : bool = s . parse ( ) ? ;
Ok ( AutoPairConfig ::Enable ( enable ) )
}
}
#[ derive(Debug) ]
pub struct TextObjectQuery {
pub query : Query ,
}
#[ derive(Debug) ]
pub enum CapturedNode < ' a > {
Single ( Node < ' a > ) ,
/// Guaranteed to be not empty
Grouped ( Vec < Node < ' a > > ) ,
}
impl < ' a > CapturedNode < ' a > {
pub fn start_byte ( & self ) -> usize {
match self {
Self ::Single ( n ) = > n . start_byte ( ) ,
Self ::Grouped ( ns ) = > ns [ 0 ] . start_byte ( ) ,
}
}
pub fn end_byte ( & self ) -> usize {
match self {
Self ::Single ( n ) = > n . end_byte ( ) ,
Self ::Grouped ( ns ) = > ns . last ( ) . unwrap ( ) . end_byte ( ) ,
}
}
pub fn byte_range ( & self ) -> std ::ops ::Range < usize > {
self . start_byte ( ) .. self . end_byte ( )
}
}
/// The maximum number of in-progress matches a TS cursor can consider at once.
/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
///
///
/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
/// However, this causes performance issues for medium to large files.
/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
///
///
/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
///
///
/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
const TREE_SITTER_MATCH_LIMIT : u32 = 256 ;
impl TextObjectQuery {
/// Run the query on the given node and return sub nodes which match given
/// capture ("function.inside", "class.around", etc).
///
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
/// and support for this is partial and could use improvement.
///
/// ```query
/// (comment)+ @capture
///
/// ; OR
/// (
/// (comment)*
/// .
/// (function)
/// ) @capture
/// ```
pub fn capture_nodes < ' a > (
& ' a self ,
capture_name : & str ,
node : Node < ' a > ,
slice : RopeSlice < ' a > ,
cursor : & ' a mut QueryCursor ,
) -> Option < impl Iterator < Item = CapturedNode < ' a > > > {
self . capture_nodes_any ( & [ capture_name ] , node , slice , cursor )
}
/// Find the first capture that exists out of all given `capture_names`
/// and return sub nodes that match this capture.
pub fn capture_nodes_any < ' a > (
& ' a self ,
capture_names : & [ & str ] ,
node : Node < ' a > ,
slice : RopeSlice < ' a > ,
cursor : & ' a mut QueryCursor ,
) -> Option < impl Iterator < Item = CapturedNode < ' a > > > {
let capture_idx = capture_names
. iter ( )
. find_map ( | cap | self . query . capture_index_for_name ( cap ) ) ? ;
cursor . set_match_limit ( TREE_SITTER_MATCH_LIMIT ) ;
let nodes = cursor
. captures ( & self . query , node , RopeProvider ( slice ) )
. filter_map ( move | ( mat , _ ) | {
let nodes : Vec < _ > = mat
. captures
. iter ( )
. filter_map ( | cap | ( cap . index = = capture_idx ) . then_some ( cap . node ) )
. collect ( ) ;
if nodes . len ( ) > 1 {
Some ( CapturedNode ::Grouped ( nodes ) )
} else {
nodes . into_iter ( ) . map ( CapturedNode ::Single ) . next ( )
}
} ) ;
Some ( nodes )
}
}
pub fn read_query ( language : & str , filename : & str ) -> String {
static INHERITS_REGEX : Lazy < Regex > =
Lazy ::new ( | | Regex ::new ( r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*" ) . unwrap ( ) ) ;
let query = load_runtime_file ( language , filename ) . unwrap_or_default ( ) ;
// replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
INHERITS_REGEX
. replace_all ( & query , | captures : & regex ::Captures | {
captures [ 1 ]
. split ( ',' )
. map ( | language | format! ( "\n{}\n" , read_query ( language , filename ) ) )
. collect ::< String > ( )
} )
. to_string ( )
}
impl LanguageConfiguration {
fn initialize_highlight ( & self , scopes : & [ String ] ) -> Option < Arc < HighlightConfiguration > > {
let highlights_query = read_query ( & self . language_id , "highlights.scm" ) ;
// always highlight syntax errors
// highlights_query += "\n(ERROR) @error";
let injections_query = read_query ( & self . language_id , "injections.scm" ) ;
let locals_query = read_query ( & self . language_id , "locals.scm" ) ;
if highlights_query . is_empty ( ) {
None
} else {
let language = get_language ( self . grammar . as_deref ( ) . unwrap_or ( & self . language_id ) )
. map_err ( | err | {
log ::error ! (
"Failed to load tree-sitter parser for language {:?}: {}" ,
self . language_id ,
err
)
} )
. ok ( ) ? ;
let config = HighlightConfiguration ::new (
language ,
& highlights_query ,
& injections_query ,
& locals_query ,
)
. map_err ( | err | log ::error ! ( "Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}" , self . language_id , err ) )
. ok ( ) ? ;
config . configure ( scopes ) ;
Some ( Arc ::new ( config ) )
}
}
pub fn reconfigure ( & self , scopes : & [ String ] ) {
if let Some ( Some ( config ) ) = self . highlight_config . get ( ) {
config . configure ( scopes ) ;
}
}
pub fn highlight_config ( & self , scopes : & [ String ] ) -> Option < Arc < HighlightConfiguration > > {
self . highlight_config
. get_or_init ( | | self . initialize_highlight ( scopes ) )
. clone ( )
}
pub fn is_highlight_initialized ( & self ) -> bool {
self . highlight_config . get ( ) . is_some ( )
}
pub fn indent_query ( & self ) -> Option < & Query > {
self . indent_query
. get_or_init ( | | self . load_query ( "indents.scm" ) )
. as_ref ( )
}
pub fn textobject_query ( & self ) -> Option < & TextObjectQuery > {
self . textobject_query
. get_or_init ( | | {
self . load_query ( "textobjects.scm" )
. map ( | query | TextObjectQuery { query } )
} )
. as_ref ( )
}
pub fn scope ( & self ) -> & str {
& self . scope
}
fn load_query ( & self , kind : & str ) -> Option < Query > {
let query_text = read_query ( & self . language_id , kind ) ;
if query_text . is_empty ( ) {
return None ;
}
let lang = & self . highlight_config . get ( ) ? . as_ref ( ) ? . language ;
Query ::new ( lang , & query_text )
. map_err ( | e | {
log ::error ! (
"Failed to parse {} queries for {}: {}" ,
kind ,
self . language_id ,
e
)
} )
. ok ( )
}
}
#[ derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize) ]
#[ serde(default, rename_all = " kebab-case " , deny_unknown_fields) ]
pub struct SoftWrap {
/// Soft wrap lines that exceed viewport width. Default to off
// NOTE: Option on purpose because the struct is shared between language config and global config.
// By default the option is None so that the language config falls back to the global config unless explicitly set.
pub enable : Option < bool > ,
/// Maximum space left free at the end of the line.
/// This space is used to wrap text at word boundaries. If that is not possible within this limit
/// the word is simply split at the end of the line.
///
/// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views.
///
/// Default to 20
pub max_wrap : Option < u16 > ,
/// Maximum number of indentation that can be carried over from the previous line when softwrapping.
/// If a line is indented further then this limit it is rendered at the start of the viewport instead.
///
/// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views.
///
/// Default to 40
pub max_indent_retain : Option < u16 > ,
/// Indicator placed at the beginning of softwrapped lines
///
/// Defaults to ↪
pub wrap_indicator : Option < String > ,
/// Softwrap at `text_width` instead of viewport width if it is shorter
pub wrap_at_text_width : Option < bool > ,
}
#[ derive(Debug) ]
struct FileTypeGlob {
glob : globset ::Glob ,
language_id : usize ,
}
impl FileTypeGlob {
fn new ( glob : globset ::Glob , language_id : usize ) -> Self {
Self { glob , language_id }
}
}
#[ derive(Debug) ]
struct FileTypeGlobMatcher {
matcher : globset ::GlobSet ,
file_types : Vec < FileTypeGlob > ,
}
impl FileTypeGlobMatcher {
fn new ( file_types : Vec < FileTypeGlob > ) -> Result < Self , globset ::Error > {
let mut builder = globset ::GlobSetBuilder ::new ( ) ;
for file_type in & file_types {
builder . add ( file_type . glob . clone ( ) ) ;
}
Ok ( Self {
matcher : builder . build ( ) ? ,
file_types ,
} )
}
fn language_id_for_path ( & self , path : & Path ) -> Option < & usize > {
self . matcher
. matches ( path )
. iter ( )
. filter_map ( | idx | self . file_types . get ( * idx ) )
. max_by_key ( | file_type | file_type . glob . glob ( ) . len ( ) )
. map ( | file_type | & file_type . language_id )
}
}
// Expose loader as Lazy<> global since it's always static?
#[ derive(Debug) ]
pub struct Loader {
// highlight_names ?
language_configs : Vec < Arc < LanguageConfiguration > > ,
language_config_ids_by_extension : HashMap < String , usize > , // Vec<usize>
language_config_ids_glob_matcher : FileTypeGlobMatcher ,
language_config_ids_by_shebang : HashMap < String , usize > ,
language_server_configs : HashMap < String , LanguageServerConfiguration > ,
scopes : ArcSwap < Vec < String > > ,
}
pub type LoaderError = globset ::Error ;
impl Loader {
pub fn new ( config : Configuration ) -> Result < Self , LoaderError > {
let mut language_configs = Vec ::new ( ) ;
let mut language_config_ids_by_extension = HashMap ::new ( ) ;
let mut language_config_ids_by_shebang = HashMap ::new ( ) ;
let mut file_type_globs = Vec ::new ( ) ;
for config in config . language {
// get the next id
let language_id = language_configs . len ( ) ;
for file_type in & config . file_types {
// entry().or_insert(Vec::new).push(language_id);
match file_type {
FileType ::Extension ( extension ) = > {
language_config_ids_by_extension . insert ( extension . clone ( ) , language_id ) ;
}
FileType ::Glob ( glob ) = > {
file_type_globs . push ( FileTypeGlob ::new ( glob . to_owned ( ) , language_id ) ) ;
}
} ;
}
for shebang in & config . shebangs {
language_config_ids_by_shebang . insert ( shebang . clone ( ) , language_id ) ;
}
language_configs . push ( Arc ::new ( config ) ) ;
}
Ok ( Self {
language_configs ,
language_config_ids_by_extension ,
language_config_ids_glob_matcher : FileTypeGlobMatcher ::new ( file_type_globs ) ? ,
language_config_ids_by_shebang ,
language_server_configs : config . language_server ,
scopes : ArcSwap ::from_pointee ( Vec ::new ( ) ) ,
} )
}
pub fn language_config_for_file_name ( & self , path : & Path ) -> Option < Arc < LanguageConfiguration > > {
// Find all the language configurations that match this file name
// or a suffix of the file name.
let configuration_id = self
. language_config_ids_glob_matcher
. language_id_for_path ( path )
. or_else ( | | {
path . extension ( )
. and_then ( | extension | extension . to_str ( ) )
. and_then ( | extension | self . language_config_ids_by_extension . get ( extension ) )
} ) ;
configuration_id . and_then ( | & id | self . language_configs . get ( id ) . cloned ( ) )
// TODO: content_regex handling conflict resolution
}
pub fn language_config_for_shebang (
& self ,
source : RopeSlice ,
) -> Option < Arc < LanguageConfiguration > > {
let line = Cow ::from ( source . line ( 0 ) ) ;
static SHEBANG_REGEX : Lazy < Regex > =
Lazy ::new ( | | Regex ::new ( & [ "^" , SHEBANG ] . concat ( ) ) . unwrap ( ) ) ;
let configuration_id = SHEBANG_REGEX
. captures ( & line )
. and_then ( | cap | self . language_config_ids_by_shebang . get ( & cap [ 1 ] ) ) ;
configuration_id . and_then ( | & id | self . language_configs . get ( id ) . cloned ( ) )
}
pub fn language_config_for_scope ( & self , scope : & str ) -> Option < Arc < LanguageConfiguration > > {
self . language_configs
. iter ( )
. find ( | config | config . scope = = scope )
. cloned ( )
}
pub fn language_config_for_language_id ( & self , id : & str ) -> Option < Arc < LanguageConfiguration > > {
self . language_configs
. iter ( )
. find ( | config | config . language_id = = id )
. cloned ( )
}
/// Unlike language_config_for_language_id, which only returns Some for an exact id, this
/// function will perform a regex match on the given string to find the closest language match.
pub fn language_config_for_name ( & self , name : & str ) -> Option < Arc < LanguageConfiguration > > {
let mut best_match_length = 0 ;
let mut best_match_position = None ;
for ( i , configuration ) in self . language_configs . iter ( ) . enumerate ( ) {
if let Some ( injection_regex ) = & configuration . injection_regex {
if let Some ( mat ) = injection_regex . find ( name ) {
let length = mat . end ( ) - mat . start ( ) ;
if length > best_match_length {
best_match_position = Some ( i ) ;
best_match_length = length ;
}
}
}
}
best_match_position . map ( | i | self . language_configs [ i ] . clone ( ) )
}
pub fn language_configuration_for_injection_string (
& self ,
capture : & InjectionLanguageMarker ,
) -> Option < Arc < LanguageConfiguration > > {
match capture {
InjectionLanguageMarker ::Name ( string ) = > self . language_config_for_name ( string ) ,
InjectionLanguageMarker ::Filename ( file ) = > self . language_config_for_file_name ( file ) ,
InjectionLanguageMarker ::Shebang ( shebang ) = > {
self . language_config_for_language_id ( shebang )
}
}
}
pub fn language_configs ( & self ) -> impl Iterator < Item = & Arc < LanguageConfiguration > > {
self . language_configs . iter ( )
}
pub fn language_server_configs ( & self ) -> & HashMap < String , LanguageServerConfiguration > {
& self . language_server_configs
}
pub fn set_scopes ( & self , scopes : Vec < String > ) {
self . scopes . store ( Arc ::new ( scopes ) ) ;
// Reconfigure existing grammars
for config in self
. language_configs
. iter ( )
. filter ( | cfg | cfg . is_highlight_initialized ( ) )
{
config . reconfigure ( & self . scopes ( ) ) ;
}
}
pub fn scopes ( & self ) -> Guard < Arc < Vec < String > > > {
self . scopes . load ( )
}
}
pub struct TsParser {
parser : tree_sitter ::Parser ,
pub cursors : Vec < QueryCursor > ,
}
// could also just use a pool, or a single instance?
thread_local! {
pub static PARSER : RefCell < TsParser > = RefCell ::new ( TsParser {
parser : Parser ::new ( ) ,
cursors : Vec ::new ( ) ,
} )
}
#[ derive(Debug) ]
pub struct Syntax {
layers : HopSlotMap < LayerId , LanguageLayer > ,
root : LayerId ,
loader : Arc < ArcSwap < Loader > > ,
}
fn byte_range_to_str ( range : std ::ops ::Range < usize > , source : RopeSlice ) -> Cow < str > {
Cow ::from ( source . byte_slice ( range ) )
}
impl Syntax {
pub fn new (
source : RopeSlice ,
config : Arc < HighlightConfiguration > ,
loader : Arc < ArcSwap < Loader > > ,
) -> Option < Self > {
let root_layer = LanguageLayer {
tree : None ,
config ,
depth : 0 ,
flags : LayerUpdateFlags ::empty ( ) ,
ranges : vec ! [ Range {
start_byte : 0 ,
end_byte : usize ::MAX ,
start_point : Point ::new ( 0 , 0 ) ,
end_point : Point ::new ( usize ::MAX , usize ::MAX ) ,
} ] ,
parent : None ,
} ;
// track scope_descriptor: a Vec of scopes for item in tree
let mut layers = HopSlotMap ::default ( ) ;
let root = layers . insert ( root_layer ) ;
let mut syntax = Self {
root ,
layers ,
loader ,
} ;
let res = syntax . update ( source , source , & ChangeSet ::new ( source ) ) ;
if res . is_err ( ) {
log ::error ! ( "TS parser failed, disabling TS for the current buffer: {res:?}" ) ;
return None ;
}
Some ( syntax )
}
pub fn update (
& mut self ,
old_source : RopeSlice ,
source : RopeSlice ,
changeset : & ChangeSet ,
) -> Result < ( ) , Error > {
let mut queue = VecDeque ::new ( ) ;
queue . push_back ( self . root ) ;
let loader = self . loader . load ( ) ;
let scopes = loader . scopes . load ( ) ;
let injection_callback = | language : & InjectionLanguageMarker | {
loader
. language_configuration_for_injection_string ( language )
. and_then ( | language_config | language_config . highlight_config ( & scopes ) )
} ;
// Convert the changeset into tree sitter edits.
let edits = generate_edits ( old_source , changeset ) ;
// This table allows inverse indexing of `layers`.
// That is by hashing a `Layer` you can find
// the `LayerId` of an existing equivalent `Layer` in `layers`.
//
// It is used to determine if a new layer exists for an injection
// or if an existing layer needs to be updated.
let mut layers_table = RawTable ::with_capacity ( self . layers . len ( ) ) ;
let layers_hasher = RandomState ::new ( ) ;
// Use the edits to update all layers markers
fn point_add ( a : Point , b : Point ) -> Point {
if b . row > 0 {
Point ::new ( a . row . saturating_add ( b . row ) , b . column )
} else {
Point ::new ( 0 , a . column . saturating_add ( b . column ) )
}
}
fn point_sub ( a : Point , b : Point ) -> Point {
if a . row > b . row {
Point ::new ( a . row . saturating_sub ( b . row ) , a . column )
} else {
Point ::new ( 0 , a . column . saturating_sub ( b . column ) )
}
}
for ( layer_id , layer ) in self . layers . iter_mut ( ) {
// The root layer always covers the whole range (0..usize::MAX)
if layer . depth = = 0 {
layer . flags = LayerUpdateFlags ::MODIFIED ;
continue ;
}
if ! edits . is_empty ( ) {
for range in & mut layer . ranges {
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
for edit in edits . iter ( ) . rev ( ) {
let is_pure_insertion = edit . old_end_byte = = edit . start_byte ;
// if edit is after range, skip
if edit . start_byte > range . end_byte {
// TODO: || (is_noop && edit.start_byte == range.end_byte)
continue ;
}
// if edit is before range, shift entire range by len
if edit . old_end_byte < range . start_byte {
range . start_byte =
edit . new_end_byte + ( range . start_byte - edit . old_end_byte ) ;
range . start_point = point_add (
edit . new_end_position ,
point_sub ( range . start_point , edit . old_end_position ) ,
) ;
range . end_byte = edit
. new_end_byte
. saturating_add ( range . end_byte - edit . old_end_byte ) ;
range . end_point = point_add (
edit . new_end_position ,
point_sub ( range . end_point , edit . old_end_position ) ,
) ;
layer . flags | = LayerUpdateFlags ::MOVED ;
}
// if the edit starts in the space before and extends into the range
else if edit . start_byte < range . start_byte {
range . start_byte = edit . new_end_byte ;
range . start_point = edit . new_end_position ;
range . end_byte = range
. end_byte
. saturating_sub ( edit . old_end_byte )
. saturating_add ( edit . new_end_byte ) ;
range . end_point = point_add (
edit . new_end_position ,
point_sub ( range . end_point , edit . old_end_position ) ,
) ;
layer . flags = LayerUpdateFlags ::MODIFIED ;
}
// If the edit is an insertion at the start of the tree, shift
else if edit . start_byte = = range . start_byte & & is_pure_insertion {
range . start_byte = edit . new_end_byte ;
range . start_point = edit . new_end_position ;
layer . flags | = LayerUpdateFlags ::MOVED ;
} else {
range . end_byte = range
. end_byte
. saturating_sub ( edit . old_end_byte )
. saturating_add ( edit . new_end_byte ) ;
range . end_point = point_add (
edit . new_end_position ,
point_sub ( range . end_point , edit . old_end_position ) ,
) ;
layer . flags = LayerUpdateFlags ::MODIFIED ;
}
}
}
}
let hash = layers_hasher . hash_one ( layer ) ;
// Safety: insert_no_grow is unsafe because it assumes that the table
// has enough capacity to hold additional elements.
// This is always the case as we reserved enough capacity above.
unsafe { layers_table . insert_no_grow ( hash , layer_id ) } ;
}
PARSER . with ( | ts_parser | {
let ts_parser = & mut ts_parser . borrow_mut ( ) ;
ts_parser . parser . set_timeout_micros ( 1000 * 500 ) ; // half a second is pretty generours
let mut cursor = ts_parser . cursors . pop ( ) . unwrap_or_else ( QueryCursor ::new ) ;
// TODO: might need to set cursor range
cursor . set_byte_range ( 0 .. usize ::MAX ) ;
cursor . set_match_limit ( TREE_SITTER_MATCH_LIMIT ) ;
let source_slice = source . slice ( .. ) ;
while let Some ( layer_id ) = queue . pop_front ( ) {
let layer = & mut self . layers [ layer_id ] ;
// Mark the layer as touched
layer . flags | = LayerUpdateFlags ::TOUCHED ;
// If a tree already exists, notify it of changes.
if let Some ( tree ) = & mut layer . tree {
if layer
. flags
. intersects ( LayerUpdateFlags ::MODIFIED | LayerUpdateFlags ::MOVED )
{
for edit in edits . iter ( ) . rev ( ) {
// Apply the edits in reverse.
// If we applied them in order then edit 1 would disrupt the positioning of edit 2.
tree . edit ( edit ) ;
}
}
if layer . flags . contains ( LayerUpdateFlags ::MODIFIED ) {
// Re-parse the tree.
layer . parse ( & mut ts_parser . parser , source ) ? ;
}
} else {
// always parse if this layer has never been parsed before
layer . parse ( & mut ts_parser . parser , source ) ? ;
}
// Switch to an immutable borrow.
let layer = & self . layers [ layer_id ] ;
// Process injections.
let matches = cursor . matches (
& layer . config . injections_query ,
layer . tree ( ) . root_node ( ) ,
RopeProvider ( source_slice ) ,
) ;
let mut combined_injections = vec! [
( None , Vec ::new ( ) , IncludedChildren ::default ( ) ) ;
layer . config . combined_injections_patterns . len ( )
] ;
let mut injections = Vec ::new ( ) ;
let mut last_injection_end = 0 ;
for mat in matches {
let ( injection_capture , content_node , included_children ) = layer
. config
. injection_for_match ( & layer . config . injections_query , & mat , source_slice ) ;
// in case this is a combined injection save it for more processing later
if let Some ( combined_injection_idx ) = layer
. config
. combined_injections_patterns
. iter ( )
. position ( | & pattern | pattern = = mat . pattern_index )
{
let entry = & mut combined_injections [ combined_injection_idx ] ;
if injection_capture . is_some ( ) {
entry . 0 = injection_capture ;
}
if let Some ( content_node ) = content_node {
if content_node . start_byte ( ) > = last_injection_end {
entry . 1. push ( content_node ) ;
last_injection_end = content_node . end_byte ( ) ;
}
}
entry . 2 = included_children ;
continue ;
}
// Explicitly remove this match so that none of its other captures will remain
// in the stream of captures.
mat . remove ( ) ;
// If a language is found with the given name, then add a new language layer
// to the highlighted document.
if let ( Some ( injection_capture ) , Some ( content_node ) ) =
( injection_capture , content_node )
{
if let Some ( config ) = ( injection_callback ) ( & injection_capture ) {
let ranges =
intersect_ranges ( & layer . ranges , & [ content_node ] , included_children ) ;
if ! ranges . is_empty ( ) {
if content_node . start_byte ( ) < last_injection_end {
continue ;
}
last_injection_end = content_node . end_byte ( ) ;
injections . push ( ( config , ranges ) ) ;
}
}
}
}
for ( lang_name , content_nodes , included_children ) in combined_injections {
if let ( Some ( lang_name ) , false ) = ( lang_name , content_nodes . is_empty ( ) ) {
if let Some ( config ) = ( injection_callback ) ( & lang_name ) {
let ranges =
intersect_ranges ( & layer . ranges , & content_nodes , included_children ) ;
if ! ranges . is_empty ( ) {
injections . push ( ( config , ranges ) ) ;
}
}
}
}
let depth = layer . depth + 1 ;
// TODO: can't inline this since matches borrows self.layers
for ( config , ranges ) in injections {
let new_layer = LanguageLayer {
tree : None ,
config ,
depth ,
ranges ,
flags : LayerUpdateFlags ::empty ( ) ,
parent : Some ( layer_id ) ,
} ;
// Find an identical existing layer
let layer = layers_table
. get ( layers_hasher . hash_one ( & new_layer ) , | & it | {
self . layers [ it ] = = new_layer
} )
. copied ( ) ;
// ...or insert a new one.
let layer_id = layer . unwrap_or_else ( | | self . layers . insert ( new_layer ) ) ;
queue . push_back ( layer_id ) ;
}
// TODO: pre-process local scopes at this time, rather than highlight?
// would solve problems with locals not working across boundaries
}
// Return the cursor back in the pool.
ts_parser . cursors . push ( cursor ) ;
// Reset all `LayerUpdateFlags` and remove all untouched layers
self . layers . retain ( | _ , layer | {
replace ( & mut layer . flags , LayerUpdateFlags ::empty ( ) )
. contains ( LayerUpdateFlags ::TOUCHED )
} ) ;
Ok ( ( ) )
} )
}
pub fn tree ( & self ) -> & Tree {
self . layers [ self . root ] . tree ( )
}
/// Iterate over the highlighted regions for a given slice of source code.
pub fn highlight_iter < ' a > (
& ' a self ,
source : RopeSlice < ' a > ,
range : Option < std ::ops ::Range < usize > > ,
cancellation_flag : Option < & ' a AtomicUsize > ,
) -> impl Iterator < Item = Result < HighlightEvent , Error > > + ' a {
let mut layers = self
. layers
. iter ( )
. filter_map ( | ( _ , layer ) | {
// TODO: if range doesn't overlap layer range, skip it
// Reuse a cursor from the pool if available.
let mut cursor = PARSER . with ( | ts_parser | {
let highlighter = & mut ts_parser . borrow_mut ( ) ;
highlighter . cursors . pop ( ) . unwrap_or_else ( QueryCursor ::new )
} ) ;
// The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
// prevents them from being moved. But both of these values are really just
// pointers, so it's actually ok to move them.
let cursor_ref =
unsafe { mem ::transmute ::< _ , & ' static mut QueryCursor > ( & mut cursor ) } ;
// if reusing cursors & no range this resets to whole range
cursor_ref . set_byte_range ( range . clone ( ) . unwrap_or ( 0 .. usize ::MAX ) ) ;
cursor_ref . set_match_limit ( TREE_SITTER_MATCH_LIMIT ) ;
let mut captures = cursor_ref
. captures (
& layer . config . query ,
layer . tree ( ) . root_node ( ) ,
RopeProvider ( source ) ,
)
. peekable ( ) ;
// If there's no captures, skip the layer
captures . peek ( ) ? ;
Some ( HighlightIterLayer {
highlight_end_stack : Vec ::new ( ) ,
scope_stack : vec ! [ LocalScope {
inherits : false ,
range : 0 .. usize ::MAX ,
local_defs : Vec ::new ( ) ,
} ] ,
cursor ,
_tree : None ,
captures : RefCell ::new ( captures ) ,
config : layer . config . as_ref ( ) , // TODO: just reuse `layer`
depth : layer . depth , // TODO: just reuse `layer`
} )
} )
. collect ::< Vec < _ > > ( ) ;
layers . sort_unstable_by_key ( | layer | layer . sort_key ( ) ) ;
let mut result = HighlightIter {
source ,
byte_offset : range . map_or ( 0 , | r | r . start ) ,
cancellation_flag ,
iter_count : 0 ,
layers ,
next_event : None ,
last_highlight_range : None ,
} ;
result . sort_layers ( ) ;
result
}
pub fn tree_for_byte_range ( & self , start : usize , end : usize ) -> & Tree {
let mut container_id = self . root ;
for ( layer_id , layer ) in self . layers . iter ( ) {
if layer . depth > self . layers [ container_id ] . depth
& & layer . contains_byte_range ( start , end )
{
container_id = layer_id ;
}
}
self . layers [ container_id ] . tree ( )
}
pub fn named_descendant_for_byte_range ( & self , start : usize , end : usize ) -> Option < Node < ' _ > > {
self . tree_for_byte_range ( start , end )
. root_node ( )
. named_descendant_for_byte_range ( start , end )
}
pub fn descendant_for_byte_range ( & self , start : usize , end : usize ) -> Option < Node < ' _ > > {
self . tree_for_byte_range ( start , end )
. root_node ( )
. descendant_for_byte_range ( start , end )
}
pub fn walk ( & self ) -> TreeCursor < ' _ > {
// data structure to find the smallest range that contains a point
// when some of the ranges in the structure can overlap.
TreeCursor ::new ( & self . layers , self . root )
}
// Commenting
// comment_strings_for_pos
// is_commented
// Indentation
// suggested_indent_for_line_at_buffer_row
// suggested_indent_for_buffer_row
// indent_level_for_line
// TODO: Folding
}
bitflags ! {
/// Flags that track the status of a layer
/// in the `Sytaxn::update` function
#[ derive(Debug) ]
struct LayerUpdateFlags : u32 {
const MODIFIED = 0b001 ;
const MOVED = 0b010 ;
const TOUCHED = 0b100 ;
}
}
#[ derive(Debug) ]
pub struct LanguageLayer {
// mode
// grammar
pub config : Arc < HighlightConfiguration > ,
pub ( crate ) tree : Option < Tree > ,
pub ranges : Vec < Range > ,
pub depth : u32 ,
flags : LayerUpdateFlags ,
parent : Option < LayerId > ,
}
/// This PartialEq implementation only checks if that
/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
/// It does not check whether the layers have the same internal treesitter
/// state.
impl PartialEq for LanguageLayer {
fn eq ( & self , other : & Self ) -> bool {
self . depth = = other . depth
& & self . config . language = = other . config . language
& & self . ranges = = other . ranges
}
}
/// Hash implementation belongs to PartialEq implementation above.
/// See its documentation for details.
impl Hash for LanguageLayer {
fn hash < H : Hasher > ( & self , state : & mut H ) {
self . depth . hash ( state ) ;
self . config . language . hash ( state ) ;
self . ranges . hash ( state ) ;
}
}
impl LanguageLayer {
pub fn tree ( & self ) -> & Tree {
// TODO: no unwrap
self . tree . as_ref ( ) . unwrap ( )
}
fn parse ( & mut self , parser : & mut Parser , source : RopeSlice ) -> Result < ( ) , Error > {
parser
. set_included_ranges ( & self . ranges )
. map_err ( | _ | Error ::InvalidRanges ) ? ;
parser
. set_language ( & self . config . language )
. map_err ( | _ | Error ::InvalidLanguage ) ? ;
// unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
let tree = parser
. parse_with (
& mut | byte , _ | {
if byte < = source . len_bytes ( ) {
let ( chunk , start_byte , _ , _ ) = source . chunk_at_byte ( byte ) ;
& chunk . as_bytes ( ) [ byte - start_byte .. ]
} else {
// out of range
& [ ]
}
} ,
self . tree . as_ref ( ) ,
)
. ok_or ( Error ::Cancelled ) ? ;
// unsafe { ts_parser.parser.set_cancellation_flag(None) };
self . tree = Some ( tree ) ;
Ok ( ( ) )
}
/// Whether the layer contains the given byte range.
///
/// If the layer has multiple ranges (i.e. combined injections), the
/// given range is considered contained if it is within the start and
/// end bytes of the first and last ranges **and** if the given range
/// starts or ends within any of the layer's ranges.
fn contains_byte_range ( & self , start : usize , end : usize ) -> bool {
let layer_start = self
. ranges
. first ( )
. expect ( "ranges should not be empty" )
. start_byte ;
let layer_end = self
. ranges
. last ( )
. expect ( "ranges should not be empty" )
. end_byte ;
layer_start < = start
& & layer_end > = end
& & self . ranges . iter ( ) . any ( | range | {
let byte_range = range . start_byte .. range . end_byte ;
byte_range . contains ( & start ) | | byte_range . contains ( & end )
} )
}
}
pub ( crate ) fn generate_edits (
old_text : RopeSlice ,
changeset : & ChangeSet ,
) -> Vec < tree_sitter ::InputEdit > {
use Operation ::* ;
let mut old_pos = 0 ;
let mut edits = Vec ::new ( ) ;
if changeset . changes . is_empty ( ) {
return edits ;
}
let mut iter = changeset . changes . iter ( ) . peekable ( ) ;
// TODO; this is a lot easier with Change instead of Operation.
fn point_at_pos ( text : RopeSlice , pos : usize ) -> ( usize , Point ) {
let byte = text . char_to_byte ( pos ) ; // <- attempted to index past end
let line = text . char_to_line ( pos ) ;
let line_start_byte = text . line_to_byte ( line ) ;
let col = byte - line_start_byte ;
( byte , Point ::new ( line , col ) )
}
fn traverse ( point : Point , text : & Tendril ) -> Point {
let Point {
mut row ,
mut column ,
} = point ;
// TODO: there should be a better way here.
let mut chars = text . chars ( ) . peekable ( ) ;
while let Some ( ch ) = chars . next ( ) {
if char_is_line_ending ( ch ) & & ! ( ch = = '\r' & & chars . peek ( ) = = Some ( & '\n' ) ) {
row + = 1 ;
column = 0 ;
} else {
column + = 1 ;
}
}
Point { row , column }
}
while let Some ( change ) = iter . next ( ) {
let len = match change {
Delete ( i ) | Retain ( i ) = > * i ,
Insert ( _ ) = > 0 ,
} ;
let mut old_end = old_pos + len ;
match change {
Retain ( _ ) = > { }
Delete ( _ ) = > {
let ( start_byte , start_position ) = point_at_pos ( old_text , old_pos ) ;
let ( old_end_byte , old_end_position ) = point_at_pos ( old_text , old_end ) ;
// deletion
edits . push ( tree_sitter ::InputEdit {
start_byte , // old_pos to byte
old_end_byte , // old_end to byte
new_end_byte : start_byte , // old_pos to byte
start_position , // old pos to coords
old_end_position , // old_end to coords
new_end_position : start_position , // old pos to coords
} ) ;
}
Insert ( s ) = > {
let ( start_byte , start_position ) = point_at_pos ( old_text , old_pos ) ;
// a subsequent delete means a replace, consume it
if let Some ( Delete ( len ) ) = iter . peek ( ) {
old_end = old_pos + len ;
let ( old_end_byte , old_end_position ) = point_at_pos ( old_text , old_end ) ;
iter . next ( ) ;
// replacement
edits . push ( tree_sitter ::InputEdit {
start_byte , // old_pos to byte
old_end_byte , // old_end to byte
new_end_byte : start_byte + s . len ( ) , // old_pos to byte + s.len()
start_position , // old pos to coords
old_end_position , // old_end to coords
new_end_position : traverse ( start_position , s ) , // old pos + chars, newlines matter too (iter over)
} ) ;
} else {
// insert
edits . push ( tree_sitter ::InputEdit {
start_byte , // old_pos to byte
old_end_byte : start_byte , // same
new_end_byte : start_byte + s . len ( ) , // old_pos + s.len()
start_position , // old pos to coords
old_end_position : start_position , // same
new_end_position : traverse ( start_position , s ) , // old pos + chars, newlines matter too (iter over)
} ) ;
}
}
}
old_pos = old_end ;
}
edits
}
use std ::sync ::atomic ::{ AtomicUsize , Ordering } ;
use std ::{ iter , mem , ops , str , usize } ;
use tree_sitter ::{
Language as Grammar , Node , Parser , Point , Query , QueryCaptures , QueryCursor , QueryError ,
QueryMatch , Range , TextProvider , Tree ,
} ;
const CANCELLATION_CHECK_INTERVAL : usize = 100 ;
/// Indicates which highlight should be applied to a region of source code.
#[ derive(Copy, Clone, Debug, PartialEq, Eq) ]
pub struct Highlight ( pub usize ) ;
/// Represents the reason why syntax highlighting failed.
#[ derive(Debug, PartialEq, Eq) ]
pub enum Error {
Cancelled ,
InvalidLanguage ,
InvalidRanges ,
Unknown ,
}
/// Represents a single step in rendering a syntax-highlighted document.
#[ derive(Copy, Clone, Debug) ]
pub enum HighlightEvent {
Source { start : usize , end : usize } ,
HighlightStart ( Highlight ) ,
HighlightEnd ,
}
/// Contains the data needed to highlight code written in a particular language.
///
/// This struct is immutable and can be shared between threads.
#[ derive(Debug) ]
pub struct HighlightConfiguration {
pub language : Grammar ,
pub query : Query ,
injections_query : Query ,
combined_injections_patterns : Vec < usize > ,
highlights_pattern_index : usize ,
highlight_indices : ArcSwap < Vec < Option < Highlight > > > ,
non_local_variable_patterns : Vec < bool > ,
injection_content_capture_index : Option < u32 > ,
injection_language_capture_index : Option < u32 > ,
injection_filename_capture_index : Option < u32 > ,
injection_shebang_capture_index : Option < u32 > ,
local_scope_capture_index : Option < u32 > ,
local_def_capture_index : Option < u32 > ,
local_def_value_capture_index : Option < u32 > ,
local_ref_capture_index : Option < u32 > ,
}
#[ derive(Debug) ]
struct LocalDef < ' a > {
name : Cow < ' a , str > ,
value_range : ops ::Range < usize > ,
highlight : Option < Highlight > ,
}
#[ derive(Debug) ]
struct LocalScope < ' a > {
inherits : bool ,
range : ops ::Range < usize > ,
local_defs : Vec < LocalDef < ' a > > ,
}
#[ derive(Debug) ]
struct HighlightIter < ' a > {
source : RopeSlice < ' a > ,
byte_offset : usize ,
cancellation_flag : Option < & ' a AtomicUsize > ,
layers : Vec < HighlightIterLayer < ' a > > ,
iter_count : usize ,
next_event : Option < HighlightEvent > ,
last_highlight_range : Option < ( usize , usize , u32 ) > ,
}
// Adapter to convert rope chunks to bytes
pub struct ChunksBytes < ' a > {
chunks : ropey ::iter ::Chunks < ' a > ,
}
impl < ' a > Iterator for ChunksBytes < ' a > {
type Item = & ' a [ u8 ] ;
fn next ( & mut self ) -> Option < Self ::Item > {
self . chunks . next ( ) . map ( str ::as_bytes )
}
}
pub struct RopeProvider < ' a > ( pub RopeSlice < ' a > ) ;
impl < ' a > TextProvider < & ' a [ u8 ] > for RopeProvider < ' a > {
type I = ChunksBytes < ' a > ;
fn text ( & mut self , node : Node ) -> Self ::I {
let fragment = self . 0. byte_slice ( node . start_byte ( ) .. node . end_byte ( ) ) ;
ChunksBytes {
chunks : fragment . chunks ( ) ,
}
}
}
struct HighlightIterLayer < ' a > {
_tree : Option < Tree > ,
cursor : QueryCursor ,
captures : RefCell < iter ::Peekable < QueryCaptures < ' a , ' a , RopeProvider < ' a > , & ' a [ u8 ] > > > ,
config : & ' a HighlightConfiguration ,
highlight_end_stack : Vec < usize > ,
scope_stack : Vec < LocalScope < ' a > > ,
depth : u32 ,
}
impl < ' a > fmt ::Debug for HighlightIterLayer < ' a > {
fn fmt ( & self , f : & mut fmt ::Formatter < ' _ > ) -> fmt ::Result {
f . debug_struct ( "HighlightIterLayer" ) . finish ( )
}
}
impl HighlightConfiguration {
/// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
/// queries.
///
/// # Parameters
///
/// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
/// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
/// should be non-empty, otherwise no syntax highlights will be added.
/// * `injections_query` - A string containing tree patterns for injecting other languages
/// into the document. This can be empty if no injections are desired.
/// * `locals_query` - A string containing tree patterns for tracking local variable
/// definitions and references. This can be empty if local variable tracking is not needed.
///
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
pub fn new (
language : Grammar ,
highlights_query : & str ,
injection_query : & str ,
locals_query : & str ,
) -> Result < Self , QueryError > {
// Concatenate the query strings, keeping track of the start offset of each section.
let mut query_source = String ::new ( ) ;
query_source . push_str ( locals_query ) ;
let highlights_query_offset = query_source . len ( ) ;
query_source . push_str ( highlights_query ) ;
// Construct a single query by concatenating the three query strings, but record the
// range of pattern indices that belong to each individual string.
let query = Query ::new ( & language , & query_source ) ? ;
let mut highlights_pattern_index = 0 ;
for i in 0 .. ( query . pattern_count ( ) ) {
let pattern_offset = query . start_byte_for_pattern ( i ) ;
if pattern_offset < highlights_query_offset {
highlights_pattern_index + = 1 ;
}
}
let injections_query = Query ::new ( & language , injection_query ) ? ;
let combined_injections_patterns = ( 0 .. injections_query . pattern_count ( ) )
. filter ( | & i | {
injections_query
. property_settings ( i )
. iter ( )
. any ( | s | & * s . key = = "injection.combined" )
} )
. collect ( ) ;
// Find all of the highlighting patterns that are disabled for nodes that
// have been identified as local variables.
let non_local_variable_patterns = ( 0 .. query . pattern_count ( ) )
. map ( | i | {
query
. property_predicates ( i )
. iter ( )
. any ( | ( prop , positive ) | ! * positive & & prop . key . as_ref ( ) = = "local" )
} )
. collect ( ) ;
// Store the numeric ids for all of the special captures.
let mut injection_content_capture_index = None ;
let mut injection_language_capture_index = None ;
let mut injection_filename_capture_index = None ;
let mut injection_shebang_capture_index = None ;
let mut local_def_capture_index = None ;
let mut local_def_value_capture_index = None ;
let mut local_ref_capture_index = None ;
let mut local_scope_capture_index = None ;
for ( i , name ) in query . capture_names ( ) . iter ( ) . enumerate ( ) {
let i = Some ( i as u32 ) ;
match * name {
"local.definition" = > local_def_capture_index = i ,
"local.definition-value" = > local_def_value_capture_index = i ,
"local.reference" = > local_ref_capture_index = i ,
"local.scope" = > local_scope_capture_index = i ,
_ = > { }
}
}
for ( i , name ) in injections_query . capture_names ( ) . iter ( ) . enumerate ( ) {
let i = Some ( i as u32 ) ;
match * name {
"injection.content" = > injection_content_capture_index = i ,
"injection.language" = > injection_language_capture_index = i ,
"injection.filename" = > injection_filename_capture_index = i ,
"injection.shebang" = > injection_shebang_capture_index = i ,
_ = > { }
}
}
let highlight_indices = ArcSwap ::from_pointee ( vec! [ None ; query . capture_names ( ) . len ( ) ] ) ;
Ok ( Self {
language ,
query ,
injections_query ,
combined_injections_patterns ,
highlights_pattern_index ,
highlight_indices ,
non_local_variable_patterns ,
injection_content_capture_index ,
injection_language_capture_index ,
injection_filename_capture_index ,
injection_shebang_capture_index ,
local_scope_capture_index ,
local_def_capture_index ,
local_def_value_capture_index ,
local_ref_capture_index ,
} )
}
/// Get a slice containing all of the highlight names used in the configuration.
pub fn names ( & self ) -> & [ & str ] {
self . query . capture_names ( )
}
/// Set the list of recognized highlight names.
///
/// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
/// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
/// these queries can choose to recognize highlights with different levels of specificity.
/// For example, the string `function.builtin` will match against `function.builtin.constructor`
/// but will not match `function.method.builtin` and `function.method`.
///
/// When highlighting, results are returned as `Highlight` values, which contain the index
/// of the matched highlight this list of highlight names.
pub fn configure ( & self , recognized_names : & [ String ] ) {
let mut capture_parts = Vec ::new ( ) ;
let indices : Vec < _ > = self
. query
. capture_names ( )
. iter ( )
. map ( move | capture_name | {
capture_parts . clear ( ) ;
capture_parts . extend ( capture_name . split ( '.' ) ) ;
let mut best_index = None ;
let mut best_match_len = 0 ;
for ( i , recognized_name ) in recognized_names . iter ( ) . enumerate ( ) {
let mut len = 0 ;
let mut matches = true ;
for ( i , part ) in recognized_name . split ( '.' ) . enumerate ( ) {
match capture_parts . get ( i ) {
Some ( capture_part ) if * capture_part = = part = > len + = 1 ,
_ = > {
matches = false ;
break ;
}
}
}
if matches & & len > best_match_len {
best_index = Some ( i ) ;
best_match_len = len ;
}
}
best_index . map ( Highlight )
} )
. collect ( ) ;
self . highlight_indices . store ( Arc ::new ( indices ) ) ;
}
fn injection_pair < ' a > (
& self ,
query_match : & QueryMatch < ' a , ' a > ,
source : RopeSlice < ' a > ,
) -> ( Option < InjectionLanguageMarker < ' a > > , Option < Node < ' a > > ) {
let mut injection_capture = None ;
let mut content_node = None ;
for capture in query_match . captures {
let index = Some ( capture . index ) ;
if index = = self . injection_language_capture_index {
let name = byte_range_to_str ( capture . node . byte_range ( ) , source ) ;
injection_capture = Some ( InjectionLanguageMarker ::Name ( name ) ) ;
} else if index = = self . injection_filename_capture_index {
let name = byte_range_to_str ( capture . node . byte_range ( ) , source ) ;
let path = Path ::new ( name . as_ref ( ) ) . to_path_buf ( ) ;
injection_capture = Some ( InjectionLanguageMarker ::Filename ( path . into ( ) ) ) ;
} else if index = = self . injection_shebang_capture_index {
let node_slice = source . byte_slice ( capture . node . byte_range ( ) ) ;
// some languages allow space and newlines before the actual string content
// so a shebang could be on either the first or second line
let lines = if let Ok ( end ) = node_slice . try_line_to_byte ( 2 ) {
node_slice . byte_slice ( .. end )
} else {
node_slice
} ;
static SHEBANG_REGEX : Lazy < rope ::Regex > =
Lazy ::new ( | | rope ::Regex ::new ( SHEBANG ) . unwrap ( ) ) ;
injection_capture = SHEBANG_REGEX
. captures_iter ( lines . regex_input ( ) )
. map ( | cap | {
let cap = lines . byte_slice ( cap . get_group ( 1 ) . unwrap ( ) . range ( ) ) ;
InjectionLanguageMarker ::Shebang ( cap . into ( ) )
} )
. next ( )
} else if index = = self . injection_content_capture_index {
content_node = Some ( capture . node ) ;
}
}
( injection_capture , content_node )
}
fn injection_for_match < ' a > (
& self ,
query : & ' a Query ,
query_match : & QueryMatch < ' a , ' a > ,
source : RopeSlice < ' a > ,
) -> (
Option < InjectionLanguageMarker < ' a > > ,
Option < Node < ' a > > ,
IncludedChildren ,
) {
let ( mut injection_capture , content_node ) = self . injection_pair ( query_match , source ) ;
let mut included_children = IncludedChildren ::default ( ) ;
for prop in query . property_settings ( query_match . pattern_index ) {
match prop . key . as_ref ( ) {
// In addition to specifying the language name via the text of a
// captured node, it can also be hard-coded via a `#set!` predicate
// that sets the injection.language key.
"injection.language" if injection_capture . is_none ( ) = > {
injection_capture = prop
. value
. as_ref ( )
. map ( | s | InjectionLanguageMarker ::Name ( s . as_ref ( ) . into ( ) ) ) ;
}
// By default, injections do not include the *children* of an
// `injection.content` node - only the ranges that belong to the
// node itself. This can be changed using a `#set!` predicate that
// sets the `injection.include-children` key.
"injection.include-children" = > included_children = IncludedChildren ::All ,
// Some queries might only exclude named children but include unnamed
// children in their `injection.content` node. This can be enabled using
// a `#set!` predicate that sets the `injection.include-unnamed-children` key.
"injection.include-unnamed-children" = > {
included_children = IncludedChildren ::Unnamed
}
_ = > { }
}
}
( injection_capture , content_node , included_children )
}
}
impl < ' a > HighlightIterLayer < ' a > {
// First, sort scope boundaries by their byte offset in the document. At a
// given position, emit scope endings before scope beginnings. Finally, emit
// scope boundaries from deeper layers first.
fn sort_key ( & self ) -> Option < ( usize , bool , isize ) > {
let depth = - ( self . depth as isize ) ;
let next_start = self
. captures
. borrow_mut ( )
. peek ( )
. map ( | ( m , i ) | m . captures [ * i ] . node . start_byte ( ) ) ;
let next_end = self . highlight_end_stack . last ( ) . cloned ( ) ;
match ( next_start , next_end ) {
( Some ( start ) , Some ( end ) ) = > {
if start < end {
Some ( ( start , true , depth ) )
} else {
Some ( ( end , false , depth ) )
}
}
( Some ( i ) , None ) = > Some ( ( i , true , depth ) ) ,
( None , Some ( j ) ) = > Some ( ( j , false , depth ) ) ,
_ = > None ,
}
}
}
#[ derive(Clone) ]
enum IncludedChildren {
None ,
All ,
Unnamed ,
}
impl Default for IncludedChildren {
fn default ( ) -> Self {
Self ::None
}
}
// Compute the ranges that should be included when parsing an injection.
// This takes into account three things:
// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
// are the ranges of those nodes.
// * `includes_children` - For some injections, the content nodes' children should be
// excluded from the nested document, so that only the content nodes' *own* content
// is reparsed. For other injections, the content nodes' entire ranges should be
// reparsed, including the ranges of their children.
fn intersect_ranges (
parent_ranges : & [ Range ] ,
nodes : & [ Node ] ,
included_children : IncludedChildren ,
) -> Vec < Range > {
let mut cursor = nodes [ 0 ] . walk ( ) ;
let mut result = Vec ::new ( ) ;
let mut parent_range_iter = parent_ranges . iter ( ) ;
let mut parent_range = parent_range_iter
. next ( )
. expect ( "Layers should only be constructed with non-empty ranges vectors" ) ;
for node in nodes . iter ( ) {
let mut preceding_range = Range {
start_byte : 0 ,
start_point : Point ::new ( 0 , 0 ) ,
end_byte : node . start_byte ( ) ,
end_point : node . start_position ( ) ,
} ;
let following_range = Range {
start_byte : node . end_byte ( ) ,
start_point : node . end_position ( ) ,
end_byte : usize ::MAX ,
end_point : Point ::new ( usize ::MAX , usize ::MAX ) ,
} ;
for excluded_range in node
. children ( & mut cursor )
. filter_map ( | child | match included_children {
IncludedChildren ::None = > Some ( child . range ( ) ) ,
IncludedChildren ::All = > None ,
IncludedChildren ::Unnamed = > {
if child . is_named ( ) {
Some ( child . range ( ) )
} else {
None
}
}
} )
. chain ( [ following_range ] . iter ( ) . cloned ( ) )
{
let mut range = Range {
start_byte : preceding_range . end_byte ,
start_point : preceding_range . end_point ,
end_byte : excluded_range . start_byte ,
end_point : excluded_range . start_point ,
} ;
preceding_range = excluded_range ;
if range . end_byte < parent_range . start_byte {
continue ;
}
while parent_range . start_byte < = range . end_byte {
if parent_range . end_byte > range . start_byte {
if range . start_byte < parent_range . start_byte {
range . start_byte = parent_range . start_byte ;
range . start_point = parent_range . start_point ;
}
if parent_range . end_byte < range . end_byte {
if range . start_byte < parent_range . end_byte {
result . push ( Range {
start_byte : range . start_byte ,
start_point : range . start_point ,
end_byte : parent_range . end_byte ,
end_point : parent_range . end_point ,
} ) ;
}
range . start_byte = parent_range . end_byte ;
range . start_point = parent_range . end_point ;
} else {
if range . start_byte < range . end_byte {
result . push ( range ) ;
}
break ;
}
}
if let Some ( next_range ) = parent_range_iter . next ( ) {
parent_range = next_range ;
} else {
return result ;
}
}
}
}
result
}
impl < ' a > HighlightIter < ' a > {
fn emit_event (
& mut self ,
offset : usize ,
event : Option < HighlightEvent > ,
) -> Option < Result < HighlightEvent , Error > > {
let result ;
if self . byte_offset < offset {
result = Some ( Ok ( HighlightEvent ::Source {
start : self . byte_offset ,
end : offset ,
} ) ) ;
self . byte_offset = offset ;
self . next_event = event ;
} else {
result = event . map ( Ok ) ;
}
self . sort_layers ( ) ;
result
}
fn sort_layers ( & mut self ) {
while ! self . layers . is_empty ( ) {
if let Some ( sort_key ) = self . layers [ 0 ] . sort_key ( ) {
let mut i = 0 ;
while i + 1 < self . layers . len ( ) {
if let Some ( next_offset ) = self . layers [ i + 1 ] . sort_key ( ) {
if next_offset < sort_key {
i + = 1 ;
continue ;
}
} else {
let layer = self . layers . remove ( i + 1 ) ;
PARSER . with ( | ts_parser | {
let highlighter = & mut ts_parser . borrow_mut ( ) ;
highlighter . cursors . push ( layer . cursor ) ;
} ) ;
}
break ;
}
if i > 0 {
self . layers [ 0 .. ( i + 1 ) ] . rotate_left ( 1 ) ;
}
break ;
} else {
let layer = self . layers . remove ( 0 ) ;
PARSER . with ( | ts_parser | {
let highlighter = & mut ts_parser . borrow_mut ( ) ;
highlighter . cursors . push ( layer . cursor ) ;
} ) ;
}
}
}
}
impl < ' a > Iterator for HighlightIter < ' a > {
type Item = Result < HighlightEvent , Error > ;
fn next ( & mut self ) -> Option < Self ::Item > {
' main : loop {
// If we've already determined the next highlight boundary, just return it.
if let Some ( e ) = self . next_event . take ( ) {
return Some ( Ok ( e ) ) ;
}
// Periodically check for cancellation, returning `Cancelled` error if the
// cancellation flag was flipped.
if let Some ( cancellation_flag ) = self . cancellation_flag {
self . iter_count + = 1 ;
if self . iter_count > = CANCELLATION_CHECK_INTERVAL {
self . iter_count = 0 ;
if cancellation_flag . load ( Ordering ::Relaxed ) ! = 0 {
return Some ( Err ( Error ::Cancelled ) ) ;
}
}
}
// If none of the layers have any more highlight boundaries, terminate.
if self . layers . is_empty ( ) {
let len = self . source . len_bytes ( ) ;
return if self . byte_offset < len {
let result = Some ( Ok ( HighlightEvent ::Source {
start : self . byte_offset ,
end : len ,
} ) ) ;
self . byte_offset = len ;
result
} else {
None
} ;
}
// Get the next capture from whichever layer has the earliest highlight boundary.
let range ;
let layer = & mut self . layers [ 0 ] ;
let captures = layer . captures . get_mut ( ) ;
if let Some ( ( next_match , capture_index ) ) = captures . peek ( ) {
let next_capture = next_match . captures [ * capture_index ] ;
range = next_capture . node . byte_range ( ) ;
// If any previous highlight ends before this node starts, then before
// processing this capture, emit the source code up until the end of the
// previous highlight, and an end event for that highlight.
if let Some ( end_byte ) = layer . highlight_end_stack . last ( ) . cloned ( ) {
if end_byte < = range . start {
layer . highlight_end_stack . pop ( ) ;
return self . emit_event ( end_byte , Some ( HighlightEvent ::HighlightEnd ) ) ;
}
}
}
// If there are no more captures, then emit any remaining highlight end events.
// And if there are none of those, then just advance to the end of the document.
else if let Some ( end_byte ) = layer . highlight_end_stack . last ( ) . cloned ( ) {
layer . highlight_end_stack . pop ( ) ;
return self . emit_event ( end_byte , Some ( HighlightEvent ::HighlightEnd ) ) ;
} else {
return self . emit_event ( self . source . len_bytes ( ) , None ) ;
} ;
let ( mut match_ , capture_index ) = captures . next ( ) . unwrap ( ) ;
let mut capture = match_ . captures [ capture_index ] ;
// Remove from the local scope stack any local scopes that have already ended.
while range . start > layer . scope_stack . last ( ) . unwrap ( ) . range . end {
layer . scope_stack . pop ( ) ;
}
// If this capture is for tracking local variables, then process the
// local variable info.
let mut reference_highlight = None ;
let mut definition_highlight = None ;
while match_ . pattern_index < layer . config . highlights_pattern_index {
// If the node represents a local scope, push a new local scope onto
// the scope stack.
if Some ( capture . index ) = = layer . config . local_scope_capture_index {
definition_highlight = None ;
let mut scope = LocalScope {
inherits : true ,
range : range . clone ( ) ,
local_defs : Vec ::new ( ) ,
} ;
for prop in layer . config . query . property_settings ( match_ . pattern_index ) {
if let "local.scope-inherits" = prop . key . as_ref ( ) {
scope . inherits =
prop . value . as_ref ( ) . map_or ( true , | r | r . as_ref ( ) = = "true" ) ;
}
}
layer . scope_stack . push ( scope ) ;
}
// If the node represents a definition, add a new definition to the
// local scope at the top of the scope stack.
else if Some ( capture . index ) = = layer . config . local_def_capture_index {
reference_highlight = None ;
let scope = layer . scope_stack . last_mut ( ) . unwrap ( ) ;
let mut value_range = 0 .. 0 ;
for capture in match_ . captures {
if Some ( capture . index ) = = layer . config . local_def_value_capture_index {
value_range = capture . node . byte_range ( ) ;
}
}
let name = byte_range_to_str ( range . clone ( ) , self . source ) ;
scope . local_defs . push ( LocalDef {
name ,
value_range ,
highlight : None ,
} ) ;
definition_highlight = scope . local_defs . last_mut ( ) . map ( | s | & mut s . highlight ) ;
}
// If the node represents a reference, then try to find the corresponding
// definition in the scope stack.
else if Some ( capture . index ) = = layer . config . local_ref_capture_index
& & definition_highlight . is_none ( )
{
definition_highlight = None ;
let name = byte_range_to_str ( range . clone ( ) , self . source ) ;
for scope in layer . scope_stack . iter ( ) . rev ( ) {
if let Some ( highlight ) = scope . local_defs . iter ( ) . rev ( ) . find_map ( | def | {
if def . name = = name & & range . start > = def . value_range . end {
Some ( def . highlight )
} else {
None
}
} ) {
reference_highlight = highlight ;
break ;
}
if ! scope . inherits {
break ;
}
}
}
// Continue processing any additional matches for the same node.
if let Some ( ( next_match , next_capture_index ) ) = captures . peek ( ) {
let next_capture = next_match . captures [ * next_capture_index ] ;
if next_capture . node = = capture . node {
capture = next_capture ;
match_ = captures . next ( ) . unwrap ( ) . 0 ;
continue ;
}
}
self . sort_layers ( ) ;
continue 'main ;
}
// Otherwise, this capture must represent a highlight.
// If this exact range has already been highlighted by an earlier pattern, or by
// a different layer, then skip over this one.
if let Some ( ( last_start , last_end , last_depth ) ) = self . last_highlight_range {
if range . start = = last_start & & range . end = = last_end & & layer . depth < last_depth {
self . sort_layers ( ) ;
continue 'main ;
}
}
// If the current node was found to be a local variable, then skip over any
// highlighting patterns that are disabled for local variables.
if definition_highlight . is_some ( ) | | reference_highlight . is_some ( ) {
while layer . config . non_local_variable_patterns [ match_ . pattern_index ] {
match_ . remove ( ) ;
if let Some ( ( next_match , next_capture_index ) ) = captures . peek ( ) {
let next_capture = next_match . captures [ * next_capture_index ] ;
if next_capture . node = = capture . node {
capture = next_capture ;
match_ = captures . next ( ) . unwrap ( ) . 0 ;
continue ;
}
}
self . sort_layers ( ) ;
continue 'main ;
}
}
// Once a highlighting pattern is found for the current node, skip over
// any later highlighting patterns that also match this node. Captures
// for a given node are ordered by pattern index, so these subsequent
// captures are guaranteed to be for highlighting, not injections or
// local variables.
while let Some ( ( next_match , next_capture_index ) ) = captures . peek ( ) {
let next_capture = next_match . captures [ * next_capture_index ] ;
if next_capture . node = = capture . node {
captures . next ( ) ;
} else {
break ;
}
}
let current_highlight = layer . config . highlight_indices . load ( ) [ capture . index as usize ] ;
// If this node represents a local definition, then store the current
// highlight value on the local scope entry representing this node.
if let Some ( definition_highlight ) = definition_highlight {
* definition_highlight = current_highlight ;
}
// Emit a scope start event and push the node's end position to the stack.
if let Some ( highlight ) = reference_highlight . or ( current_highlight ) {
self . last_highlight_range = Some ( ( range . start , range . end , layer . depth ) ) ;
layer . highlight_end_stack . push ( range . end ) ;
return self
. emit_event ( range . start , Some ( HighlightEvent ::HighlightStart ( highlight ) ) ) ;
}
self . sort_layers ( ) ;
}
}
}
#[ derive(Debug, Clone) ]
pub enum InjectionLanguageMarker < ' a > {
Name ( Cow < ' a , str > ) ,
Filename ( Cow < ' a , Path > ) ,
Shebang ( String ) ,
}
const SHEBANG : & str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)" ;
pub struct Merge < I > {
iter : I ,
spans : Box < dyn Iterator < Item = ( usize , std ::ops ::Range < usize > ) > > ,
next_event : Option < HighlightEvent > ,
next_span : Option < ( usize , std ::ops ::Range < usize > ) > ,
queue : Vec < HighlightEvent > ,
}
/// Merge a list of spans into the highlight event stream.
pub fn merge < I : Iterator < Item = HighlightEvent > > (
iter : I ,
spans : Vec < ( usize , std ::ops ::Range < usize > ) > ,
) -> Merge < I > {
let spans = Box ::new ( spans . into_iter ( ) ) ;
let mut merge = Merge {
iter ,
spans ,
next_event : None ,
next_span : None ,
queue : Vec ::new ( ) ,
} ;
merge . next_event = merge . iter . next ( ) ;
merge . next_span = merge . spans . next ( ) ;
merge
}
impl < I : Iterator < Item = HighlightEvent > > Iterator for Merge < I > {
type Item = HighlightEvent ;
fn next ( & mut self ) -> Option < Self ::Item > {
use HighlightEvent ::* ;
if let Some ( event ) = self . queue . pop ( ) {
return Some ( event ) ;
}
loop {
match ( self . next_event , & self . next_span ) {
// this happens when range is partially or fully offscreen
( Some ( Source { start , .. } ) , Some ( ( span , range ) ) ) if start > range . start = > {
if start > range . end {
self . next_span = self . spans . next ( ) ;
} else {
self . next_span = Some ( ( * span , start .. range . end ) ) ;
} ;
}
_ = > break ,
}
}
match ( self . next_event , & self . next_span ) {
( Some ( HighlightStart ( i ) ) , _ ) = > {
self . next_event = self . iter . next ( ) ;
Some ( HighlightStart ( i ) )
}
( Some ( HighlightEnd ) , _ ) = > {
self . next_event = self . iter . next ( ) ;
Some ( HighlightEnd )
}
( Some ( Source { start , end } ) , Some ( ( _ , range ) ) ) if start < range . start = > {
let intersect = range . start . min ( end ) ;
let event = Source {
start ,
end : intersect ,
} ;
if end = = intersect {
// the event is complete
self . next_event = self . iter . next ( ) ;
} else {
// subslice the event
self . next_event = Some ( Source {
start : intersect ,
end ,
} ) ;
} ;
Some ( event )
}
( Some ( Source { start , end } ) , Some ( ( span , range ) ) ) if start = = range . start = > {
let intersect = range . end . min ( end ) ;
let event = HighlightStart ( Highlight ( * span ) ) ;
// enqueue in reverse order
self . queue . push ( HighlightEnd ) ;
self . queue . push ( Source {
start ,
end : intersect ,
} ) ;
if end = = intersect {
// the event is complete
self . next_event = self . iter . next ( ) ;
} else {
// subslice the event
self . next_event = Some ( Source {
start : intersect ,
end ,
} ) ;
} ;
if intersect = = range . end {
self . next_span = self . spans . next ( ) ;
} else {
self . next_span = Some ( ( * span , intersect .. range . end ) ) ;
}
Some ( event )
}
( Some ( event ) , None ) = > {
self . next_event = self . iter . next ( ) ;
Some ( event )
}
// Can happen if cursor at EOF and/or diagnostic reaches past the end.
// We need to actually emit events for the cursor-at-EOF situation,
// even though the range is past the end of the text. This needs to be
// handled appropriately by the drawing code by not assuming that
// all `Source` events point to valid indices in the rope.
( None , Some ( ( span , range ) ) ) = > {
let event = HighlightStart ( Highlight ( * span ) ) ;
self . queue . push ( HighlightEnd ) ;
self . queue . push ( Source {
start : range . start ,
end : range . end ,
} ) ;
self . next_span = self . spans . next ( ) ;
Some ( event )
}
( None , None ) = > None ,
e = > unreachable! ( "{:?}" , e ) ,
}
}
}
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
fn node_is_visible ( node : & Node ) -> bool {
node . is_missing ( ) | | ( node . is_named ( ) & & node . language ( ) . node_kind_is_visible ( node . kind_id ( ) ) )
}
pub fn pretty_print_tree < W : fmt ::Write > ( fmt : & mut W , node : Node ) -> fmt ::Result {
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
if node . child_count ( ) = = 0 {
if node_is_visible ( & node ) {
write! ( fmt , "({})" , node . kind ( ) )
} else {
write! ( fmt , "\"{}\"" , node . kind ( ) )
}
} else {
pretty_print_tree_impl ( fmt , & mut node . walk ( ) , 0 )
}
}
fn pretty_print_tree_impl < W : fmt ::Write > (
fmt : & mut W ,
cursor : & mut tree_sitter ::TreeCursor ,
depth : usize ,
) -> fmt ::Result {
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
let node = cursor . node ( ) ;
let visible = node_is_visible ( & node ) ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
if visible {
let indentation_columns = depth * 2 ;
write! ( fmt , "{:indentation_columns$}" , "" ) ? ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
if let Some ( field_name ) = cursor . field_name ( ) {
write! ( fmt , "{}: " , field_name ) ? ;
}
write! ( fmt , "({}" , node . kind ( ) ) ? ;
}
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
// Handle children.
if cursor . goto_first_child ( ) {
loop {
if node_is_visible ( & cursor . node ( ) ) {
fmt . write_char ( '\n' ) ? ;
}
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
pretty_print_tree_impl ( fmt , cursor , depth + 1 ) ? ;
if ! cursor . goto_next_sibling ( ) {
break ;
}
}
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
let moved = cursor . goto_parent ( ) ;
// The parent of the first child must exist, and must be `node`.
debug_assert! ( moved ) ;
debug_assert! ( cursor . node ( ) = = node ) ;
}
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
if visible {
fmt . write_char ( ')' ) ? ;
}
Ok ( ( ) )
}
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
#[ cfg(test) ]
mod test {
use super ::* ;
use crate ::{ Rope , Transaction } ;
#[ test ]
fn test_textobject_queries ( ) {
let query_str = r #"
( line_comment ) + @ quantified_nodes
( ( line_comment ) + ) @ quantified_nodes_grouped
( ( line_comment ) ( line_comment ) ) @ multiple_nodes_grouped
" #;
let source = Rope ::from_str (
r #"
/// a comment on
/// multiple lines
" #,
) ;
let loader = Loader ::new ( Configuration {
language : vec ! [ ] ,
language_server : HashMap ::new ( ) ,
} )
. unwrap ( ) ;
let language = get_language ( "rust" ) . unwrap ( ) ;
let query = Query ::new ( & language , query_str ) . unwrap ( ) ;
let textobject = TextObjectQuery { query } ;
let mut cursor = QueryCursor ::new ( ) ;
let config = HighlightConfiguration ::new ( language , "" , "" , "" ) . unwrap ( ) ;
let syntax = Syntax ::new (
source . slice ( .. ) ,
Arc ::new ( config ) ,
Arc ::new ( ArcSwap ::from_pointee ( loader ) ) ,
)
. unwrap ( ) ;
let root = syntax . tree ( ) . root_node ( ) ;
let mut test = | capture , range | {
let matches : Vec < _ > = textobject
. capture_nodes ( capture , root , source . slice ( .. ) , & mut cursor )
. unwrap ( )
. collect ( ) ;
assert_eq! (
matches [ 0 ] . byte_range ( ) ,
range ,
"@{} expected {:?}" ,
capture ,
range
)
} ;
test ( "quantified_nodes" , 1 .. 36 ) ;
// NOTE: Enable after implementing proper node group capturing
// test("quantified_nodes_grouped", 1..36);
// test("multiple_nodes_grouped", 1..36);
}
#[ test ]
fn test_parser ( ) {
let highlight_names : Vec < String > = [
"attribute" ,
"constant" ,
"function.builtin" ,
"function" ,
"keyword" ,
"operator" ,
"property" ,
"punctuation" ,
"punctuation.bracket" ,
"punctuation.delimiter" ,
"string" ,
"string.special" ,
"tag" ,
"type" ,
"type.builtin" ,
"variable" ,
"variable.builtin" ,
"variable.parameter" ,
]
. iter ( )
. cloned ( )
. map ( String ::from )
. collect ( ) ;
let loader = Loader ::new ( Configuration {
language : vec ! [ ] ,
language_server : HashMap ::new ( ) ,
} )
. unwrap ( ) ;
let language = get_language ( "rust" ) . unwrap ( ) ;
let config = HighlightConfiguration ::new (
language ,
& std ::fs ::read_to_string ( "../runtime/grammars/sources/rust/queries/highlights.scm" )
. unwrap ( ) ,
& std ::fs ::read_to_string ( "../runtime/grammars/sources/rust/queries/injections.scm" )
. unwrap ( ) ,
"" , // locals.scm
)
. unwrap ( ) ;
config . configure ( & highlight_names ) ;
let source = Rope ::from_str (
"
struct Stuff { }
fn main ( ) { }
" ,
) ;
let syntax = Syntax ::new (
source . slice ( .. ) ,
Arc ::new ( config ) ,
Arc ::new ( ArcSwap ::from_pointee ( loader ) ) ,
)
. unwrap ( ) ;
let tree = syntax . tree ( ) ;
let root = tree . root_node ( ) ;
assert_eq! ( root . kind ( ) , "source_file" ) ;
assert_eq! (
root . to_sexp ( ) ,
concat! (
"(source_file " ,
"(struct_item name: (type_identifier) body: (field_declaration_list)) " ,
"(function_item name: (identifier) parameters: (parameters) body: (block)))"
)
) ;
let struct_node = root . child ( 0 ) . unwrap ( ) ;
assert_eq! ( struct_node . kind ( ) , "struct_item" ) ;
}
#[ test ]
fn test_input_edits ( ) {
use tree_sitter ::InputEdit ;
let doc = Rope ::from ( "hello world!\ntest 123" ) ;
let transaction = Transaction ::change (
& doc ,
vec! [ ( 6 , 11 , Some ( "test" . into ( ) ) ) , ( 12 , 17 , None ) ] . into_iter ( ) ,
) ;
let edits = generate_edits ( doc . slice ( .. ) , transaction . changes ( ) ) ;
// transaction.apply(&mut state);
assert_eq! (
edits ,
& [
InputEdit {
start_byte : 6 ,
old_end_byte : 11 ,
new_end_byte : 10 ,
start_position : Point { row : 0 , column : 6 } ,
old_end_position : Point { row : 0 , column : 11 } ,
new_end_position : Point { row : 0 , column : 10 }
} ,
InputEdit {
start_byte : 12 ,
old_end_byte : 17 ,
new_end_byte : 12 ,
start_position : Point { row : 0 , column : 12 } ,
old_end_position : Point { row : 1 , column : 4 } ,
new_end_position : Point { row : 0 , column : 12 }
}
]
) ;
// Testing with the official example from tree-sitter
let mut doc = Rope ::from ( "fn test() {}" ) ;
let transaction =
Transaction ::change ( & doc , vec! [ ( 8 , 8 , Some ( "a: u32" . into ( ) ) ) ] . into_iter ( ) ) ;
let edits = generate_edits ( doc . slice ( .. ) , transaction . changes ( ) ) ;
transaction . apply ( & mut doc ) ;
assert_eq! ( doc , "fn test(a: u32) {}" ) ;
assert_eq! (
edits ,
& [ InputEdit {
start_byte : 8 ,
old_end_byte : 8 ,
new_end_byte : 14 ,
start_position : Point { row : 0 , column : 8 } ,
old_end_position : Point { row : 0 , column : 8 } ,
new_end_position : Point { row : 0 , column : 14 }
} ]
) ;
}
#[ track_caller ]
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
fn assert_pretty_print (
language_name : & str ,
source : & str ,
expected : & str ,
start : usize ,
end : usize ,
) {
let source = Rope ::from_str ( source ) ;
let loader = Loader ::new ( Configuration {
language : vec ! [ ] ,
language_server : HashMap ::new ( ) ,
} )
. unwrap ( ) ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
let language = get_language ( language_name ) . unwrap ( ) ;
let config = HighlightConfiguration ::new ( language , "" , "" , "" ) . unwrap ( ) ;
let syntax = Syntax ::new (
source . slice ( .. ) ,
Arc ::new ( config ) ,
Arc ::new ( ArcSwap ::from_pointee ( loader ) ) ,
)
. unwrap ( ) ;
let root = syntax
. tree ( )
. root_node ( )
. descendant_for_byte_range ( start , end )
. unwrap ( ) ;
let mut output = String ::new ( ) ;
pretty_print_tree ( & mut output , root ) . unwrap ( ) ;
assert_eq! ( expected , output ) ;
}
#[ test ]
fn test_pretty_print ( ) {
let source = r#"/// Hello"# ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
assert_pretty_print ( "rust" , source , "(line_comment)" , 0 , source . len ( ) ) ;
// A large tree should be indented with fields:
let source = r #" fn main ( ) {
println! ( "Hello, World!" ) ;
} " #;
assert_pretty_print (
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
"rust" ,
source ,
concat! (
"(function_item\n" ,
" name: (identifier)\n" ,
" parameters: (parameters)\n" ,
" body: (block\n" ,
" (expression_statement\n" ,
" (macro_invocation\n" ,
" macro: (identifier)\n" ,
" (token_tree\n" ,
" (string_literal))))))" ,
) ,
0 ,
source . len ( ) ,
) ;
// Selecting a token should print just that token:
let source = r#"fn main() {}"# ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
assert_pretty_print ( "rust" , source , r#""fn""# , 0 , 1 ) ;
// Error nodes are printed as errors:
let source = r#"}{"# ;
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
assert_pretty_print ( "rust" , source , "(ERROR)" , 0 , source . len ( ) ) ;
// Fields broken under unnamed nodes are determined correctly.
// In the following source, `object` belongs to the `singleton_method`
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
// This can cause a bug with a pretty-printing implementation that
// uses `Node::field_name_for_child` to determine field names but is
// fixed when using `tree_sitter::TreeCursor::field_name`.
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
2 years ago
let source = " def self . method_name
true
end " ;
assert_pretty_print (
"ruby" ,
source ,
concat! (
"(singleton_method\n" ,
" object: (self)\n" ,
" name: (identifier)\n" ,
" body: (body_statement\n" ,
" (true)))"
) ,
0 ,
source . len ( ) ,
) ;
}
#[ test ]
fn test_load_runtime_file ( ) {
// Test to make sure we can load some data from the runtime directory.
let contents = load_runtime_file ( "rust" , "indents.scm" ) . unwrap ( ) ;
assert! ( ! contents . is_empty ( ) ) ;
let results = load_runtime_file ( "rust" , "does-not-exist" ) ;
assert! ( results . is_err ( ) ) ;
}
}