From 9123d3fbb843d778d19569dfee48555584014ee8 Mon Sep 17 00:00:00 2001 From: "Lucas @ StarkWare" <70894690+LucasLvy@users.noreply.github.com> Date: Wed, 12 Jun 2024 02:20:13 +0200 Subject: [PATCH] feat(cairo): update tree-sitter grammar and queries (#10919) * feat(cairo): update tree-sitter grammar and queries * fix suggestions --- languages.toml | 5 +- runtime/queries/cairo/highlights.scm | 363 +++++++++++++++++++++++++- runtime/queries/cairo/indents.scm | 119 ++++++++- runtime/queries/cairo/injections.scm | 2 +- runtime/queries/cairo/locals.scm | 26 +- runtime/queries/cairo/textobjects.scm | 74 +++++- 6 files changed, 583 insertions(+), 6 deletions(-) diff --git a/languages.toml b/languages.toml index 1888e8f4e..436937cd8 100644 --- a/languages.toml +++ b/languages.toml @@ -2074,9 +2074,12 @@ file-types = ["cairo"] comment-token = "//" indent = { tab-width = 4, unit = " " } # auto-format = true -grammar = "rust" language-servers = [ "cairo-language-server" ] +[[grammar]] +name = "cairo" +source = { git = "https://github.com/starkware-libs/tree-sitter-cairo", rev = "0596baab741ffacdc65c761d5d5ffbbeae97f033" } + [[language]] name = "cpon" scope = "scope.cpon" diff --git a/runtime/queries/cairo/highlights.scm b/runtime/queries/cairo/highlights.scm index ae55c7faf..d2cabd1c5 100644 --- a/runtime/queries/cairo/highlights.scm +++ b/runtime/queries/cairo/highlights.scm @@ -1 +1,362 @@ -; inherits: rust +; ------- +; Tree-Sitter doesn't allow overrides in regards to captures, +; though it is possible to affect the child node of a captured +; node. Thus, the approach here is to flip the order so that +; overrides are unnecessary. +; ------- + +; ------- +; Types +; ------- + +(type_parameters + (type_identifier) @type.parameter) +(constrained_type_parameter + left: (type_identifier) @type.parameter) + +; --- +; Primitives +; --- + +(primitive_type) @type.builtin +(boolean_literal) @constant.builtin.boolean +(numeric_literal) @constant.numeric.integer +[ + (string_literal) + (shortstring_literal) +] @string +[ + (line_comment) +] @comment + +; --- +; Extraneous +; --- + +(enum_variant (identifier) @type.enum.variant) + +(field_initializer + (field_identifier) @variable.other.member) +(shorthand_field_initializer + (identifier) @variable.other.member) +(shorthand_field_identifier) @variable.other.member + + +; --- +; Punctuation +; --- + +[ + "::" + "." + ";" + "," +] @punctuation.delimiter + +[ + "(" + ")" + "[" + "]" + "{" + "}" +] @punctuation.bracket +(type_arguments + [ + "<" + ">" + ] @punctuation.bracket) +(type_parameters + [ + "<" + ">" + ] @punctuation.bracket) + +; --- +; Variables +; --- + +(let_declaration + pattern: [ + ((identifier) @variable) + ((tuple_pattern + (identifier) @variable)) + ]) + +; It needs to be anonymous to not conflict with `call_expression` further below. +(_ + value: (field_expression + value: (identifier)? @variable + field: (field_identifier) @variable.other.member)) + +(parameter + pattern: (identifier) @variable.parameter) + +; ------- +; Keywords +; ------- +[ + "match" + "if" + "else" +] @keyword.control.conditional + +[ + "while" + "loop" +] @keyword.control.repeat + +[ + "break" + "continue" + "return" +] @keyword.control.return + +"use" @keyword.control.import +(mod_item "mod" @keyword.control.import !body) +(use_as_clause "as" @keyword.control.import) + + +[ + (crate) + (super) + "as" + "pub" + "mod" + (extern) + (nopanic) + + "impl" + "trait" + "of" + + "default" +] @keyword + +[ + "struct" + "enum" + "type" +] @keyword.storage.type + +"let" @keyword.storage +"fn" @keyword.function + +(mutable_specifier) @keyword.storage.modifier.mut +(ref_specifier) @keyword.storage.modifier.ref + +(snapshot_type "@" @keyword.storage.modifier.ref) + +[ + "const" + "ref" +] @keyword.storage.modifier + +; TODO: variable.mut to highlight mutable identifiers via locals.scm + +; ------- +; Constructors +; ------- +; TODO: this is largely guesswork, remove it once we get actual info from locals.scm or r-a + +(struct_expression + name: (type_identifier) @constructor) + +(tuple_enum_pattern + type: [ + (identifier) @constructor + (scoped_identifier + name: (identifier) @constructor) + ]) +(struct_pattern + type: [ + ((type_identifier) @constructor) + (scoped_type_identifier + name: (type_identifier) @constructor) + ]) +(match_pattern + ((identifier) @constructor) (#match? @constructor "^[A-Z]")) +(or_pattern + ((identifier) @constructor) + ((identifier) @constructor) + (#match? @constructor "^[A-Z]")) + +; ------- +; Guess Other Types +; ------- + +((identifier) @constant + (#match? @constant "^[A-Z][A-Z\\d_]*$")) + +; --- +; PascalCase identifiers in call_expressions (e.g. `Ok()`) +; are assumed to be enum constructors. +; --- + +(call_expression + function: [ + ((identifier) @constructor + (#match? @constructor "^[A-Z]")) + (scoped_identifier + name: ((identifier) @constructor + (#match? @constructor "^[A-Z]"))) + ]) + +; --- +; PascalCase identifiers under a path which is also PascalCase +; are assumed to be constructors if they have methods or fields. +; --- + +(field_expression + value: (scoped_identifier + path: [ + (identifier) @type + (scoped_identifier + name: (identifier) @type) + ] + name: (identifier) @constructor + (#match? @type "^[A-Z]") + (#match? @constructor "^[A-Z]"))) + +; --- +; Other PascalCase identifiers are assumed to be structs. +; --- + +((identifier) @type + (#match? @type "^[A-Z]")) + +; ------- +; Functions +; ------- + +(call_expression + function: [ + ((identifier) @function) + (scoped_identifier + name: (identifier) @function) + (field_expression + field: (field_identifier) @function) + ]) +(generic_function + function: [ + ((identifier) @function) + (scoped_identifier + name: (identifier) @function) + (field_expression + field: (field_identifier) @function.method) + ]) +(function_item + (function + name: (identifier) @function)) + +(function_signature_item + (function + name: (identifier) @function)) + +(external_function_item + (function + name: (identifier) @function)) + +; --- +; Macros +; --- + +(attribute + (identifier) @special + arguments: (token_tree (identifier) @type) + (#eq? @special "derive") +) + +(attribute + (identifier) @function.macro) +(attribute + [ + (identifier) @function.macro + (scoped_identifier + name: (identifier) @function.macro) + ] + (token_tree (identifier) @function.macro)?) + +(inner_attribute_item) @attribute + +(macro_invocation + macro: [ + ((identifier) @function.macro) + (scoped_identifier + name: (identifier) @function.macro) + ] + "!" @function.macro) + + +; ------- +; Operators +; ------- + +[ + "*" + "->" + "=>" + "<=" + "=" + "==" + "!" + "!=" + "%" + "%=" + "@" + "&&" + "|" + "||" + "^" + "*" + "*=" + "-" + "-=" + "+" + "+=" + "/" + "/=" + ">" + "<" + ">=" + ">>" + "<<" +] @operator + +; ------- +; Paths +; ------- + +(use_declaration + argument: (identifier) @namespace) +(use_wildcard + (identifier) @namespace) +(mod_item + name: (identifier) @namespace) +(scoped_use_list + path: (identifier)? @namespace) +(use_list + (identifier) @namespace) +(use_as_clause + path: (identifier)? @namespace + alias: (identifier) @namespace) + +; --- +; Remaining Paths +; --- + +(scoped_identifier + path: (identifier)? @namespace + name: (identifier) @namespace) +(scoped_type_identifier + path: (identifier) @namespace) + +; ------- +; Remaining Identifiers +; ------- + +"?" @special + +(type_identifier) @type +(identifier) @variable +(field_identifier) @variable.other.member diff --git a/runtime/queries/cairo/indents.scm b/runtime/queries/cairo/indents.scm index ae55c7faf..35c162429 100644 --- a/runtime/queries/cairo/indents.scm +++ b/runtime/queries/cairo/indents.scm @@ -1 +1,118 @@ -; inherits: rust +[ + (use_list) + (block) + (match_block) + (arguments) + (parameters) + (declaration_list) + (field_declaration_list) + (field_initializer_list) + (struct_pattern) + (tuple_pattern) + (unit_expression) + (enum_variant_list) + (call_expression) + (binary_expression) + (field_expression) + (tuple_expression) + (array_expression) + + (token_tree) +] @indent + +[ + "}" + "]" + ")" +] @outdent + +; Indent the right side of assignments. +; The #not-same-line? predicate is required to prevent an extra indent for e.g. +; an else-clause where the previous if-clause starts on the same line as the assignment. +(assignment_expression + . + (_) @expr-start + right: (_) @indent + (#not-same-line? @indent @expr-start) + (#set! "scope" "all") +) +(compound_assignment_expr + . + (_) @expr-start + right: (_) @indent + (#not-same-line? @indent @expr-start) + (#set! "scope" "all") +) +(let_declaration + "let" @expr-start + value: (_) @indent + alternative: (_)? @indent + (#not-same-line? @indent @expr-start) + (#set! "scope" "all") +) +(let_condition + . + (_) @expr-start + value: (_) @indent + (#not-same-line? @indent @expr-start) + (#set! "scope" "all") +) +(if_expression + . + (_) @expr-start + condition: (_) @indent + (#not-same-line? @indent @expr-start) + (#set! "scope" "all") +) +(field_pattern + . + (_) @expr-start + pattern: (_) @indent + (#not-same-line? @indent @expr-start) + (#set! "scope" "all") +) +; Indent type aliases that span multiple lines, similar to +; regular assignment expressions +(type_item + . + (_) @expr-start + type: (_) @indent + (#not-same-line? @indent @expr-start) + (#set! "scope" "all") +) + +; Some field expressions where the left part is a multiline expression are not +; indented by cargo fmt. +; Because this multiline expression might be nested in an arbitrary number of +; field expressions, this can only be matched using a Regex. +(field_expression + value: (_) @val + "." @outdent + ; Check whether the first line ends with `(`, `{` or `[` (up to whitespace). + (#match? @val "(\\A[^\\n\\r]+(\\(|\\{|\\[)[\\t ]*(\\n|\\r))") +) +; Same as above, but with an additional `call_expression`. This is required since otherwise +; the arguments of the function call won't be outdented. +(call_expression + function: (field_expression + value: (_) @val + "." @outdent + (#match? @val "(\\A[^\\n\\r]+(\\(|\\{|\\[)[\\t ]*(\\n|\\r))") + ) + arguments: (_) @outdent +) + + +; Indent if guards in patterns. +; Since the tree-sitter grammar doesn't create a node for the if expression, +; it's not possible to do this correctly in all cases. Indenting the tail of the +; whole pattern whenever it contains an `if` only fails if the `if` appears after +; the second line of the pattern (which should only rarely be the case) +(match_pattern + . + (_) @expr-start + "if" @pattern-guard + (#not-same-line? @expr-start @pattern-guard) +) @indent + + diff --git a/runtime/queries/cairo/injections.scm b/runtime/queries/cairo/injections.scm index a2358b1ca..e07c83b4d 100644 --- a/runtime/queries/cairo/injections.scm +++ b/runtime/queries/cairo/injections.scm @@ -1,3 +1,3 @@ -([(line_comment) (block_comment)] @injection.content +([(line_comment)] @injection.content (#set! injection.language "comment")) diff --git a/runtime/queries/cairo/locals.scm b/runtime/queries/cairo/locals.scm index ae55c7faf..35acb55c6 100644 --- a/runtime/queries/cairo/locals.scm +++ b/runtime/queries/cairo/locals.scm @@ -1 +1,25 @@ -; inherits: rust +; Scopes + +[ + (function_item) + (struct_item) + (enum_item) + (type_item) + (trait_item) + (impl_item) + (block) +] @local.scope + +; Definitions + +(parameter + (identifier) @local.definition) + +(type_parameters + (type_identifier) @local.definition) +(constrained_type_parameter + left: (type_identifier) @local.definition) + +; References +(identifier) @local.reference +(type_identifier) @local.reference diff --git a/runtime/queries/cairo/textobjects.scm b/runtime/queries/cairo/textobjects.scm index ae55c7faf..4031873de 100644 --- a/runtime/queries/cairo/textobjects.scm +++ b/runtime/queries/cairo/textobjects.scm @@ -1 +1,73 @@ -; inherits: rust +(function_item + body: (_) @function.inside) @function.around + +(struct_item + body: (_) @class.inside) @class.around + +(enum_item + body: (_) @class.inside) @class.around + +(trait_item + body: (_) @class.inside) @class.around + +(impl_item + body: (_) @class.inside) @class.around + +(parameters + ((_) @parameter.inside . ","? @parameter.around) @parameter.around) + +(type_parameters + ((_) @parameter.inside . ","? @parameter.around) @parameter.around) + +(type_arguments + ((_) @parameter.inside . ","? @parameter.around) @parameter.around) + +(arguments + ((_) @parameter.inside . ","? @parameter.around) @parameter.around) + +(field_initializer_list + ((_) @parameter.inside . ","? @parameter.around) @parameter.around) + +[ + (line_comment) +] @comment.inside + +(line_comment)+ @comment.around + +(; #[test] + (attribute_item + (attribute + (identifier) @_test_attribute)) + ; allow other attributes like #[should_panic] and comments + [ + (attribute_item) + (line_comment) + ]* + ; the test function + (function_item + body: (_) @test.inside) @test.around + (#eq? @_test_attribute "test")) + +(array_expression + (_) @entry.around) + +(tuple_expression + (_) @entry.around) + +(tuple_pattern + (_) @entry.around) + +; Commonly used vec macro intializer is special cased +(macro_invocation + (identifier) @_id (token_tree (_) @entry.around) + (#eq? @_id "array")) + +(enum_variant) @entry.around + +(field_declaration + (_) @entry.inside) @entry.around + +(field_initializer + (_) @entry.inside) @entry.around + +(shorthand_field_initializer) @entry.around