From 64bfd3066e0ec52d699af60e2ec57b00c419d25a Mon Sep 17 00:00:00 2001 From: vikingowl Date: Fri, 27 Mar 2026 09:42:45 +0100 Subject: [PATCH] chore: scaffold tree-sitter-rune project --- .editorconfig | 15 + .gitignore | 40 + Cargo.toml | 32 + binding.gyp | 30 + bindings/node/binding.cc | 20 + bindings/node/index.d.ts | 32 + bindings/node/index.js | 29 + bindings/python/tree_sitter_rune/__init__.py | 26 + bindings/rust/build.rs | 19 + bindings/rust/lib.rs | 30 + .../plans/2026-03-27-tree-sitter-rune.md | 2702 +++++++++++++++++ .../2026-03-27-tree-sitter-rune-design.md | 252 ++ grammar.js | 1695 +++++++++++ package-lock.json | 63 + package.json | 53 + queries/highlights.scm | 2 + queries/tags.scm | 2 + src/scanner.c | 393 +++ tree-sitter.json | 31 + 19 files changed, 5466 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 binding.gyp create mode 100644 bindings/node/binding.cc create mode 100644 bindings/node/index.d.ts create mode 100644 bindings/node/index.js create mode 100644 bindings/python/tree_sitter_rune/__init__.py create mode 100644 bindings/rust/build.rs create mode 100644 bindings/rust/lib.rs create mode 100644 docs/superpowers/plans/2026-03-27-tree-sitter-rune.md create mode 100644 docs/superpowers/specs/2026-03-27-tree-sitter-rune-design.md create mode 100644 grammar.js create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 queries/highlights.scm create mode 100644 queries/tags.scm create mode 100644 src/scanner.c create mode 100644 tree-sitter.json diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..e84f7ac --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 2 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true + +[*.scm] +indent_size = 2 + +[Makefile] +indent_style = tab diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..308fcab --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# Rust artifacts +target/ + +# Node artifacts +build/ +prebuilds/ +node_modules/ + +# Swift artifacts +.build/ + +# Go artifacts +_obj/ + +# Python artifacts +.venv/ +dist/ +*.egg-info +*.whl + +# C artifacts +*.a +*.so +*.so.* +*.dylib +*.dll +*.pc + +# Example dirs +/examples/*/ + +# Grammar volatiles +*.wasm +*.obj +*.o + +# Archives +*.tar.gz +*.tgz +*.zip diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..01bfdb8 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "tree-sitter-rune" +version = "0.1.0" +description = "Rune grammar for tree-sitter" +authors = [] +license = "MIT" +edition = "2021" +rust-version = "1.65" +keywords = ["parser", "tree-sitter", "rune"] +include = [ + "bindings/rust/*", + "queries/*", + "src/*", + "grammar.js", + "tree-sitter.json", +] + +[lib] +path = "bindings/rust/lib.rs" + +[[example]] +name = "parse" +path = "bindings/rust/examples/parse.rs" + +[dependencies] +tree-sitter-language = "0.1" + +[build-dependencies] +cc = "1.1" + +[dev-dependencies] +tree-sitter = "0.25" diff --git a/binding.gyp b/binding.gyp new file mode 100644 index 0000000..5d61e37 --- /dev/null +++ b/binding.gyp @@ -0,0 +1,30 @@ +{ + "targets": [ + { + "target_name": "tree_sitter_rune_binding", + "dependencies": [ + " + +typedef struct TSLanguage TSLanguage; + +extern "C" TSLanguage *tree_sitter_rune(); + +// "tree-sitter", "language" hashed with BLAKE2 +const napi_type_tag LANGUAGE_TYPE_TAG = { + 0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16 +}; + +Napi::Object Init(Napi::Env env, Napi::Object exports) { + exports["name"] = Napi::String::New(env, "rune"); + auto language = Napi::External::New(env, tree_sitter_rune()); + language.TypeTag(&LANGUAGE_TYPE_TAG); + exports["language"] = language; + return exports; +} + +NODE_API_MODULE(tree_sitter_rune_binding, Init) diff --git a/bindings/node/index.d.ts b/bindings/node/index.d.ts new file mode 100644 index 0000000..2ce6ac8 --- /dev/null +++ b/bindings/node/index.d.ts @@ -0,0 +1,32 @@ +type BaseNode = { + type: string; + named: boolean; +}; + +type ChildNode = { + multiple: boolean; + required: boolean; + types: BaseNode[]; +}; + +type NodeInfo = + | { + type: string; + named: boolean; + subtypes: BaseNode[]; + } + | { + type: string; + named: boolean; + fields: { [name: string]: ChildNode }; + children: ChildNode[]; + }; + +type Language = { + name: string; + language: unknown; + nodeTypeInfo: NodeInfo[]; +}; + +declare const language: Language; +export = language; diff --git a/bindings/node/index.js b/bindings/node/index.js new file mode 100644 index 0000000..a4402e9 --- /dev/null +++ b/bindings/node/index.js @@ -0,0 +1,29 @@ +const root = require("path").join(__dirname, "..", ".."); + +if (process.versions?.bun) { + const fs = require("fs"); + const os = require("os"); + const path = require("path"); + + const bindingName = "tree_sitter_rune_binding"; + const platformFolder = `${os.platform()}-${os.arch()}`; + const prebuildsDir = path.join(root, "prebuilds", platformFolder); + + if (fs.existsSync(prebuildsDir)) { + const files = fs.readdirSync(prebuildsDir); + const nodeFile = files.find((f) => f.endsWith(".node")); + if (nodeFile) { + module.exports = require(path.join(prebuildsDir, nodeFile)); + } + } + + if (!module.exports) { + module.exports = require(`${root}/build/Release/${bindingName}.node`); + } +} else { + module.exports = require("node-gyp-build")(root); +} + +try { + module.exports.nodeTypeInfo = require("../../src/node-types.json"); +} catch (_) {} diff --git a/bindings/python/tree_sitter_rune/__init__.py b/bindings/python/tree_sitter_rune/__init__.py new file mode 100644 index 0000000..6601c69 --- /dev/null +++ b/bindings/python/tree_sitter_rune/__init__.py @@ -0,0 +1,26 @@ +from importlib.resources import files + +from ._binding import language + + +def _get_query(name, filename): + if name in globals(): + return globals()[name] + query = files(__name__).joinpath("queries").joinpath(filename).read_text() + globals()[name] = query + return query + + +def __getattr__(name): + if name == "HIGHLIGHTS_QUERY": + return _get_query("HIGHLIGHTS_QUERY", "highlights.scm") + if name == "TAGS_QUERY": + return _get_query("TAGS_QUERY", "tags.scm") + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__(): + return [*globals(), "HIGHLIGHTS_QUERY", "TAGS_QUERY"] + + +__all__ = ["language", "HIGHLIGHTS_QUERY", "TAGS_QUERY"] diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs new file mode 100644 index 0000000..a3025c6 --- /dev/null +++ b/bindings/rust/build.rs @@ -0,0 +1,19 @@ +fn main() { + let src_dir = std::path::Path::new("src"); + + let mut c_config = cc::Build::new(); + c_config.std("c11").include(src_dir); + + #[cfg(target_env = "msvc")] + c_config.flag("-utf-8"); + + let parser_path = src_dir.join("parser.c"); + c_config.file(&parser_path); + println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); + + let scanner_path = src_dir.join("scanner.c"); + c_config.file(&scanner_path); + println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); + + c_config.compile("tree-sitter-rune"); +} diff --git a/bindings/rust/lib.rs b/bindings/rust/lib.rs new file mode 100644 index 0000000..a163003 --- /dev/null +++ b/bindings/rust/lib.rs @@ -0,0 +1,30 @@ +use tree_sitter_language::LanguageFn; + +extern "C" { + fn tree_sitter_rune() -> *const (); +} + +/// The tree-sitter [`LanguageFn`] for this grammar. +pub static LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_rune) }; + +/// The content of the [`node-types.json`] file for this grammar. +/// +/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types +pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); + +/// The syntax highlighting query for this language. +pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); + +/// The tagging query for this language. +pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm"); + +#[cfg(test)] +mod tests { + #[test] + fn test_can_load_grammar() { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&super::LANGUAGE.into()) + .expect("Error loading Rune grammar"); + } +} diff --git a/docs/superpowers/plans/2026-03-27-tree-sitter-rune.md b/docs/superpowers/plans/2026-03-27-tree-sitter-rune.md new file mode 100644 index 0000000..cba325e --- /dev/null +++ b/docs/superpowers/plans/2026-03-27-tree-sitter-rune.md @@ -0,0 +1,2702 @@ +# tree-sitter-rune Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build a tree-sitter grammar for the Rune programming language by forking tree-sitter-rust, stripping type system rules, and adding Rune-specific constructs. + +**Architecture:** Fork tree-sitter-rust's grammar.js (~1695 lines, 182 rules), remove ~60 type/ownership/trait/macro rules, modify ~20 declaration/expression rules for dynamic typing, add ~10 new rules for template literals, select blocks, object literals, and `is`/`is not`. An external scanner handles template literal interpolation. + +**Tech Stack:** tree-sitter CLI (v0.26), Node.js (grammar DSL), C (external scanner), Scheme (query files) + +**Reference files (read these before starting):** +- Design spec: `docs/superpowers/specs/2026-03-27-tree-sitter-rune-design.md` +- Rune test file: `~/.config/owlry/plugins/hyprshutdown/main.rn` +- tree-sitter-rust grammar: `https://github.com/tree-sitter/tree-sitter-rust` (v0.24.1) +- tree-sitter-javascript scanner (template literal reference): `https://github.com/tree-sitter/tree-sitter-javascript/blob/master/src/scanner.c` +- Rune language book: `https://rune-rs.github.io/book/` +- Rune parser source (authoritative grammar): `https://github.com/rune-rs/rune/tree/main/crates/rune/src/grammar/` + +--- + +## Task 1: Project Scaffolding + +**Files:** +- Create: `grammar.js`, `package.json`, `tree-sitter.json`, `Cargo.toml`, `binding.gyp`, `bindings/`, `.gitignore`, `.editorconfig` + +- [ ] **Step 1: Install tree-sitter CLI** + +Run: `cargo install tree-sitter-cli --locked` +Expected: Binary installed at `~/.cargo/bin/tree-sitter` + +- [ ] **Step 2: Initialize tree-sitter project** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +tree-sitter init +``` +Expected: Project scaffolding created with `grammar.js`, `package.json`, `tree-sitter.json`, `Cargo.toml`, `binding.gyp`, `bindings/`, etc. + +- [ ] **Step 3: Install npm dependencies** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npm install +``` +Expected: `node_modules/` created with `tree-sitter-cli`, `node-addon-api`, `node-gyp-build` + +- [ ] **Step 4: Update tree-sitter.json for Rune** + +Replace the generated `tree-sitter.json` with: + +```json +{ + "grammars": [ + { + "name": "rune", + "camelcase": "Rune", + "scope": "source.rune", + "path": ".", + "file-types": ["rn"], + "highlights": ["queries/highlights.scm"], + "tags": ["queries/tags.scm"], + "injection-regex": "rune" + } + ], + "metadata": { + "version": "0.1.0", + "license": "MIT", + "description": "Rune grammar for tree-sitter", + "authors": [], + "links": { + "repository": "https://github.com/TODO/tree-sitter-rune" + } + }, + "bindings": { + "c": true, + "go": true, + "node": true, + "python": true, + "rust": true, + "swift": false + } +} +``` + +- [ ] **Step 5: Initialize git repo** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git init +git add -A +git commit -m "chore: scaffold tree-sitter-rune project" +``` + +- [ ] **Step 6: Copy tree-sitter-rust grammar.js as starting point** + +Fetch tree-sitter-rust's `grammar.js` from `https://raw.githubusercontent.com/tree-sitter/tree-sitter-rust/refs/heads/master/grammar.js` and save it to `grammar.js`, replacing the generated template. + +Verify: +Run: `head -5 grammar.js` +Expected: Should show the tree-sitter-rust grammar header + +- [ ] **Step 7: Copy tree-sitter-rust scanner.c as starting point** + +Fetch tree-sitter-rust's `src/scanner.c` from `https://raw.githubusercontent.com/tree-sitter/tree-sitter-rust/refs/heads/master/src/scanner.c` and save it to `src/scanner.c`. + +- [ ] **Step 8: Commit starting point** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add grammar.js src/scanner.c +git commit -m "chore: copy tree-sitter-rust grammar and scanner as starting point" +``` + +--- + +## Task 2: Strip Type System from grammar.js + +**Files:** +- Modify: `grammar.js` + +This task removes all type-related rules from the Rust grammar. Rune is dynamically typed — none of these exist. + +- [ ] **Step 1: Rename grammar from 'rust' to 'rune'** + +Change the grammar name: +```js +// Change: +name: 'rust', +// To: +name: 'rune', +``` + +- [ ] **Step 2: Remove the `primitiveTypes` and `numericTypes` constants** + +Delete the `numericTypes` array (lines ~33-48), `primitiveTypes` definition (line ~58), and `TOKEN_TREE_NON_SPECIAL_PUNCTUATION` array (lines ~51-56). These are Rust-specific. + +- [ ] **Step 3: Remove `_type` supertype and all type rules** + +Remove from `supertypes`: `$._type`, `$._literal_pattern` + +Delete these rules entirely from the `rules` object: +- `_type` +- `bracketed_type` +- `qualified_type` +- `lifetime` +- `array_type` +- `for_lifetimes` +- `function_type` +- `tuple_type` +- `unit_type` +- `generic_type` +- `generic_type_with_turbofish` +- `bounded_type` +- `use_bounds` +- `type_arguments` +- `type_binding` +- `reference_type` +- `pointer_type` +- `never_type` +- `abstract_type` +- `dynamic_type` + +- [ ] **Step 4: Remove generic/trait/impl rules** + +Delete these rules: +- `type_parameters` +- `type_parameter` +- `lifetime_parameter` +- `const_parameter` +- `trait_bounds` +- `higher_ranked_trait_bound` +- `removed_trait_bound` +- `where_clause` +- `where_predicate` +- `impl_item` +- `trait_item` +- `associated_type` + +- [ ] **Step 5: Remove type-related expressions** + +Delete these rules: +- `type_cast_expression` (the `as` type cast) +- `reference_expression` (the `&` and `&mut` prefix) +- `generic_function` (turbofish `foo::()`) +- `scoped_type_identifier` +- `scoped_type_identifier_in_expression_position` + +- [ ] **Step 6: Remove type-related patterns** + +Delete these rules: +- `generic_pattern` +- `ref_pattern` +- `mut_pattern` +- `reference_pattern` + +- [ ] **Step 7: Remove `mutable_specifier` rule** + +Delete the `mutable_specifier` rule. Rune has no `mut` keyword. + +- [ ] **Step 8: Update `_expression_except_range` to remove deleted alternatives** + +Remove these alternatives from the `_expression_except_range` choice: +- `$.type_cast_expression` +- `$.reference_expression` +- `$.generic_function` +- `$.try_expression` +- `$.metavariable` +- `$.unit_expression` + +- [ ] **Step 9: Update `_expression_ending_with_block` to remove deleted alternatives** + +Remove these alternatives: +- `$.unsafe_block` +- `$.const_block` +- `$.gen_block` +- `$.try_block` + +- [ ] **Step 10: Update `_pattern` to remove deleted alternatives** + +Remove these alternatives from `_pattern`: +- `$.generic_pattern` +- `$.ref_pattern` +- `$.mut_pattern` +- `$.reference_pattern` +- `$.const_block` +- `$.metavariable` +- All `primitive_type` aliases (the `alias(choice(...primitiveTypes...), $.identifier)`) + +- [ ] **Step 11: Update `_literal_pattern` to remove type-specific patterns** + +Remove the `negative_literal` pattern if it references types. Keep integer, string, char, boolean, float literals. + +- [ ] **Step 12: Remove type-related conflicts** + +Remove these from the `conflicts` array: +- `[$._type, $._pattern]` +- `[$.unit_type, $.tuple_pattern]` +- `[$.parameters, $._pattern]` (may need to keep — test after) +- `[$.foreign_mod_item, $.function_modifiers]` + +- [ ] **Step 13: Remove `_type` from `supertypes`** + +Update the `supertypes` array to only include: +```js +supertypes: $ => [ + $._expression, + $._literal, + $._declaration_statement, + $._pattern, +], +``` + +- [ ] **Step 14: Remove type-related items from `inline`** + +Remove `$._reserved_identifier` from inline if it only referenced primitive types. + +- [ ] **Step 15: Attempt generation to find remaining broken references** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter generate +``` +Expected: Errors about undefined rules. Note each one — these are references we missed. Fix them by removing the referencing code. + +- [ ] **Step 16: Iterate until generation succeeds** + +Fix each broken reference found in Step 15. Common issues: +- Rules that referenced `$._type` (return types, parameter types) +- Rules that referenced `$.type_parameters` +- Rules that referenced `$.where_clause` +- Rules in `_declaration_statement` that referenced deleted items + +Run `npx tree-sitter generate` after each fix until it succeeds. + +- [ ] **Step 17: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add grammar.js +git commit -m "refactor: strip type system rules from grammar" +``` + +--- + +## Task 3: Strip Ownership, Unsafe, Extern, Macro Definition Rules + +**Files:** +- Modify: `grammar.js` + +- [ ] **Step 1: Remove ownership/borrow rules** + +Delete these rules: +- `self_parameter` (the `self`, `&self`, `&mut self` in method params) +- `variadic_parameter` (C-style `...`) + +- [ ] **Step 2: Remove unsafe/extern rules** + +Delete these rules: +- `unsafe_block` +- `extern_modifier` +- `extern_crate_declaration` +- `foreign_mod_item` +- `function_signature_item` (forward declarations, not in Rune) + +- [ ] **Step 3: Remove const/static/type alias/union rules** + +Delete these rules: +- `const_item` +- `static_item` +- `type_item` +- `union_item` + +- [ ] **Step 4: Remove macro definition rules** + +Delete these rules (Rune macros are native Rust-side only, not definable in `.rn` source): +- `macro_definition` +- `macro_rule` +- `token_tree_pattern` +- `token_binding_pattern` +- `token_repetition_pattern` +- `fragment_specifier` +- `token_tree` +- `token_repetition` +- `delim_token_tree` +- `_delim_tokens` +- `_non_delim_token` +- `_non_special_token` +- `_tokens` +- `_token_pattern` +- `metavariable` + +- [ ] **Step 5: Remove macro invocation rule** + +Delete `macro_invocation`. Rune doesn't have user-invokable macros in the Rust `name!()` sense. However, Rune does support `println!()` style built-in macros — check the Rune docs to confirm whether `!` macro syntax appears in Rune source. If it does, keep `macro_invocation` but simplify it. If not, delete it. + +**Decision point:** Read `https://rune-rs.github.io/book/` to check. Rune does use `println!("hello")` syntax — it has built-in macros. Keep `macro_invocation` but simplify: remove `token_tree` body and replace with a simple `arguments`-style body or `delim_token_tree`. + +Simplified `macro_invocation`: +```js +macro_invocation: $ => seq( + field('macro', choice( + $.identifier, + $.scoped_identifier, + )), + '!', + field('arguments', $.arguments), +), +``` + +This handles `println!("hello", value)` without needing full token tree parsing. + +- [ ] **Step 6: Simplify `function_modifiers`** + +Rune only supports `async` as a function modifier (no `const`, `default`, `unsafe`, `extern`). + +Replace with: +```js +function_modifiers: $ => repeat1('async'), +``` + +Actually even simpler — just inline `optional('async')` into function_item. Remove the `function_modifiers` rule. + +- [ ] **Step 7: Update `_declaration_statement` to remove deleted items** + +Remove references to deleted rules from `_declaration_statement`: +- `$.const_item` +- `$.static_item` +- `$.type_item` +- `$.union_item` +- `$.extern_crate_declaration` +- `$.foreign_mod_item` +- `$.impl_item` +- `$.trait_item` +- `$.macro_definition` +- `$.function_signature_item` + +The remaining declaration statements should be: +- `$.function_item` +- `$.struct_item` +- `$.enum_item` +- `$.mod_item` +- `$.use_declaration` +- `$.let_declaration` +- `$.attribute_item` +- `$.inner_attribute_item` +- `$.macro_invocation` (simplified) +- `$.empty_statement` + +- [ ] **Step 8: Remove `shebang` rule if present** + +Rune files don't use shebangs. Remove the `shebang` rule and the `optional($.shebang)` from `source_file`. + +Update `source_file`: +```js +source_file: $ => repeat($._statement), +``` + +- [ ] **Step 9: Generate and fix remaining references** + +Run: `npx tree-sitter generate` +Fix any remaining broken references iteratively. + +- [ ] **Step 10: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add grammar.js +git commit -m "refactor: strip ownership, unsafe, extern, and macro definition rules" +``` + +--- + +## Task 4: Adapt Core Declarations for Rune + +**Files:** +- Modify: `grammar.js` + +- [ ] **Step 1: Adapt `function_item` for untyped Rune functions** + +Rune functions have no type annotations on parameters or return values: +```rune +pub fn hello(name) { } +pub async fn fetch(url) { } +``` + +Replace `function_item` with: +```js +function_item: $ => seq( + optional($.visibility_modifier), + optional('async'), + 'fn', + field('name', $.identifier), + field('parameters', $.parameters), + field('body', $.block), +), +``` + +- [ ] **Step 2: Simplify `parameters` rule** + +Rune parameters are just identifiers, no types: + +```js +parameters: $ => seq( + '(', + sepBy(',', seq( + optional($.attribute_item), + $.identifier, + )), + optional(','), + ')', +), +``` + +Remove the `parameter` and `self_parameter` and `variadic_parameter` rules if not already deleted. The `parameters` rule now directly contains identifiers. + +- [ ] **Step 3: Adapt `struct_item` for untyped Rune structs** + +Rune structs have three forms: +```rune +struct Unit; +struct Tuple(a, b); +struct Named { x, y } +``` + +Replace `struct_item` with: +```js +struct_item: $ => seq( + optional($.visibility_modifier), + 'struct', + field('name', $._type_identifier), + choice( + field('body', $.field_declaration_list), + seq(field('body', $.ordered_field_declaration_list), ';'), + ';', + ), +), +``` + +- [ ] **Step 4: Simplify `field_declaration_list` and `field_declaration`** + +Rune struct fields are just names: +```rune +struct Foo { x, y } +``` + +```js +field_declaration_list: $ => seq( + '{', + sepBy(',', seq( + optional($.visibility_modifier), + $._field_identifier, + )), + optional(','), + '}', +), +``` + +Remove the old `field_declaration` rule that included types. The field is now just a visibility modifier + identifier. + +- [ ] **Step 5: Simplify `ordered_field_declaration_list`** + +Rune tuple struct fields are just identifiers: +```rune +struct Pair(a, b); +``` + +```js +ordered_field_declaration_list: $ => seq( + '(', + sepBy(',', seq( + optional($.visibility_modifier), + $.identifier, + )), + optional(','), + ')', +), +``` + +- [ ] **Step 6: Adapt `enum_item` and `enum_variant`** + +Rune enums: +```rune +enum Result { Ok(value), Err(msg) } +enum Color { Red, Green, Blue } +``` + +```js +enum_item: $ => seq( + optional($.visibility_modifier), + 'enum', + field('name', $._type_identifier), + field('body', $.enum_variant_list), +), + +enum_variant_list: $ => seq( + '{', + sepBy(',', seq( + optional($.attribute_item), + $.enum_variant, + )), + optional(','), + '}', +), + +enum_variant: $ => seq( + optional($.visibility_modifier), + field('name', $.identifier), + optional(choice( + field('body', $.ordered_field_declaration_list), + field('body', $.field_declaration_list), + )), +), +``` + +- [ ] **Step 7: Simplify `let_declaration`** + +Rune `let` has no type annotation: +```rune +let x = 42; +let items = []; +``` + +```js +let_declaration: $ => seq( + 'let', + field('pattern', $._pattern), + optional(seq( + '=', + field('value', $._expression), + )), + ';', +), +``` + +- [ ] **Step 8: Simplify `closure_expression`** + +Rune closures have no type annotations and no `move` keyword: +```rune +let add = |a, b| a + b; +let greet = || println!("hello"); +``` + +```js +closure_expression: $ => prec(PREC.closure, seq( + optional('async'), + field('parameters', $.closure_parameters), + choice( + field('body', $._expression), + field('body', $.block), + ), +)), + +closure_parameters: $ => seq( + '|', + sepBy(',', $.identifier), + '|', +), +``` + +- [ ] **Step 9: Simplify `struct_expression`** + +Rune struct instantiation: +```rune +let p = Point { x: 1, y: 2 }; +``` + +Keep `struct_expression`, `field_initializer_list`, `field_initializer`, `shorthand_field_initializer`, `base_field_initializer` as-is since the syntax is the same as Rust minus turbofish. + +Remove any `type_arguments` references from `struct_expression`. + +- [ ] **Step 10: Remove `try_expression`** + +Rune doesn't have the `?` operator. Delete the `try_expression` rule and remove it from `_expression_except_range`. + +- [ ] **Step 11: Verify `use_declaration` works** + +Rune uses the same `use` syntax as Rust: +```rune +use owlry::Item; +use std::collections::{HashMap, HashSet}; +``` + +The existing `use_declaration`, `scoped_use_list`, `use_list`, `use_as_clause`, `use_wildcard` rules should work unchanged. Verify no deleted rules are referenced. + +- [ ] **Step 12: Generate and test** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter generate +``` +Expected: Successful generation + +- [ ] **Step 13: Test parsing the owlry plugin** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter parse ~/.config/owlry/plugins/hyprshutdown/main.rn +``` +Expected: Should produce a parse tree (may have ERROR nodes — that's OK at this stage, as long as it doesn't crash). Note any ERROR nodes for later fixing. + +- [ ] **Step 14: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add grammar.js +git commit -m "refactor: adapt declarations for Rune's untyped syntax" +``` + +--- + +## Task 5: Clean Up Expressions and Patterns + +**Files:** +- Modify: `grammar.js` + +- [ ] **Step 1: Clean up `_expression_except_range`** + +The final set of expression alternatives for Rune should be: + +```js +_expression_except_range: $ => choice( + $.unary_expression, + $.binary_expression, + $.assignment_expression, + $.compound_assignment_expr, + $.return_expression, + $.yield_expression, + $.call_expression, + $.arguments, + $.array_expression, + $.parenthesized_expression, + $.tuple_expression, + $.struct_expression, + $.if_expression, + $.match_expression, + $.while_expression, + $.loop_expression, + $.for_expression, + $.closure_expression, + $.break_expression, + $.continue_expression, + $.index_expression, + $.await_expression, + $.field_expression, + $.async_block, + $.block, + $._literal, + prec.left($.identifier), + alias(choice('self'), $.self), + $.scoped_identifier, + $.macro_invocation, +), +``` + +Note: `yield_expression` already exists in tree-sitter-rust. `try_expression`, `reference_expression`, `type_cast_expression`, `generic_function`, `unit_expression`, `unsafe_block`, `const_block`, `gen_block`, `try_block` are removed. + +- [ ] **Step 2: Clean up `_expression_ending_with_block`** + +```js +_expression_ending_with_block: $ => choice( + $.async_block, + $.block, + $.if_expression, + $.match_expression, + $.while_expression, + $.loop_expression, + $.for_expression, +), +``` + +- [ ] **Step 3: Clean up `_pattern`** + +The final set of pattern alternatives for Rune: + +```js +_pattern: $ => choice( + $._literal_pattern, + $.identifier, + $.scoped_identifier, + $.tuple_pattern, + $.tuple_struct_pattern, + $.struct_pattern, + $.slice_pattern, + $.captured_pattern, + $.remaining_field_pattern, + $.range_pattern, + $.or_pattern, + '_', +), +``` + +Removed: `generic_pattern`, `ref_pattern`, `mut_pattern`, `reference_pattern`, `const_block`, `metavariable`, all `primitive_type` aliases. + +- [ ] **Step 4: Clean up `binary_expression` operators** + +Keep all standard operators. The `as` keyword cast is removed (that was `type_cast_expression`, already deleted). + +Verify the `PREC` table makes sense for Rune. Remove any Rust-specific precedence levels that are no longer used. + +- [ ] **Step 5: Clean up `unary_expression`** + +Rune unary operators: `-` (negation), `!` (logical not), `*` (dereference — check if Rune has this). + +Rune likely does NOT have `*` dereference or `&` reference. Simplify: +```js +unary_expression: $ => prec(PREC.unary, seq( + choice('-', '!'), + $._expression, +)), +``` + +- [ ] **Step 6: Remove `label` rule if Rune doesn't have labeled loops** + +Check the Rune docs. If Rune supports `'label: loop { break 'label; }` syntax, keep it. Otherwise, remove `label` and its references in `loop_expression`, `while_expression`, `for_expression`, `break_expression`, `continue_expression`. + +**Decision point:** Check `https://rune-rs.github.io/book/loops.html`. If no labeled loops, remove. + +- [ ] **Step 7: Verify `for_expression` syntax** + +Rune `for`: +```rune +for item in collection { + // ... +} +``` + +The existing Rust `for_expression` should work. Verify it doesn't reference any deleted types. + +```js +for_expression: $ => seq( + optional('async'), // Rune may support async for — check + 'for', + field('pattern', $._pattern), + 'in', + field('value', $._expression), + field('body', $.block), +), +``` + +- [ ] **Step 8: Verify `match_expression` works** + +Rune match is identical to Rust match: +```rune +match value { + n if n < 5 => "small", + _ => "large", +} +``` + +The existing `match_expression`, `match_block`, `match_arm`, `last_match_arm`, `match_pattern` should work unchanged. + +- [ ] **Step 9: Update `visibility_modifier`** + +Rune supports `pub`, `pub(crate)`, `pub(super)`, `pub(self)` — same as Rust. Keep unchanged. + +- [ ] **Step 10: Generate and test** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter generate && npx tree-sitter parse ~/.config/owlry/plugins/hyprshutdown/main.rn +``` +Expected: Successful generation and improved parsing of the test file. + +- [ ] **Step 11: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add grammar.js +git commit -m "refactor: clean up expressions and patterns for Rune" +``` + +--- + +## Task 6: Adapt External Scanner + +**Files:** +- Modify: `src/scanner.c` +- Modify: `grammar.js` (externals field) + +- [ ] **Step 1: Plan the scanner tokens** + +The Rune scanner needs: +- `STRING_CONTENT` — keep from Rust (scans until `"` or `\`) +- `FLOAT_LITERAL` — keep from Rust (disambiguates `1.method()` from `1.0`) +- `BLOCK_COMMENT_CONTENT` — keep (nested `/* */` comments) +- `BLOCK_OUTER_DOC_COMMENT_MARKER` — keep (for `/** */`) +- `BLOCK_INNER_DOC_COMMENT_MARKER` — keep (for `/*! */`) +- `LINE_DOC_CONTENT` — keep (for `///` and `//!`) +- `TEMPLATE_CONTENT` — **NEW** (scans template literal content until `` ` ``, `${`, or `\`) +- `ERROR_SENTINEL` — keep (error recovery detection) + +Remove: +- `RAW_STRING_LITERAL_START` — Rune has no raw strings +- `RAW_STRING_LITERAL_CONTENT` — Rune has no raw strings +- `RAW_STRING_LITERAL_END` — Rune has no raw strings + +- [ ] **Step 2: Update externals in grammar.js** + +```js +externals: $ => [ + $.string_content, + $.float_literal, + $._outer_block_doc_comment_marker, + $._inner_block_doc_comment_marker, + $._block_comment_content, + $._line_doc_content, + $.template_content, + $._error_sentinel, +], +``` + +- [ ] **Step 3: Modify scanner.c — remove raw string handling** + +Remove from scanner.c: +- The `Scanner` struct and its `opening_hash_count` field (only used for raw strings) +- `serialize` / `deserialize` functions can return 0 / be no-ops (scanner becomes stateless) +- `scan_raw_string_start`, `scan_raw_string_content`, `scan_raw_string_end` functions +- All `RAW_STRING_LITERAL_*` token handling in the `scan` function + +Update the token enum to match the new `externals` order: +```c +enum TokenType { + STRING_CONTENT, + FLOAT_LITERAL, + BLOCK_OUTER_DOC_COMMENT_MARKER, + BLOCK_INNER_DOC_COMMENT_MARKER, + BLOCK_COMMENT_CONTENT, + LINE_DOC_CONTENT, + TEMPLATE_CONTENT, + ERROR_SENTINEL, +}; +``` + +- [ ] **Step 4: Add `scan_template_content` function** + +Reference: tree-sitter-javascript's `scan_template_chars`. + +```c +static bool scan_template_content(TSLexer *lexer) { + lexer->result_symbol = TEMPLATE_CONTENT; + bool has_content = false; + while (true) { + lexer->mark_end(lexer); + switch (lexer->lookahead) { + case '`': + return has_content; + case '\0': + return false; + case '$': + advance(lexer); + if (lexer->lookahead == '{') { + return has_content; + } + has_content = true; + break; + case '\\': + return has_content; + default: + advance(lexer); + has_content = true; + } + } +} +``` + +- [ ] **Step 5: Wire `scan_template_content` into the scan function** + +In the main `scan` function, add a check for `TEMPLATE_CONTENT` before other tokens: + +```c +if (valid_symbols[TEMPLATE_CONTENT]) { + return scan_template_content(lexer); +} +``` + +- [ ] **Step 6: Make scanner stateless** + +Since raw strings are removed, the scanner needs no persistent state: + +```c +void *tree_sitter_rune_external_scanner_create() { return NULL; } +void tree_sitter_rune_external_scanner_destroy(void *payload) {} +unsigned tree_sitter_rune_external_scanner_serialize(void *payload, char *buffer) { return 0; } +void tree_sitter_rune_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {} +``` + +- [ ] **Step 7: Rename all scanner functions from `tree_sitter_rust_*` to `tree_sitter_rune_*`** + +Find and replace all occurrences of `tree_sitter_rust_external_scanner` with `tree_sitter_rune_external_scanner`. + +- [ ] **Step 8: Generate and verify** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter generate +``` +Expected: Successful generation with updated externals. + +- [ ] **Step 9: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add grammar.js src/scanner.c +git commit -m "refactor: adapt external scanner for Rune, add template_content token" +``` + +--- + +## Task 7: Add Template Literals + +**Files:** +- Modify: `grammar.js` +- Create: `test/corpus/literals.txt` (partial — template literal tests) + +- [ ] **Step 1: Add template literal grammar rules** + +Add these rules to `grammar.js`: + +```js +template_literal: $ => seq( + '`', + repeat(choice( + $.template_content, + $.escape_sequence, + $.interpolation, + )), + '`', +), + +interpolation: $ => seq( + '${', + field('expression', $._expression), + '}', +), +``` + +- [ ] **Step 2: Add template_literal to `_literal`** + +Add `$.template_literal` to the `_literal` choice rule: + +```js +_literal: $ => choice( + $.string_literal, + $.raw_string_literal, // remove if raw strings were deleted + $.char_literal, + $.boolean_literal, + $.integer_literal, + $.float_literal, + $.template_literal, // ADD THIS +), +``` + +Also remove `$.raw_string_literal` from `_literal` since Rune doesn't have raw strings. + +- [ ] **Step 3: Write template literal test cases** + +Create `test/corpus/literals.txt`: + +``` +================== +Simple template literal +================== + +fn main() { + let x = `hello world`; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (template_literal + (template_content)))))) + +================== +Template literal with interpolation +================== + +fn main() { + let name = "world"; + let msg = `hello ${name}`; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (string_literal + (string_content))) + (let_declaration + pattern: (identifier) + value: (template_literal + (template_content) + (interpolation + expression: (identifier))))))) + +================== +Template literal with expression interpolation +================== + +fn main() { + let x = `result: ${1 + 2}`; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (template_literal + (template_content) + (interpolation + expression: (binary_expression + left: (integer_literal) + right: (integer_literal)))))))) + +================== +Template literal with method call interpolation +================== + +fn main() { + let msg = `Hello ${name.to_upper()}!`; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (template_literal + (template_content) + (interpolation + expression: (call_expression + function: (field_expression + value: (identifier) + field: (field_identifier)) + arguments: (arguments))) + (template_content)))))) + +================== +Empty template literal +================== + +fn main() { + let x = ``; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (template_literal))))) +``` + +- [ ] **Step 4: Generate and run tests** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter generate && npx tree-sitter test +``` +Expected: Template literal tests pass. Fix any failures. + +- [ ] **Step 5: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add grammar.js test/corpus/literals.txt +git commit -m "feat: add template literal support with interpolation" +``` + +--- + +## Task 8: Add Rune-Specific Constructs + +**Files:** +- Modify: `grammar.js` +- Create: `test/corpus/async.txt`, `test/corpus/generators.txt`, `test/corpus/objects.txt` + +- [ ] **Step 1: Add `select` expression** + +Rune's `select` block for async concurrency: +```rune +select { + result = future1 => { handle(result) }, + _ = future2 => { println!("done") }, +} +``` + +Add to grammar.js: +```js +select_expression: $ => seq( + 'select', + '{', + sepBy(',', $.select_arm), + optional(','), + '}', +), + +select_arm: $ => seq( + field('pattern', $._pattern), + '=', + field('value', $._expression_except_range), + '=>', + field('body', choice($._expression, $.block)), +), +``` + +Add `$.select_expression` to `_expression_ending_with_block`. + +- [ ] **Step 2: Add object literal** + +Rune's object syntax: +```rune +let obj = #{ name: "Alice", age: 30 }; +``` + +```js +object_literal: $ => seq( + '#', + '{', + sepBy(',', $.object_entry), + optional(','), + '}', +), + +object_entry: $ => seq( + field('key', $.identifier), + ':', + field('value', $._expression), +), +``` + +Add `$.object_literal` to `_expression_except_range`. + +- [ ] **Step 3: Add `is` and `is not` operators** + +Rune's runtime type checking: +```rune +if value is String { } +if value is not Vec { } +``` + +Add `is` and `is not` as binary operator variants. The right-hand side is a type name (identifier or scoped identifier), not an expression. + +```js +is_expression: $ => prec.left(PREC.comparative, seq( + field('left', $._expression), + 'is', + field('right', choice($.identifier, $.scoped_identifier)), +)), + +is_not_expression: $ => prec.left(PREC.comparative, seq( + field('left', $._expression), + 'is', + 'not', + field('right', choice($.identifier, $.scoped_identifier)), +)), +``` + +Add both to `_expression_except_range`. + +- [ ] **Step 4: Verify yield_expression exists** + +tree-sitter-rust already has `yield_expression`. Verify it's still present and works: +```js +yield_expression: $ => prec.right(seq( + 'yield', + optional($._expression), +)), +``` + +This should work as-is for Rune generators. + +- [ ] **Step 5: Add byte literals** + +Rune supports `b'x'` byte chars and `b"hello"` byte strings. Check if tree-sitter-rust already handles these (it likely does via `char_literal` and `string_literal` with `b` prefix). If not, add: + +```js +byte_literal: $ => seq('b', $.char_literal), +byte_string_literal: $ => seq('b', $.string_literal), +``` + +Verify against the existing Rust grammar — byte literals may already be handled by the char/string rules. + +- [ ] **Step 6: Write select expression tests** + +Create `test/corpus/async.txt`: + +``` +================== +Async function +================== + +async fn fetch(url) { + http::get(url).await +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (expression_statement + (await_expression + (call_expression + function: (scoped_identifier + path: (identifier) + name: (identifier)) + arguments: (arguments + (identifier)))))))) + +================== +Select expression +================== + +async fn race() { + select { + result = future_a => result, + _ = future_b => "timeout", + } +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (expression_statement + (select_expression + (select_arm + pattern: (identifier) + value: (identifier) + body: (identifier)) + (select_arm + pattern: (_) + value: (identifier) + body: (string_literal + (string_content)))))))) +``` + +- [ ] **Step 7: Write generator tests** + +Create `test/corpus/generators.txt`: + +``` +================== +Generator function with yield +================== + +fn fibonacci() { + let a = 0; + let b = 1; + loop { + yield a; + let c = a + b; + a = b; + b = c; + } +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (integer_literal)) + (let_declaration + pattern: (identifier) + value: (integer_literal)) + (expression_statement + (loop_expression + body: (block + (expression_statement + (yield_expression + (identifier))) + (let_declaration + pattern: (identifier) + value: (binary_expression + left: (identifier) + right: (identifier))) + (expression_statement + (assignment_expression + left: (identifier) + right: (identifier))) + (expression_statement + (assignment_expression + left: (identifier) + right: (identifier))))))))) +``` + +- [ ] **Step 8: Write object literal tests** + +Create `test/corpus/objects.txt`: + +``` +================== +Object literal +================== + +fn main() { + let obj = #{ name: "Alice", age: 30 }; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (object_literal + (object_entry + key: (identifier) + value: (string_literal + (string_content))) + (object_entry + key: (identifier) + value: (integer_literal))))))) + +================== +Empty object literal +================== + +fn main() { + let obj = #{}; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (object_literal))))) +``` + +- [ ] **Step 9: Generate and run all tests** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter generate && npx tree-sitter test +``` +Expected: All tests pass. Fix any failures iteratively. + +- [ ] **Step 10: Test against real file** + +Run: +```bash +npx tree-sitter parse ~/.config/owlry/plugins/hyprshutdown/main.rn +``` +Expected: Clean parse tree with no ERROR nodes. + +- [ ] **Step 11: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add grammar.js test/corpus/ +git commit -m "feat: add select, object literals, is/is not, and Rune-specific constructs" +``` + +--- + +## Task 9: Core Test Corpus + +**Files:** +- Create: `test/corpus/declarations.txt` +- Create: `test/corpus/expressions.txt` +- Create: `test/corpus/control_flow.txt` +- Create: `test/corpus/patterns.txt` + +- [ ] **Step 1: Write declaration tests** + +Create `test/corpus/declarations.txt`: + +``` +================== +Simple function +================== + +fn hello() { + 42 +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (integer_literal)))) + +================== +Public function with parameters +================== + +pub fn add(a, b) { + a + b +} + +--- + +(source_file + (function_item + (visibility_modifier) + name: (identifier) + parameters: (parameters + (identifier) + (identifier)) + body: (block + (binary_expression + left: (identifier) + right: (identifier))))) + +================== +Use declaration +================== + +use owlry::Item; + +--- + +(source_file + (use_declaration + argument: (scoped_identifier + path: (identifier) + name: (identifier)))) + +================== +Struct with named fields +================== + +struct Point { + x, + y, +} + +--- + +(source_file + (struct_item + name: (type_identifier) + body: (field_declaration_list + (field_identifier) + (field_identifier)))) + +================== +Tuple struct +================== + +struct Pair(a, b); + +--- + +(source_file + (struct_item + name: (type_identifier) + body: (ordered_field_declaration_list + (identifier) + (identifier)))) + +================== +Unit struct +================== + +struct Empty; + +--- + +(source_file + (struct_item + name: (type_identifier))) + +================== +Enum +================== + +enum Color { + Red, + Green, + Blue, +} + +--- + +(source_file + (enum_item + name: (type_identifier) + body: (enum_variant_list + (enum_variant + name: (identifier)) + (enum_variant + name: (identifier)) + (enum_variant + name: (identifier))))) + +================== +Enum with tuple variants +================== + +enum Result { + Ok(value), + Err(msg), +} + +--- + +(source_file + (enum_item + name: (type_identifier) + body: (enum_variant_list + (enum_variant + name: (identifier) + body: (ordered_field_declaration_list + (identifier))) + (enum_variant + name: (identifier) + body: (ordered_field_declaration_list + (identifier)))))) + +================== +Module declaration +================== + +mod utils { + pub fn helper() { + true + } +} + +--- + +(source_file + (mod_item + name: (identifier) + body: (declaration_list + (function_item + (visibility_modifier) + name: (identifier) + parameters: (parameters) + body: (block + (boolean_literal)))))) + +================== +Let declaration +================== + +fn main() { + let x = 42; + let items = []; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (integer_literal)) + (let_declaration + pattern: (identifier) + value: (array_expression))))) +``` + +- [ ] **Step 2: Write expression tests** + +Create `test/corpus/expressions.txt`: + +``` +================== +Method chain call +================== + +fn main() { + items.push(Item::new("id", "name", "cmd") + .description("desc") + .icon("icon")); +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (expression_statement + (call_expression + function: (field_expression + value: (identifier) + field: (field_identifier)) + arguments: (arguments + (call_expression + function: (field_expression + value: (call_expression + function: (field_expression + value: (call_expression + function: (scoped_identifier + path: (identifier) + name: (identifier)) + arguments: (arguments + (string_literal (string_content)) + (string_literal (string_content)) + (string_literal (string_content)))) + field: (field_identifier)) + arguments: (arguments + (string_literal (string_content)))) + field: (field_identifier)) + arguments: (arguments + (string_literal (string_content)))))))))) + +================== +Closure expression +================== + +fn main() { + let add = |a, b| a + b; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (closure_expression + parameters: (closure_parameters + (identifier) + (identifier)) + body: (binary_expression + left: (identifier) + right: (identifier))))))) + +================== +Array operations +================== + +fn main() { + let items = [1, 2, 3]; + let first = items[0]; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (array_expression + (integer_literal) + (integer_literal) + (integer_literal))) + (let_declaration + pattern: (identifier) + value: (index_expression + (identifier) + (integer_literal)))))) + +================== +Await expression +================== + +async fn fetch() { + let data = http::get(url).await; +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (await_expression + (call_expression + function: (scoped_identifier + path: (identifier) + name: (identifier)) + arguments: (arguments + (identifier)))))))) + +================== +Macro invocation +================== + +fn main() { + println!("hello"); +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (expression_statement + (macro_invocation + macro: (identifier) + arguments: (arguments + (string_literal + (string_content)))))))) +``` + +- [ ] **Step 3: Write control flow tests** + +Create `test/corpus/control_flow.txt`: + +``` +================== +If-else expression +================== + +fn check(x) { + if x > 0 { + "positive" + } else { + "non-positive" + } +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (expression_statement + (if_expression + condition: (binary_expression + left: (identifier) + right: (integer_literal)) + consequence: (block + (string_literal (string_content))) + alternative: (else_clause + (block + (string_literal (string_content))))))))) + +================== +Match expression +================== + +fn classify(n) { + match n { + 0 => "zero", + n if n < 0 => "negative", + _ => "positive", + } +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (expression_statement + (match_expression + value: (identifier) + body: (match_block + (match_arm + pattern: (match_pattern + (integer_literal)) + value: (string_literal (string_content))) + (match_arm + pattern: (match_pattern + (identifier) + condition: (binary_expression + left: (identifier) + right: (integer_literal))) + value: (string_literal (string_content))) + (last_match_arm + pattern: (match_pattern (_)) + value: (string_literal (string_content))))))))) + +================== +Loop with break +================== + +fn countdown() { + let n = 10; + loop { + if n == 0 { + break; + } + n = n - 1; + } +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (identifier) + value: (integer_literal)) + (expression_statement + (loop_expression + body: (block + (expression_statement + (if_expression + condition: (binary_expression + left: (identifier) + right: (integer_literal)) + consequence: (block + (expression_statement + (break_expression))))) + (expression_statement + (assignment_expression + left: (identifier) + right: (binary_expression + left: (identifier) + right: (integer_literal)))))))))) + +================== +While loop +================== + +fn drain(items) { + while items.len() > 0 { + items.pop(); + } +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (expression_statement + (while_expression + condition: (binary_expression + left: (call_expression + function: (field_expression + value: (identifier) + field: (field_identifier)) + arguments: (arguments)) + right: (integer_literal)) + body: (block + (expression_statement + (call_expression + function: (field_expression + value: (identifier) + field: (field_identifier)) + arguments: (arguments))))))))) + +================== +For loop +================== + +fn sum(items) { + let total = 0; + for item in items { + total = total + item; + } + total +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (let_declaration + pattern: (identifier) + value: (integer_literal)) + (expression_statement + (for_expression + pattern: (identifier) + value: (identifier) + body: (block + (expression_statement + (assignment_expression + left: (identifier) + right: (binary_expression + left: (identifier) + right: (identifier))))))) + (identifier)))) + +================== +Return expression +================== + +fn early_return(x) { + if x < 0 { + return 0; + } + x +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (expression_statement + (if_expression + condition: (binary_expression + left: (identifier) + right: (integer_literal)) + consequence: (block + (expression_statement + (return_expression + (integer_literal)))))) + (identifier)))) +``` + +- [ ] **Step 4: Write pattern tests** + +Create `test/corpus/patterns.txt`: + +``` +================== +Tuple destructuring +================== + +fn main() { + let (a, b) = get_pair(); +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters) + body: (block + (let_declaration + pattern: (tuple_pattern + (identifier) + (identifier)) + value: (call_expression + function: (identifier) + arguments: (arguments)))))) + +================== +Struct pattern in match +================== + +fn describe(point) { + match point { + Point { x, y } => `${x}, ${y}`, + _ => "unknown", + } +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (expression_statement + (match_expression + value: (identifier) + body: (match_block + (match_arm + pattern: (match_pattern + (struct_pattern + type: (type_identifier) + (field_pattern + name: (shorthand_field_identifier)) + (field_pattern + name: (shorthand_field_identifier)))) + value: (template_literal + (interpolation + expression: (identifier)) + (template_content) + (interpolation + expression: (identifier)))) + (last_match_arm + pattern: (match_pattern (_)) + value: (string_literal (string_content))))))))) + +================== +Or pattern +================== + +fn check(color) { + match color { + Color::Red | Color::Blue => "primary", + _ => "other", + } +} + +--- + +(source_file + (function_item + name: (identifier) + parameters: (parameters + (identifier)) + body: (block + (expression_statement + (match_expression + value: (identifier) + body: (match_block + (match_arm + pattern: (match_pattern + (or_pattern + (scoped_identifier + path: (identifier) + name: (identifier)) + (scoped_identifier + path: (identifier) + name: (identifier)))) + value: (string_literal (string_content))) + (last_match_arm + pattern: (match_pattern (_)) + value: (string_literal (string_content))))))))) +``` + +- [ ] **Step 5: Generate and run all tests** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter generate && npx tree-sitter test +``` +Expected: All tests pass. Fix any failures. Use `npx tree-sitter test -u` to update expected trees if node names differ from what was predicted. + +- [ ] **Step 6: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add test/corpus/ +git commit -m "test: add core test corpus for declarations, expressions, control flow, patterns" +``` + +--- + +## Task 10: Highlight Queries + +**Files:** +- Create: `queries/highlights.scm` + +- [ ] **Step 1: Write highlights.scm** + +Adapted from tree-sitter-rust, removing Rust-specific captures and adding Rune-specific ones: + +```scheme +; Identifiers + +(type_identifier) @type + +(field_identifier) @property + +; Identifier conventions + +; Assume all-caps names are constants +((identifier) @constant + (#match? @constant "^[A-Z][A-Z\\d_]+$")) + +; Assume uppercase names are enum constructors +((identifier) @constructor + (#match? @constructor "^[A-Z]")) + +; Assume that uppercase names in paths are types +((scoped_identifier + path: (identifier) @type) + (#match? @type "^[A-Z]")) +((scoped_identifier + path: (scoped_identifier + name: (identifier) @type)) + (#match? @type "^[A-Z]")) + +; Function calls + +(call_expression + function: (identifier) @function.call) +(call_expression + function: (field_expression + field: (field_identifier) @function.method.call)) +(call_expression + function: (scoped_identifier + "::" + name: (identifier) @function.call)) + +(macro_invocation + macro: (identifier) @function.macro + "!" @function.macro) + +; Function definitions + +(function_item + name: (identifier) @function) + +; Parameters + +(parameters (identifier) @variable.parameter) +(closure_parameters (identifier) @variable.parameter) + +; Comments + +(line_comment) @comment +(block_comment) @comment + +; Punctuation + +"(" @punctuation.bracket +")" @punctuation.bracket +"[" @punctuation.bracket +"]" @punctuation.bracket +"{" @punctuation.bracket +"}" @punctuation.bracket + +"::" @punctuation.delimiter +":" @punctuation.delimiter +"." @punctuation.delimiter +"," @punctuation.delimiter +";" @punctuation.delimiter + +; Keywords + +"async" @keyword +"await" @keyword +"break" @keyword +"continue" @keyword +"else" @keyword +"enum" @keyword +"fn" @keyword +"for" @keyword +"if" @keyword +"in" @keyword +"is" @keyword +"let" @keyword +"loop" @keyword +"match" @keyword +"mod" @keyword +"not" @keyword +"pub" @keyword +"return" @keyword +"select" @keyword +"struct" @keyword +"use" @keyword +"while" @keyword +"yield" @keyword +(crate) @keyword +(use_list (self) @keyword) +(scoped_use_list (self) @keyword) +(scoped_identifier (self) @keyword) +(super) @keyword + +(self) @variable.builtin + +; Literals + +(char_literal) @character +(string_literal) @string +(string_content) @string +(template_literal) @string +(template_content) @string +(escape_sequence) @string.escape +(interpolation + "${" @punctuation.special + "}" @punctuation.special) + +(boolean_literal) @constant.builtin +(integer_literal) @number +(float_literal) @number.float + +; Operators + +"*" @operator +"!" @operator +"=" @operator +"==" @operator +"!=" @operator +"<" @operator +">" @operator +"<=" @operator +">=" @operator +"+" @operator +"-" @operator +"/" @operator +"%" @operator +"+=" @operator +"-=" @operator +"*=" @operator +"/=" @operator +"%=" @operator +"&&" @operator +"||" @operator +"=>" @operator + +; Attributes + +(attribute_item) @attribute +(inner_attribute_item) @attribute +``` + +- [ ] **Step 2: Verify highlights work** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter highlight ~/.config/owlry/plugins/hyprshutdown/main.rn +``` +Expected: Colored output showing keywords, strings, functions, etc. highlighted correctly. + +- [ ] **Step 3: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add queries/highlights.scm +git commit -m "feat: add syntax highlighting queries" +``` + +--- + +## Task 11: Indents, Folds, Locals, and Tags Queries + +**Files:** +- Create: `queries/indents.scm` +- Create: `queries/folds.scm` +- Create: `queries/locals.scm` +- Create: `queries/tags.scm` + +- [ ] **Step 1: Write indents.scm** + +```scheme +; Indent on opening braces/brackets +[ + (block) + (field_declaration_list) + (enum_variant_list) + (declaration_list) + (match_block) + (arguments) + (parameters) + (array_expression) + (object_literal) + (select_expression) +] @indent + +[ + "}" + "]" + ")" +] @outdent +``` + +- [ ] **Step 2: Write folds.scm** + +```scheme +[ + (block) + (field_declaration_list) + (enum_variant_list) + (declaration_list) + (match_block) + (select_expression) + (block_comment) +] @fold +``` + +- [ ] **Step 3: Write locals.scm** + +```scheme +; Scopes + +(block) @local.scope +(function_item) @local.scope +(closure_expression) @local.scope +(for_expression) @local.scope +(while_expression) @local.scope +(loop_expression) @local.scope +(if_expression) @local.scope +(match_arm) @local.scope + +; Definitions + +(let_declaration + pattern: (identifier) @local.definition) + +(parameters + (identifier) @local.definition) + +(closure_parameters + (identifier) @local.definition) + +(for_expression + pattern: (identifier) @local.definition) + +; References + +(identifier) @local.reference +``` + +- [ ] **Step 4: Write tags.scm** + +```scheme +; Struct definitions +(struct_item + name: (type_identifier) @name) @definition.class + +; Enum definitions +(enum_item + name: (type_identifier) @name) @definition.class + +; Function definitions +(function_item + name: (identifier) @name) @definition.function + +; Module definitions +(mod_item + name: (identifier) @name) @definition.module + +; Function calls +(call_expression + function: (identifier) @name) @reference.call + +(call_expression + function: (field_expression + field: (field_identifier) @name)) @reference.call + +; Macro calls +(macro_invocation + macro: (identifier) @name) @reference.call +``` + +- [ ] **Step 5: Update tree-sitter.json to reference all query files** + +Add indents, folds, locals to the grammar config: +```json +{ + "grammars": [ + { + "name": "rune", + "camelcase": "Rune", + "scope": "source.rune", + "path": ".", + "file-types": ["rn"], + "highlights": ["queries/highlights.scm"], + "tags": ["queries/tags.scm"], + "injection-regex": "rune" + } + ] +} +``` + +Note: `indents.scm`, `folds.scm`, and `locals.scm` are consumed by nvim-treesitter directly from the `queries/` directory, not declared in `tree-sitter.json`. + +- [ ] **Step 6: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add queries/ tree-sitter.json +git commit -m "feat: add indent, fold, locals, and tags queries" +``` + +--- + +## Task 12: Real-World Validation and Examples + +**Files:** +- Create: `examples/owlry_plugin.rn` + +- [ ] **Step 1: Copy real-world test file** + +Copy the owlry plugin as an example: +```bash +cp ~/.config/owlry/plugins/hyprshutdown/main.rn ~/ssd/git/active/tree-sitter-rune/examples/owlry_plugin.rn +``` + +- [ ] **Step 2: Parse and verify clean output** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter parse examples/owlry_plugin.rn +``` +Expected: Clean parse tree with NO `(ERROR)` or `(MISSING)` nodes. If any exist, fix the grammar rules causing them and re-run tests. + +- [ ] **Step 3: Create a comprehensive example file** + +Create `examples/comprehensive.rn` that exercises all Rune constructs: + +```rune +use std::collections; + +struct Point { x, y } + +struct Wrapper(inner); + +enum Shape { + Circle(radius), + Rectangle { width, height }, + Unknown, +} + +pub fn describe(shape) { + match shape { + Shape::Circle(r) => `circle with radius ${r}`, + Shape::Rectangle { width, height } => { + let area = width * height; + `rectangle with area ${area}` + }, + _ => "unknown shape", + } +} + +pub async fn fetch_data(url) { + let response = http::get(url).await; + response +} + +fn fibonacci() { + let a = 0; + let b = 1; + loop { + yield a; + let c = a + b; + a = b; + b = c; + } +} + +pub fn make_counter() { + let count = 0; + let increment = || { + count = count + 1; + count + }; + increment +} + +fn check_type(value) { + if value is String { + println!("it's a string"); + } + + if value is not Vec { + println!("not a vector"); + } +} + +fn objects() { + let config = #{ + name: "test", + debug: true, + count: 42, + }; +} + +async fn race_futures(a, b) { + select { + result = a => result, + result = b => result, + } +} + +pub fn refresh() { + let items = []; + + items.push(Item::new("id", "name", "cmd") + .description("A description") + .icon("icon-name") + .keywords(["one", "two"])); + + items +} + +pub fn query(q) { + [] +} +``` + +- [ ] **Step 4: Parse comprehensive example** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter parse examples/comprehensive.rn +``` +Expected: Clean parse with no errors. + +- [ ] **Step 5: Highlight comprehensive example** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter highlight examples/comprehensive.rn +``` +Expected: Sensible colored output. + +- [ ] **Step 6: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add examples/ +git commit -m "test: add real-world and comprehensive example files" +``` + +--- + +## Task 13: Polish — README, LICENSE, Package Metadata + +**Files:** +- Create: `README.md` +- Create: `LICENSE` +- Modify: `package.json` +- Modify: `Cargo.toml` + +- [ ] **Step 1: Create LICENSE** + +Create `LICENSE` with MIT license text. Use current year (2026) and the user's name/handle. + +- [ ] **Step 2: Create README.md** + +Create `README.md` with: +- Project title and description +- Installation instructions (nvim-treesitter custom parser, manual) +- Supported syntax overview +- Formatter integration (conform.nvim + `rune fmt`) +- Development instructions (build, test, generate) +- License + +- [ ] **Step 3: Update package.json metadata** + +Ensure `package.json` has correct: +- `name`: `tree-sitter-rune` +- `version`: `0.1.0` +- `description`: `Rune grammar for tree-sitter` +- `license`: `MIT` +- `keywords`: include `rune`, `tree-sitter`, `parser` + +- [ ] **Step 4: Update Cargo.toml metadata** + +Ensure `Cargo.toml` has: +- `name`: `tree-sitter-rune` +- `version`: `0.1.0` +- `description`: `Rune grammar for tree-sitter` +- `license`: `MIT` + +- [ ] **Step 5: Final test run** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +npx tree-sitter generate && npx tree-sitter test && npx tree-sitter parse examples/comprehensive.rn +``` +Expected: All pass cleanly. + +- [ ] **Step 6: Commit** + +Run: +```bash +cd ~/ssd/git/active/tree-sitter-rune +git add README.md LICENSE package.json Cargo.toml +git commit -m "docs: add README, LICENSE, and update package metadata" +``` + +--- + +## Dependency Graph + +``` +Task 1 (Scaffold) + └─> Task 2 (Strip types) + └─> Task 3 (Strip ownership/unsafe/macros) + └─> Task 4 (Adapt declarations) + └─> Task 5 (Clean expressions/patterns) + └─> Task 6 (External scanner) + ├─> Task 7 (Template literals) + └─> Task 8 (Rune constructs) + └─> Task 9 (Core test corpus) + ├─> Task 10 (Highlights query) + ├─> Task 11 (Indents/folds/locals/tags) + └─> Task 12 (Validation) + └─> Task 13 (Polish) +``` + +Tasks 10, 11, and 12 can run in parallel after Task 9. diff --git a/docs/superpowers/specs/2026-03-27-tree-sitter-rune-design.md b/docs/superpowers/specs/2026-03-27-tree-sitter-rune-design.md new file mode 100644 index 0000000..12b74a2 --- /dev/null +++ b/docs/superpowers/specs/2026-03-27-tree-sitter-rune-design.md @@ -0,0 +1,252 @@ +# tree-sitter-rune Design Spec + +## Overview + +A tree-sitter grammar for the [Rune](https://rune-rs.github.io/) programming language, targeting integration with nvim-treesitter for syntax highlighting, indentation, code folding, and scope tracking in Neovim. + +Rune is a dynamically-typed, Rust-inspired scripting language designed for embedding in Rust applications. It uses the `.rn` file extension. + +## Approach + +Fork [tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) and modify: + +- **Remove** ~30-40% of rules related to Rust's type system, ownership model, and static typing +- **Add** Rune-specific constructs (template literals, generators, select blocks, dynamic typing) +- **Adapt** shared constructs (functions, control flow, patterns) to Rune's untyped signatures + +This leverages tree-sitter-rust's battle-tested expression parsing, operator precedence, and string handling while producing a clean grammar that maps 1:1 to Rune's actual syntax. + +## Repository + +- **Location:** `~/ssd/git/active/tree-sitter-rune` +- **License:** MIT +- **Target:** nvim-treesitter submission, npm and crates.io publishing + +## Project Structure + +``` +tree-sitter-rune/ +├── grammar.js # Grammar definition +├── package.json # Node project config with tree-sitter field +├── Cargo.toml # Rust bindings +├── binding.gyp # Node native addon build +├── bindings/ # Language bindings (node, rust) +├── src/ # Generated parser (not hand-edited) +│ ├── parser.c +│ ├── scanner.c # External scanner for template literals +│ └── ... +├── queries/ +│ ├── highlights.scm # Syntax highlighting captures +│ ├── indents.scm # Auto-indentation rules +│ ├── folds.scm # Code folding regions +│ └── locals.scm # Scope/variable tracking +├── test/ +│ └── corpus/ # Test cases (input -> expected tree) +│ ├── declarations.txt +│ ├── expressions.txt +│ ├── literals.txt +│ ├── patterns.txt +│ ├── control_flow.txt +│ ├── async.txt +│ ├── generators.txt +│ └── objects.txt +├── examples/ # Real .rn files for parse validation +├── LICENSE +└── README.md +``` + +## Grammar: Removals from tree-sitter-rust + +These Rust constructs do not exist in Rune and are removed entirely: + +| Construct | Reason | +|---|---| +| Lifetime annotations (`'a`, `'static`) | No borrow checker | +| Type annotations on params/returns | Dynamically typed | +| Generic type parameters (``) | No generics | +| Trait definitions and `impl Trait` | No trait system | +| `impl` blocks | No type-level methods | +| Borrow/reference syntax (`&`, `&mut`) | No ownership model | +| `where` clauses | No type constraints | +| `unsafe` blocks | No unsafe | +| `extern` blocks / FFI | Embedding is Rust-side | +| `const` / `static` items | Not in Rune | +| Type aliases (`type Foo = ...`) | No type system | +| Union types | Not in Rune | +| `dyn` / `Box` / smart pointer syntax | Runtime-managed | +| Turbofish (`::`) | No generics | + +## Grammar: Additions for Rune + +| Construct | Details | +|---|---| +| Template literals | Backtick strings with `${expr}` interpolation. Requires an external scanner for nested balanced expressions. Reference: tree-sitter-javascript's template literal scanner. | +| `select` blocks | `select { branch => expr, ... }` for async concurrency | +| `yield` expressions | `yield value` for generators | +| Untyped function params | `fn foo(a, b)` — parameter names only, no type annotations | +| Untyped struct fields | `struct Foo { x, y }` — field names only | +| Untyped enum variants | `enum E { Ok(value), Err(msg) }` — positional names, no types | +| Async closures | `async \|\| { ... }` and `async \|x\| { ... }` | +| Object literals | `#{ key: value }` syntax | +| `is` / `is not` operators | Runtime type checking | +| Byte literals | `b'x'` and `b"string"` | + +## Grammar: Shared Constructs (Keep, Adapt) + +These exist in both Rust and Rune with minor differences: + +- **Functions:** Keep `fn`, `pub fn`, `async fn`. Remove return type annotations. +- **Control flow:** `if`/`else`, `match`, `loop`, `while`, `for`, `break`, `continue`, `return` — identical syntax. +- **Pattern matching:** Literal, tuple, struct, enum, wildcard, rest (`..`), guard clauses — same as Rust minus type patterns. +- **Closures:** `|args| expr` — same syntax, no type annotations on params. +- **Modules:** `mod`, `use`, `self`, `crate`, `super` — same as Rust. +- **Visibility:** `pub`, `pub(crate)`, `pub(super)`, `pub(self)` — same as Rust. +- **Operators:** Arithmetic, comparison, logical, assignment — same as Rust plus `is`/`is not`. +- **Comments:** `//` line and `/* */` block — same as Rust. +- **String literals:** Double-quoted strings with escape sequences — same as Rust. + +## External Scanner + +An external scanner (`src/scanner.c`) is needed for template literal parsing. The scanner must: + +1. Recognize backtick (`` ` ``) as the start/end of a template literal +2. Track `${` as the start of an interpolation +3. Handle nested braces within interpolations (balanced `{}` counting) +4. Return to template literal content after `}` closes the interpolation + +Reference implementation: [tree-sitter-javascript's scanner.c](https://github.com/tree-sitter/tree-sitter-javascript/blob/master/src/scanner.c) handles the same construct. + +## Query Files + +### highlights.scm + +Key capture groups: + +- `@keyword` — `fn`, `let`, `pub`, `use`, `mod`, `if`, `else`, `match`, `loop`, `while`, `for`, `return`, `async`, `await`, `yield`, `select`, `break`, `continue`, `in`, `is` +- `@function` — function definitions and calls +- `@function.method` — method calls (chained `.method()`) +- `@string` — regular strings and template literal content +- `@string.special` — `${...}` interpolation delimiters +- `@variable` — identifiers +- `@variable.parameter` — function parameters +- `@operator` — binary/unary operators +- `@type` — struct/enum names (PascalCase convention) +- `@module` — module paths +- `@comment` — line and block comments +- `@punctuation.bracket` — `()`, `[]`, `{}` +- `@punctuation.delimiter` — `,`, `;`, `::` +- `@constant.builtin` — `true`, `false`, `None` +- `@number` — integer and float literals + +### indents.scm + +Block-based indentation for `{}` bodies: functions, control flow, match arms, struct/enum definitions, closures, block expressions. + +### folds.scm + +Foldable regions: function bodies, struct/enum bodies, match blocks, block expressions, multi-line comment blocks. + +### locals.scm + +Scope definitions: function bodies, block expressions, closures, for/while/loop bodies. +Variable definitions: `let` bindings, function parameters, for-loop variables. +References: identifier usage. + +## Formatter Integration + +No custom formatter — Rune ships `rune fmt` since v0.13. Document integration with conform.nvim: + +```lua +require("conform").setup({ + formatters_by_ft = { + rune = { "rune_fmt" }, + }, + formatters = { + rune_fmt = { + command = "rune", + args = { "fmt", "$FILENAME" }, + stdin = false, + }, + }, +}) +``` + +This is documented in the README, not shipped as part of the grammar repo. + +## Testing Strategy + +### Test Corpus + +Each `.txt` file in `test/corpus/` covers a category of syntax using tree-sitter's standard format: + +``` +================== +Test name +================== + +source code here + +--- + +(expected_syntax_tree) +``` + +**Categories:** + +- `declarations.txt` — fn, struct, enum, mod, use, visibility modifiers +- `expressions.txt` — binary, unary, call, method chain, field access, index, range +- `literals.txt` — strings, template literals, numbers, booleans, byte literals, object literals +- `control_flow.txt` — if/else, match with guards, loops (loop/while/for), break/continue/return +- `patterns.txt` — match patterns (literal, tuple, struct, enum, wildcard, rest, or-patterns) +- `async.txt` — async fn, .await, select blocks, async closures +- `generators.txt` — yield expressions, generator functions, streams +- `objects.txt` — object literals (`#{}`), vector literals + +### Real-World Validation + +Parse existing `.rn` files to catch gaps: +- `~/.config/owlry/plugins/hyprshutdown/main.rn` +- Future owlry plugin files as they're written + +### Validation Criteria + +- All test corpus files pass `tree-sitter test` +- Real-world `.rn` files parse without errors +- Highlight queries produce sensible coloring in Neovim +- No parser crashes on malformed input (graceful error recovery) + +## Development Phases + +### Phase 1: Core Grammar + +Fork tree-sitter-rust, strip type system rules, adapt function/struct/enum definitions to Rune's untyped signatures. Target: parse `main.rn` successfully. + +### Phase 2: Rune-Specific Constructs + +Add template literals (external scanner), select blocks, yield expressions, async closures, object literals, `is`/`is not`. Build test corpus alongside. + +### Phase 3: Query Files + +Write `highlights.scm`, `indents.scm`, `folds.scm`, `locals.scm`. Validate in Neovim with real files. + +### Phase 4: Polish & Publish + +- Complete test corpus coverage +- README with installation and usage instructions +- MIT LICENSE +- Submit PR to nvim-treesitter to register parser +- Publish to npm (`tree-sitter-rune`) and crates.io + +## nvim-treesitter Submission Requirements + +- Grammar hosted on public Git repository +- `highlights.scm` included at minimum +- Parser handles real-world code without crashing +- `package.json` contains `tree-sitter` field with grammar metadata +- Maintained and responsive to issues + +## Dependencies + +- **Build time:** Node.js (for `tree-sitter generate`), tree-sitter CLI, C compiler +- **Runtime:** None (compiled to a `.so` shared library) +- **Formatter:** `rune` CLI (installed separately by the user) diff --git a/grammar.js b/grammar.js new file mode 100644 index 0000000..d91bc03 --- /dev/null +++ b/grammar.js @@ -0,0 +1,1695 @@ +/** + * @file Rust grammar for tree-sitter + * @author Maxim Sokolov + * @author Max Brunsfeld + * @author Amaan Qureshi + * @license MIT + */ + +/// +// @ts-check + +// https://doc.rust-lang.org/reference/expressions.html#expression-precedence +const PREC = { + call: 15, + field: 14, + try: 13, + unary: 12, + cast: 11, + multiplicative: 10, + additive: 9, + shift: 8, + bitand: 7, + bitxor: 6, + bitor: 5, + comparative: 4, + and: 3, + or: 2, + range: 1, + assign: 0, + closure: -1, +}; + +const numericTypes = [ + 'u8', + 'i8', + 'u16', + 'i16', + 'u32', + 'i32', + 'u64', + 'i64', + 'u128', + 'i128', + 'isize', + 'usize', + 'f32', + 'f64', +]; + +// https://doc.rust-lang.org/reference/tokens.html#punctuation +const TOKEN_TREE_NON_SPECIAL_PUNCTUATION = [ + '+', '-', '*', '/', '%', '^', '!', '&', '|', '&&', '||', '<<', + '>>', '+=', '-=', '*=', '/=', '%=', '^=', '&=', '|=', '<<=', + '>>=', '=', '==', '!=', '>', '<', '>=', '<=', '@', '_', '.', + '..', '...', '..=', ',', ';', ':', '::', '->', '=>', '#', '?', +]; + +const primitiveTypes = numericTypes.concat(['bool', 'str', 'char']); + +module.exports = grammar({ + name: 'rust', + + extras: $ => [ + /\s/, + $.line_comment, + $.block_comment, + ], + + externals: $ => [ + $.string_content, + $._raw_string_literal_start, + $.raw_string_literal_content, + $._raw_string_literal_end, + $.float_literal, + $._outer_block_doc_comment_marker, + $._inner_block_doc_comment_marker, + $._block_comment_content, + $._line_doc_content, + $._error_sentinel, + ], + + supertypes: $ => [ + $._expression, + $._type, + $._literal, + $._literal_pattern, + $._declaration_statement, + $._pattern, + ], + + inline: $ => [ + $._path, + $._type_identifier, + $._tokens, + $._field_identifier, + $._non_special_token, + $._declaration_statement, + $._reserved_identifier, + $._expression_ending_with_block, + ], + + conflicts: $ => [ + // Local ambiguity due to anonymous types: + // See https://internals.rust-lang.org/t/pre-rfc-deprecating-anonymous-parameters/3710 + [$._type, $._pattern], + [$.unit_type, $.tuple_pattern], + [$.scoped_identifier, $.scoped_type_identifier], + [$.parameters, $._pattern], + [$.parameters, $.tuple_struct_pattern], + [$.array_expression], + [$.visibility_modifier], + [$.visibility_modifier, $.scoped_identifier, $.scoped_type_identifier], + [$.foreign_mod_item, $.function_modifiers], + ], + + word: $ => $.identifier, + + rules: { + source_file: $ => seq( + optional($.shebang), + repeat($._statement), + ), + + _statement: $ => choice( + $.expression_statement, + $._declaration_statement, + ), + + empty_statement: _ => ';', + + expression_statement: $ => choice( + seq($._expression, ';'), + prec(1, $._expression_ending_with_block), + ), + + _declaration_statement: $ => choice( + $.const_item, + $.macro_invocation, + $.macro_definition, + $.empty_statement, + $.attribute_item, + $.inner_attribute_item, + $.mod_item, + $.foreign_mod_item, + $.struct_item, + $.union_item, + $.enum_item, + $.type_item, + $.function_item, + $.function_signature_item, + $.impl_item, + $.trait_item, + $.associated_type, + $.let_declaration, + $.use_declaration, + $.extern_crate_declaration, + $.static_item, + ), + + // Section - Macro definitions + + macro_definition: $ => { + const rules = seq( + repeat(seq($.macro_rule, ';')), + optional($.macro_rule), + ); + + return seq( + 'macro_rules!', + field('name', choice( + $.identifier, + $._reserved_identifier, + )), + choice( + seq('(', rules, ')', ';'), + seq('[', rules, ']', ';'), + seq('{', rules, '}'), + ), + ); + }, + + macro_rule: $ => seq( + field('left', $.token_tree_pattern), + '=>', + field('right', $.token_tree), + ), + + _token_pattern: $ => choice( + $.token_tree_pattern, + $.token_repetition_pattern, + $.token_binding_pattern, + $.metavariable, + $._non_special_token, + ), + + token_tree_pattern: $ => choice( + seq('(', repeat($._token_pattern), ')'), + seq('[', repeat($._token_pattern), ']'), + seq('{', repeat($._token_pattern), '}'), + ), + + token_binding_pattern: $ => prec(1, seq( + field('name', $.metavariable), + ':', + field('type', $.fragment_specifier), + )), + + token_repetition_pattern: $ => seq( + '$', '(', repeat($._token_pattern), ')', optional(/[^+*?]+/), choice('+', '*', '?'), + ), + + fragment_specifier: _ => choice( + 'block', 'expr', 'expr_2021', 'ident', 'item', 'lifetime', 'literal', 'meta', 'pat', + 'pat_param', 'path', 'stmt', 'tt', 'ty', 'vis', + ), + + _tokens: $ => choice( + $.token_tree, + $.token_repetition, + $.metavariable, + $._non_special_token, + ), + + token_tree: $ => choice( + seq('(', repeat($._tokens), ')'), + seq('[', repeat($._tokens), ']'), + seq('{', repeat($._tokens), '}'), + ), + + token_repetition: $ => seq( + '$', '(', repeat($._tokens), ')', optional(/[^+*?]+/), choice('+', '*', '?'), + ), + + // Matches non-delimiter tokens common to both macro invocations and + // definitions. This is everything except $ and metavariables (which begin + // with $). + _non_special_token: $ => choice( + $._literal, $.identifier, $.mutable_specifier, $.self, $.super, $.crate, + alias(choice(...primitiveTypes), $.primitive_type), + prec.right(repeat1(choice(...TOKEN_TREE_NON_SPECIAL_PUNCTUATION))), + '\'', + 'as', 'async', 'await', 'break', 'const', 'continue', 'default', 'enum', 'fn', 'for', 'gen', + 'if', 'impl', 'let', 'loop', 'match', 'mod', 'pub', 'return', 'static', 'struct', 'trait', + 'type', 'union', 'unsafe', 'use', 'where', 'while', + ), + + // Section - Declarations + + attribute_item: $ => seq( + '#', + '[', + $.attribute, + ']', + ), + + inner_attribute_item: $ => seq( + '#', + '!', + '[', + $.attribute, + ']', + ), + + attribute: $ => seq( + $._path, + optional(choice( + seq('=', field('value', $._expression)), + field('arguments', alias($.delim_token_tree, $.token_tree)), + )), + ), + + mod_item: $ => seq( + optional($.visibility_modifier), + 'mod', + field('name', $.identifier), + choice( + ';', + field('body', $.declaration_list), + ), + ), + + foreign_mod_item: $ => seq( + optional('unsafe'), + $.extern_modifier, + choice( + ';', + field('body', $.declaration_list), + ), + ), + + declaration_list: $ => seq( + '{', + repeat($._declaration_statement), + '}', + ), + + struct_item: $ => seq( + optional($.visibility_modifier), + 'struct', + field('name', $._type_identifier), + field('type_parameters', optional($.type_parameters)), + choice( + seq( + optional($.where_clause), + field('body', $.field_declaration_list), + ), + seq( + field('body', $.ordered_field_declaration_list), + optional($.where_clause), + ';', + ), + ';', + ), + ), + + union_item: $ => seq( + optional($.visibility_modifier), + 'union', + field('name', $._type_identifier), + field('type_parameters', optional($.type_parameters)), + optional($.where_clause), + field('body', $.field_declaration_list), + ), + + enum_item: $ => seq( + optional($.visibility_modifier), + 'enum', + field('name', $._type_identifier), + field('type_parameters', optional($.type_parameters)), + optional($.where_clause), + field('body', $.enum_variant_list), + ), + + enum_variant_list: $ => seq( + '{', + sepBy(',', seq(repeat($.attribute_item), $.enum_variant)), + optional(','), + '}', + ), + + enum_variant: $ => seq( + optional($.visibility_modifier), + field('name', $.identifier), + field('body', optional(choice( + $.field_declaration_list, + $.ordered_field_declaration_list, + ))), + optional(seq( + '=', + field('value', $._expression), + )), + ), + + field_declaration_list: $ => seq( + '{', + sepBy(',', seq(repeat($.attribute_item), $.field_declaration)), + optional(','), + '}', + ), + + field_declaration: $ => seq( + optional($.visibility_modifier), + field('name', $._field_identifier), + ':', + field('type', $._type), + ), + + ordered_field_declaration_list: $ => seq( + '(', + sepBy(',', seq( + repeat($.attribute_item), + optional($.visibility_modifier), + field('type', $._type), + )), + optional(','), + ')', + ), + + extern_crate_declaration: $ => seq( + optional($.visibility_modifier), + 'extern', + $.crate, + field('name', $.identifier), + optional(seq( + 'as', + field('alias', $.identifier), + )), + ';', + ), + + const_item: $ => seq( + optional($.visibility_modifier), + 'const', + field('name', $.identifier), + ':', + field('type', $._type), + optional( + seq( + '=', + field('value', $._expression), + ), + ), + ';', + ), + + static_item: $ => seq( + optional($.visibility_modifier), + 'static', + + // Not actual rust syntax, but made popular by the lazy_static crate. + optional('ref'), + + optional($.mutable_specifier), + field('name', $.identifier), + ':', + field('type', $._type), + optional(seq( + '=', + field('value', $._expression), + )), + ';', + ), + + type_item: $ => seq( + optional($.visibility_modifier), + 'type', + field('name', $._type_identifier), + field('type_parameters', optional($.type_parameters)), + optional($.where_clause), + '=', + field('type', $._type), + optional($.where_clause), + ';', + ), + + function_item: $ => seq( + optional($.visibility_modifier), + optional($.function_modifiers), + 'fn', + field('name', choice($.identifier, $.metavariable)), + field('type_parameters', optional($.type_parameters)), + field('parameters', $.parameters), + optional(seq('->', field('return_type', $._type))), + optional($.where_clause), + field('body', $.block), + ), + + function_signature_item: $ => seq( + optional($.visibility_modifier), + optional($.function_modifiers), + 'fn', + field('name', choice($.identifier, $.metavariable)), + field('type_parameters', optional($.type_parameters)), + field('parameters', $.parameters), + optional(seq('->', field('return_type', $._type))), + optional($.where_clause), + ';', + ), + + function_modifiers: $ => repeat1(choice( + 'async', + 'default', + 'const', + 'unsafe', + $.extern_modifier, + )), + + where_clause: $ => prec.right(seq( + 'where', + optional(seq( + sepBy1(',', $.where_predicate), + optional(','), + )), + )), + + where_predicate: $ => seq( + field('left', choice( + $.lifetime, + $._type_identifier, + $.scoped_type_identifier, + $.generic_type, + $.reference_type, + $.pointer_type, + $.tuple_type, + $.array_type, + $.higher_ranked_trait_bound, + alias(choice(...primitiveTypes), $.primitive_type), + )), + field('bounds', $.trait_bounds), + ), + + impl_item: $ => seq( + optional('unsafe'), + 'impl', + field('type_parameters', optional($.type_parameters)), + optional(seq( + optional('!'), + field('trait', choice( + $._type_identifier, + $.scoped_type_identifier, + $.generic_type, + )), + 'for', + )), + field('type', $._type), + optional($.where_clause), + choice(field('body', $.declaration_list), ';'), + ), + + trait_item: $ => seq( + optional($.visibility_modifier), + optional('unsafe'), + 'trait', + field('name', $._type_identifier), + field('type_parameters', optional($.type_parameters)), + field('bounds', optional($.trait_bounds)), + optional($.where_clause), + field('body', $.declaration_list), + ), + + associated_type: $ => seq( + 'type', + field('name', $._type_identifier), + field('type_parameters', optional($.type_parameters)), + field('bounds', optional($.trait_bounds)), + optional($.where_clause), + ';', + ), + + trait_bounds: $ => seq( + ':', + sepBy1('+', choice( + $._type, + $.lifetime, + $.higher_ranked_trait_bound, + )), + ), + + higher_ranked_trait_bound: $ => seq( + 'for', + field('type_parameters', $.type_parameters), + field('type', $._type), + ), + + removed_trait_bound: $ => seq( + '?', + $._type, + ), + + type_parameters: $ => prec(1, seq( + '<', + sepBy1(',', seq( + repeat($.attribute_item), + choice( + $.metavariable, + $.type_parameter, + $.lifetime_parameter, + $.const_parameter, + ), + )), + optional(','), + '>', + )), + + const_parameter: $ => seq( + 'const', + field('name', $.identifier), + ':', + field('type', $._type), + optional( + seq( + '=', + field('value', + choice( + $.block, + $.identifier, + $._literal, + $.negative_literal, + ), + ), + ), + ), + ), + + type_parameter: $ => prec(1, seq( + field('name', $._type_identifier), + optional(field('bounds', $.trait_bounds)), + optional( + seq( + '=', + field('default_type', $._type), + ), + ), + )), + + lifetime_parameter: $ => prec(1, seq( + field('name', $.lifetime), + optional(field('bounds', $.trait_bounds)), + )), + + let_declaration: $ => seq( + 'let', + optional($.mutable_specifier), + field('pattern', $._pattern), + optional(seq( + ':', + field('type', $._type), + )), + optional(seq( + '=', + field('value', $._expression), + )), + optional(seq( + 'else', + field('alternative', $.block), + )), + ';', + ), + + use_declaration: $ => seq( + optional($.visibility_modifier), + 'use', + field('argument', $._use_clause), + ';', + ), + + _use_clause: $ => choice( + $._path, + $.use_as_clause, + $.use_list, + $.scoped_use_list, + $.use_wildcard, + ), + + scoped_use_list: $ => seq( + field('path', optional($._path)), + '::', + field('list', $.use_list), + ), + + use_list: $ => seq( + '{', + sepBy(',', choice( + $._use_clause, + )), + optional(','), + '}', + ), + + use_as_clause: $ => seq( + field('path', $._path), + 'as', + field('alias', $.identifier), + ), + + use_wildcard: $ => seq( + optional(seq(optional($._path), '::')), + '*', + ), + + parameters: $ => seq( + '(', + sepBy(',', seq( + optional($.attribute_item), + choice( + $.parameter, + $.self_parameter, + $.variadic_parameter, + '_', + $._type, + ))), + optional(','), + ')', + ), + + self_parameter: $ => seq( + optional('&'), + optional($.lifetime), + optional($.mutable_specifier), + $.self, + ), + + variadic_parameter: $ => seq( + optional($.mutable_specifier), + optional(seq( + field('pattern', $._pattern), + ':', + )), + '...', + ), + + parameter: $ => seq( + optional($.mutable_specifier), + field('pattern', choice( + $._pattern, + $.self, + )), + ':', + field('type', $._type), + ), + + extern_modifier: $ => seq( + 'extern', + optional($.string_literal), + ), + + visibility_modifier: $ => choice( + $.crate, + seq( + 'pub', + optional(seq( + '(', + choice( + $.self, + $.super, + $.crate, + seq('in', $._path), + ), + ')', + )), + ), + ), + + // Section - Types + + _type: $ => choice( + $.abstract_type, + $.reference_type, + $.metavariable, + $.pointer_type, + $.generic_type, + $.scoped_type_identifier, + $.tuple_type, + $.unit_type, + $.array_type, + $.function_type, + $._type_identifier, + $.macro_invocation, + $.never_type, + $.dynamic_type, + $.bounded_type, + $.removed_trait_bound, + alias(choice(...primitiveTypes), $.primitive_type), + ), + + bracketed_type: $ => seq( + '<', + choice( + $._type, + $.qualified_type, + ), + '>', + ), + + qualified_type: $ => seq( + field('type', $._type), + 'as', + field('alias', $._type), + ), + + lifetime: $ => prec(1, seq('\'', $.identifier)), + + array_type: $ => seq( + '[', + field('element', $._type), + optional(seq( + ';', + field('length', $._expression), + )), + ']', + ), + + for_lifetimes: $ => seq( + 'for', + '<', + sepBy1(',', $.lifetime), + optional(','), + '>', + ), + + function_type: $ => seq( + optional($.for_lifetimes), + prec(PREC.call, seq( + choice( + field('trait', choice( + $._type_identifier, + $.scoped_type_identifier, + )), + seq( + optional($.function_modifiers), + 'fn', + ), + ), + field('parameters', $.parameters), + )), + optional(seq('->', field('return_type', $._type))), + ), + + tuple_type: $ => seq( + '(', + sepBy1(',', $._type), + optional(','), + ')', + ), + + unit_type: _ => seq('(', ')'), + + generic_function: $ => prec(1, seq( + field('function', choice( + $.identifier, + $.scoped_identifier, + $.field_expression, + )), + '::', + field('type_arguments', $.type_arguments), + )), + + generic_type: $ => prec(1, seq( + field('type', choice( + $._type_identifier, + $._reserved_identifier, + $.scoped_type_identifier, + )), + field('type_arguments', $.type_arguments), + )), + + generic_type_with_turbofish: $ => seq( + field('type', choice( + $._type_identifier, + $.scoped_identifier, + )), + '::', + field('type_arguments', $.type_arguments), + ), + + bounded_type: $ => prec.left(-1, seq( + choice($.lifetime, $._type, $.use_bounds), + '+', + choice($.lifetime, $._type, $.use_bounds), + )), + + use_bounds: $ => seq( + 'use', + token(prec(1, '<')), + sepBy( + ',', + choice( + $.lifetime, + $._type_identifier, + ), + ), + optional(','), + '>', + ), + + type_arguments: $ => seq( + token(prec(1, '<')), + sepBy1(',', seq( + choice( + $._type, + $.type_binding, + $.lifetime, + $._literal, + $.block, + ), + optional($.trait_bounds), + )), + optional(','), + '>', + ), + + type_binding: $ => seq( + field('name', $._type_identifier), + field('type_arguments', optional($.type_arguments)), + '=', + field('type', $._type), + ), + + reference_type: $ => seq( + '&', + optional($.lifetime), + optional($.mutable_specifier), + field('type', $._type), + ), + + pointer_type: $ => seq( + '*', + choice('const', $.mutable_specifier), + field('type', $._type), + ), + + never_type: _ => '!', + + abstract_type: $ => seq( + 'impl', + optional(seq('for', $.type_parameters)), + field('trait', prec(1, choice( + $._type_identifier, + $.scoped_type_identifier, + $.removed_trait_bound, + $.generic_type, + $.function_type, + $.tuple_type, + $.bounded_type, + ))), + ), + + dynamic_type: $ => seq( + 'dyn', + field('trait', choice( + $.higher_ranked_trait_bound, + $._type_identifier, + $.scoped_type_identifier, + $.generic_type, + $.function_type, + $.tuple_type, + )), + ), + + mutable_specifier: _ => 'mut', + + // Section - Expressions + + _expression_except_range: $ => choice( + $.unary_expression, + $.reference_expression, + $.try_expression, + $.binary_expression, + $.assignment_expression, + $.compound_assignment_expr, + $.type_cast_expression, + $.call_expression, + $.return_expression, + $.yield_expression, + $._literal, + prec.left($.identifier), + alias(choice(...primitiveTypes), $.identifier), + prec.left($._reserved_identifier), + $.self, + $.scoped_identifier, + $.generic_function, + $.await_expression, + $.field_expression, + $.array_expression, + $.tuple_expression, + prec(1, $.macro_invocation), + $.unit_expression, + $.break_expression, + $.continue_expression, + $.index_expression, + $.metavariable, + $.closure_expression, + $.parenthesized_expression, + $.struct_expression, + $._expression_ending_with_block, + ), + + _expression: $ => choice( + $._expression_except_range, + $.range_expression, + ), + + _expression_ending_with_block: $ => choice( + $.unsafe_block, + $.async_block, + $.gen_block, + $.try_block, + $.block, + $.if_expression, + $.match_expression, + $.while_expression, + $.loop_expression, + $.for_expression, + $.const_block, + ), + + macro_invocation: $ => seq( + field('macro', choice( + $.scoped_identifier, + $.identifier, + $._reserved_identifier, + )), + '!', + alias($.delim_token_tree, $.token_tree), + ), + + delim_token_tree: $ => choice( + seq('(', repeat($._delim_tokens), ')'), + seq('[', repeat($._delim_tokens), ']'), + seq('{', repeat($._delim_tokens), '}'), + ), + + _delim_tokens: $ => choice( + $._non_delim_token, + alias($.delim_token_tree, $.token_tree), + ), + + // Should match any token other than a delimiter. + _non_delim_token: $ => choice( + $._non_special_token, + '$', + ), + + scoped_identifier: $ => seq( + field('path', optional(choice( + $._path, + $.bracketed_type, + alias($.generic_type_with_turbofish, $.generic_type), + ))), + '::', + field('name', choice($.identifier, $.super)), + ), + + scoped_type_identifier_in_expression_position: $ => prec(-2, seq( + field('path', optional(choice( + $._path, + alias($.generic_type_with_turbofish, $.generic_type), + ))), + '::', + field('name', $._type_identifier), + )), + + scoped_type_identifier: $ => seq( + field('path', optional(choice( + $._path, + alias($.generic_type_with_turbofish, $.generic_type), + $.bracketed_type, + $.generic_type, + ))), + '::', + field('name', $._type_identifier), + ), + + range_expression: $ => prec.left(PREC.range, choice( + seq($._expression, choice('..', '...', '..='), $._expression), + seq($._expression, '..'), + seq('..', $._expression), + '..', + )), + + unary_expression: $ => prec(PREC.unary, seq( + choice('-', '*', '!'), + $._expression, + )), + + try_expression: $ => prec(PREC.try, seq( + $._expression, + '?', + )), + + reference_expression: $ => prec(PREC.unary, seq( + '&', + choice( + seq('raw', choice('const', $.mutable_specifier)), + optional($.mutable_specifier), + ), + field('value', $._expression), + )), + + binary_expression: $ => { + const table = [ + [PREC.and, '&&'], + [PREC.or, '||'], + [PREC.bitand, '&'], + [PREC.bitor, '|'], + [PREC.bitxor, '^'], + [PREC.comparative, choice('==', '!=', '<', '<=', '>', '>=')], + [PREC.shift, choice('<<', '>>')], + [PREC.additive, choice('+', '-')], + [PREC.multiplicative, choice('*', '/', '%')], + ]; + + // @ts-ignore + return choice(...table.map(([precedence, operator]) => prec.left(precedence, seq( + field('left', $._expression), + // @ts-ignore + field('operator', operator), + field('right', $._expression), + )))); + }, + + assignment_expression: $ => prec.left(PREC.assign, seq( + field('left', $._expression), + '=', + field('right', $._expression), + )), + + compound_assignment_expr: $ => prec.left(PREC.assign, seq( + field('left', $._expression), + field('operator', choice('+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=')), + field('right', $._expression), + )), + + type_cast_expression: $ => prec.left(PREC.cast, seq( + field('value', $._expression), + 'as', + field('type', $._type), + )), + + return_expression: $ => choice( + prec.left(seq('return', $._expression)), + prec(-1, 'return'), + ), + + yield_expression: $ => choice( + prec.left(seq('yield', $._expression)), + prec(-1, 'yield'), + ), + + call_expression: $ => prec(PREC.call, seq( + field('function', $._expression_except_range), + field('arguments', $.arguments), + )), + + arguments: $ => seq( + '(', + sepBy(',', seq(repeat($.attribute_item), $._expression)), + optional(','), + ')', + ), + + array_expression: $ => seq( + '[', + repeat($.attribute_item), + choice( + seq( + $._expression, + ';', + field('length', $._expression), + ), + seq( + sepBy(',', seq(repeat($.attribute_item), $._expression)), + optional(','), + ), + ), + ']', + ), + + parenthesized_expression: $ => seq( + '(', + $._expression, + ')', + ), + + tuple_expression: $ => seq( + '(', + repeat($.attribute_item), + seq($._expression, ','), + repeat(seq($._expression, ',')), + optional($._expression), + ')', + ), + + unit_expression: _ => seq('(', ')'), + + struct_expression: $ => seq( + field('name', choice( + $._type_identifier, + alias($.scoped_type_identifier_in_expression_position, $.scoped_type_identifier), + $.generic_type_with_turbofish, + )), + field('body', $.field_initializer_list), + ), + + field_initializer_list: $ => seq( + '{', + sepBy(',', choice( + $.shorthand_field_initializer, + $.field_initializer, + $.base_field_initializer, + )), + optional(','), + '}', + ), + + shorthand_field_initializer: $ => seq( + repeat($.attribute_item), + $.identifier, + ), + + field_initializer: $ => seq( + repeat($.attribute_item), + field('field', choice($._field_identifier, $.integer_literal)), + ':', + field('value', $._expression), + ), + + base_field_initializer: $ => seq( + '..', + $._expression, + ), + + if_expression: $ => prec.right(seq( + 'if', + field('condition', $._condition), + field('consequence', $.block), + optional(field('alternative', $.else_clause)), + )), + + let_condition: $ => seq( + 'let', + field('pattern', $._pattern), + '=', + field('value', prec.left(PREC.and, $._expression)), + ), + + _let_chain: $ => prec.left(PREC.and, choice( + seq($._let_chain, '&&', $.let_condition), + seq($._let_chain, '&&', $._expression), + seq($.let_condition, '&&', $._expression), + seq($.let_condition, '&&', $.let_condition), + seq($._expression, '&&', $.let_condition), + )), + + _condition: $ => choice( + $._expression, + $.let_condition, + alias($._let_chain, $.let_chain), + ), + + else_clause: $ => seq( + 'else', + choice( + $.block, + $.if_expression, + ), + ), + + match_expression: $ => seq( + 'match', + field('value', $._expression), + field('body', $.match_block), + ), + + match_block: $ => seq( + '{', + optional(seq( + repeat($.match_arm), + alias($.last_match_arm, $.match_arm), + )), + '}', + ), + + match_arm: $ => prec.right(seq( + repeat(choice($.attribute_item, $.inner_attribute_item)), + field('pattern', $.match_pattern), + '=>', + choice( + seq(field('value', $._expression), ','), + field('value', prec(1, $._expression_ending_with_block)), + ), + )), + + last_match_arm: $ => seq( + repeat(choice($.attribute_item, $.inner_attribute_item)), + field('pattern', $.match_pattern), + '=>', + field('value', $._expression), + optional(','), + ), + + match_pattern: $ => seq( + $._pattern, + optional(seq('if', field('condition', $._condition))), + ), + + while_expression: $ => seq( + optional(seq($.label, ':')), + 'while', + field('condition', $._condition), + field('body', $.block), + ), + + loop_expression: $ => seq( + optional(seq($.label, ':')), + 'loop', + field('body', $.block), + ), + + for_expression: $ => seq( + optional(seq($.label, ':')), + 'for', + field('pattern', $._pattern), + 'in', + field('value', $._expression), + field('body', $.block), + ), + + const_block: $ => seq( + 'const', + field('body', $.block), + ), + + closure_expression: $ => prec(PREC.closure, seq( + optional('static'), + optional('async'), + optional('move'), + field('parameters', $.closure_parameters), + choice( + seq( + optional(seq('->', field('return_type', $._type))), + field('body', $.block), + ), + field('body', choice($._expression, '_')), + ), + )), + + closure_parameters: $ => seq( + '|', + sepBy(',', choice( + $._pattern, + $.parameter, + )), + '|', + ), + + label: $ => seq('\'', $.identifier), + + break_expression: $ => prec.left(seq('break', optional($.label), optional($._expression))), + + continue_expression: $ => prec.left(seq('continue', optional($.label))), + + index_expression: $ => prec(PREC.call, seq($._expression, '[', $._expression, ']')), + + await_expression: $ => prec(PREC.field, seq( + $._expression, + '.', + 'await', + )), + + field_expression: $ => prec(PREC.field, seq( + field('value', $._expression), + '.', + field('field', choice( + $._field_identifier, + $.integer_literal, + )), + )), + + unsafe_block: $ => seq( + 'unsafe', + $.block, + ), + + async_block: $ => seq( + 'async', + optional('move'), + $.block, + ), + + gen_block: $ => seq( + 'gen', + optional('move'), + $.block, + ), + + try_block: $ => seq( + 'try', + $.block, + ), + + block: $ => seq( + optional(seq($.label, ':')), + '{', + repeat($._statement), + optional($._expression), + '}', + ), + + // Section - Patterns + + _pattern: $ => choice( + $._literal_pattern, + alias(choice(...primitiveTypes), $.identifier), + $.identifier, + $.scoped_identifier, + $.generic_pattern, + $.tuple_pattern, + $.tuple_struct_pattern, + $.struct_pattern, + $._reserved_identifier, + $.ref_pattern, + $.slice_pattern, + $.captured_pattern, + $.reference_pattern, + $.remaining_field_pattern, + $.mut_pattern, + $.range_pattern, + $.or_pattern, + $.const_block, + $.macro_invocation, + '_', + ), + + generic_pattern: $ => seq( + choice( + $.identifier, + $.scoped_identifier, + ), + '::', + field('type_arguments', $.type_arguments), + ), + + tuple_pattern: $ => seq( + '(', + sepBy(',', choice($._pattern, $.closure_expression)), + optional(','), + ')', + ), + + slice_pattern: $ => seq( + '[', + sepBy(',', $._pattern), + optional(','), + ']', + ), + + tuple_struct_pattern: $ => seq( + field('type', choice( + $.identifier, + $.scoped_identifier, + alias($.generic_type_with_turbofish, $.generic_type), + )), + '(', + sepBy(',', $._pattern), + optional(','), + ')', + ), + + struct_pattern: $ => seq( + field('type', choice( + $._type_identifier, + $.scoped_type_identifier, + )), + '{', + sepBy(',', choice($.field_pattern, $.remaining_field_pattern)), + optional(','), + '}', + ), + + field_pattern: $ => seq( + optional('ref'), + optional($.mutable_specifier), + choice( + field('name', alias($.identifier, $.shorthand_field_identifier)), + seq( + field('name', $._field_identifier), + ':', + field('pattern', $._pattern), + ), + ), + ), + + remaining_field_pattern: _ => '..', + + mut_pattern: $ => prec(-1, seq( + $.mutable_specifier, + $._pattern, + )), + + range_pattern: $ => choice( + seq( + field('left', choice( + $._literal_pattern, + $._path, + )), + choice( + seq( + choice('...', '..=', '..'), + field('right', choice( + $._literal_pattern, + $._path, + )), + ), + '..', + ), + ), + seq( + choice('..=', '..'), + field('right', choice( + $._literal_pattern, + $._path, + )), + ), + ), + + ref_pattern: $ => seq( + 'ref', + $._pattern, + ), + + captured_pattern: $ => seq( + $.identifier, + '@', + $._pattern, + ), + + reference_pattern: $ => seq( + '&', + optional($.mutable_specifier), + $._pattern, + ), + + or_pattern: $ => prec.left(-2, choice( + seq($._pattern, '|', $._pattern), + seq('|', $._pattern), + )), + + // Section - Literals + + _literal: $ => choice( + $.string_literal, + $.raw_string_literal, + $.char_literal, + $.boolean_literal, + $.integer_literal, + $.float_literal, + ), + + _literal_pattern: $ => choice( + $.string_literal, + $.raw_string_literal, + $.char_literal, + $.boolean_literal, + $.integer_literal, + $.float_literal, + $.negative_literal, + ), + + negative_literal: $ => seq('-', choice($.integer_literal, $.float_literal)), + + integer_literal: _ => token(seq( + choice( + /[0-9][0-9_]*/, + /0x[0-9a-fA-F_]+/, + /0b[01_]+/, + /0o[0-7_]+/, + ), + optional(choice(...numericTypes)), + )), + + string_literal: $ => seq( + choice( + '"', + alias(/[bc]"/, '"'), + ), + repeat(choice( + $.escape_sequence, + $.string_content, + )), + '"', + ), + + raw_string_literal: $ => seq( + $._raw_string_literal_start, + alias($.raw_string_literal_content, $.string_content), + $._raw_string_literal_end, + ), + + char_literal: _ => token(seq( + optional('b'), + '\'', + optional(choice( + seq('\\', choice( + /[^xu]/, + /u[0-9a-fA-F]{4}/, + /u\{[0-9a-fA-F]+\}/, + /x[0-9a-fA-F]{2}/, + )), + /[^\\']/, + )), + '\'', + )), + + escape_sequence: _ => token.immediate( + seq('\\', + choice( + /[^xu]/, + /u[0-9a-fA-F]{4}/, + /u\{[0-9a-fA-F]+\}/, + /x[0-9a-fA-F]{2}/, + ), + )), + + boolean_literal: _ => choice('true', 'false'), + + comment: $ => choice( + $.line_comment, + $.block_comment, + ), + + line_comment: $ => seq( + // All line comments start with two // + '//', + // Then are followed by: + // - 2 or more slashes making it a regular comment + // - 1 slash or 1 or more bang operators making it a doc comment + // - or just content for the comment + choice( + // A tricky edge case where what looks like a doc comment is not + seq(token.immediate(prec(2, /\/\//)), /.*/), + // A regular doc comment + seq($._line_doc_comment_marker, field('doc', alias($._line_doc_content, $.doc_comment))), + token.immediate(prec(1, /.*/)), + ), + ), + + _line_doc_comment_marker: $ => choice( + // An outer line doc comment applies to the element that it is outside of + field('outer', alias($._outer_line_doc_comment_marker, $.outer_doc_comment_marker)), + // An inner line doc comment applies to the element it is inside of + field('inner', alias($._inner_line_doc_comment_marker, $.inner_doc_comment_marker)), + ), + + _inner_line_doc_comment_marker: _ => token.immediate(prec(2, '!')), + _outer_line_doc_comment_marker: _ => token.immediate(prec(2, '/')), + + block_comment: $ => seq( + '/*', + optional( + choice( + // Documentation block comments: /** docs */ or /*! docs */ + seq( + $._block_doc_comment_marker, + optional(field('doc', alias($._block_comment_content, $.doc_comment))), + ), + // Non-doc block comments + $._block_comment_content, + ), + ), + '*/', + ), + + _block_doc_comment_marker: $ => choice( + field('outer', alias($._outer_block_doc_comment_marker, $.outer_doc_comment_marker)), + field('inner', alias($._inner_block_doc_comment_marker, $.inner_doc_comment_marker)), + ), + + _path: $ => choice( + $.self, + alias(choice(...primitiveTypes), $.identifier), + $.metavariable, + $.super, + $.crate, + $.identifier, + $.scoped_identifier, + $._reserved_identifier, + ), + + identifier: _ => /(r#)?[_\p{XID_Start}][_\p{XID_Continue}]*/, + + shebang: _ => /#![\r\f\t\v ]*([^\[\n].*)?\n/, + + _reserved_identifier: $ => alias(choice( + 'default', + 'union', + 'gen', + 'raw', + ), $.identifier), + + _type_identifier: $ => alias($.identifier, $.type_identifier), + _field_identifier: $ => alias($.identifier, $.field_identifier), + + self: _ => 'self', + super: _ => 'super', + crate: _ => 'crate', + + metavariable: _ => /\$[a-zA-Z_]\w*/, + }, +}); + +/** + * Creates a rule to match one or more of the rules separated by the separator. + * + * @param {RuleOrLiteral} sep - The separator to use. + * @param {RuleOrLiteral} rule + * + * @returns {SeqRule} + */ +function sepBy1(sep, rule) { + return seq(rule, repeat(seq(sep, rule))); +} + + +/** + * Creates a rule to optionally match one or more of the rules separated by the separator. + * + * @param {RuleOrLiteral} sep - The separator to use. + * @param {RuleOrLiteral} rule + * + * @returns {ChoiceRule} + */ +function sepBy(sep, rule) { + return optional(sepBy1(sep, rule)); +} diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..69282d4 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,63 @@ +{ + "name": "tree-sitter-rune", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "tree-sitter-rune", + "version": "0.1.0", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.4" + }, + "devDependencies": { + "tree-sitter-cli": "^0.26.7" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/node-addon-api": { + "version": "8.7.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.7.0.tgz", + "integrity": "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA==", + "license": "MIT", + "engines": { + "node": "^18 || ^20 || >= 21" + } + }, + "node_modules/node-gyp-build": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", + "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==", + "license": "MIT", + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, + "node_modules/tree-sitter-cli": { + "version": "0.26.7", + "resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.26.7.tgz", + "integrity": "sha512-fOg/DokJr/gW7suy9IypT1MQon28+JxOmtjudrT04rHRyqVJzrvakrojwyU8r0U6UNRsZKilb8VIhyarv2XUkQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "tree-sitter": "cli.js" + }, + "engines": { + "node": ">=12.0.0" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..462da31 --- /dev/null +++ b/package.json @@ -0,0 +1,53 @@ +{ + "name": "tree-sitter-rune", + "version": "0.1.0", + "description": "Rune grammar for tree-sitter", + "main": "bindings/node", + "types": "bindings/node", + "keywords": [ + "parser", + "tree-sitter", + "rune" + ], + "files": [ + "grammar.js", + "binding.gyp", + "prebuilds/", + "queries/", + "src/", + "bindings/node" + ], + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + }, + "devDependencies": { + "tree-sitter-cli": "^0.26.7" + }, + "scripts": { + "install": "node-gyp-build", + "test": "node --test bindings/node/test", + "parse": "tree-sitter parse", + "generate": "tree-sitter generate" + }, + "tree-sitter": [ + { + "scope": "source.rune", + "file-types": [ + "rn" + ], + "highlights": "queries/highlights.scm", + "tags": "queries/tags.scm", + "injection-regex": "rune" + } + ], + "license": "MIT" +} diff --git a/queries/highlights.scm b/queries/highlights.scm new file mode 100644 index 0000000..927d14d --- /dev/null +++ b/queries/highlights.scm @@ -0,0 +1,2 @@ +; Highlights for Rune +; (populated in Task 10) diff --git a/queries/tags.scm b/queries/tags.scm new file mode 100644 index 0000000..c3d7470 --- /dev/null +++ b/queries/tags.scm @@ -0,0 +1,2 @@ +; Tags for Rune +; (populated in Task 11) diff --git a/src/scanner.c b/src/scanner.c new file mode 100644 index 0000000..269f6b2 --- /dev/null +++ b/src/scanner.c @@ -0,0 +1,393 @@ +#include "tree_sitter/alloc.h" +#include "tree_sitter/parser.h" + +#include + +enum TokenType { + STRING_CONTENT, + RAW_STRING_LITERAL_START, + RAW_STRING_LITERAL_CONTENT, + RAW_STRING_LITERAL_END, + FLOAT_LITERAL, + BLOCK_OUTER_DOC_MARKER, + BLOCK_INNER_DOC_MARKER, + BLOCK_COMMENT_CONTENT, + LINE_DOC_CONTENT, + ERROR_SENTINEL +}; + +typedef struct { + uint8_t opening_hash_count; +} Scanner; + +void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); } + +void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner *)payload); } + +unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) { + Scanner *scanner = (Scanner *)payload; + buffer[0] = (char)scanner->opening_hash_count; + return 1; +} + +void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { + Scanner *scanner = (Scanner *)payload; + scanner->opening_hash_count = 0; + if (length == 1) { + Scanner *scanner = (Scanner *)payload; + scanner->opening_hash_count = buffer[0]; + } +} + +static inline bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); } + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static inline bool process_string(TSLexer *lexer) { + bool has_content = false; + for (;;) { + if (lexer->lookahead == '\"' || lexer->lookahead == '\\') { + break; + } + if (lexer->eof(lexer)) { + return false; + } + has_content = true; + advance(lexer); + } + lexer->result_symbol = STRING_CONTENT; + lexer->mark_end(lexer); + return has_content; +} + +static inline bool scan_raw_string_start(Scanner *scanner, TSLexer *lexer) { + if (lexer->lookahead == 'b' || lexer->lookahead == 'c') { + advance(lexer); + } + if (lexer->lookahead != 'r') { + return false; + } + advance(lexer); + + uint8_t opening_hash_count = 0; + while (lexer->lookahead == '#') { + advance(lexer); + opening_hash_count++; + } + + if (lexer->lookahead != '"') { + return false; + } + advance(lexer); + scanner->opening_hash_count = opening_hash_count; + + lexer->result_symbol = RAW_STRING_LITERAL_START; + return true; +} + +static inline bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) { + for (;;) { + if (lexer->eof(lexer)) { + return false; + } + if (lexer->lookahead == '"') { + lexer->mark_end(lexer); + advance(lexer); + unsigned hash_count = 0; + while (lexer->lookahead == '#' && hash_count < scanner->opening_hash_count) { + advance(lexer); + hash_count++; + } + if (hash_count == scanner->opening_hash_count) { + lexer->result_symbol = RAW_STRING_LITERAL_CONTENT; + return true; + } + } else { + advance(lexer); + } + } +} + +static inline bool scan_raw_string_end(Scanner *scanner, TSLexer *lexer) { + advance(lexer); + for (unsigned i = 0; i < scanner->opening_hash_count; i++) { + advance(lexer); + } + lexer->result_symbol = RAW_STRING_LITERAL_END; + return true; +} + +static inline bool process_float_literal(TSLexer *lexer) { + lexer->result_symbol = FLOAT_LITERAL; + + advance(lexer); + while (is_num_char(lexer->lookahead)) { + advance(lexer); + } + + bool has_fraction = false, has_exponent = false; + + if (lexer->lookahead == '.') { + has_fraction = true; + advance(lexer); + if (iswalpha(lexer->lookahead)) { + // The dot is followed by a letter: 1.max(2) => not a float but an integer + return false; + } + + if (lexer->lookahead == '.') { + return false; + } + while (is_num_char(lexer->lookahead)) { + advance(lexer); + } + } + + lexer->mark_end(lexer); + + if (lexer->lookahead == 'e' || lexer->lookahead == 'E') { + has_exponent = true; + advance(lexer); + if (lexer->lookahead == '+' || lexer->lookahead == '-') { + advance(lexer); + } + if (!is_num_char(lexer->lookahead)) { + return true; + } + advance(lexer); + while (is_num_char(lexer->lookahead)) { + advance(lexer); + } + + lexer->mark_end(lexer); + } + + if (!has_exponent && !has_fraction) { + return false; + } + + if (lexer->lookahead != 'u' && lexer->lookahead != 'i' && lexer->lookahead != 'f') { + return true; + } + advance(lexer); + if (!iswdigit(lexer->lookahead)) { + return true; + } + + while (iswdigit(lexer->lookahead)) { + advance(lexer); + } + + lexer->mark_end(lexer); + return true; +} + +static inline bool process_line_doc_content(TSLexer *lexer) { + lexer->result_symbol = LINE_DOC_CONTENT; + for (;;) { + if (lexer->eof(lexer)) { + return true; + } + if (lexer->lookahead == '\n') { + // Include the newline in the doc content node. + // Line endings are useful for markdown injection. + advance(lexer); + return true; + } + advance(lexer); + } +} + +typedef enum { + LeftForwardSlash, + LeftAsterisk, + Continuing, +} BlockCommentState; + +typedef struct { + BlockCommentState state; + unsigned nestingDepth; +} BlockCommentProcessing; + +static inline void process_left_forward_slash(BlockCommentProcessing *processing, char current) { + if (current == '*') { + processing->nestingDepth += 1; + } + processing->state = Continuing; +}; + +static inline void process_left_asterisk(BlockCommentProcessing *processing, char current, TSLexer *lexer) { + if (current == '*') { + lexer->mark_end(lexer); + processing->state = LeftAsterisk; + return; + } + + if (current == '/') { + processing->nestingDepth -= 1; + } + + processing->state = Continuing; +} + +static inline void process_continuing(BlockCommentProcessing *processing, char current) { + switch (current) { + case '/': + processing->state = LeftForwardSlash; + break; + case '*': + processing->state = LeftAsterisk; + break; + } +} + +static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbols) { + char first = (char)lexer->lookahead; + // The first character is stored so we can safely advance inside + // these if blocks. However, because we only store one, we can only + // safely advance 1 time. Since there's a chance that an advance could + // happen in one state, we must advance in all states to ensure that + // the program ends up in a sane state prior to processing the block + // comment if need be. + if (valid_symbols[BLOCK_INNER_DOC_MARKER] && first == '!') { + lexer->result_symbol = BLOCK_INNER_DOC_MARKER; + advance(lexer); + return true; + } + if (valid_symbols[BLOCK_OUTER_DOC_MARKER] && first == '*') { + advance(lexer); + lexer->mark_end(lexer); + // If the next token is a / that means that it's an empty block comment. + if (lexer->lookahead == '/') { + return false; + } + // If the next token is a * that means that this isn't a BLOCK_OUTER_DOC_MARKER + // as BLOCK_OUTER_DOC_MARKER's only have 2 * not 3 or more. + if (lexer->lookahead != '*') { + lexer->result_symbol = BLOCK_OUTER_DOC_MARKER; + return true; + } + } else { + advance(lexer); + } + + if (valid_symbols[BLOCK_COMMENT_CONTENT]) { + BlockCommentProcessing processing = {Continuing, 1}; + // Manually set the current state based on the first character + switch (first) { + case '*': + processing.state = LeftAsterisk; + if (lexer->lookahead == '/') { + // This case can happen in an empty doc block comment + // like /*!*/. The comment has no contents, so bail. + return false; + } + break; + case '/': + processing.state = LeftForwardSlash; + break; + default: + processing.state = Continuing; + break; + } + + // For the purposes of actually parsing rust code, this + // is incorrect as it considers an unterminated block comment + // to be an error. However, for the purposes of syntax highlighting + // this should be considered successful as otherwise you are not able + // to syntax highlight a block of code prior to closing the + // block comment + while (!lexer->eof(lexer) && processing.nestingDepth != 0) { + // Set first to the current lookahead as that is the second character + // as we force an advance in the above code when we are checking if we + // need to handle a block comment inner or outer doc comment signifier + // node + first = (char)lexer->lookahead; + switch (processing.state) { + case LeftForwardSlash: + process_left_forward_slash(&processing, first); + break; + case LeftAsterisk: + process_left_asterisk(&processing, first, lexer); + break; + case Continuing: + lexer->mark_end(lexer); + process_continuing(&processing, first); + break; + default: + break; + } + advance(lexer); + if (first == '/' && processing.nestingDepth != 0) { + lexer->mark_end(lexer); + } + } + lexer->result_symbol = BLOCK_COMMENT_CONTENT; + return true; + } + + return false; +} + +bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + // The documentation states that if the lexical analysis fails for some reason + // they will mark every state as valid and pass it to the external scanner + // However, we can't do anything to help them recover in that case so we + // should just fail. + /* + link: https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + If a syntax error is encountered during regular parsing, Tree-sitter’s + first action during error recovery will be to call the external scanner’s + scan function with all tokens marked valid. The scanner should detect this + case and handle it appropriately. One simple method of detection is to add + an unused token to the end of the externals array, for example + + externals: $ => [$.token1, $.token2, $.error_sentinel], + + then check whether that token is marked valid to determine whether + Tree-sitter is in error correction mode. + */ + if (valid_symbols[ERROR_SENTINEL]) { + return false; + } + + Scanner *scanner = (Scanner *)payload; + + if (valid_symbols[BLOCK_COMMENT_CONTENT] || valid_symbols[BLOCK_INNER_DOC_MARKER] || + valid_symbols[BLOCK_OUTER_DOC_MARKER]) { + return process_block_comment(lexer, valid_symbols); + } + + if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) { + return process_string(lexer); + } + + if (valid_symbols[LINE_DOC_CONTENT]) { + return process_line_doc_content(lexer); + } + + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (valid_symbols[RAW_STRING_LITERAL_START] && + (lexer->lookahead == 'r' || lexer->lookahead == 'b' || lexer->lookahead == 'c')) { + return scan_raw_string_start(scanner, lexer); + } + + if (valid_symbols[RAW_STRING_LITERAL_CONTENT]) { + return scan_raw_string_content(scanner, lexer); + } + + if (valid_symbols[RAW_STRING_LITERAL_END] && lexer->lookahead == '"') { + return scan_raw_string_end(scanner, lexer); + } + + if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) { + return process_float_literal(lexer); + } + + return false; +} diff --git a/tree-sitter.json b/tree-sitter.json new file mode 100644 index 0000000..09ec5a2 --- /dev/null +++ b/tree-sitter.json @@ -0,0 +1,31 @@ +{ + "grammars": [ + { + "name": "rune", + "camelcase": "Rune", + "scope": "source.rune", + "path": ".", + "file-types": ["rn"], + "highlights": ["queries/highlights.scm"], + "tags": ["queries/tags.scm"], + "injection-regex": "rune" + } + ], + "metadata": { + "version": "0.1.0", + "license": "MIT", + "description": "Rune grammar for tree-sitter", + "authors": [], + "links": { + "repository": "https://github.com/TODO/tree-sitter-rune" + } + }, + "bindings": { + "c": true, + "go": true, + "node": true, + "python": true, + "rust": true, + "swift": false + } +}