Skip to content

Commit acea3e0

Browse files
Add upstream-pinned Tree-sitter query sync with overrides
1 parent 46acbec commit acea3e0

7 files changed

Lines changed: 237 additions & 18 deletions

File tree

.github/workflows/validate.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ jobs:
1616
with:
1717
bun-version: latest
1818

19+
- name: Check upstream-pinned highlight queries
20+
run: bun run scripts/sync-upstream-queries.ts --check
21+
1922
- name: Validate extension manifests
2023
run: bun run scripts/validate.ts
2124

CONTRIBUTING.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ scripts/ # Validation and generation scripts
2626
- `extension.json` defines the extension manifest (category, capabilities, tool references)
2727
- `tooling.json` (optional) defines pre-built platform-specific binaries distributed as tarballs
2828
- Not every extension has a `tooling.json`. Extensions without one rely on runtime-installed tools
29+
- `query-sources.json` pins upstream Tree-sitter highlight query sources for opt-in languages
2930

3031
## Adding a New Extension
3132

@@ -73,6 +74,30 @@ scripts/ # Validation and generation scripts
7374
bun run scripts/validate.ts
7475
```
7576

77+
## Upstream Tree-sitter Queries
78+
79+
For language extensions, prefer pinned upstream queries instead of hand-editing
80+
`highlights.scm` directly.
81+
82+
1. Add an entry in `query-sources.json` with:
83+
- `repository` (e.g. `tree-sitter/tree-sitter-rust`)
84+
- `revision` (tag or commit SHA)
85+
- `queryPath` (usually `queries/highlights.scm`)
86+
- `targetPath` (extension `highlights.scm`)
87+
- optional `overridePath` (e.g. `highlights.override.scm`)
88+
- optional `replacements` for tiny deterministic patches
89+
2. Add/modify `<extension>/highlights.override.scm` for local Athas-specific rules.
90+
3. Run:
91+
```bash
92+
bun run scripts/sync-upstream-queries.ts
93+
```
94+
4. Verify:
95+
```bash
96+
bun run scripts/sync-upstream-queries.ts --check
97+
```
98+
99+
`highlights.scm` is treated as generated output for entries in `query-sources.json`.
100+
76101
## Extension Manifest Format
77102

78103
### Required Fields

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,21 @@ Root-level files:
2626
```bash
2727
bun run scripts/validate.ts
2828
bun run scripts/generate-manifests.ts
29+
bun run scripts/sync-upstream-queries.ts
2930
```
3031

32+
## Upstream Query Sync
33+
34+
Tree-sitter highlight queries can be pinned to upstream grammar repositories via
35+
`query-sources.json`.
36+
37+
- `highlights.scm` is generated from pinned upstream sources.
38+
- Use `highlights.override.scm` for local Athas-specific fixes.
39+
- To verify everything is in sync:
40+
```bash
41+
bun run scripts/sync-upstream-queries.ts --check
42+
```
43+
3144
## Contributing
3245

3346
See [CONTRIBUTING.md](CONTRIBUTING.md).
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
; Highlight Rust doc comments on parser versions that do not expose `doc_comment`.
2+
((line_comment) @comment.documentation
3+
(#match? @comment.documentation "^///|^//!"))
4+
((block_comment) @comment.documentation
5+
(#match? @comment.documentation "^/\\*\\*|^/\\*!"))

extensions/rust/highlights.scm

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,13 @@
1-
; Identifiers
2-
3-
(type_identifier) @type
4-
(primitive_type) @type.builtin
5-
(field_identifier) @property
6-
1+
; AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY.
2+
; Source: https://github.com/tree-sitter/tree-sitter-rust/blob/v0.20.4/queries/highlights.scm
3+
; Generator: scripts/sync-upstream-queries.ts (rust)
4+
; Local customizations belong in highlights.override.scm.
75
; Identifier conventions
86

97
; Assume all-caps names are constants
108
((identifier) @constant
119
(#match? @constant "^[A-Z][A-Z\\d_]+$"))
1210

13-
; Assume uppercase names are enum constructors
14-
((identifier) @constructor
15-
(#match? @constructor "^[A-Z]"))
16-
1711
; Assume that uppercase names in paths are types
1812
((scoped_identifier
1913
path: (identifier) @type)
@@ -30,6 +24,10 @@
3024
name: (identifier) @type))
3125
(#match? @type "^[A-Z]"))
3226

27+
; Assume other uppercase names are enum constructors
28+
((identifier) @constructor
29+
(#match? @constructor "^[A-Z]"))
30+
3331
; Assume all qualified names in struct patterns are enum constructors. (They're
3432
; either that, or struct names; highlighting both as constructors seems to be
3533
; the less glaring choice of error, visually.)
@@ -67,14 +65,15 @@
6765
(function_item (identifier) @function)
6866
(function_signature_item (identifier) @function)
6967

68+
; Other identifiers
69+
70+
(type_identifier) @type
71+
(primitive_type) @type.builtin
72+
(field_identifier) @property
73+
7074
(line_comment) @comment
7175
(block_comment) @comment
7276

73-
((line_comment) @comment.documentation
74-
(#match? @comment.documentation "^///|^//!"))
75-
((block_comment) @comment.documentation
76-
(#match? @comment.documentation "^/\\*\\*|^/\\*!"))
77-
7877
"(" @punctuation.bracket
7978
")" @punctuation.bracket
8079
"[" @punctuation.bracket
@@ -112,7 +111,6 @@
112111
"extern" @keyword
113112
"fn" @keyword
114113
"for" @keyword
115-
"gen" @keyword
116114
"if" @keyword
117115
"impl" @keyword
118116
"in" @keyword
@@ -123,7 +121,6 @@
123121
"mod" @keyword
124122
"move" @keyword
125123
"pub" @keyword
126-
"raw" @keyword
127124
"ref" @keyword
128125
"return" @keyword
129126
"static" @keyword
@@ -135,7 +132,6 @@
135132
"use" @keyword
136133
"where" @keyword
137134
"while" @keyword
138-
"yield" @keyword
139135
(crate) @keyword
140136
(mutable_specifier) @keyword
141137
(use_list (self) @keyword)
@@ -161,3 +157,10 @@
161157
"*" @operator
162158
"&" @operator
163159
"'" @operator
160+
161+
; --- Athas overrides ---
162+
; Highlight Rust doc comments on parser versions that do not expose `doc_comment`.
163+
((line_comment) @comment.documentation
164+
(#match? @comment.documentation "^///|^//!"))
165+
((block_comment) @comment.documentation
166+
(#match? @comment.documentation "^/\\*\\*|^/\\*!"))

query-sources.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"rust": {
3+
"repository": "tree-sitter/tree-sitter-rust",
4+
"revision": "v0.20.4",
5+
"queryPath": "queries/highlights.scm",
6+
"targetPath": "extensions/rust/highlights.scm",
7+
"overridePath": "extensions/rust/highlights.override.scm",
8+
"replacements": [
9+
{
10+
"find": "(#match? @constant \"^[A-Z][A-Z\\\\d_]+$'\"))",
11+
"replace": "(#match? @constant \"^[A-Z][A-Z\\\\d_]+$\"))"
12+
}
13+
]
14+
}
15+
}

scripts/sync-upstream-queries.ts

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/**
2+
* Sync highlight queries from pinned upstream tree-sitter repos.
3+
*
4+
* Usage:
5+
* bun run scripts/sync-upstream-queries.ts
6+
* bun run scripts/sync-upstream-queries.ts --check
7+
*/
8+
9+
import { readFile, writeFile } from "node:fs/promises";
10+
import { existsSync } from "node:fs";
11+
import { join, resolve } from "node:path";
12+
13+
type Replacement = {
14+
find: string;
15+
replace: string;
16+
};
17+
18+
type QuerySourceEntry = {
19+
repository: string;
20+
revision: string;
21+
queryPath: string;
22+
targetPath: string;
23+
overridePath?: string;
24+
replacements?: Replacement[];
25+
};
26+
27+
type QuerySources = Record<string, QuerySourceEntry>;
28+
29+
const ROOT = resolve(import.meta.dirname, "..");
30+
const SOURCES_PATH = join(ROOT, "query-sources.json");
31+
const CHECK_MODE = process.argv.includes("--check");
32+
33+
function normalizeNewlines(input: string): string {
34+
return input.replace(/\r\n/g, "\n");
35+
}
36+
37+
function ensureTrailingNewline(input: string): string {
38+
return input.endsWith("\n") ? input : `${input}\n`;
39+
}
40+
41+
function applyReplacements(content: string, replacements: Replacement[] | undefined): string {
42+
if (!replacements || replacements.length === 0) {
43+
return content;
44+
}
45+
46+
let next = content;
47+
for (const replacement of replacements) {
48+
if (!next.includes(replacement.find)) {
49+
throw new Error(`Replacement target not found: ${replacement.find}`);
50+
}
51+
next = next.split(replacement.find).join(replacement.replace);
52+
}
53+
54+
return next;
55+
}
56+
57+
function buildRawUrl(entry: QuerySourceEntry): string {
58+
return `https://raw.githubusercontent.com/${entry.repository}/${entry.revision}/${entry.queryPath}`;
59+
}
60+
61+
function buildGeneratedHeader(name: string, entry: QuerySourceEntry): string {
62+
return [
63+
"; AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY.",
64+
`; Source: https://github.com/${entry.repository}/blob/${entry.revision}/${entry.queryPath}`,
65+
`; Generator: scripts/sync-upstream-queries.ts (${name})`,
66+
"; Local customizations belong in highlights.override.scm.",
67+
"",
68+
].join("\n");
69+
}
70+
71+
function buildGeneratedQuery(
72+
name: string,
73+
entry: QuerySourceEntry,
74+
upstreamContent: string,
75+
overrideContent: string | null,
76+
): string {
77+
const header = buildGeneratedHeader(name, entry);
78+
const upstream = ensureTrailingNewline(normalizeNewlines(upstreamContent)).trimEnd();
79+
80+
if (!overrideContent || overrideContent.trim().length === 0) {
81+
return `${header}${upstream}\n`;
82+
}
83+
84+
const normalizedOverride = ensureTrailingNewline(normalizeNewlines(overrideContent)).trimEnd();
85+
return `${header}${upstream}\n\n; --- Athas overrides ---\n${normalizedOverride}\n`;
86+
}
87+
88+
async function fetchText(url: string): Promise<string> {
89+
const response = await fetch(url);
90+
if (!response.ok) {
91+
throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
92+
}
93+
return await response.text();
94+
}
95+
96+
async function syncEntry(name: string, entry: QuerySourceEntry): Promise<{
97+
name: string;
98+
changed: boolean;
99+
}> {
100+
const rawUrl = buildRawUrl(entry);
101+
const targetPath = join(ROOT, entry.targetPath);
102+
const overridePath = entry.overridePath ? join(ROOT, entry.overridePath) : null;
103+
104+
const upstreamRaw = await fetchText(rawUrl);
105+
const patchedUpstream = applyReplacements(upstreamRaw, entry.replacements);
106+
const overrideContent =
107+
overridePath && existsSync(overridePath) ? await readFile(overridePath, "utf8") : null;
108+
109+
const generated = buildGeneratedQuery(name, entry, patchedUpstream, overrideContent);
110+
const existing = existsSync(targetPath) ? await readFile(targetPath, "utf8") : "";
111+
const changed = normalizeNewlines(existing) !== normalizeNewlines(generated);
112+
113+
if (CHECK_MODE) {
114+
if (changed) {
115+
throw new Error(
116+
`${name}: ${entry.targetPath} is out of date. Run: bun run scripts/sync-upstream-queries.ts`,
117+
);
118+
}
119+
return { name, changed: false };
120+
}
121+
122+
if (changed) {
123+
await writeFile(targetPath, generated, "utf8");
124+
}
125+
126+
return { name, changed };
127+
}
128+
129+
async function main() {
130+
const rawConfig = await readFile(SOURCES_PATH, "utf8");
131+
const sources = JSON.parse(rawConfig) as QuerySources;
132+
133+
const names = Object.keys(sources).sort();
134+
if (names.length === 0) {
135+
console.log("No query sources configured.");
136+
return;
137+
}
138+
139+
const results = [];
140+
for (const name of names) {
141+
const result = await syncEntry(name, sources[name]);
142+
results.push(result);
143+
const label = CHECK_MODE ? "checked" : result.changed ? "updated" : "unchanged";
144+
console.log(`${name}: ${label}`);
145+
}
146+
147+
if (CHECK_MODE) {
148+
console.log(`\nQuery sources check passed (${results.length} entries).`);
149+
} else {
150+
const updated = results.filter((entry) => entry.changed).length;
151+
console.log(`\nQuery sync complete (${updated}/${results.length} updated).`);
152+
}
153+
}
154+
155+
await main();

0 commit comments

Comments
 (0)