Skip to content

Commit 00b096d

Browse files
author
Your Name
committed
feat(extraction): CommonJS require() import extraction for JS/TS
The ES module import walker (walk_es_imports) only handled 'import' statements but not CommonJS 'require()' calls. JS codebases using require() had zero imports extracted. Adds require() detection in walk_es_imports: - Detects variable_declarator/assignment_expression with require() call value - Handles: const X = require('Y') (default import) - Handles: const { A, B } = require('Y') (destructured import via object_pattern) - Handles: const [A, B] = require('Y') (array destructured) - Supports shorthand_property_identifier_pattern and pair_pattern variants - Falls back to path_last() for unnamed requires Also adds variable_declaration and expression_statement to js_import_types in lang_specs.c, catching 'var X = require()' patterns (older JS codebases). Tested: JS service went from 0 to 335 IMPORTS with both ESM and CJS detected.
1 parent d98f3a0 commit 00b096d

2 files changed

Lines changed: 83 additions & 0 deletions

File tree

internal/cbm/extract_imports.c

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,88 @@ static void walk_es_imports(CBMExtractCtx *ctx, TSNode node) {
340340
return;
341341
}
342342

343+
/* CommonJS: const X = require("Y"), const { A, B } = require("Y")
344+
* Tree-sitter structure: variable_declarator → name + value(call_expression)
345+
* We detect require() calls inside lexical_declaration/variable_declaration. */
346+
if (strcmp(kind, "variable_declarator") == 0 || strcmp(kind, "assignment_expression") == 0) {
347+
TSNode value = ts_node_child_by_field_name(node, "value", 5);
348+
if (ts_node_is_null(value)) {
349+
value = ts_node_child_by_field_name(node, "right", 5);
350+
}
351+
if (!ts_node_is_null(value) && strcmp(ts_node_type(value), "call_expression") == 0) {
352+
TSNode func = ts_node_child_by_field_name(value, "function", 8);
353+
if (!ts_node_is_null(func) && strcmp(ts_node_type(func), "identifier") == 0) {
354+
char *fname = cbm_node_text(a, func, ctx->source);
355+
if (fname && strcmp(fname, "require") == 0) {
356+
/* Extract the require() argument */
357+
TSNode args = ts_node_child_by_field_name(value, "arguments", 9);
358+
if (!ts_node_is_null(args) && ts_node_named_child_count(args) > 0) {
359+
TSNode arg0 = ts_node_named_child(args, 0);
360+
const char *at = ts_node_type(arg0);
361+
if (strcmp(at, "string") == 0 || strcmp(at, "string_literal") == 0 ||
362+
strcmp(at, "template_string") == 0) {
363+
char *path = strip_quotes(a, cbm_node_text(a, arg0, ctx->source));
364+
if (path && path[0]) {
365+
/* Get the variable name(s) being assigned */
366+
TSNode lhs = ts_node_child_by_field_name(node, "name", 4);
367+
if (ts_node_is_null(lhs)) {
368+
lhs = ts_node_child_by_field_name(node, "left", 4);
369+
}
370+
if (!ts_node_is_null(lhs)) {
371+
const char *lk = ts_node_type(lhs);
372+
if (strcmp(lk, "identifier") == 0) {
373+
char *name = cbm_node_text(a, lhs, ctx->source);
374+
CBMImport imp = {.local_name = name, .module_path = path};
375+
cbm_imports_push(&ctx->result->imports, a, imp);
376+
} else if (strcmp(lk, "object_pattern") == 0) {
377+
/* Destructured: const { A, B } = require("Y") */
378+
uint32_t nc = ts_node_named_child_count(lhs);
379+
for (uint32_t k = 0; k < nc; k++) {
380+
TSNode prop = ts_node_named_child(lhs, k);
381+
const char *pk = ts_node_type(prop);
382+
if (strcmp(pk, "shorthand_property_identifier_pattern") == 0 ||
383+
strcmp(pk, "shorthand_property_identifier") == 0 ||
384+
strcmp(pk, "identifier") == 0) {
385+
char *name = cbm_node_text(a, prop, ctx->source);
386+
CBMImport imp = {.local_name = name, .module_path = path};
387+
cbm_imports_push(&ctx->result->imports, a, imp);
388+
} else if (strcmp(pk, "pair_pattern") == 0 ||
389+
strcmp(pk, "pair") == 0) {
390+
TSNode val = ts_node_child_by_field_name(prop, "value", 5);
391+
if (!ts_node_is_null(val)) {
392+
char *name = cbm_node_text(a, val, ctx->source);
393+
CBMImport imp = {.local_name = name, .module_path = path};
394+
cbm_imports_push(&ctx->result->imports, a, imp);
395+
}
396+
}
397+
}
398+
} else if (strcmp(lk, "array_pattern") == 0) {
399+
/* Array destructured: const [A, B] = require("Y") */
400+
uint32_t nc = ts_node_named_child_count(lhs);
401+
for (uint32_t k = 0; k < nc; k++) {
402+
TSNode elem = ts_node_named_child(lhs, k);
403+
if (strcmp(ts_node_type(elem), "identifier") == 0) {
404+
char *name = cbm_node_text(a, elem, ctx->source);
405+
CBMImport imp = {.local_name = name, .module_path = path};
406+
cbm_imports_push(&ctx->result->imports, a, imp);
407+
}
408+
}
409+
}
410+
} else {
411+
/* Fallback: use last path segment as name */
412+
CBMImport imp = {.local_name = path_last(a, path),
413+
.module_path = path};
414+
cbm_imports_push(&ctx->result->imports, a, imp);
415+
}
416+
}
417+
}
418+
}
419+
}
420+
}
421+
}
422+
/* Don't return — let it recurse to catch nested requires */
423+
}
424+
343425
recurse:;
344426
uint32_t count = ts_node_child_count(node);
345427
for (uint32_t i = 0; i < count; i++) {

internal/cbm/lang_specs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ static const char *js_class_types[] = {"class_declaration", "class", NULL};
114114
static const char *js_module_types[] = {"program", NULL};
115115
static const char *js_call_types[] = {"call_expression", NULL};
116116
static const char *js_import_types[] = {"import_statement", "lexical_declaration",
117+
"variable_declaration", "expression_statement",
117118
"export_statement", NULL};
118119
static const char *js_branch_types[] = {"if_statement", "for_statement", "for_in_statement",
119120
"while_statement", "switch_statement", "case_clause",

0 commit comments

Comments
 (0)