Skip to content

Commit 1c7b0a4

Browse files
committed
Pure AST decorator route extraction + end-to-end DATA_FLOWS
Extract route_path and route_method directly from decorator AST nodes during tree-sitter walk. Route + HANDLES edges created during definition node creation. ensure_decorator_routes pass recreates Route+HANDLES for existing nodes in incremental mode. Prefix Routes bridged to handler Functions by service directory matching.
1 parent 148d951 commit 1c7b0a4

7 files changed

Lines changed: 347 additions & 10 deletions

File tree

internal/cbm/cbm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ typedef struct {
9999
const char **param_names; // NULL-terminated array (NULL if none)
100100
const char **param_types; // NULL-terminated array (NULL if none)
101101
const char **return_types; // NULL-terminated array (NULL if none)
102+
const char *route_path; // HTTP route path from decorator (e.g., "/api/users") or NULL
103+
const char *route_method; // HTTP method from decorator (e.g., "POST") or NULL
102104
int complexity; // cyclomatic complexity
103105
int lines; // body line count
104106
bool is_exported;

internal/cbm/extract_defs.c

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,126 @@ static const char *extract_docstring(CBMArena *a, TSNode node, const char *sourc
463463
return NULL;
464464
}
465465

466+
/* HTTP method names recognized in decorator calls (e.g., @router.post → "POST") */
467+
static const char *decorator_method_name(const char *attr_text) {
468+
if (!attr_text) {
469+
return NULL;
470+
}
471+
/* Match the last segment after the dot: "router.post" → "post" */
472+
const char *dot = strrchr(attr_text, '.');
473+
const char *method = dot ? dot + 1 : attr_text;
474+
if (strcmp(method, "get") == 0 || strcmp(method, "Get") == 0) {
475+
return "GET";
476+
}
477+
if (strcmp(method, "post") == 0 || strcmp(method, "Post") == 0) {
478+
return "POST";
479+
}
480+
if (strcmp(method, "put") == 0 || strcmp(method, "Put") == 0) {
481+
return "PUT";
482+
}
483+
if (strcmp(method, "delete") == 0 || strcmp(method, "Delete") == 0) {
484+
return "DELETE";
485+
}
486+
if (strcmp(method, "patch") == 0 || strcmp(method, "Patch") == 0) {
487+
return "PATCH";
488+
}
489+
if (strcmp(method, "route") == 0 || strcmp(method, "api_route") == 0) {
490+
return "ANY";
491+
}
492+
return NULL;
493+
}
494+
495+
/* Extract route path + method from a decorator's AST nodes.
496+
* Works for: @app.route("/path"), @router.post("/path"), @GetMapping("/path"),
497+
* @app.get("/path", ...), etc.
498+
*
499+
* Pure AST approach: walks the decorator node's call children to find:
500+
* 1. The function/attribute name → infer HTTP method
501+
* 2. The first string argument → route path */
502+
static void extract_route_from_decorators(CBMArena *a, TSNode func_node, const char *source,
503+
const CBMLangSpec *spec, const char **out_path,
504+
const char **out_method) {
505+
*out_path = NULL;
506+
*out_method = NULL;
507+
508+
if (!spec->decorator_node_types || !spec->decorator_node_types[0]) {
509+
return;
510+
}
511+
512+
TSNode prev = ts_node_prev_sibling(func_node);
513+
while (!ts_node_is_null(prev)) {
514+
if (!cbm_kind_in_set(prev, spec->decorator_node_types)) {
515+
break;
516+
}
517+
518+
/* Walk into the decorator to find a call expression with a path argument.
519+
* Python decorator node structure: decorator → (call → attribute + argument_list)
520+
* Java annotation: annotation → (name + arguments) */
521+
uint32_t dc = ts_node_named_child_count(prev);
522+
for (uint32_t di = 0; di < dc; di++) {
523+
TSNode dchild = ts_node_named_child(prev, di);
524+
const char *dk = ts_node_type(dchild);
525+
526+
/* Python/JS: decorator contains a call node */
527+
if (strcmp(dk, "call") == 0) {
528+
/* Get the function/attribute being called */
529+
TSNode fn = ts_node_child_by_field_name(dchild, "function", 8);
530+
if (ts_node_is_null(fn)) {
531+
fn = ts_node_named_child(dchild, 0);
532+
}
533+
if (!ts_node_is_null(fn)) {
534+
char *fn_text = cbm_node_text(a, fn, source);
535+
const char *method = decorator_method_name(fn_text);
536+
if (method) {
537+
/* Found a route decorator — extract path from arguments */
538+
TSNode args = ts_node_child_by_field_name(dchild, "arguments", 9);
539+
if (ts_node_is_null(args)) {
540+
/* Try argument_list as child */
541+
for (uint32_t ai = 0; ai < ts_node_named_child_count(dchild); ai++) {
542+
TSNode ac = ts_node_named_child(dchild, ai);
543+
if (strcmp(ts_node_type(ac), "argument_list") == 0) {
544+
args = ac;
545+
break;
546+
}
547+
}
548+
}
549+
if (!ts_node_is_null(args)) {
550+
/* First string argument is the path */
551+
uint32_t nc = ts_node_named_child_count(args);
552+
for (uint32_t ai = 0; ai < nc && ai < 3; ai++) {
553+
TSNode arg = ts_node_named_child(args, ai);
554+
const char *ak = ts_node_type(arg);
555+
if (strcmp(ak, "string") == 0 ||
556+
strcmp(ak, "string_literal") == 0 ||
557+
strcmp(ak, "interpreted_string_literal") == 0) {
558+
char *path = cbm_node_text(a, arg, source);
559+
if (path) {
560+
int plen = (int)strlen(path);
561+
if (plen >= 2 && (path[0] == '"' || path[0] == '\'')) {
562+
path =
563+
cbm_arena_strndup(a, path + 1, (size_t)(plen - 2));
564+
}
565+
if (path && path[0] == '/') {
566+
*out_path = path;
567+
*out_method = method;
568+
return;
569+
}
570+
}
571+
}
572+
}
573+
}
574+
/* Route decorator but no path arg → path is "/" */
575+
*out_path = "/";
576+
*out_method = method;
577+
return;
578+
}
579+
}
580+
}
581+
}
582+
prev = ts_node_prev_sibling(prev);
583+
}
584+
}
585+
466586
// Extract decorator names from preceding decorator/annotation nodes
467587
static const char **extract_decorators(CBMArena *a, TSNode node, const char *source,
468588
CBMLanguage lang, const CBMLangSpec *spec) {
@@ -1123,8 +1243,9 @@ static void extract_func_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec
11231243
def.label = "Method";
11241244
}
11251245

1126-
// Decorators
1246+
// Decorators + route extraction from decorator AST
11271247
def.decorators = extract_decorators(a, node, ctx->source, ctx->language, spec);
1248+
extract_route_from_decorators(a, node, ctx->source, spec, &def.route_path, &def.route_method);
11281249

11291250
// Docstring
11301251
def.docstring = extract_docstring(a, node, ctx->source, ctx->language);
@@ -1610,6 +1731,7 @@ static void push_method_def(CBMExtractCtx *ctx, TSNode child, const char *class_
16101731
}
16111732

16121733
def.decorators = extract_decorators(a, child, ctx->source, ctx->language, spec);
1734+
extract_route_from_decorators(a, child, ctx->source, spec, &def.route_path, &def.route_method);
16131735
def.docstring = extract_docstring(a, child, ctx->source, ctx->language);
16141736

16151737
if (spec->branching_node_types && spec->branching_node_types[0]) {

src/pipeline/pass_definitions.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ static void build_def_props(char *buf, size_t bufsize, const CBMDefinition *def)
188188
append_json_str_array(buf, bufsize, &pos, "base_classes", def->base_classes);
189189
append_json_str_array(buf, bufsize, &pos, "param_names", def->param_names);
190190
append_json_str_array(buf, bufsize, &pos, "param_types", def->param_types);
191+
append_json_string(buf, bufsize, &pos, "route_path", def->route_path);
192+
append_json_string(buf, bufsize, &pos, "route_method", def->route_method);
191193

192194
if (pos < bufsize - 1) {
193195
buf[pos] = '}';

src/pipeline/pass_httplinks.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,17 @@ static char **extract_decorators(const char *json, int *out_count) {
128128
while ((item = yyjson_arr_iter_next(&iter))) {
129129
if (yyjson_is_str(item)) {
130130
// NOLINTNEXTLINE(misc-include-cleaner) — strdup provided by standard header
131-
out[idx++] = strdup(yyjson_get_str(item));
131+
char *s = strdup(yyjson_get_str(item));
132+
/* Collapse newlines to spaces so regex matches multiline decorators.
133+
* POSIX regex [[:space:]] may not match \n on all platforms. */
134+
if (s) {
135+
for (char *p = s; *p; p++) {
136+
if (*p == '\n' || *p == '\r') {
137+
*p = ' ';
138+
}
139+
}
140+
}
141+
out[idx++] = s;
132142
}
133143
}
134144
out[idx] = NULL;

src/pipeline/pass_parallel.c

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ static void build_def_props(char *buf, size_t bufsize, const CBMDefinition *def)
199199
append_json_str_array(buf, bufsize, &pos, "base_classes", def->base_classes);
200200
append_json_str_array(buf, bufsize, &pos, "param_names", def->param_names);
201201
append_json_str_array(buf, bufsize, &pos, "param_types", def->param_types);
202+
append_json_string(buf, bufsize, &pos, "route_path", def->route_path);
203+
append_json_string(buf, bufsize, &pos, "route_method", def->route_method);
202204
if (pos < bufsize - 1) {
203205
buf[pos] = '}';
204206
buf[pos + 1] = '\0';
@@ -440,11 +442,29 @@ static void extract_worker(int worker_id, void *ctx_ptr) {
440442
char props[2048];
441443
build_def_props(props, sizeof(props), def);
442444

443-
cbm_gbuf_upsert_node(ws->local_gbuf, def->label ? def->label : "Function", def->name,
444-
def->qualified_name,
445-
def->file_path ? def->file_path : fi->rel_path,
446-
(int)def->start_line, (int)def->end_line, props);
445+
int64_t func_id = cbm_gbuf_upsert_node(
446+
ws->local_gbuf, def->label ? def->label : "Function", def->name,
447+
def->qualified_name, def->file_path ? def->file_path : fi->rel_path,
448+
(int)def->start_line, (int)def->end_line, props);
447449
ws->nodes_created++;
450+
451+
/* AST-extracted route: create Route node + HANDLES edge directly.
452+
* Pure AST approach: route_path/route_method extracted from decorator
453+
* tree-sitter nodes during extraction, no regex needed. */
454+
if (def->route_path && def->route_path[0]) {
455+
const char *rm = def->route_method ? def->route_method : "ANY";
456+
char route_qn[CBM_ROUTE_QN_SIZE];
457+
snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", rm, def->route_path);
458+
char rprops[256];
459+
snprintf(rprops, sizeof(rprops), "{\"method\":\"%s\",\"source\":\"decorator\"}",
460+
rm);
461+
int64_t route_id = cbm_gbuf_upsert_node(
462+
ws->local_gbuf, "Route", def->route_path, route_qn,
463+
def->file_path ? def->file_path : fi->rel_path, 0, 0, rprops);
464+
char hprops[512];
465+
snprintf(hprops, sizeof(hprops), "{\"handler\":\"%s\"}", def->qualified_name);
466+
cbm_gbuf_insert_edge(ws->local_gbuf, func_id, route_id, "HANDLES", hprops);
467+
}
448468
}
449469

450470
/* Free TSTree immediately — arena strings survive for registry+resolve.

0 commit comments

Comments
 (0)