Skip to content

Commit b5e288a

Browse files
committed
Argument-to-parameter mapping + field access chains (Phase B2+B3)
CBMCallArg struct captures up to 8 arguments per call with expression text, resolved constant values, keyword names, and positional index. Member expressions produce dotted chain expressions automatically. Serialized as "args" array on CALLS, HTTP_CALLS, ASYNC_CALLS, and CONFIGURES edge properties. Enables data flow tracing across function and service boundaries.
1 parent ec70bfd commit b5e288a

4 files changed

Lines changed: 190 additions & 30 deletions

File tree

internal/cbm/cbm.h

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,23 @@ typedef struct {
107107
bool is_entry_point;
108108
} CBMDefinition;
109109

110+
/* Argument captured from a call expression */
110111
typedef struct {
111-
const char *callee_name; // raw callee text ("pkg.Func", "foo")
112-
const char *enclosing_func_qn; // QN of enclosing function (or module QN)
113-
const char *first_string_arg; // first string literal argument (URL, topic, key) or NULL
114-
const char *second_arg_name; // second argument identifier (handler ref) or NULL
112+
const char *expr; // raw expression text ("payload.info", "MY_URL", "'hello'")
113+
const char *value; // resolved string value or NULL (constant propagation)
114+
const char *keyword; // keyword name if keyword arg ("url", "topic_id"), NULL if positional
115+
int index; // positional index (0-based)
116+
} CBMCallArg;
117+
118+
#define CBM_MAX_CALL_ARGS 8
119+
120+
typedef struct {
121+
const char *callee_name; // raw callee text ("pkg.Func", "foo")
122+
const char *enclosing_func_qn; // QN of enclosing function (or module QN)
123+
const char *first_string_arg; // first string literal argument (URL, topic, key) or NULL
124+
const char *second_arg_name; // second argument identifier (handler ref) or NULL
125+
CBMCallArg args[CBM_MAX_CALL_ARGS]; // first N arguments with expressions
126+
int arg_count; // number of captured arguments
115127
} CBMCall;
116128

117129
typedef struct {

internal/cbm/extract_calls.c

Lines changed: 75 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,25 @@ static const char *lookup_string_constant(const CBMExtractCtx *ctx, const char *
2222
return NULL;
2323
}
2424

25+
/* Check if a node type is a string literal */
26+
static int is_string_like(const char *kind) {
27+
return (strcmp(kind, "string") == 0 || strcmp(kind, "string_literal") == 0 ||
28+
strcmp(kind, "interpreted_string_literal") == 0 ||
29+
strcmp(kind, "raw_string_literal") == 0 || strcmp(kind, "string_content") == 0);
30+
}
31+
32+
/* Strip surrounding quotes from a string, return arena-allocated copy */
33+
static const char *strip_quotes(CBMArena *a, const char *text) {
34+
if (!text || !text[0]) {
35+
return NULL;
36+
}
37+
int len = (int)strlen(text);
38+
if (len >= 2 && (text[0] == '"' || text[0] == '\'')) {
39+
return cbm_arena_strndup(a, text + 1, (size_t)(len - 2));
40+
}
41+
return text;
42+
}
43+
2544
// Forward declarations
2645
static void walk_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec);
2746
static char *extract_callee_name(CBMArena *a, TSNode node, const char *source, CBMLanguage lang);
@@ -365,6 +384,53 @@ void cbm_extract_calls(CBMExtractCtx *ctx) {
365384

366385
// --- Unified handler: called once per node by the cursor walk ---
367386

387+
/* Extract all arguments from a call expression into call->args[].
388+
* Captures expression text, resolved constants, keyword names, and
389+
* dotted field chains (member_expression → "payload.info.url"). */
390+
static void extract_call_args(CBMExtractCtx *ctx, TSNode args, CBMCall *call) {
391+
uint32_t argc = ts_node_named_child_count(args);
392+
int positional_idx = 0;
393+
for (uint32_t ai = 0; ai < argc && call->arg_count < CBM_MAX_CALL_ARGS; ai++) {
394+
TSNode arg_node = ts_node_named_child(args, ai);
395+
const char *ak = ts_node_type(arg_node);
396+
CBMCallArg *ca = &call->args[call->arg_count];
397+
memset(ca, 0, sizeof(*ca));
398+
399+
if (strcmp(ak, "keyword_argument") == 0 || strcmp(ak, "pair") == 0) {
400+
TSNode key_n = ts_node_child_by_field_name(arg_node, "name", 4);
401+
TSNode val_n = ts_node_child_by_field_name(arg_node, "value", 5);
402+
if (ts_node_is_null(key_n)) {
403+
key_n = ts_node_child_by_field_name(arg_node, "key", 3);
404+
}
405+
if (!ts_node_is_null(key_n)) {
406+
ca->keyword = cbm_node_text(ctx->arena, key_n, ctx->source);
407+
}
408+
if (!ts_node_is_null(val_n)) {
409+
ca->expr = cbm_node_text(ctx->arena, val_n, ctx->source);
410+
if (strcmp(ts_node_type(val_n), "identifier") == 0 && ca->expr) {
411+
ca->value = lookup_string_constant(ctx, ca->expr);
412+
} else if (is_string_like(ts_node_type(val_n)) && ca->expr) {
413+
ca->value = strip_quotes(ctx->arena, ca->expr);
414+
}
415+
}
416+
ca->index = positional_idx++;
417+
call->arg_count++;
418+
} else if (strcmp(ak, "list_splat") == 0 || strcmp(ak, "dictionary_splat") == 0 ||
419+
strcmp(ak, "spread_element") == 0) {
420+
positional_idx++;
421+
} else {
422+
ca->expr = cbm_node_text(ctx->arena, arg_node, ctx->source);
423+
ca->index = positional_idx++;
424+
if (is_string_like(ak) && ca->expr) {
425+
ca->value = strip_quotes(ctx->arena, ca->expr);
426+
} else if (strcmp(ak, "identifier") == 0 && ca->expr) {
427+
ca->value = lookup_string_constant(ctx, ca->expr);
428+
}
429+
call->arg_count++;
430+
}
431+
}
432+
}
433+
368434
void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, WalkState *state) {
369435
if (!spec->call_node_types || !spec->call_node_types[0]) {
370436
return;
@@ -504,26 +570,26 @@ void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, Walk
504570
}
505571
}
506572

507-
/* Extract second argument name (handler ref for route registrations).
508-
* Pattern: router.GET("/path", handlerFunc) → second_arg_name = "handlerFunc"
509-
* Only extracted when first_string_arg looks like a path. */
573+
/* Extract second argument name (handler ref for route registrations). */
510574
if (call.first_string_arg != NULL && call.first_string_arg[0] == '/' &&
511575
!ts_node_is_null(args)) {
512576
uint32_t nc2 = ts_node_named_child_count(args);
513577
for (uint32_t ai = 1; ai < nc2 && ai < 4 && !call.second_arg_name; ai++) {
514578
TSNode arg2 = ts_node_named_child(args, ai);
515579
const char *ak2 = ts_node_type(arg2);
516-
if (strcmp(ak2, "identifier") == 0) {
517-
call.second_arg_name = cbm_node_text(ctx->arena, arg2, ctx->source);
518-
} else if (strcmp(ak2, "member_expression") == 0 ||
519-
strcmp(ak2, "selector_expression") == 0 ||
520-
strcmp(ak2, "attribute") == 0 ||
521-
strcmp(ak2, "field_expression") == 0) {
580+
if (strcmp(ak2, "identifier") == 0 || strcmp(ak2, "member_expression") == 0 ||
581+
strcmp(ak2, "selector_expression") == 0 || strcmp(ak2, "attribute") == 0 ||
582+
strcmp(ak2, "field_expression") == 0) {
522583
call.second_arg_name = cbm_node_text(ctx->arena, arg2, ctx->source);
523584
}
524585
}
525586
}
526587

588+
/* B2+B3: Capture all arguments with expressions + field chains. */
589+
if (!ts_node_is_null(args)) {
590+
extract_call_args(ctx, args, &call);
591+
}
592+
527593
cbm_calls_push(&ctx->result->calls, ctx->arena, call);
528594
}
529595
}

src/pipeline/pass_parallel.c

Lines changed: 95 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,65 @@ typedef struct {
692692
_Atomic int next_file_idx;
693693
} resolve_ctx_t;
694694

695+
/* Minimum buffer space needed per arg JSON object */
696+
#define CBM_ARG_JSON_GUARD 32
697+
698+
/* Append arg data as JSON to edge properties: ,"args":[{"i":0,"e":"x","v":"val"},...]
699+
* Returns new position in buffer. */
700+
static size_t append_args_json(char *buf, size_t bufsize, size_t pos, const CBMCall *call) {
701+
if (call->arg_count == 0 || pos >= bufsize - 20) {
702+
return pos;
703+
}
704+
int n = snprintf(buf + pos, bufsize - pos, ",\"args\":[");
705+
if (n <= 0) {
706+
return pos;
707+
}
708+
pos += (size_t)n;
709+
for (int i = 0; i < call->arg_count && pos < bufsize - CBM_ARG_JSON_GUARD; i++) {
710+
const CBMCallArg *a = &call->args[i];
711+
if (i > 0 && pos < bufsize - 1) {
712+
buf[pos++] = ',';
713+
}
714+
/* Truncate long expressions to keep edge properties compact */
715+
char expr_buf[128];
716+
if (a->expr) {
717+
snprintf(expr_buf, sizeof(expr_buf), "%.*s", 120, a->expr);
718+
/* Escape quotes for JSON safety */
719+
for (char *p = expr_buf; *p; p++) {
720+
if (*p == '"') {
721+
*p = '\'';
722+
}
723+
if (*p == '\n' || *p == '\r') {
724+
*p = ' ';
725+
}
726+
}
727+
} else {
728+
expr_buf[0] = '\0';
729+
}
730+
if (a->keyword && a->value) {
731+
n = snprintf(buf + pos, bufsize - pos,
732+
"{\"i\":%d,\"k\":\"%s\",\"e\":\"%s\",\"v\":\"%s\"}", a->index, a->keyword,
733+
expr_buf, a->value);
734+
} else if (a->keyword) {
735+
n = snprintf(buf + pos, bufsize - pos, "{\"i\":%d,\"k\":\"%s\",\"e\":\"%s\"}", a->index,
736+
a->keyword, expr_buf);
737+
} else if (a->value) {
738+
n = snprintf(buf + pos, bufsize - pos, "{\"i\":%d,\"e\":\"%s\",\"v\":\"%s\"}", a->index,
739+
expr_buf, a->value);
740+
} else {
741+
n = snprintf(buf + pos, bufsize - pos, "{\"i\":%d,\"e\":\"%s\"}", a->index, expr_buf);
742+
}
743+
if (n > 0) {
744+
pos += (size_t)n;
745+
}
746+
}
747+
if (pos < bufsize - 1) {
748+
buf[pos++] = ']';
749+
}
750+
buf[pos] = '\0';
751+
return pos;
752+
}
753+
695754
/* Classify a resolved call by library identity and emit the appropriate edge.
696755
* Extracted from resolve_worker to keep cognitive complexity under threshold. */
697756
static void emit_service_edge(cbm_gbuf_t *gbuf, const cbm_gbuf_node_t *source,
@@ -759,23 +818,46 @@ static void emit_service_edge(cbm_gbuf_t *gbuf, const cbm_gbuf_node_t *source,
759818
int64_t route_id =
760819
cbm_gbuf_upsert_node(gbuf, "Route", arg, route_qn, "", 0, 0, route_props);
761820

762-
char props[512];
763-
snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"url_path\":\"%s\"%s%s%s%s%s%s}",
764-
call->callee_name, arg, method ? ",\"method\":\"" : "", method ? method : "",
765-
method ? "\"" : "", broker ? ",\"broker\":\"" : "", broker ? broker : "",
766-
broker ? "\"" : "");
821+
char props[2048];
822+
int n = snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"url_path\":\"%s\"%s%s%s%s%s%s",
823+
call->callee_name, arg, method ? ",\"method\":\"" : "",
824+
method ? method : "", method ? "\"" : "", broker ? ",\"broker\":\"" : "",
825+
broker ? broker : "", broker ? "\"" : "");
826+
if (n > 0 && (size_t)n < sizeof(props) - 2) {
827+
size_t pos = append_args_json(props, sizeof(props), (size_t)n, call);
828+
if (pos < sizeof(props) - 1) {
829+
props[pos] = '}';
830+
props[pos + 1] = '\0';
831+
}
832+
}
767833
cbm_gbuf_insert_edge(gbuf, source->id, route_id, edge_type, props);
768834
} else if (svc == CBM_SVC_CONFIG) {
769-
char props[512];
770-
snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"key\":\"%s\",\"confidence\":%.2f}",
771-
call->callee_name, arg != NULL ? arg : "", res->confidence);
835+
char props[2048];
836+
int n =
837+
snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"key\":\"%s\",\"confidence\":%.2f",
838+
call->callee_name, arg != NULL ? arg : "", res->confidence);
839+
if (n > 0 && (size_t)n < sizeof(props) - 2) {
840+
size_t pos = append_args_json(props, sizeof(props), (size_t)n, call);
841+
if (pos < sizeof(props) - 1) {
842+
props[pos] = '}';
843+
props[pos + 1] = '\0';
844+
}
845+
}
772846
cbm_gbuf_insert_edge(gbuf, source->id, target->id, "CONFIGURES", props);
773847
} else {
774-
char props[512];
775-
snprintf(props, sizeof(props),
776-
"{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}",
777-
call->callee_name, res->confidence, res->strategy ? res->strategy : "unknown",
778-
res->candidate_count);
848+
char props[2048];
849+
int n = snprintf(props, sizeof(props),
850+
"{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\","
851+
"\"candidates\":%d",
852+
call->callee_name, res->confidence,
853+
res->strategy ? res->strategy : "unknown", res->candidate_count);
854+
if (n > 0 && (size_t)n < sizeof(props) - 2) {
855+
size_t pos = append_args_json(props, sizeof(props), (size_t)n, call);
856+
if (pos < sizeof(props) - 1) {
857+
props[pos] = '}';
858+
props[pos + 1] = '\0';
859+
}
860+
}
779861
cbm_gbuf_insert_edge(gbuf, source->id, target->id, "CALLS", props);
780862
}
781863
}

tests/test_pipeline.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3675,15 +3675,15 @@ TEST(infra_parse_terraform_full) {
36753675
" }\n"
36763676
" }\n"
36773677
" backend \"gcs\" {\n"
3678-
" bucket = \"hoepke-tf\"\n"
3678+
" bucket = \"example-tf\"\n"
36793679
" prefix = \"state\"\n"
36803680
" }\n"
36813681
"}\n"
36823682
"\n"
36833683
"variable \"project_id\" {\n"
36843684
" description = \"The GCP project ID\"\n"
36853685
" type = string\n"
3686-
" default = \"hoepke-cloud\"\n"
3686+
" default = \"example-cloud\"\n"
36873687
"}\n"
36883688
"\n"
36893689
"variable \"region\" {\n"
@@ -3736,7 +3736,7 @@ TEST(infra_parse_terraform_full) {
37363736
bool found_project_id = false;
37373737
for (int i = 0; i < r.variable_count; i++) {
37383738
if (strcmp(r.variables[i].name, "project_id") == 0) {
3739-
ASSERT_STR_EQ(r.variables[i].default_val, "hoepke-cloud");
3739+
ASSERT_STR_EQ(r.variables[i].default_val, "example-cloud");
37403740
ASSERT_STR_EQ(r.variables[i].type, "string");
37413741
ASSERT_STR_EQ(r.variables[i].description, "The GCP project ID");
37423742
found_project_id = true;
@@ -3770,7 +3770,7 @@ TEST(infra_parse_terraform_variables_only) {
37703770
"variable \"project_id\" {\n"
37713771
" description = \"The GCP project ID\"\n"
37723772
" type = string\n"
3773-
" default = \"hoepke-cloud\"\n"
3773+
" default = \"example-cloud\"\n"
37743774
"}\n"
37753775
"\n"
37763776
"variable \"secret_key\" {\n"

0 commit comments

Comments
 (0)