Skip to content

Commit 949d663

Browse files
author
Your Name
committed
feat(store): Route→Function resolution + relaxed process detection
Two fixes to dramatically increase detected execution flows: 1. Route→Function resolution (step 1b): Route nodes have 0 outgoing edges (only incoming HANDLES from Module nodes), so BFS from Routes went nowhere. Now resolves each Route entry point through the HANDLES edge to find the Module, then looks up Functions in the same file — those become the real BFS starting points. This connects HTTP API routes to their handler logic. 2. Relaxed cross-community requirement: previously, flows were only created when BFS crossed a Louvain community boundary. Now flows with ≥3 steps are kept even within a single community, picking the deepest non-generic node as terminal. This catches Express-style flat patterns (route → controller → storage → db) that stay within one community. Results: - Express monorepo: 4 → 61 flows (route handlers now visible) - C# service: 69 → 78 flows (+9 intra-community flows) - JS service: 65 → 70 flows (+5 intra-community flows) - TS monolith: 300 (capped, no change)
1 parent 309780d commit 949d663

1 file changed

Lines changed: 79 additions & 2 deletions

File tree

src/store/store.c

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4647,6 +4647,60 @@ int cbm_store_detect_processes(cbm_store_t *s, const char *project, int max_proc
46474647
return 0;
46484648
}
46494649

4650+
/* 1b. Resolve Route entry points to handler Functions.
4651+
* Route nodes have 0 outgoing edges (only incoming HANDLES from Modules).
4652+
* For each Route, find the Module that HANDLES it, then find Functions in
4653+
* the same file that have outgoing CALLS. Replace the Route entry point
4654+
* with those Functions — they're the real BFS starting points. */
4655+
{
4656+
const char *resolve_sql =
4657+
"SELECT DISTINCT fn.id, fn.name FROM edges e "
4658+
"JOIN nodes m ON m.id = e.source_id AND m.label = 'Module' "
4659+
"JOIN nodes fn ON fn.file_path = m.file_path "
4660+
"AND fn.label IN ('Function','Method') AND fn.project = ?2 "
4661+
"WHERE e.target_id = ?1 AND e.type = 'HANDLES' AND e.project = ?2";
4662+
sqlite3_stmt *res_stmt = NULL;
4663+
sqlite3_prepare_v2(s->db, resolve_sql, -1, &res_stmt, NULL);
4664+
4665+
if (res_stmt) {
4666+
int orig_count = ep_count;
4667+
for (int i = 0; i < orig_count; i++) {
4668+
/* Check if this entry point is a Route node */
4669+
const char *check_sql = "SELECT label FROM nodes WHERE id = ?1";
4670+
sqlite3_stmt *chk = NULL;
4671+
sqlite3_prepare_v2(s->db, check_sql, -1, &chk, NULL);
4672+
if (!chk) continue;
4673+
sqlite3_bind_int64(chk, 1, ep_ids[i]);
4674+
const char *label = NULL;
4675+
if (sqlite3_step(chk) == SQLITE_ROW) {
4676+
label = (const char *)sqlite3_column_text(chk, 0);
4677+
}
4678+
bool is_route = (label && strcmp(label, "Route") == 0);
4679+
sqlite3_finalize(chk);
4680+
4681+
if (!is_route) continue;
4682+
4683+
/* Resolve Route → Module → Functions */
4684+
sqlite3_reset(res_stmt);
4685+
sqlite3_bind_int64(res_stmt, 1, ep_ids[i]);
4686+
bind_text(res_stmt, 2, project);
4687+
4688+
while (sqlite3_step(res_stmt) == SQLITE_ROW) {
4689+
if (ep_count >= ep_cap) {
4690+
ep_cap *= 2;
4691+
ep_ids = safe_realloc(ep_ids, (size_t)ep_cap * sizeof(int64_t));
4692+
ep_names = safe_realloc(ep_names, (size_t)ep_cap * sizeof(char *));
4693+
}
4694+
ep_ids[ep_count] = sqlite3_column_int64(res_stmt, 0);
4695+
const char *fn_name = (const char *)sqlite3_column_text(res_stmt, 1);
4696+
ep_names[ep_count] = heap_strdup(fn_name ? fn_name : "?");
4697+
ep_count++;
4698+
}
4699+
}
4700+
sqlite3_finalize(res_stmt);
4701+
}
4702+
}
4703+
46504704
/* 2. Load nodes + CALLS edges for Louvain */
46514705
const char *nsql = "SELECT id FROM nodes WHERE project=?1 "
46524706
"AND label IN ('Function','Method','Class','Interface')";
@@ -4815,9 +4869,32 @@ int cbm_store_detect_processes(cbm_store_t *s, const char *project, int max_proc
48154869
}
48164870
}
48174871

4872+
/* If no cross-community terminal was found, still accept flows with ≥3 steps.
4873+
* This prevents filtering out legitimate API flows (route → controller → storage)
4874+
* that happen to stay within one Louvain community due to flat call patterns.
4875+
* Pick the deepest non-generic node as terminal for the label. */
48184876
if (!is_cross) {
4819-
cbm_store_traverse_free(&tr);
4820-
continue;
4877+
if (tr.visited_count < 3) {
4878+
cbm_store_traverse_free(&tr);
4879+
continue;
4880+
}
4881+
/* Find best terminal by hop depth + name quality */
4882+
for (int v = 0; v < tr.visited_count; v++) {
4883+
const char *nm = tr.visited[v].node.name;
4884+
if (!nm) continue;
4885+
bool is_generic = false;
4886+
for (int g = 0; generic_names[g]; g++) {
4887+
if (strcmp(nm, generic_names[g]) == 0) { is_generic = true; break; }
4888+
}
4889+
if (is_generic) continue;
4890+
int score = (int)strlen(nm) * 10 + tr.visited[v].hop * 5;
4891+
if (nm[0] >= 'A' && nm[0] <= 'Z') score += 50;
4892+
if (score > best_score) {
4893+
best_score = score;
4894+
terminal_id = tr.visited[v].node.id;
4895+
terminal_name = nm;
4896+
}
4897+
}
48214898
}
48224899

48234900
/* Label: "EntryPoint → Terminal" (UTF-8 arrow) */

0 commit comments

Comments
 (0)