Skip to content

Commit 57b89e0

Browse files
author
Your Name
committed
feat(quality): semantic cluster labels + process participation in trace
Gap 1 — Semantic cluster labels: Replace auto-numbered 'Cluster_N' with directory-derived semantic labels. For each cluster, sample up to 50 member file paths, extract the most common non-generic directory segment (skip src/lib/dist/test/node_modules/shared), capitalize and TitleCase the result. Falls back to 'Cluster_N' when no directory has >= 3 occurrences. Result: 'Services', 'Components', 'Controllers', 'Storage', 'Models', 'Stores', 'Scenarios', 'Courses' — matching competing tool quality. Gap 2 — Process participation in trace_call_path: After BFS traversal, query the processes table to find all execution flows the traced function participates in (as entry point, terminal, or by name substring match in the flow label). Includes up to 20 flows with label, process_type, and step_count directly in the trace response — no separate tool call needed.
1 parent 4416642 commit 57b89e0

2 files changed

Lines changed: 163 additions & 5 deletions

File tree

src/mcp/mcp.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1672,6 +1672,62 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
16721672
yyjson_mut_obj_add_val(doc, root, "callers", callers);
16731673
}
16741674

1675+
/* Add process participation: which execution flows does the traced node appear in? */
1676+
{
1677+
cbm_process_info_t *procs = NULL;
1678+
int pcount = 0;
1679+
cbm_store_list_processes(store, project, &procs, &pcount);
1680+
1681+
if (pcount > 0) {
1682+
yyjson_mut_val *flows = yyjson_mut_arr(doc);
1683+
int flow_count = 0;
1684+
1685+
/* Check each process for participation by the traced node.
1686+
* Match by name (case-insensitive) since the process may store
1687+
* a different node ID for the same logical function. */
1688+
for (int pi = 0; pi < pcount && flow_count < 20; pi++) {
1689+
bool participates = false;
1690+
/* Check original matched node by ID */
1691+
if (procs[pi].entry_point_id == nodes[best_idx].id ||
1692+
procs[pi].terminal_id == nodes[best_idx].id) {
1693+
participates = true;
1694+
}
1695+
/* Check start_ids (method IDs for class resolution) */
1696+
if (!participates) {
1697+
for (int si = 0; si < start_id_count; si++) {
1698+
if (procs[pi].entry_point_id == start_ids[si] ||
1699+
procs[pi].terminal_id == start_ids[si]) {
1700+
participates = true;
1701+
break;
1702+
}
1703+
}
1704+
}
1705+
/* Fallback: match by function name in the process label */
1706+
if (!participates && func_name && procs[pi].label) {
1707+
/* Process labels are "EntryName → TerminalName" */
1708+
if (strstr(procs[pi].label, func_name) != NULL) {
1709+
participates = true;
1710+
}
1711+
}
1712+
if (participates) {
1713+
yyjson_mut_val *fi = yyjson_mut_obj(doc);
1714+
yyjson_mut_obj_add_strcpy(doc, fi, "label",
1715+
procs[pi].label ? procs[pi].label : "");
1716+
yyjson_mut_obj_add_strcpy(doc, fi, "process_type",
1717+
procs[pi].process_type ? procs[pi].process_type : "");
1718+
yyjson_mut_obj_add_int(doc, fi, "step_count", procs[pi].step_count);
1719+
yyjson_mut_arr_add_val(flows, fi);
1720+
flow_count++;
1721+
}
1722+
}
1723+
1724+
if (flow_count > 0) {
1725+
yyjson_mut_obj_add_val(doc, root, "processes", flows);
1726+
}
1727+
}
1728+
cbm_store_free_processes(procs, pcount);
1729+
}
1730+
16751731
/* Serialize BEFORE freeing traversal results (yyjson borrows strings) */
16761732
char *json = yy_doc_to_str(doc);
16771733
yyjson_mut_doc_free(doc);

src/store/store.c

Lines changed: 107 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4328,11 +4328,113 @@ static int arch_clusters(cbm_store_t *s, const char *project, cbm_architecture_i
43284328
clusters[ci].top_nodes = top_names;
43294329
clusters[ci].top_node_count = tn;
43304330

4331-
/* Label: use the most common node name prefix as a heuristic.
4332-
* For now, just use "Cluster_N" — semantic naming requires LLM. */
4333-
char label_buf[64];
4334-
snprintf(label_buf, sizeof(label_buf), "Cluster_%d", comm_id);
4335-
clusters[ci].label = heap_strdup(label_buf);
4331+
/* Derive semantic label from most common directory in member file paths.
4332+
* E.g. members in controllers/ → "Controllers", components/ → "Components" */
4333+
{
4334+
/* Query file paths for a sample of cluster members */
4335+
char dir_counts[64][64]; /* directory names */
4336+
int dir_freqs[64]; /* frequency counts */
4337+
int dir_n = 0;
4338+
memset(dir_freqs, 0, sizeof(dir_freqs));
4339+
4340+
int sample_limit = cn < 50 ? cn : 50;
4341+
for (int k = 0; k < sample_limit; k++) {
4342+
cbm_node_t ni;
4343+
if (cbm_store_find_node_by_id(s, comm_nodes[k], &ni) == CBM_STORE_OK) {
4344+
if (ni.file_path && ni.file_path[0]) {
4345+
/* Extract the deepest meaningful directory segment.
4346+
* E.g. "src/controllers/users-controller.ts" → "controllers" */
4347+
const char *fp = ni.file_path;
4348+
const char *best_dir = NULL;
4349+
const char *p2 = fp;
4350+
const char *prev_slash = NULL;
4351+
while (*p2) {
4352+
if (*p2 == '/') {
4353+
if (prev_slash) {
4354+
/* Extract segment between prev_slash+1 and p2 */
4355+
int slen = (int)(p2 - prev_slash - 1);
4356+
if (slen > 0 && slen < 60) {
4357+
/* Skip generic dirs: src, lib, dist, build, test, node_modules */
4358+
char seg[64];
4359+
memcpy(seg, prev_slash + 1, (size_t)slen);
4360+
seg[slen] = '\0';
4361+
if (strcmp(seg, "src") != 0 && strcmp(seg, "lib") != 0 &&
4362+
strcmp(seg, "dist") != 0 && strcmp(seg, "build") != 0 &&
4363+
strcmp(seg, "node_modules") != 0 &&
4364+
strcmp(seg, "test") != 0 && strcmp(seg, "tests") != 0 &&
4365+
strcmp(seg, "shared") != 0 && strcmp(seg, "utils") != 0 &&
4366+
strcmp(seg, "internal") != 0 && strcmp(seg, "generated") != 0) {
4367+
best_dir = prev_slash + 1;
4368+
}
4369+
}
4370+
}
4371+
prev_slash = p2;
4372+
}
4373+
p2++;
4374+
}
4375+
if (best_dir) {
4376+
const char *end = strchr(best_dir, '/');
4377+
int dlen = end ? (int)(end - best_dir) : (int)strlen(best_dir);
4378+
if (dlen > 0 && dlen < 60) {
4379+
char dname[64];
4380+
memcpy(dname, best_dir, (size_t)dlen);
4381+
dname[dlen] = '\0';
4382+
/* Find or add to dir_counts */
4383+
bool found_dir = false;
4384+
for (int d = 0; d < dir_n; d++) {
4385+
if (strcmp(dir_counts[d], dname) == 0) {
4386+
dir_freqs[d]++;
4387+
found_dir = true;
4388+
break;
4389+
}
4390+
}
4391+
if (!found_dir && dir_n < 64) {
4392+
strncpy(dir_counts[dir_n], dname, 63);
4393+
dir_counts[dir_n][63] = '\0';
4394+
dir_freqs[dir_n] = 1;
4395+
dir_n++;
4396+
}
4397+
}
4398+
}
4399+
}
4400+
cbm_node_free_fields(&ni);
4401+
}
4402+
}
4403+
4404+
/* Pick the most frequent directory name */
4405+
char label_buf[64];
4406+
int best_freq = 0;
4407+
int best_di = -1;
4408+
for (int d = 0; d < dir_n; d++) {
4409+
if (dir_freqs[d] > best_freq) {
4410+
best_freq = dir_freqs[d];
4411+
best_di = d;
4412+
}
4413+
}
4414+
if (best_di >= 0 && best_freq >= 3) {
4415+
/* Capitalize first letter */
4416+
char cap_name[64];
4417+
strncpy(cap_name, dir_counts[best_di], sizeof(cap_name) - 1);
4418+
cap_name[sizeof(cap_name) - 1] = '\0';
4419+
if (cap_name[0] >= 'a' && cap_name[0] <= 'z') {
4420+
cap_name[0] = cap_name[0] - 'a' + 'A';
4421+
}
4422+
/* Convert kebab-case to TitleCase: "users-controller" → "UsersController" */
4423+
for (int j = 0; cap_name[j]; j++) {
4424+
if (cap_name[j] == '-' && cap_name[j + 1]) {
4425+
/* Remove dash and capitalize next */
4426+
memmove(&cap_name[j], &cap_name[j + 1], strlen(&cap_name[j + 1]) + 1);
4427+
if (cap_name[j] >= 'a' && cap_name[j] <= 'z') {
4428+
cap_name[j] = cap_name[j] - 'a' + 'A';
4429+
}
4430+
}
4431+
}
4432+
snprintf(label_buf, sizeof(label_buf), "%s", cap_name);
4433+
} else {
4434+
snprintf(label_buf, sizeof(label_buf), "Cluster_%d", comm_id);
4435+
}
4436+
clusters[ci].label = heap_strdup(label_buf);
4437+
}
43364438

43374439
/* packages and edge_types are optional, leave as NULL/0 for now */
43384440
clusters[ci].packages = NULL;

0 commit comments

Comments
 (0)