From 9332593b26ee569e60ed493b1c43afcd5018e60c Mon Sep 17 00:00:00 2001 From: Berserk Agent Date: Wed, 24 Jun 2026 12:38:10 +0000 Subject: [PATCH 1/4] Store path scoping for get_architecture Signed-off-by: Berserk Agent --- src/mcp/mcp.c | 14 +- src/store/store.c | 509 ++++++++++++++++++++++++++++++++++------ src/store/store.h | 12 +- tests/test_store_arch.c | 104 ++++++-- 4 files changed, 545 insertions(+), 94 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 7016a0d2..0bb5efa3 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1917,12 +1917,14 @@ static void append_cross_repo_summary(yyjson_mut_doc *doc, yyjson_mut_val *root, static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { char *project = cbm_mcp_get_string_arg(args, "project"); + char *scope_path = cbm_mcp_get_string_arg(args, "path"); cbm_store_t *store = resolve_store(srv, project); REQUIRE_STORE(store, project); char *not_indexed = verify_project_indexed(store, project); if (not_indexed) { free(project); + free(scope_path); return not_indexed; } @@ -1962,14 +1964,15 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { /* Counts-only: this handler renders label/type counts but never property * keys, and full key discovery json_each-scans every row (seconds-to- * minutes on multi-million-node graphs). */ - cbm_store_get_schema_counts(store, project, &schema); + cbm_store_get_schema_counts_scoped(store, project, scope_path, &schema); cbm_architecture_info_t arch = {0}; - cbm_store_get_architecture(store, project, aspects_strs_count > 0 ? aspects_strs : NULL, - aspects_strs_count, &arch); + cbm_store_get_architecture(store, project, scope_path, + aspects_strs_count > 0 ? aspects_strs : NULL, aspects_strs_count, + &arch); - int node_count = cbm_store_count_nodes(store, project); - int edge_count = cbm_store_count_edges(store, project); + int node_count = cbm_store_count_nodes_scoped(store, project, scope_path); + int edge_count = cbm_store_count_edges_scoped(store, project, scope_path); yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); @@ -2193,6 +2196,7 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { yyjson_doc_free(aspects_doc); } free(project); + free(scope_path); char *result = cbm_mcp_text_result(json, false); free(json); diff --git a/src/store/store.c b/src/store/store.c index 995f6e85..152c8c61 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -2908,6 +2908,120 @@ static void schema_discover_props(sqlite3 *db, const char *sql, const char *proj *out_count = pn; } +/* Path scoping for architecture / schema (shared). */ +static bool arch_path_is_set(const char *path) { + if (!path) { + return false; + } + while (*path == ' ' || *path == '\t' || *path == '\n' || *path == '\r') { + path++; + } + return path[0] != '\0'; +} + +static bool arch_path_prepare(const char *path, char *norm_out, size_t norm_sz, char *like_out, + size_t like_sz) { + if (!arch_path_is_set(path)) { + return false; + } + while (*path == ' ' || *path == '\t' || *path == '\n' || *path == '\r') { + path++; + } + if (path[0] == '\0') { + return false; + } + if (strncmp(path, "./", 2) == 0) { + path += 2; + } + while (*path == '/') { + path++; + } + if (path[0] == '\0') { + return false; + } + strncpy(norm_out, path, norm_sz - 1); + norm_out[norm_sz - 1] = '\0'; + size_t len = strlen(norm_out); + while (len > 0 && (norm_out[len - 1] == ' ' || norm_out[len - 1] == '\t')) { + norm_out[--len] = '\0'; + } + if (norm_out[0] == '\0') { + return false; + } + snprintf(like_out, like_sz, "%s/%%", norm_out); + return true; +} + +static const char *arch_path_scope_sql(void) { + return " AND (file_path = ? OR file_path LIKE ?)"; +} + +static void arch_bind_path_scope(sqlite3_stmt *stmt, int exact_idx, int like_idx, const char *norm, + const char *like_pat) { + bind_text(stmt, exact_idx, norm); + bind_text(stmt, like_idx, like_pat); +} + +int cbm_store_count_nodes_scoped(cbm_store_t *s, const char *project, const char *path) { + if (!s || !s->db || !project) { + return 0; + } + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + if (!arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like))) { + return cbm_store_count_nodes(s, project); + } + const char *sql = "SELECT COUNT(*) FROM nodes WHERE project = ?1 " + "AND (file_path = ?2 OR file_path LIKE ?3);"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { + if (stmt) { + sqlite3_finalize(stmt); + } + return CBM_STORE_ERR; + } + bind_text(stmt, ST_COL_1, project); + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + int n = 0; + if (sqlite3_step(stmt) == SQLITE_ROW) { + n = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + return n; +} + +int cbm_store_count_edges_scoped(cbm_store_t *s, const char *project, const char *path) { + if (!s || !s->db || !project) { + return 0; + } + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + if (!arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like))) { + return cbm_store_count_edges(s, project); + } + const char *sql = + "SELECT COUNT(*) FROM edges e WHERE e.project = ?1 " + "AND EXISTS (SELECT 1 FROM nodes ns WHERE ns.id = e.source_id AND ns.project = ?1 " + "AND (ns.file_path = ?2 OR ns.file_path LIKE ?3)) " + "AND EXISTS (SELECT 1 FROM nodes nt WHERE nt.id = e.target_id AND nt.project = ?1 " + "AND (nt.file_path = ?2 OR nt.file_path LIKE ?3));"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { + if (stmt) { + sqlite3_finalize(stmt); + } + return CBM_STORE_ERR; + } + bind_text(stmt, ST_COL_1, project); + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + int n = 0; + if (sqlite3_step(stmt) == SQLITE_ROW) { + n = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + return n; +} + /* with_props=false skips the per-label/per-type JSON property-key discovery: * those json_each() scans walk EVERY row of each label/type (minutes-scale on * multi-million-node graphs) and get_architecture only needs the counts. */ @@ -3061,6 +3175,124 @@ int cbm_store_get_schema_counts(cbm_store_t *s, const char *project, cbm_schema_ return get_schema_impl(s, project, out, false); } +int cbm_store_get_schema_counts_scoped(cbm_store_t *s, const char *project, const char *path, + cbm_schema_info_t *out) { + memset(out, 0, sizeof(*out)); + if (!s || !s->db) { + return CBM_NOT_FOUND; + } + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + if (!scoped) { + return get_schema_impl(s, project, out, false); + } + + char sqlbuf[ST_SQL_BUF]; + { + const char *base = "SELECT label, COUNT(*) FROM nodes WHERE project = ?1"; + snprintf(sqlbuf, sizeof(sqlbuf), "%s%s GROUP BY label ORDER BY COUNT(*) DESC;", base, + arch_path_scope_sql()); + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { + if (stmt) { + sqlite3_finalize(stmt); + } + return CBM_NOT_FOUND; + } + bind_text(stmt, SKIP_ONE, project); + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + + int cap = ST_INIT_CAP_8; + int n = 0; + cbm_label_count_t *arr = malloc(cap * sizeof(cbm_label_count_t)); + if (!arr) { + sqlite3_finalize(stmt); + return CBM_NOT_FOUND; + } + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + int new_cap = cap * ST_GROWTH; + void *tmp = realloc(arr, new_cap * sizeof(cbm_label_count_t)); + if (!tmp) { + for (int i = 0; i < n; i++) { + safe_str_free(&arr[i].label); + } + free(arr); + sqlite3_finalize(stmt); + return CBM_NOT_FOUND; + } + arr = tmp; + cap = new_cap; + } + arr[n].label = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].count = sqlite3_column_int(stmt, SKIP_ONE); + arr[n].properties = NULL; + arr[n].property_count = 0; + n++; + } + sqlite3_finalize(stmt); + out->node_labels = arr; + out->node_label_count = n; + } + + { + const char *esql = + "SELECT e.type, COUNT(*) FROM edges e WHERE e.project = ?1 " + "AND EXISTS (SELECT 1 FROM nodes ns WHERE ns.id = e.source_id AND ns.project = ?1 " + "AND (ns.file_path = ?2 OR ns.file_path LIKE ?3)) " + "AND EXISTS (SELECT 1 FROM nodes nt WHERE nt.id = e.target_id AND nt.project = ?1 " + "AND (nt.file_path = ?2 OR nt.file_path LIKE ?3)) " + "GROUP BY e.type ORDER BY COUNT(*) DESC;"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, esql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { + if (stmt) { + sqlite3_finalize(stmt); + } + cbm_store_schema_free(out); + return CBM_NOT_FOUND; + } + bind_text(stmt, SKIP_ONE, project); + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + + int cap = ST_INIT_CAP_8; + int n = 0; + cbm_type_count_t *arr = malloc(cap * sizeof(cbm_type_count_t)); + if (!arr) { + sqlite3_finalize(stmt); + cbm_store_schema_free(out); + return CBM_NOT_FOUND; + } + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + int new_cap = cap * ST_GROWTH; + void *tmp = realloc(arr, new_cap * sizeof(cbm_type_count_t)); + if (!tmp) { + for (int i = 0; i < n; i++) { + safe_str_free(&arr[i].type); + } + free(arr); + sqlite3_finalize(stmt); + cbm_store_schema_free(out); + return CBM_NOT_FOUND; + } + arr = tmp; + cap = new_cap; + } + arr[n].type = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].count = sqlite3_column_int(stmt, SKIP_ONE); + arr[n].properties = NULL; + arr[n].property_count = 0; + n++; + } + sqlite3_finalize(stmt); + out->edge_types = arr; + out->edge_type_count = n; + } + + return CBM_STORE_OK; +} + void cbm_store_schema_free(cbm_schema_info_t *out) { if (!out) { return; @@ -3231,14 +3463,27 @@ static const char *file_ext(const char *path) { /* ── Architecture aspect implementations ───────────────────────── */ -static int arch_languages(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT file_path FROM nodes WHERE project=?1 AND label='File'"; +static int arch_languages(cbm_store_t *s, const char *project, const char *path, + cbm_architecture_info_t *out) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char sqlbuf[ST_SQL_BUF]; + const char *base = "SELECT file_path FROM nodes WHERE project=?1 AND label='File'"; + if (scoped) { + snprintf(sqlbuf, sizeof(sqlbuf), "%s%s", base, arch_path_scope_sql()); + } else { + snprintf(sqlbuf, sizeof(sqlbuf), "%s", base); + } sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, sqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "arch_languages"); return CBM_STORE_ERR; } bind_text(stmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + } /* Count per language using a simple parallel array */ const char *lang_names[CBM_SZ_64]; @@ -3295,18 +3540,31 @@ static int arch_languages(cbm_store_t *s, const char *project, cbm_architecture_ return CBM_STORE_OK; } -static int arch_entry_points(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT name, qualified_name, file_path FROM nodes " - "WHERE project=?1 AND json_extract(properties, '$.is_entry_point') = 1 " - "AND (json_extract(properties, '$.is_test') IS NULL OR " - "json_extract(properties, '$.is_test') != 1) " - "AND file_path NOT LIKE '%test%' LIMIT 20"; +static int arch_entry_points(cbm_store_t *s, const char *project, const char *path, + cbm_architecture_info_t *out) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char sqlbuf[ST_SQL_BUF]; + const char *base = "SELECT name, qualified_name, file_path FROM nodes " + "WHERE project=?1 AND json_extract(properties, '$.is_entry_point') = 1 " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND file_path NOT LIKE '%test%'"; + if (scoped) { + snprintf(sqlbuf, sizeof(sqlbuf), "%s%s LIMIT 20", base, arch_path_scope_sql()); + } else { + snprintf(sqlbuf, sizeof(sqlbuf), "%s LIMIT 20", base); + } sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, sqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "arch_entry_points"); return CBM_STORE_ERR; } bind_text(stmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + } int cap = ST_INIT_CAP_8; int n = 0; @@ -3351,18 +3609,30 @@ static char *extract_json_string_prop(const char *json, const char *key, int key return heap_strdup(vbuf); } -static int arch_routes(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT name, properties, COALESCE(file_path, '') FROM nodes " - "WHERE project=?1 AND label='Route' " - "AND (json_extract(properties, '$.is_test') IS NULL OR " - "json_extract(properties, '$.is_test') != 1) " - "LIMIT 20"; +static int arch_routes(cbm_store_t *s, const char *project, const char *path, + cbm_architecture_info_t *out) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char sqlbuf[ST_SQL_BUF]; + const char *base = "SELECT name, properties, COALESCE(file_path, '') FROM nodes " + "WHERE project=?1 AND label='Route' " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1)"; + if (scoped) { + snprintf(sqlbuf, sizeof(sqlbuf), "%s%s LIMIT 20", base, arch_path_scope_sql()); + } else { + snprintf(sqlbuf, sizeof(sqlbuf), "%s LIMIT 20", base); + } sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, sqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "arch_routes"); return CBM_STORE_ERR; } bind_text(stmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + } int cap = ST_INIT_CAP_8; int n = 0; @@ -3407,20 +3677,35 @@ static int arch_routes(cbm_store_t *s, const char *project, cbm_architecture_inf return CBM_STORE_OK; } -static int arch_hotspots(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT n.name, n.qualified_name, COUNT(*) as fan_in " - "FROM nodes n JOIN edges e ON e.target_id = n.id AND e.type = 'CALLS' " - "WHERE n.project=?1 AND n.label IN ('Function', 'Method') " - "AND (json_extract(n.properties, '$.is_test') IS NULL OR " - "json_extract(n.properties, '$.is_test') != 1) " - "AND n.file_path NOT LIKE '%test%' " - "GROUP BY n.id ORDER BY fan_in DESC LIMIT 10"; +static int arch_hotspots(cbm_store_t *s, const char *project, const char *path, + cbm_architecture_info_t *out) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char sqlbuf[ST_SQL_BUF]; + const char *base = "SELECT n.name, n.qualified_name, COUNT(*) as fan_in " + "FROM nodes n JOIN edges e ON e.target_id = n.id AND e.type = 'CALLS' " + "WHERE n.project=?1 AND n.label IN ('Function', 'Method') " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "AND n.file_path NOT LIKE '%test%'"; + if (scoped) { + snprintf(sqlbuf, sizeof(sqlbuf), + "%s AND (n.file_path = ?2 OR n.file_path LIKE ?3) " + "GROUP BY n.id ORDER BY fan_in DESC LIMIT 10", + base); + } else { + snprintf(sqlbuf, sizeof(sqlbuf), "%s GROUP BY n.id ORDER BY fan_in DESC LIMIT 10", base); + } sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, sqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "arch_hotspots"); return CBM_STORE_ERR; } bind_text(stmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + } int cap = ST_INIT_CAP_8; int n = 0; @@ -3482,17 +3767,28 @@ static void accum_boundary(const char *src_pkg, const char *tgt_pkg, char **bfro } } -static int arch_boundaries(cbm_store_t *s, const char *project, cbm_cross_pkg_boundary_t **out_arr, - int *out_count) { - /* Build nodeID → package map. ORDER BY id so lookup_pkg can binary-search. */ - const char *nsql = "SELECT id, qualified_name FROM nodes WHERE project=?1 AND label IN " - "('Function','Method','Class') ORDER BY id"; +static int arch_boundaries(cbm_store_t *s, const char *project, const char *path, + cbm_cross_pkg_boundary_t **out_arr, int *out_count) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char nsqlbuf[ST_SQL_BUF]; + const char *nbase = "SELECT id, qualified_name, file_path FROM nodes WHERE project=?1 AND label IN " + "('Function','Method','Class')"; + if (scoped) { + snprintf(nsqlbuf, sizeof(nsqlbuf), "%s%s ORDER BY id", nbase, arch_path_scope_sql()); + } else { + snprintf(nsqlbuf, sizeof(nsqlbuf), "%s ORDER BY id", nbase); + } sqlite3_stmt *nstmt = NULL; - if (sqlite3_prepare_v2(s->db, nsql, CBM_NOT_FOUND, &nstmt, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, nsqlbuf, CBM_NOT_FOUND, &nstmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "arch_boundaries_nodes"); return CBM_STORE_ERR; } bind_text(nstmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(nstmt, ST_COL_2, ST_COL_3, norm, like); + } int ncap = CBM_SZ_256; int nn = 0; @@ -3505,7 +3801,8 @@ static int arch_boundaries(cbm_store_t *s, const char *project, cbm_cross_pkg_bo nids = safe_realloc(nids, ncap * sizeof(int64_t)); npkgs = safe_realloc(npkgs, ncap * sizeof(char *)); } - nids[nn] = sqlite3_column_int64(nstmt, 0); + int64_t nid = sqlite3_column_int64(nstmt, 0); + nids[nn] = nid; const char *qn = (const char *)sqlite3_column_text(nstmt, SKIP_ONE); npkgs[nn] = heap_strdup(cbm_qn_to_package(qn)); nn++; @@ -3591,16 +3888,28 @@ static int arch_boundaries(cbm_store_t *s, const char *project, cbm_cross_pkg_bo #define MAX_PREVIEW_NAMES 15 /* Fallback: derive packages from QN segments when no Package nodes exist. */ -static int arch_packages_from_qn(cbm_store_t *s, const char *project, +static int arch_packages_from_qn(cbm_store_t *s, const char *project, const char *path, cbm_package_summary_t **out_arr, int *out_count) { - const char *qsql = "SELECT qualified_name FROM nodes WHERE project=?1 AND label IN " - "('Function','Method','Class')"; + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char qsqlbuf[ST_SQL_BUF]; + const char *qbase = "SELECT qualified_name FROM nodes WHERE project=?1 AND label IN " + "('Function','Method','Class')"; + if (scoped) { + snprintf(qsqlbuf, sizeof(qsqlbuf), "%s%s", qbase, arch_path_scope_sql()); + } else { + snprintf(qsqlbuf, sizeof(qsqlbuf), "%s", qbase); + } sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, qsql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, qsqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "arch_packages_qn"); return CBM_STORE_ERR; } bind_text(stmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + } char *pnames[CBM_SZ_64]; int pcounts[CBM_SZ_64]; @@ -3658,17 +3967,31 @@ static int arch_packages_from_qn(cbm_store_t *s, const char *project, return CBM_STORE_OK; } -static int arch_packages(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - /* Try Package nodes first */ - const char *sql = - "SELECT n.name, COUNT(*) as cnt FROM nodes n " - "WHERE n.project=?1 AND n.label='Package' GROUP BY n.name ORDER BY cnt DESC LIMIT 15"; +static int arch_packages(cbm_store_t *s, const char *project, const char *path, + cbm_architecture_info_t *out) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char sqlbuf[ST_SQL_BUF]; + const char *base = "SELECT n.name, COUNT(*) as cnt FROM nodes n " + "WHERE n.project=?1 AND n.label='Package'"; + if (scoped) { + snprintf(sqlbuf, sizeof(sqlbuf), + "%s AND (n.file_path = ?2 OR n.file_path LIKE ?3) " + "GROUP BY n.name ORDER BY cnt DESC LIMIT 15", + base); + } else { + snprintf(sqlbuf, sizeof(sqlbuf), "%s GROUP BY n.name ORDER BY cnt DESC LIMIT 15", base); + } sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, sqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "arch_packages"); return CBM_STORE_ERR; } bind_text(stmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + } int cap = ST_INIT_CAP_16; int n = 0; @@ -3687,7 +4010,7 @@ static int arch_packages(cbm_store_t *s, const char *project, cbm_architecture_i /* Fallback: group by QN segment if no Package nodes */ if (n == 0) { free(arr); - int rc = arch_packages_from_qn(s, project, &arr, &n); + int rc = arch_packages_from_qn(s, project, path, &arr, &n); if (rc != CBM_STORE_OK) { return rc; } @@ -3759,17 +4082,29 @@ static bool pkg_in_list(const char *pkg, char **list, int count) { return false; } -/* Collect package names from nodes matching a SQL query. */ -static int collect_pkg_names(cbm_store_t *s, const char *sql, const char *project, char **pkgs, - int max_pkgs) { +/* Collect package names from nodes matching a SQL query (must use ?1 = project). */ +static int collect_pkg_names(cbm_store_t *s, const char *sql, const char *project, const char *path, + char **pkgs, int max_pkgs) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char sqlbuf[ST_SQL_BUF]; + if (scoped) { + snprintf(sqlbuf, sizeof(sqlbuf), "%s%s", sql, arch_path_scope_sql()); + } else { + snprintf(sqlbuf, sizeof(sqlbuf), "%s", sql); + } sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { + if (sqlite3_prepare_v2(s->db, sqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) { if (stmt) { sqlite3_finalize(stmt); } return CBM_NOT_FOUND; } bind_text(stmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + } int count = 0; while (sqlite3_step(stmt) == SQLITE_ROW && count < max_pkgs) { const char *qn = (const char *)sqlite3_column_text(stmt, 0); @@ -3779,11 +4114,12 @@ static int collect_pkg_names(cbm_store_t *s, const char *sql, const char *projec return count; } -static int arch_layers(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { +static int arch_layers(cbm_store_t *s, const char *project, const char *path, + cbm_architecture_info_t *out) { /* Get boundaries for fan analysis */ cbm_cross_pkg_boundary_t *boundaries = NULL; int bcount = 0; - int rc = arch_boundaries(s, project, &boundaries, &bcount); + int rc = arch_boundaries(s, project, path, &boundaries, &bcount); if (rc != CBM_STORE_OK) { return rc; } @@ -3792,13 +4128,13 @@ static int arch_layers(cbm_store_t *s, const char *project, cbm_architecture_inf char *route_pkgs[CBM_SZ_32]; int nrpkgs = collect_pkg_names(s, "SELECT qualified_name FROM nodes WHERE project=?1 AND label='Route'", - project, route_pkgs, CBM_SZ_32); + project, path, route_pkgs, CBM_SZ_32); char *entry_pkgs[CBM_SZ_32]; int nepkgs = collect_pkg_names(s, "SELECT qualified_name FROM nodes WHERE project=?1 AND " "json_extract(properties, '$.is_entry_point') = 1", - project, entry_pkgs, CBM_SZ_32); + project, path, entry_pkgs, CBM_SZ_32); /* Compute fan-in/out per package */ char *all_pkgs[CBM_SZ_64]; @@ -4065,14 +4401,27 @@ static void arch_free_dirs(char **dir_paths, int *dir_child_counts, char ***dir_ free(files); } -static int arch_file_tree(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT file_path FROM nodes WHERE project=?1 AND label='File'"; +static int arch_file_tree(cbm_store_t *s, const char *project, const char *path, + cbm_architecture_info_t *out) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char sqlbuf[ST_SQL_BUF]; + const char *base = "SELECT file_path FROM nodes WHERE project=?1 AND label='File'"; + if (scoped) { + snprintf(sqlbuf, sizeof(sqlbuf), "%s%s", base, arch_path_scope_sql()); + } else { + snprintf(sqlbuf, sizeof(sqlbuf), "%s", base); + } sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, sqlbuf, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK) { store_set_error_sqlite(s, "arch_file_tree"); return CBM_STORE_ERR; } bind_text(stmt, SKIP_ONE, project); + if (scoped) { + arch_bind_path_scope(stmt, ST_COL_2, ST_COL_3, norm, like); + } int fcap = CBM_SZ_32; int fn = 0; @@ -4782,17 +5131,30 @@ static int cluster_rank_cmp(const void *a, const void *b) { return cb->members - ca->members; } -static int arch_clusters(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - /* 1. Load Function/Method/Class nodes, ordered by id for bsearch. */ - const char *nsql = "SELECT id, name, qualified_name FROM nodes " - "WHERE project=?1 AND label IN ('Function','Method','Class') " - "ORDER BY id LIMIT ?2"; +static int arch_clusters(cbm_store_t *s, const char *project, const char *path, + cbm_architecture_info_t *out) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512]; + bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); + char nsqlbuf[ST_SQL_BUF]; + const char *nbase = "SELECT id, name, qualified_name, file_path FROM nodes " + "WHERE project=?1 AND label IN ('Function','Method','Class')"; + if (scoped) { + snprintf(nsqlbuf, sizeof(nsqlbuf), "%s%s ORDER BY id LIMIT ?4", nbase, arch_path_scope_sql()); + } else { + snprintf(nsqlbuf, sizeof(nsqlbuf), "%s ORDER BY id LIMIT ?2", nbase); + } sqlite3_stmt *st = NULL; - if (sqlite3_prepare_v2(s->db, nsql, CBM_NOT_FOUND, &st, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(s->db, nsqlbuf, CBM_NOT_FOUND, &st, NULL) != SQLITE_OK) { return CBM_STORE_OK; /* clusters are best-effort */ } bind_text(st, SKIP_ONE, project); - sqlite3_bind_int(st, CBM_SZ_2, CBM_CLUSTER_NODE_CAP); + if (scoped) { + arch_bind_path_scope(st, ST_COL_2, ST_COL_3, norm, like); + sqlite3_bind_int(st, ST_COL_4, CBM_CLUSTER_NODE_CAP); + } else { + sqlite3_bind_int(st, CBM_SZ_2, CBM_CLUSTER_NODE_CAP); + } int cap = ST_INIT_CAP_8; int n = 0; int64_t *ids = malloc((size_t)cap * sizeof(int64_t)); @@ -4951,37 +5313,38 @@ static bool want_aspect(const char **aspects, int aspect_count, const char *name return false; } -int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char **aspects, - int aspect_count, cbm_architecture_info_t *out) { +int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char *path, + const char **aspects, int aspect_count, + cbm_architecture_info_t *out) { memset(out, 0, sizeof(*out)); int rc; if (want_aspect(aspects, aspect_count, "languages")) { - rc = arch_languages(s, project, out); + rc = arch_languages(s, project, path, out); if (rc != CBM_STORE_OK) { return rc; } } if (want_aspect(aspects, aspect_count, "packages")) { - rc = arch_packages(s, project, out); + rc = arch_packages(s, project, path, out); if (rc != CBM_STORE_OK) { return rc; } } if (want_aspect(aspects, aspect_count, "entry_points")) { - rc = arch_entry_points(s, project, out); + rc = arch_entry_points(s, project, path, out); if (rc != CBM_STORE_OK) { return rc; } } if (want_aspect(aspects, aspect_count, "routes")) { - rc = arch_routes(s, project, out); + rc = arch_routes(s, project, path, out); if (rc != CBM_STORE_OK) { return rc; } } if (want_aspect(aspects, aspect_count, "hotspots")) { - rc = arch_hotspots(s, project, out); + rc = arch_hotspots(s, project, path, out); if (rc != CBM_STORE_OK) { return rc; } @@ -4989,7 +5352,7 @@ int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char * if (want_aspect(aspects, aspect_count, "boundaries")) { cbm_cross_pkg_boundary_t *barr = NULL; int bcount = 0; - rc = arch_boundaries(s, project, &barr, &bcount); + rc = arch_boundaries(s, project, path, &barr, &bcount); if (rc != CBM_STORE_OK) { return rc; } @@ -4997,19 +5360,19 @@ int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char * out->boundary_count = bcount; } if (want_aspect(aspects, aspect_count, "layers")) { - rc = arch_layers(s, project, out); + rc = arch_layers(s, project, path, out); if (rc != CBM_STORE_OK) { return rc; } } if (want_aspect(aspects, aspect_count, "file_tree")) { - rc = arch_file_tree(s, project, out); + rc = arch_file_tree(s, project, path, out); if (rc != CBM_STORE_OK) { return rc; } } if (want_aspect(aspects, aspect_count, "clusters")) { - rc = arch_clusters(s, project, out); + rc = arch_clusters(s, project, path, out); if (rc != CBM_STORE_OK) { return rc; } diff --git a/src/store/store.h b/src/store/store.h index 26b09a5c..b52e889a 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -314,6 +314,10 @@ int cbm_store_find_node_ids_by_qns(cbm_store_t *s, const char *project, const ch /* Count nodes in project. Returns count or CBM_STORE_ERR. */ int cbm_store_count_nodes(cbm_store_t *s, const char *project); +int cbm_store_count_nodes_scoped(cbm_store_t *s, const char *project, const char *path); + +int cbm_store_count_edges_scoped(cbm_store_t *s, const char *project, const char *path); + /* Delete all nodes for a project (cascade deletes edges). */ int cbm_store_delete_nodes_by_project(cbm_store_t *s, const char *project); @@ -430,6 +434,9 @@ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t * label/type counts, e.g. get_architecture. */ int cbm_store_get_schema_counts(cbm_store_t *s, const char *project, cbm_schema_info_t *out); +int cbm_store_get_schema_counts_scoped(cbm_store_t *s, const char *project, const char *path, + cbm_schema_info_t *out); + /* Free a schema info's allocated memory. */ void cbm_store_schema_free(cbm_schema_info_t *out); @@ -528,8 +535,9 @@ typedef struct { int file_tree_count; } cbm_architecture_info_t; -int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char **aspects, - int aspect_count, cbm_architecture_info_t *out); +int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char *path, + const char **aspects, int aspect_count, + cbm_architecture_info_t *out); void cbm_store_architecture_free(cbm_architecture_info_t *out); /* ── ADR (Architecture Decision Record) ────────────────────────── */ diff --git a/tests/test_store_arch.c b/tests/test_store_arch.c index eb64d8c5..110ec655 100644 --- a/tests/test_store_arch.c +++ b/tests/test_store_arch.c @@ -143,7 +143,7 @@ static cbm_store_t *setup_arch_test_store(void) { TEST(arch_get_all) { cbm_store_t *s = setup_arch_test_store(); cbm_architecture_info_t info; - ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, 0, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, NULL, 0, &info), CBM_STORE_OK); ASSERT_TRUE(info.language_count > 0); ASSERT_TRUE(info.package_count > 0); @@ -162,7 +162,7 @@ TEST(arch_entry_points_exclude_tests) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"entry_points"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); for (int i = 0; i < info.entry_point_count; i++) { ASSERT_TRUE(strstr(info.entry_points[i].file, "test") == NULL); @@ -179,7 +179,7 @@ TEST(arch_hotspots_exclude_tests) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"hotspots"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); for (int i = 0; i < info.hotspot_count; i++) { ASSERT_TRUE(strstr(info.hotspots[i].name, "Test") == NULL); @@ -194,7 +194,7 @@ TEST(arch_specific_aspects) { cbm_store_t *s = setup_arch_test_store(); cbm_architecture_info_t info; const char *aspects[] = {"languages", "hotspots"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 2, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 2, &info), CBM_STORE_OK); ASSERT_TRUE(info.language_count > 0); ASSERT_TRUE(info.hotspot_count > 0); @@ -208,6 +208,81 @@ TEST(arch_specific_aspects) { PASS(); } +TEST(arch_path_scoping) { + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + ASSERT_EQ(cbm_store_upsert_project(s, "pscope", "/tmp/pscope"), CBM_STORE_OK); + + cbm_node_t f1 = {.project = "pscope", + .label = "File", + .name = "a.go", + .qualified_name = "pscope.apps.foo.a.go", + .file_path = "apps/foo/a.go"}; + cbm_node_t f2 = {.project = "pscope", + .label = "File", + .name = "b.go", + .qualified_name = "pscope.other.b.go", + .file_path = "other/b.go"}; + cbm_store_upsert_node(s, &f1); + cbm_store_upsert_node(s, &f2); + + cbm_node_t fn_foo = {.project = "pscope", + .label = "Function", + .name = "Foo", + .qualified_name = "pscope.apps.foo.Foo", + .file_path = "apps/foo/a.go"}; + cbm_node_t fn_other = {.project = "pscope", + .label = "Function", + .name = "Bar", + .qualified_name = "pscope.other.Bar", + .file_path = "other/b.go"}; + cbm_store_upsert_node(s, &fn_foo); + cbm_store_upsert_node(s, &fn_other); + + const char *aspects[] = {"languages", "packages"}; + cbm_architecture_info_t whole; + memset(&whole, 0, sizeof(whole)); + ASSERT_EQ(cbm_store_get_architecture(s, "pscope", NULL, aspects, 2, &whole), CBM_STORE_OK); + + cbm_architecture_info_t scoped; + memset(&scoped, 0, sizeof(scoped)); + ASSERT_EQ(cbm_store_get_architecture(s, "pscope", "apps/foo", aspects, 2, &scoped), + CBM_STORE_OK); + + int whole_go = 0; + int scoped_go = 0; + for (int i = 0; i < whole.language_count; i++) { + if (strcmp(whole.languages[i].language, "Go") == 0) { + whole_go = whole.languages[i].file_count; + } + } + for (int i = 0; i < scoped.language_count; i++) { + if (strcmp(scoped.languages[i].language, "Go") == 0) { + scoped_go = scoped.languages[i].file_count; + } + } + ASSERT_TRUE(whole_go > scoped_go); + ASSERT_EQ(scoped_go, 1); + + int whole_pkg_nodes = 0; + for (int i = 0; i < whole.package_count; i++) { + whole_pkg_nodes += whole.packages[i].node_count; + } + int scoped_pkg_nodes = 0; + for (int i = 0; i < scoped.package_count; i++) { + scoped_pkg_nodes += scoped.packages[i].node_count; + } + ASSERT_TRUE(whole_pkg_nodes > scoped_pkg_nodes); + ASSERT_EQ(scoped_pkg_nodes, 1); + + ASSERT_TRUE(cbm_store_count_nodes(s, "pscope") > cbm_store_count_nodes_scoped(s, "pscope", "apps/foo")); + + cbm_store_architecture_free(&whole); + cbm_store_architecture_free(&scoped); + cbm_store_close(s); + PASS(); +} + TEST(arch_empty_project) { cbm_store_t *s = cbm_store_open_memory(); ASSERT_NOT_NULL(s); @@ -215,7 +290,7 @@ TEST(arch_empty_project) { cbm_architecture_info_t info; const char *aspects[] = {"all"}; - ASSERT_EQ(cbm_store_get_architecture(s, "empty", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "empty", NULL, aspects, 1, &info), CBM_STORE_OK); /* All should be empty but no errors */ cbm_store_architecture_free(&info); @@ -228,7 +303,7 @@ TEST(arch_languages) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"languages"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); /* Check Go=3, Python=1, JavaScript=1 */ int go_count = 0, py_count = 0, js_count = 0; @@ -254,7 +329,7 @@ TEST(arch_routes) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"routes"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); ASSERT_EQ(info.route_count, 1); ASSERT_STR_EQ(info.routes[0].method, "POST"); @@ -271,7 +346,7 @@ TEST(arch_hotspots) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"hotspots"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); ASSERT_TRUE(info.hotspot_count > 0); /* ProcessOrder should be a hotspot (called by HandleRequest) */ @@ -295,7 +370,7 @@ TEST(arch_boundaries) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"boundaries"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); ASSERT_TRUE(info.boundary_count > 0); /* server → handler and handler → service should be present */ @@ -361,7 +436,7 @@ static double timed_boundaries_ms(int n_nodes, int n_edges, int n_pkgs) { const char *aspects[] = {"boundaries"}; struct timespec t0, t1; clock_gettime(CLOCK_MONOTONIC, &t0); - int rc = cbm_store_get_architecture(s, "perf", aspects, 1, &info); + int rc = cbm_store_get_architecture(s, "perf", NULL, aspects, 1, &info); clock_gettime(CLOCK_MONOTONIC, &t1); double ms = (double)(t1.tv_sec - t0.tv_sec) * 1000.0 + (double)(t1.tv_nsec - t0.tv_nsec) / 1000000.0; @@ -409,7 +484,7 @@ TEST(arch_layers) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"layers"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); ASSERT_TRUE(info.layer_count > 0); /* Handler package has routes, should be "api" */ @@ -429,7 +504,7 @@ TEST(arch_file_tree) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"file_tree"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); ASSERT_TRUE(info.file_tree_count > 0); /* Check that entries have valid types */ @@ -448,7 +523,7 @@ TEST(arch_clusters) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"clusters"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); /* With 5 functions and 4 edges, Louvain should find at least 1 cluster */ if (info.cluster_count == 0) { @@ -1106,7 +1181,7 @@ TEST(arch_clusters_basic) { cbm_architecture_info_t info; memset(&info, 0, sizeof(info)); const char *aspects[] = {"clusters"}; - ASSERT_EQ(cbm_store_get_architecture(s, "test", aspects, 1, &info), CBM_STORE_OK); + ASSERT_EQ(cbm_store_get_architecture(s, "test", NULL, aspects, 1, &info), CBM_STORE_OK); ASSERT_TRUE(info.cluster_count >= 2); /* two dense communities */ for (int i = 0; i < info.cluster_count; i++) { ASSERT_TRUE(info.clusters[i].members >= 2); @@ -1279,6 +1354,7 @@ SUITE(store_arch) { RUN_TEST(arch_entry_points_exclude_tests); RUN_TEST(arch_hotspots_exclude_tests); RUN_TEST(arch_specific_aspects); + RUN_TEST(arch_path_scoping); RUN_TEST(arch_empty_project); RUN_TEST(arch_languages); RUN_TEST(arch_routes); From adf602ec94b286bb732fe06c630a919d0df24c7b Mon Sep 17 00:00:00 2001 From: Berserk Agent Date: Wed, 24 Jun 2026 12:50:49 +0000 Subject: [PATCH 2/4] fix(mcp): document path on get_architecture and emit root/scoped totals (#604) Signed-off-by: Berserk Agent --- src/mcp/mcp.c | 18 ++++++++-- tests/test_mcp.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 3 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 0bb5efa3..b7456c77 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -391,9 +391,11 @@ static const tool_def_t TOOLS[] = { "structure at a glance. Includes 'clusters': Leiden community detection over the call/import " "graph, surfacing the de-facto modules (each with a label, member count, cohesion score, " "representative top_nodes, and the packages/edge_types that bind it) — use these to grasp " - "the real architectural seams, which often cut across the folder layout.", - "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"aspects\":{\"type\":" - "\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"project\"]}"}, + "the real architectural seams, which often cut across the folder layout. Optional path scopes " + "analysis to nodes under that directory prefix (file_path).", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"path\":{\"type\":" + "\"string\",\"description\":\"Optional directory prefix to scope architecture (e.g. apps/hoa)\"}," + "\"aspects\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"project\"]}"}, {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " @@ -1973,6 +1975,7 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { int node_count = cbm_store_count_nodes_scoped(store, project, scope_path); int edge_count = cbm_store_count_edges_scoped(store, project, scope_path); + bool path_scoped = scope_path && scope_path[0]; yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); @@ -1981,6 +1984,15 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { if (project) { yyjson_mut_obj_add_str(doc, root, "project", project); } + if (path_scoped) { + yyjson_mut_obj_add_str(doc, root, "path", scope_path); + int root_nodes = cbm_store_count_nodes(store, project); + int root_edges = cbm_store_count_edges(store, project); + yyjson_mut_obj_add_int(doc, root, "root_total_nodes", root_nodes); + yyjson_mut_obj_add_int(doc, root, "root_total_edges", root_edges); + yyjson_mut_obj_add_int(doc, root, "scoped_total_nodes", node_count); + yyjson_mut_obj_add_int(doc, root, "scoped_total_edges", edge_count); + } yyjson_mut_obj_add_int(doc, root, "total_nodes", node_count); yyjson_mut_obj_add_int(doc, root, "total_edges", edge_count); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 68edc0e9..7dc7c114 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -12,6 +12,7 @@ #include #include #include +#include /* ══════════════════════════════════════════════════════════════════ * JSON-RPC PARSING @@ -791,6 +792,91 @@ TEST(tool_get_architecture_emits_populated_sections) { PASS(); } +/* Regression for #604: path scopes architecture totals and content. */ +TEST(tool_get_architecture_path_scoping) { + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + cbm_store_t *st = cbm_mcp_server_store(srv); + ASSERT_NOT_NULL(st); + + const char *proj = "arch-path"; + cbm_mcp_server_set_project(srv, proj); + cbm_store_upsert_project(st, proj, "/tmp/arch-path"); + + cbm_node_t pkg_global = {.project = proj, + .label = "Package", + .name = "Django", + .qualified_name = "arch-path.Django", + .file_path = "vendor/django/__init__.py"}; + cbm_store_upsert_node(st, &pkg_global); + + cbm_node_t pkg_local = {.project = proj, + .label = "Package", + .name = "hoa", + .qualified_name = "arch-path.hoa", + .file_path = "apps/hoa/main.go"}; + cbm_store_upsert_node(st, &pkg_local); + + cbm_node_t f_hoa = {.project = proj, + .label = "File", + .name = "main.go", + .qualified_name = "arch-path.apps.hoa.main.go", + .file_path = "apps/hoa/main.go"}; + cbm_store_upsert_node(st, &f_hoa); + + cbm_node_t f_other = {.project = proj, + .label = "File", + .name = "other.go", + .qualified_name = "arch-path.other.go", + .file_path = "lib/other.go"}; + cbm_store_upsert_node(st, &f_other); + + char *resp_root = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":92,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture\"," + "\"arguments\":{\"project\":\"arch-path\",\"aspects\":[\"packages\"]}}}"); + ASSERT_NOT_NULL(resp_root); + char *inner_root = extract_text_content(resp_root); + ASSERT_NOT_NULL(inner_root); + ASSERT_NOT_NULL(strstr(inner_root, "Django")); + + char *resp_scoped = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":93,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"get_architecture\"," + "\"arguments\":{\"project\":\"arch-path\",\"path\":\"apps/hoa\"," + "\"aspects\":[\"packages\"]}}}"); + ASSERT_NOT_NULL(resp_scoped); + char *inner_scoped = extract_text_content(resp_scoped); + ASSERT_NOT_NULL(inner_scoped); + + ASSERT_NOT_NULL(strstr(inner_scoped, "root_total_nodes")); + ASSERT_NOT_NULL(strstr(inner_scoped, "scoped_total_nodes")); + ASSERT_NOT_NULL(strstr(inner_scoped, "\"path\"")); + ASSERT_NOT_NULL(strstr(inner_scoped, "hoa")); + ASSERT_NULL(strstr(inner_scoped, "Django")); + + int root_nodes = 0; + int scoped_nodes = 0; + const char *rt = strstr(inner_scoped, "\"root_total_nodes\":"); + const char *stn = strstr(inner_scoped, "\"scoped_total_nodes\":"); + if (rt) { + sscanf(rt, "\"root_total_nodes\":%d", &root_nodes); + } + if (stn) { + sscanf(stn, "\"scoped_total_nodes\":%d", &scoped_nodes); + } + ASSERT_TRUE(root_nodes > scoped_nodes); + ASSERT_TRUE(scoped_nodes > 0); + + free(inner_scoped); + free(resp_scoped); + free(inner_root); + free(resp_root); + cbm_mcp_server_free(srv); + PASS(); +} + TEST(tool_query_graph_missing_query) { cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); @@ -2294,6 +2380,7 @@ SUITE(mcp) { RUN_TEST(tool_delete_project_not_found); RUN_TEST(tool_get_architecture_empty); RUN_TEST(tool_get_architecture_emits_populated_sections); + RUN_TEST(tool_get_architecture_path_scoping); RUN_TEST(tool_query_graph_missing_query); /* Pipeline-dependent tool handlers */ From 55058c05f0be343b7cbe07fd70bbcf0ea9eb639a Mon Sep 17 00:00:00 2001 From: Berserk Agent Date: Wed, 24 Jun 2026 12:54:45 +0000 Subject: [PATCH 3/4] fix(arch): normalize path trailing slashes; align MCP path_scoped with store (#604) Signed-off-by: Berserk Agent --- src/mcp/mcp.c | 5 +++-- src/store/store.c | 23 ++++++++++++++++++++++- src/store/store.h | 6 ++++++ tests/test_store_arch.c | 13 +++++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index b7456c77..45be91cc 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1975,7 +1975,8 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { int node_count = cbm_store_count_nodes_scoped(store, project, scope_path); int edge_count = cbm_store_count_edges_scoped(store, project, scope_path); - bool path_scoped = scope_path && scope_path[0]; + char norm_path[CBM_SZ_512]; + bool path_scoped = cbm_store_normalize_arch_path(scope_path, norm_path, sizeof(norm_path)); yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); @@ -1985,7 +1986,7 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_obj_add_str(doc, root, "project", project); } if (path_scoped) { - yyjson_mut_obj_add_str(doc, root, "path", scope_path); + yyjson_mut_obj_add_str(doc, root, "path", norm_path); int root_nodes = cbm_store_count_nodes(store, project); int root_edges = cbm_store_count_edges(store, project); yyjson_mut_obj_add_int(doc, root, "root_total_nodes", root_nodes); diff --git a/src/store/store.c b/src/store/store.c index 152c8c61..116dbd14 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -2942,9 +2942,19 @@ static bool arch_path_prepare(const char *path, char *norm_out, size_t norm_sz, strncpy(norm_out, path, norm_sz - 1); norm_out[norm_sz - 1] = '\0'; size_t len = strlen(norm_out); - while (len > 0 && (norm_out[len - 1] == ' ' || norm_out[len - 1] == '\t')) { + while (len > 0 && (norm_out[len - 1] == ' ' || norm_out[len - 1] == '\t' || + norm_out[len - 1] == '/')) { norm_out[--len] = '\0'; } + /* Collapse duplicate slashes */ + size_t w = 0; + for (size_t r = 0; norm_out[r] != '\0'; r++) { + if (norm_out[r] == '/' && w > 0 && norm_out[w - 1] == '/') { + continue; + } + norm_out[w++] = norm_out[r]; + } + norm_out[w] = '\0'; if (norm_out[0] == '\0') { return false; } @@ -2962,6 +2972,17 @@ static void arch_bind_path_scope(sqlite3_stmt *stmt, int exact_idx, int like_idx bind_text(stmt, like_idx, like_pat); } +bool cbm_store_arch_path_scoped(const char *path) { + char norm[CBM_SZ_512]; + char like[CBM_SZ_512 + 4]; + return arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); +} + +bool cbm_store_normalize_arch_path(const char *path, char *norm_out, size_t norm_sz) { + char like[CBM_SZ_512 + 4]; + return arch_path_prepare(path, norm_out, norm_sz, like, sizeof(like)); +} + int cbm_store_count_nodes_scoped(cbm_store_t *s, const char *project, const char *path) { if (!s || !s->db || !project) { return 0; diff --git a/src/store/store.h b/src/store/store.h index b52e889a..43c87f57 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -318,6 +318,12 @@ int cbm_store_count_nodes_scoped(cbm_store_t *s, const char *project, const char int cbm_store_count_edges_scoped(cbm_store_t *s, const char *project, const char *path); +/* True when path is a non-empty scope after normalization (issue #604). */ +bool cbm_store_arch_path_scoped(const char *path); + +/* When scoped, writes normalized directory prefix into norm_out. Returns false if unscoped. */ +bool cbm_store_normalize_arch_path(const char *path, char *norm_out, size_t norm_sz); + /* Delete all nodes for a project (cascade deletes edges). */ int cbm_store_delete_nodes_by_project(cbm_store_t *s, const char *project); diff --git a/tests/test_store_arch.c b/tests/test_store_arch.c index 110ec655..777140aa 100644 --- a/tests/test_store_arch.c +++ b/tests/test_store_arch.c @@ -277,6 +277,19 @@ TEST(arch_path_scoping) { ASSERT_TRUE(cbm_store_count_nodes(s, "pscope") > cbm_store_count_nodes_scoped(s, "pscope", "apps/foo")); + cbm_architecture_info_t scoped_slash; + memset(&scoped_slash, 0, sizeof(scoped_slash)); + ASSERT_EQ(cbm_store_get_architecture(s, "pscope", "apps/foo/", aspects, 2, &scoped_slash), + CBM_STORE_OK); + int slash_go = 0; + for (int i = 0; i < scoped_slash.language_count; i++) { + if (strcmp(scoped_slash.languages[i].language, "Go") == 0) { + slash_go = scoped_slash.languages[i].file_count; + } + } + ASSERT_EQ(slash_go, scoped_go); + + cbm_store_architecture_free(&scoped_slash); cbm_store_architecture_free(&whole); cbm_store_architecture_free(&scoped); cbm_store_close(s); From e270ceb8a42d3eb1dae5e7cd5120ab88a69f89d6 Mon Sep 17 00:00:00 2001 From: Berserk Agent Date: Wed, 24 Jun 2026 13:54:33 +0000 Subject: [PATCH 4/4] style(store,mcp): clang-format-20 for CI lint (#604) Signed-off-by: Berserk Agent --- src/mcp/mcp.c | 6 ++++-- src/store/store.c | 12 +++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 45be91cc..368d73f3 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -394,8 +394,10 @@ static const tool_def_t TOOLS[] = { "the real architectural seams, which often cut across the folder layout. Optional path scopes " "analysis to nodes under that directory prefix (file_path).", "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"path\":{\"type\":" - "\"string\",\"description\":\"Optional directory prefix to scope architecture (e.g. apps/hoa)\"}," - "\"aspects\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"project\"]}"}, + "\"string\",\"description\":\"Optional directory prefix to scope architecture (e.g. " + "apps/hoa)\"}," + "\"aspects\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"project\"]" + "}"}, {"search_code", "Graph-augmented code search. Finds text patterns via grep, then enriches results with " diff --git a/src/store/store.c b/src/store/store.c index 116dbd14..263ea93f 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -2942,8 +2942,8 @@ static bool arch_path_prepare(const char *path, char *norm_out, size_t norm_sz, strncpy(norm_out, path, norm_sz - 1); norm_out[norm_sz - 1] = '\0'; size_t len = strlen(norm_out); - while (len > 0 && (norm_out[len - 1] == ' ' || norm_out[len - 1] == '\t' || - norm_out[len - 1] == '/')) { + while (len > 0 && + (norm_out[len - 1] == ' ' || norm_out[len - 1] == '\t' || norm_out[len - 1] == '/')) { norm_out[--len] = '\0'; } /* Collapse duplicate slashes */ @@ -3794,8 +3794,9 @@ static int arch_boundaries(cbm_store_t *s, const char *project, const char *path char like[CBM_SZ_512]; bool scoped = arch_path_prepare(path, norm, sizeof(norm), like, sizeof(like)); char nsqlbuf[ST_SQL_BUF]; - const char *nbase = "SELECT id, qualified_name, file_path FROM nodes WHERE project=?1 AND label IN " - "('Function','Method','Class')"; + const char *nbase = + "SELECT id, qualified_name, file_path FROM nodes WHERE project=?1 AND label IN " + "('Function','Method','Class')"; if (scoped) { snprintf(nsqlbuf, sizeof(nsqlbuf), "%s%s ORDER BY id", nbase, arch_path_scope_sql()); } else { @@ -5161,7 +5162,8 @@ static int arch_clusters(cbm_store_t *s, const char *project, const char *path, const char *nbase = "SELECT id, name, qualified_name, file_path FROM nodes " "WHERE project=?1 AND label IN ('Function','Method','Class')"; if (scoped) { - snprintf(nsqlbuf, sizeof(nsqlbuf), "%s%s ORDER BY id LIMIT ?4", nbase, arch_path_scope_sql()); + snprintf(nsqlbuf, sizeof(nsqlbuf), "%s%s ORDER BY id LIMIT ?4", nbase, + arch_path_scope_sql()); } else { snprintf(nsqlbuf, sizeof(nsqlbuf), "%s ORDER BY id LIMIT ?2", nbase); }