From 3450263be5a798c5b3595bbc7b7b41564ecb9881 Mon Sep 17 00:00:00 2001 From: Andy11-cpu Date: Thu, 25 Jun 2026 05:23:40 -0400 Subject: [PATCH] Fix contained indexing: no background re-index on connect Stop registering the git watcher on every MCP initialize unless auto_watch is explicitly enabled (default false). Re-index only via index_repository or when auto_watch is turned on after a manual index. Also deduplicate project DBs by canonical/git identity, tier RAM budget to 25% on 16GB machines, and make install hooks opt-in via --hooks. Co-authored-by: Cursor Signed-off-by: Andy11-cpu Co-authored-by: Cursor Signed-off-by: Andy11-cpu Co-authored-by: Cursor Signed-off-by: Andy11-cpu Co-authored-by: Cursor --- Makefile.cbm | 1 + src/cli/cli.c | 73 +++++++++------- src/cli/cli.h | 3 +- src/main.c | 28 +++++-- src/mcp/mcp.c | 52 +++++++++--- src/pipeline/pipeline.c | 4 +- src/pipeline/project_resolve.c | 148 +++++++++++++++++++++++++++++++++ src/pipeline/project_resolve.h | 17 ++++ tests/test_cli.c | 2 +- 9 files changed, 278 insertions(+), 50 deletions(-) create mode 100644 src/pipeline/project_resolve.c create mode 100644 src/pipeline/project_resolve.h diff --git a/Makefile.cbm b/Makefile.cbm index 2bcf7b4d..b05d1662 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -175,6 +175,7 @@ GRAPH_BUFFER_SRCS = src/graph_buffer/graph_buffer.c # Pipeline module (new) PIPELINE_SRCS = \ src/pipeline/fqn.c \ + src/pipeline/project_resolve.c \ src/pipeline/path_alias.c \ src/pipeline/registry.c \ src/pipeline/pipeline.c \ diff --git a/src/cli/cli.c b/src/cli/cli.c index f159f591..6f0c9a2e 100644 --- a/src/cli/cli.c +++ b/src/cli/cli.c @@ -2625,6 +2625,8 @@ int cbm_cmd_config(int argc, char **argv) { "Enable auto-indexing on MCP session start"); printf(" %-25s default=%-10s %s\n", CBM_CONFIG_AUTO_INDEX_LIMIT, "50000", "Max files for auto-indexing new projects"); + printf(" %-25s default=%-10s %s\n", CBM_CONFIG_AUTO_WATCH, "false", + "Enable git watcher background re-indexing (off by default)"); return 0; } @@ -2650,6 +2652,8 @@ int cbm_cmd_config(int argc, char **argv) { cbm_config_get(cfg, CBM_CONFIG_AUTO_INDEX, "false")); printf(" %-25s = %-10s\n", CBM_CONFIG_AUTO_INDEX_LIMIT, cbm_config_get(cfg, CBM_CONFIG_AUTO_INDEX_LIMIT, "50000")); + printf(" %-25s = %-10s\n", CBM_CONFIG_AUTO_WATCH, + cbm_config_get(cfg, CBM_CONFIG_AUTO_WATCH, "false")); } else if (strcmp(argv[0], "get") == 0) { if (argc < MIN_ARGC_GET) { (void)fprintf(stderr, "Usage: config get \n"); @@ -2990,7 +2994,7 @@ static void plan_record(const char *agent, const char *kind, const char *path) { } static void install_claude_code_config(const char *home, const char *binary_path, bool force, - bool dry_run) { + bool dry_run, bool install_hooks) { char config_dir[CLI_BUF_1K]; cbm_claude_config_dir(home, config_dir, sizeof(config_dir)); char user_root[CLI_BUF_1K]; @@ -3010,9 +3014,13 @@ static void install_claude_code_config(const char *home, const char *binary_path snprintf(p, sizeof(p), "%s/settings.json", config_dir); plan_record("Claude Code", "mcp_config", p); snprintf(p, sizeof(p), "%s/hooks/%s", config_dir, CMM_HOOK_GATE_SCRIPT); - plan_record("Claude Code", "hook", p); + if (install_hooks) { + plan_record("Claude Code", "hook", p); + } snprintf(p, sizeof(p), "%s/hooks/%s", config_dir, CMM_SESSION_REMINDER_SCRIPT); - plan_record("Claude Code", "hook", p); + if (install_hooks) { + plan_record("Claude Code", "hook", p); + } return; } @@ -3041,14 +3049,16 @@ static void install_claude_code_config(const char *home, const char *binary_path char settings_path[CLI_BUF_1K]; snprintf(settings_path, sizeof(settings_path), "%s/settings.json", config_dir); - if (!dry_run) { + if (!dry_run && install_hooks) { cbm_upsert_claude_hooks(settings_path); cbm_install_hook_gate_script(home, binary_path); cbm_install_session_reminder_script(home); cbm_upsert_session_hooks(settings_path); + printf(" hooks: PreToolUse (Grep/Glob search-graph augmenter, non-blocking)\n"); + printf(" hooks: SessionStart (MCP usage reminder on startup/resume/clear/compact)\n"); + } else if (!dry_run) { + printf(" hooks: skipped (pass --hooks to install search augment hooks)\n"); } - printf(" hooks: PreToolUse (Grep/Glob search-graph augmenter, non-blocking)\n"); - printf(" hooks: SessionStart (MCP usage reminder on startup/resume/clear/compact)\n"); /* Migration nudge: when CLAUDE_CONFIG_DIR is set and a legacy ~/.claude tree * still exists, mention it so users can clean up stale artifacts. */ @@ -3093,7 +3103,8 @@ static void install_generic_agent_config(const char *label, const char *binary_p /* Install MCP configs for CLI-based agents (Codex, Gemini, OpenCode, Antigravity, Aider). */ /* Install Gemini CLI config with hooks. */ -static void install_gemini_config(const char *home, const char *binary_path, bool dry_run) { +static void install_gemini_config(const char *home, const char *binary_path, bool dry_run, + bool install_hooks) { char cp[CLI_BUF_1K]; char ip[CLI_BUF_1K]; snprintf(cp, sizeof(cp), "%s/.gemini/settings.json", home); @@ -3101,18 +3112,20 @@ static void install_gemini_config(const char *home, const char *binary_path, boo install_generic_agent_config("Gemini CLI", binary_path, cp, ip, dry_run, cbm_install_editor_mcp); if (g_install_plan) { - plan_record("Gemini CLI", "hook", cp); /* BeforeTool + SessionStart in settings.json */ + if (install_hooks) { + plan_record("Gemini CLI", "hook", cp); /* BeforeTool + SessionStart in settings.json */ + } return; } - if (!dry_run) { + if (!dry_run && install_hooks) { cbm_upsert_gemini_hooks(cp); cbm_upsert_gemini_session_hooks(cp); + printf(" hooks: BeforeTool + SessionStart (codebase-memory-mcp reminder)\n"); } - printf(" hooks: BeforeTool + SessionStart (codebase-memory-mcp reminder)\n"); } static void install_cli_agent_configs(const cbm_detected_agents_t *agents, const char *home, - const char *binary_path, bool dry_run) { + const char *binary_path, bool dry_run, bool install_hooks) { if (agents->codex) { char cp[CLI_BUF_1K]; char ip[CLI_BUF_1K]; @@ -3121,16 +3134,16 @@ static void install_cli_agent_configs(const cbm_detected_agents_t *agents, const install_generic_agent_config("Codex CLI", binary_path, cp, ip, dry_run, cbm_upsert_codex_mcp); if (g_install_plan) { - plan_record("Codex CLI", "hook", cp); - } else { - if (!dry_run) { - cbm_upsert_codex_hooks(cp); + if (install_hooks) { + plan_record("Codex CLI", "hook", cp); } + } else if (!dry_run && install_hooks) { + cbm_upsert_codex_hooks(cp); printf(" hooks: SessionStart (codebase-memory-mcp reminder)\n"); } } if (agents->gemini) { - install_gemini_config(home, binary_path, dry_run); + install_gemini_config(home, binary_path, dry_run, install_hooks); } if (agents->opencode) { char cp[CLI_BUF_1K]; @@ -3160,11 +3173,11 @@ static void install_cli_agent_configs(const cbm_detected_agents_t *agents, const char sp[CLI_BUF_1K]; snprintf(sp, sizeof(sp), "%s/.gemini/antigravity-cli/settings.json", home); if (g_install_plan) { - plan_record("Antigravity", "hook", sp); - } else { - if (!dry_run) { - cbm_upsert_gemini_session_hooks(sp); + if (install_hooks) { + plan_record("Antigravity", "hook", sp); } + } else if (!dry_run && install_hooks) { + cbm_upsert_gemini_session_hooks(sp); printf(" hooks: SessionStart (codebase-memory-mcp reminder)\n"); } } @@ -3250,16 +3263,16 @@ static void install_editor_agent_configs(const cbm_detected_agents_t *agents, co } static void cbm_install_agent_configs(const char *home, const char *binary_path, bool force, - bool dry_run) { + bool dry_run, bool install_hooks) { cbm_detected_agents_t agents = cbm_detect_agents(home); if (!g_install_plan) { print_detected_agents(&agents); } if (agents.claude_code) { - install_claude_code_config(home, binary_path, force, dry_run); + install_claude_code_config(home, binary_path, force, dry_run, install_hooks); } - install_cli_agent_configs(&agents, home, binary_path, dry_run); + install_cli_agent_configs(&agents, home, binary_path, dry_run, install_hooks); install_editor_agent_configs(&agents, home, binary_path, dry_run); } @@ -3317,7 +3330,7 @@ static void cbm_detect_self_path(char *buf, size_t buf_sz, const char *home) { * the config / instruction / hook files `install` WOULD write, produced by * running the real install dispatch in record-only mode (no mutation, no * network). Returns a heap JSON string (caller frees) or NULL. */ -char *cbm_build_install_plan_json(const char *home, const char *binary_path) { +char *cbm_build_install_plan_json(const char *home, const char *binary_path, bool install_hooks) { if (!home || !binary_path) { return NULL; } @@ -3326,7 +3339,7 @@ char *cbm_build_install_plan_json(const char *home, const char *binary_path) { * site records into `plan` — so the receipt cannot drift from behavior. */ cbm_install_plan_t plan = {0}; g_install_plan = &plan; - cbm_install_agent_configs(home, binary_path, false, true); + cbm_install_agent_configs(home, binary_path, false, true, install_hooks); g_install_plan = NULL; cbm_detected_agents_t det = cbm_detect_agents(home); @@ -3395,6 +3408,7 @@ int cbm_cmd_install(int argc, char **argv) { bool dry_run = false; bool force = false; bool plan = false; + bool install_hooks = false; for (int i = 0; i < argc; i++) { if (strcmp(argv[i], "--dry-run") == 0) { dry_run = true; @@ -3405,6 +3419,9 @@ int cbm_cmd_install(int argc, char **argv) { if (strcmp(argv[i], "--plan") == 0) { plan = true; } + if (strcmp(argv[i], "--hooks") == 0) { + install_hooks = true; + } } const char *home = cbm_get_home_dir(); @@ -3419,7 +3436,7 @@ int cbm_cmd_install(int argc, char **argv) { if (plan) { char self_path[CLI_BUF_1K] = {0}; cbm_detect_self_path(self_path, sizeof(self_path), home); - char *json = cbm_build_install_plan_json(home, self_path); + char *json = cbm_build_install_plan_json(home, self_path, install_hooks); if (!json) { (void)fprintf(stderr, "error: failed to build install plan\n"); return CLI_TRUE; @@ -3515,7 +3532,7 @@ int cbm_cmd_install(int argc, char **argv) { #endif /* Step 3: Install/refresh all agent configs, pointing at the install target. */ - cbm_install_agent_configs(home, bin_target, force, dry_run); + cbm_install_agent_configs(home, bin_target, force, dry_run, install_hooks); /* Step 4: Ensure PATH */ char bin_dir[CLI_BUF_1K]; @@ -4110,7 +4127,7 @@ int cbm_cmd_update(int argc, char **argv) { /* Step 6: Refresh all agent configs (skills, MCP entries, hooks) */ printf("Refreshing agent configurations...\n"); - cbm_install_agent_configs(home, bin_dest, true, false); + cbm_install_agent_configs(home, bin_dest, true, false, false); /* Step 7: Verify new version (exec directly, no shell interpretation) */ printf("\nUpdate complete. Verifying:\n"); diff --git a/src/cli/cli.h b/src/cli/cli.h index 9efe6789..760ea591 100644 --- a/src/cli/cli.h +++ b/src/cli/cli.h @@ -264,6 +264,7 @@ int cbm_config_delete(cbm_config_t *cfg, const char *key); /* Well-known config keys */ #define CBM_CONFIG_AUTO_INDEX "auto_index" #define CBM_CONFIG_AUTO_INDEX_LIMIT "auto_index_limit" +#define CBM_CONFIG_AUTO_WATCH "auto_watch" /* ── Subcommands (wired from main.c) ─────────────────────────── */ @@ -291,6 +292,6 @@ int cbm_cmd_hook_augment(void); * a machine-readable JSON list of the config/instruction/hook files `install` * would write, produced WITHOUT mutating anything. Returns a heap JSON string * (caller frees) or NULL on error. Exposed for `install --plan` and testing. */ -char *cbm_build_install_plan_json(const char *home, const char *binary_path); +char *cbm_build_install_plan_json(const char *home, const char *binary_path, bool install_hooks); #endif /* CBM_CLI_H */ diff --git a/src/main.c b/src/main.c index f2b72cba..f6db980f 100644 --- a/src/main.c +++ b/src/main.c @@ -30,7 +30,10 @@ enum { MAIN_MAX_PORT = 65536, PARENT_WATCHDOG_STACK_SIZE = 64 * CBM_SZ_1K, /* watchdog only polls — tiny stack suffices */ }; -#define MAIN_RAM_FRACTION 0.5 +#define MAIN_RAM_FRACTION_DEFAULT 0.5 +#define MAIN_RAM_FRACTION_16GB 0.25 +#define MAIN_RAM_FRACTION_32GB 0.35 +#define MAIN_RAM_BYTES_PER_GB (1024ULL * 1024 * 1024) #define SLEN(s) (sizeof(s) - 1) #include "foundation/log.h" @@ -45,6 +48,17 @@ enum { #include "ui/embedded_assets.h" #include +static double main_ram_fraction(void) { + cbm_system_info_t info = cbm_system_info(); + if (info.total_ram <= 16ULL * MAIN_RAM_BYTES_PER_GB) { + return MAIN_RAM_FRACTION_16GB; + } + if (info.total_ram <= 32ULL * MAIN_RAM_BYTES_PER_GB) { + return MAIN_RAM_FRACTION_32GB; + } + return MAIN_RAM_FRACTION_DEFAULT; +} + #include #include #include @@ -284,7 +298,7 @@ static void print_help(void) { printf("Usage:\n"); printf(" codebase-memory-mcp Run MCP server on stdio\n"); printf(" codebase-memory-mcp cli [json] Run a single tool\n"); - printf(" codebase-memory-mcp install [-y|-n] [--force] [--dry-run]\n"); + printf(" codebase-memory-mcp install [-y|-n] [--force] [--hooks] [--dry-run]\n"); printf(" codebase-memory-mcp uninstall [-y|-n] [--dry-run]\n"); printf(" codebase-memory-mcp update [-y|-n]\n"); printf(" codebase-memory-mcp config \n"); @@ -324,11 +338,11 @@ static int handle_subcommand(int argc, char **argv) { return 0; } if (strcmp(argv[i], "cli") == 0) { - cbm_mem_init(MAIN_RAM_FRACTION); + cbm_mem_init(main_ram_fraction()); return run_cli(argc - i - SKIP_ONE, argv + i + SKIP_ONE); } if (strcmp(argv[i], "hook-augment") == 0) { - cbm_mem_init(MAIN_RAM_FRACTION); + cbm_mem_init(main_ram_fraction()); return cbm_cmd_hook_augment(); } if (strcmp(argv[i], "install") == 0) { @@ -424,9 +438,9 @@ int main(int argc, char **argv) { #endif /* Default: MCP server on stdio */ - cbm_mem_init(MAIN_RAM_FRACTION); /* 50% of RAM — safe now because mimalloc tracks ALL - * memory (C + C++ allocations) via global override. - * No more untracked heap blind spots. */ + /* tiered RAM budget — mimalloc tracks ALL memory (C + C++ allocations) via global + * override. No more untracked heap blind spots. */ + cbm_mem_init(main_ram_fraction()); /* Store binary path for subprocess spawning + hook log sink */ cbm_http_server_set_binary_path(argv[0]); cbm_log_set_sink(cbm_ui_log_append); diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 7016a0d2..094248f9 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -42,6 +42,7 @@ enum { #include #include "cypher/cypher.h" #include "pipeline/pipeline.h" +#include "pipeline/project_resolve.h" #include "pipeline/pass_cross_repo.h" #include "git/git_context.h" #include "cli/cli.h" @@ -2781,6 +2782,9 @@ static bool build_index_success_response(cbm_mcp_server_t *srv, yyjson_mut_doc * return degraded; } +static bool auto_watch_enabled(cbm_mcp_server_t *srv); +static void register_watcher_if_enabled(cbm_mcp_server_t *srv); + static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) { char *repo_path = cbm_mcp_get_string_arg(args, "repo_path"); char *mode_str = cbm_mcp_get_string_arg(args, "mode"); @@ -2864,6 +2868,9 @@ static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) { bool degraded = build_index_success_response(srv, doc, root, project_name, repo_path, persistence, p, excluded_dirs, excluded_count); yyjson_mut_obj_add_str(doc, root, "status", degraded ? "degraded" : "indexed"); + if (srv->watcher && auto_watch_enabled(srv)) { + cbm_watcher_watch(srv->watcher, project_name, repo_path); + } } else { yyjson_mut_obj_add_str(doc, root, "status", "error"); yyjson_mut_obj_add_str(doc, root, "hint", @@ -4568,15 +4575,41 @@ static void detect_session(cbm_mcp_server_t *srv) { * used by the pipeline, otherwise session queries look for a .db file * that doesn't match the indexed project name. */ if (srv->session_root[0]) { - char *pname = cbm_project_name_from_path(srv->session_root); - if (pname) { - snprintf(srv->session_project, sizeof(srv->session_project), "%s", pname); - free(pname); + char *existing = cbm_find_existing_project_name(srv->session_root); + if (existing) { + snprintf(srv->session_project, sizeof(srv->session_project), "%s", existing); + cbm_log_info("session.project.reuse", "project", existing, "path", srv->session_root); + free(existing); + } else { + char *pname = cbm_project_name_from_path(srv->session_root); + if (pname) { + snprintf(srv->session_project, sizeof(srv->session_project), "%s", pname); + free(pname); + } } } } /* Background auto-index thread function */ +static bool auto_watch_enabled(cbm_mcp_server_t *srv) { + if (!srv || !srv->config) { + return false; + } + return cbm_config_get_bool(srv->config, CBM_CONFIG_AUTO_WATCH, false); +} + +static void register_watcher_if_enabled(cbm_mcp_server_t *srv) { + if (!srv || !srv->watcher || srv->session_project[0] == '\0' || srv->session_root[0] == '\0') { + return; + } + if (!auto_watch_enabled(srv)) { + cbm_log_info("watcher.skip", "reason", "auto_watch_disabled", "hint", + "run: codebase-memory-mcp config set auto_watch true"); + return; + } + cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root); +} + static void *autoindex_thread(void *arg) { cbm_mcp_server_t *srv = (cbm_mcp_server_t *)arg; @@ -4598,10 +4631,7 @@ static void *autoindex_thread(void *arg) { if (rc == 0) { cbm_log_info("autoindex.done", "project", srv->session_project); - /* Register with watcher for ongoing change detection */ - if (srv->watcher) { - cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root); - } + register_watcher_if_enabled(srv); } else { cbm_log_warn("autoindex.err", "msg", "pipeline_run_failed"); } @@ -4621,12 +4651,10 @@ static void maybe_auto_index(cbm_mcp_server_t *srv) { snprintf(db_check, sizeof(db_check), "%s/%s.db", cbm_resolve_cache_dir(), srv->session_project); if (cbm_file_size(db_check) >= 0) { - /* Already indexed → register watcher for change detection */ + /* Already indexed — use existing graph; never auto re-index on connect. */ cbm_log_info("autoindex.skip", "reason", "already_indexed", "project", srv->session_project); - if (srv->watcher) { - cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root); - } + register_watcher_if_enabled(srv); return; } } diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 8e370f7c..554a0e3e 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -15,6 +15,7 @@ enum { CBM_DIR_PERMS = 0755, PL_RING = 4, PL_RING_MASK = 3, PL_SEQ_PASSES = 6, PL_WAL_BUF = 1040 }; #define PL_NSEC_PER_SEC 1000000000LL #include "pipeline/pipeline.h" +#include "pipeline/project_resolve.h" #include "pipeline/artifact.h" #include "pipeline/pipeline_internal.h" #include "pipeline/pass_lsp_cross.h" @@ -153,7 +154,8 @@ cbm_pipeline_t *cbm_pipeline_new(const char *repo_path, const char *db_path, p->repo_path = strdup(repo_path); p->db_path = db_path ? strdup(db_path) : NULL; - p->project_name = cbm_project_name_from_path(repo_path); + char *existing = cbm_find_existing_project_name(repo_path); + p->project_name = existing ? existing : cbm_project_name_from_path(repo_path); (void)cbm_git_context_resolve(repo_path, &p->git_ctx); p->branch_qn = cbm_git_context_branch_qn(p->project_name, &p->git_ctx); p->mode = mode; diff --git a/src/pipeline/project_resolve.c b/src/pipeline/project_resolve.c new file mode 100644 index 00000000..2c9ac22f --- /dev/null +++ b/src/pipeline/project_resolve.c @@ -0,0 +1,148 @@ +/* + * project_resolve.c — Canonical path identity and duplicate-index prevention. + */ +#include "pipeline/project_resolve.h" +#include "pipeline/pipeline.h" +#include "foundation/platform.h" +#include "foundation/compat_fs.h" +#include "git/git_context.h" +#include "store/store.h" + +#include +#include +#include + +bool cbm_path_canonicalize(const char *path, char *out, size_t out_sz) { + if (!path || !out || out_sz == 0) { + return false; + } + out[0] = '\0'; +#ifdef _WIN32 + if (!_fullpath(out, path, out_sz)) { + return false; + } + cbm_normalize_path_sep(out); +#else + if (!realpath(path, out)) { + return false; + } +#endif + return out[0] != '\0'; +} + +bool cbm_project_identity_key(const char *repo_path, char *out, size_t out_sz) { + if (!repo_path || !out || out_sz == 0) { + return false; + } + + cbm_git_context_t ctx = {0}; + if (cbm_git_context_resolve(repo_path, &ctx) == 0 && ctx.canonical_root && + ctx.canonical_root[0]) { + snprintf(out, out_sz, "%s", ctx.canonical_root); + cbm_normalize_path_sep(out); + cbm_git_context_free(&ctx); + return true; + } + cbm_git_context_free(&ctx); + return cbm_path_canonicalize(repo_path, out, out_sz); +} + +static bool identity_nested(const char *child, const char *parent) { + if (!child[0] || !parent[0]) { + return false; + } + if (strcmp(child, parent) == 0) { + return true; + } + size_t plen = strlen(parent); + if (strncmp(child, parent, plen) != 0) { + return false; + } + return child[plen] == '/'; +} + +static bool is_project_db_file(const char *name, size_t len) { + if (len < 5 || strcmp(name + len - 3, ".db") != 0) { + return false; + } + if (name[0] == '_') { + return false; + } + return true; +} + +char *cbm_find_existing_project_name(const char *repo_path) { + if (!repo_path || !repo_path[0]) { + return NULL; + } + + char query_key[4096]; + if (!cbm_project_identity_key(repo_path, query_key, sizeof(query_key))) { + return NULL; + } + + char cache_dir[1024]; + snprintf(cache_dir, sizeof(cache_dir), "%s", cbm_resolve_cache_dir()); + + cbm_dir_t *d = cbm_opendir(cache_dir); + if (!d) { + return NULL; + } + + char *best_name = NULL; + size_t best_root_len = 0; + + cbm_dirent_t *entry; + while ((entry = cbm_readdir(d)) != NULL) { + const char *name = entry->name; + size_t len = strlen(name); + if (!is_project_db_file(name, len)) { + continue; + } + + char db_path[2048]; + snprintf(db_path, sizeof(db_path), "%s/%s", cache_dir, name); + + cbm_store_t *store = cbm_store_open_path(db_path); + if (!store) { + continue; + } + + char project_name[1024]; + snprintf(project_name, sizeof(project_name), "%.*s", (int)(len - 3), name); + + cbm_project_t proj = {0}; + if (cbm_store_get_project(store, project_name, &proj) != CBM_STORE_OK || !proj.root_path) { + safe_str_free(&proj.name); + safe_str_free(&proj.indexed_at); + safe_str_free(&proj.root_path); + cbm_store_close(store); + continue; + } + + char indexed_key[4096]; + bool has_key = cbm_project_identity_key(proj.root_path, indexed_key, sizeof(indexed_key)); + + safe_str_free(&proj.name); + safe_str_free(&proj.indexed_at); + safe_str_free(&proj.root_path); + cbm_store_close(store); + + if (!has_key) { + continue; + } + + if (strcmp(query_key, indexed_key) == 0 || identity_nested(query_key, indexed_key) || + identity_nested(indexed_key, query_key)) { + size_t root_len = strlen(indexed_key); + if (!best_name || root_len > best_root_len) { + free(best_name); + best_name = strdup(project_name); + best_root_len = root_len; + } + } + } + + cbm_closedir(d); + return best_name; +} diff --git a/src/pipeline/project_resolve.h b/src/pipeline/project_resolve.h new file mode 100644 index 00000000..d36b724f --- /dev/null +++ b/src/pipeline/project_resolve.h @@ -0,0 +1,17 @@ +#ifndef CBM_PROJECT_RESOLVE_H +#define CBM_PROJECT_RESOLVE_H + +#include +#include + +/* Canonicalize path (realpath / _fullpath). Returns false if path is invalid. */ +bool cbm_path_canonicalize(const char *path, char *out, size_t out_sz); + +/* Stable identity for dedup: git canonical_root when available, else canonical path. */ +bool cbm_project_identity_key(const char *repo_path, char *out, size_t out_sz); + +/* Return heap-allocated existing project name when repo_path matches a cached index + * (same identity or nested under an indexed root). Caller frees; NULL if no match. */ +char *cbm_find_existing_project_name(const char *repo_path); + +#endif diff --git a/tests/test_cli.c b/tests/test_cli.c index 0b78537c..8ce20b00 100644 --- a/tests/test_cli.c +++ b/tests/test_cli.c @@ -1592,7 +1592,7 @@ TEST(cli_install_plan_receipt_no_mutation_issue388) { snprintf(dir, sizeof(dir), "%s/.codex", tmpdir); test_mkdirp(dir); - char *json = cbm_build_install_plan_json(tmpdir, "/usr/local/bin/codebase-memory-mcp"); + char *json = cbm_build_install_plan_json(tmpdir, "/usr/local/bin/codebase-memory-mcp", false); ASSERT_NOT_NULL(json); ASSERT(strstr(json, "agent.install.plan.v1") != NULL); ASSERT(strstr(json, "writes_started") != NULL);