diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 7016a0d2..ea595ce8 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -789,20 +789,42 @@ static cbm_store_t *resolve_store(cbm_mcp_server_t *srv, const char *project) { project_db_path(project, path, sizeof(path)); srv->store = cbm_store_open_path_query(path); if (srv->store) { - /* Check DB integrity — auto-clean corrupt databases */ + /* Check DB integrity — preserve corrupt databases instead of + * silently deleting them so the user can inspect the evidence and + * recover or report the issue (#557). Rename to .corrupt. so + * the path is obvious in the cache directory. */ if (!cbm_store_check_integrity(srv->store)) { - cbm_log_error("store.auto_clean", "project", project, "path", path, "action", - "deleting corrupt db — re-index required"); cbm_store_close(srv->store); srv->store = NULL; - /* Delete the corrupt DB + WAL/SHM files */ - cbm_unlink(path); + /* Rename the corrupt DB + WAL/SHM to preserve evidence */ char wal_path[MCP_FIELD_SIZE]; char shm_path[MCP_FIELD_SIZE]; + char corrupt_path[MCP_FIELD_SIZE]; + char corrupt_wal[MCP_FIELD_SIZE]; + char corrupt_shm[MCP_FIELD_SIZE]; + long long now_s = (long long)time(NULL); snprintf(wal_path, sizeof(wal_path), "%s-wal", path); snprintf(shm_path, sizeof(shm_path), "%s-shm", path); - cbm_unlink(wal_path); - cbm_unlink(shm_path); + snprintf(corrupt_path, sizeof(corrupt_path), "%s.corrupt.%lld", path, now_s); + snprintf(corrupt_wal, sizeof(corrupt_wal), "%s-wal.corrupt.%lld", path, now_s); + snprintf(corrupt_shm, sizeof(corrupt_shm), "%s-shm.corrupt.%lld", path, now_s); + if (rename(path, corrupt_path) != 0) { + cbm_unlink(path); /* fallback: delete if rename fails */ + } + /* best-effort: sidecars may not exist */ + if (rename(wal_path, corrupt_wal) != 0) { + cbm_unlink(wal_path); + } + if (rename(shm_path, corrupt_shm) != 0) { + cbm_unlink(shm_path); + } + cbm_log_error("store.auto_clean", "project", project, "path", path, "action", + "corrupt db preserved", "preserved_as", corrupt_path); + (void)fprintf(stderr, + "ERROR corrupt project database: %s\n" + "ERROR preserved as: %s\n" + "ERROR inspect the .corrupt file, delete it when done, and re-index.\n", + path, corrupt_path); return NULL; } diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 8e370f7c..309cfb87 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -97,6 +97,11 @@ struct cbm_pipeline { /* Committed graph size at dump time (-1 = dump did not run). #334 gate axis. */ int committed_nodes; int committed_edges; + + /* ADR (project_summaries) content read from the old DB before a rewrite, + * re-inserted after the dump so manage_adr survives re-indexing (#516). + * NULL when no ADR was stored. Owned by the pipeline; freed in _free. */ + char *carried_adr; }; /* ── Global pkgmap (one active pipeline at a time) ─────────────── */ @@ -182,6 +187,7 @@ void cbm_pipeline_free(cbm_pipeline_t *p) { p->excluded_dirs = NULL; p->excluded_count = 0; free(p->branch_qn); + free(p->carried_adr); cbm_git_context_free(&p->git_ctx); /* gbuf, store, registry freed during/after run */ /* Defensively free userconfig in case run() was never called or panicked */ @@ -207,6 +213,10 @@ const char *cbm_pipeline_repo_path(const cbm_pipeline_t *p) { return p ? p->repo_path : NULL; } +const char *cbm_pipeline_carried_adr(const cbm_pipeline_t *p) { + return p ? p->carried_adr : NULL; +} + atomic_int *cbm_pipeline_cancelled_ptr(cbm_pipeline_t *p) { return p ? &p->cancelled : NULL; } @@ -783,13 +793,31 @@ static int try_incremental_or_delete_db(cbm_pipeline_t *p, cbm_file_info_t *file if (!db_path) { return CBM_NOT_FOUND; } - struct stat db_st; - if (stat(db_path, &db_st) != 0) { + /* Open the existing DB directly in query mode (never creates the file). + * A NULL result means it is absent or unreadable — nothing to reindex + * from, so we return without touching the path. Deriving existence from + * the open instead of a prior stat() removes the check-then-use race on + * db_path that CodeQL flags as a TOCTOU, and matches resolve_store() in + * mcp.c, which already uses this pattern. */ + bool was_corrupt = false; + cbm_store_t *check_store = cbm_store_open_path_query(db_path); + if (!check_store) { free(db_path); return CBM_NOT_FOUND; } - cbm_store_t *check_store = cbm_store_open_path(db_path); - if (check_store && cbm_store_check_integrity(check_store)) { + /* Capture the stored ADR (project_summaries) before any rewrite. Both the + * full dump (dump_and_persist_hashes) and the incremental dump + * (dump_and_persist) serialize a fresh DB with an empty project_summaries, + * so the ADR must be carried across and re-inserted afterwards (#516). + * This is the single point that opens the old DB for both paths. */ + cbm_adr_t carried = {0}; + if (cbm_store_adr_get(check_store, p->project_name, &carried) == CBM_STORE_OK && + carried.content) { + free(p->carried_adr); + p->carried_adr = strdup(carried.content); + } + cbm_store_adr_free(&carried); + if (cbm_store_check_integrity(check_store)) { cbm_file_hash_t *hashes = NULL; int hash_count = 0; cbm_store_get_file_hashes(check_store, p->project_name, &hashes, &hash_count); @@ -806,17 +834,45 @@ static int try_incremental_or_delete_db(cbm_pipeline_t *p, cbm_file_info_t *file cbm_log_info("pipeline.route", "path", "mode_change_reindex", "stored_hashes", itoa_buf(hash_count), "discovered", itoa_buf(file_count)); } - } else if (check_store) { + } else { + /* Integrity check failed — this is the data-loss path of #557. */ + was_corrupt = true; cbm_store_close(check_store); } - cbm_log_info("pipeline.route", "path", "reindex", "action", "deleting old db"); - cbm_unlink(db_path); - char wal[PL_WAL_BUF]; - char shm[PL_WAL_BUF]; - snprintf(wal, sizeof(wal), "%s-wal", db_path); - snprintf(shm, sizeof(shm), "%s-shm", db_path); - cbm_unlink(wal); - cbm_unlink(shm); + char wal_path[PL_WAL_BUF]; + char shm_path[PL_WAL_BUF]; + snprintf(wal_path, sizeof(wal_path), "%s-wal", db_path); + snprintf(shm_path, sizeof(shm_path), "%s-shm", db_path); + if (was_corrupt) { + /* Preserve the corrupt DB as .corrupt. instead of deleting, + * so the user can inspect/recover before the full reindex (#557). + * Only on real corruption — a healthy DB rebuilt due to a mode change + * is expected churn and must not accumulate .corrupt files. */ + long long now_s = (long long)time(NULL); + char corrupt_db[PL_WAL_BUF]; + char corrupt_wal[PL_WAL_BUF]; + char corrupt_shm[PL_WAL_BUF]; + snprintf(corrupt_db, sizeof(corrupt_db), "%s.corrupt.%lld", db_path, now_s); + snprintf(corrupt_wal, sizeof(corrupt_wal), "%s-wal.corrupt.%lld", db_path, now_s); + snprintf(corrupt_shm, sizeof(corrupt_shm), "%s-shm.corrupt.%lld", db_path, now_s); + if (rename(db_path, corrupt_db) != 0) { + cbm_unlink(db_path); + } + if (rename(wal_path, corrupt_wal) != 0) { + cbm_unlink(wal_path); + } + if (rename(shm_path, corrupt_shm) != 0) { + cbm_unlink(shm_path); + } + cbm_log_info("pipeline.route", "path", "reindex", "action", "corrupt db preserved", + "preserved_as", corrupt_db); + } else { + /* Healthy DB being rebuilt (mode change / file-count drift) — delete. */ + cbm_log_info("pipeline.route", "path", "reindex", "action", "deleting old db"); + cbm_unlink(db_path); + cbm_unlink(wal_path); + cbm_unlink(shm_path); + } free(db_path); return CBM_NOT_FOUND; } @@ -877,6 +933,12 @@ static int dump_and_persist_hashes(cbm_pipeline_t *p, const cbm_file_info_t *fil } } + /* Re-insert the ADR carried across the rewrite (#516). The fresh dump + * wrote an empty project_summaries, mirroring the file_hashes restore. */ + if (p->carried_adr) { + cbm_store_adr_store(hash_store, p->project_name, p->carried_adr); + } + /* FTS5 backfill: populate nodes_fts with camelCase-split names. * Contentless FTS5 requires the special 'delete-all' command instead of * DELETE FROM to wipe prior rows (there's no underlying content table). diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index a1cc4482..f0b3c94a 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -627,7 +627,7 @@ static void run_postpasses(cbm_pipeline_ctx_t *ctx, cbm_file_info_t *changed_fil static void dump_and_persist(cbm_gbuf_t *gbuf, const char *db_path, const char *project, cbm_file_info_t *files, int file_count, const cbm_file_hash_t *mode_skipped, int mode_skipped_count, - const char *repo_path) { + const char *repo_path, const char *carried_adr) { struct timespec t; cbm_clock_gettime(CLOCK_MONOTONIC, &t); @@ -647,6 +647,12 @@ static void dump_and_persist(cbm_gbuf_t *gbuf, const char *db_path, const char * if (hash_store) { persist_hashes(hash_store, project, files, file_count, mode_skipped, mode_skipped_count); + /* Re-insert the ADR carried across the rewrite (#516). Like the full + * dump in pipeline.c, the btree dump wrote an empty project_summaries. */ + if (carried_adr) { + cbm_store_adr_store(hash_store, project, carried_adr); + } + /* FTS5 rebuild after incremental dump. The btree dump path bypasses * any triggers that could have kept nodes_fts synchronized, so we * rebuild from the nodes table here. See the full-dump path in @@ -851,7 +857,7 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil cbm_pipeline_set_committed_counts(p, cbm_gbuf_node_count(existing), cbm_gbuf_edge_count(existing)); dump_and_persist(existing, db_path, project, files, file_count, mode_skipped, - mode_skipped_count, cbm_pipeline_repo_path(p)); + mode_skipped_count, cbm_pipeline_repo_path(p), cbm_pipeline_carried_adr(p)); free_mode_skipped(mode_skipped, mode_skipped_count); cbm_gbuf_free(existing); diff --git a/src/pipeline/pipeline_internal.h b/src/pipeline/pipeline_internal.h index af1a8de1..b6000e0d 100644 --- a/src/pipeline/pipeline_internal.h +++ b/src/pipeline/pipeline_internal.h @@ -543,6 +543,11 @@ atomic_int *cbm_pipeline_cancelled_ptr(cbm_pipeline_t *p); /* Record committed graph size (#334 gate axis) from the incremental path, * which cannot see the opaque cbm_pipeline struct. Call before the dump. */ void cbm_pipeline_set_committed_counts(cbm_pipeline_t *p, int nodes, int edges); +/* ADR (project_summaries) content captured from the old DB before a rewrite, + * so the incremental dump path can re-insert it and manage_adr survives + * re-indexing (#516). Returns NULL when no ADR was stored. Borrowed — do not + * free. */ +const char *cbm_pipeline_carried_adr(const cbm_pipeline_t *p); /* Parse a gRPC stub call "." into the canonical proto * service name + method. Returns true ONLY when a recognized gRPC stub/client diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 68edc0e9..3d4a0740 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -9,6 +9,9 @@ #include #include #include +#include /* sqlite3_exec — corrupt-DB regression #557 */ +#include /* opendir/readdir — corrupt-DB regression #557 */ +#include #include #include #include @@ -2209,6 +2212,86 @@ TEST(tool_bad_project_name_no_overflow_issue235) { } #undef ISSUE235_DBNAME +/* Issue #557: a corrupt project DB must be PRESERVED (renamed to + * .db.corrupt.), not silently unlink()'d. resolve_store() + * runs the integrity check on first query; a numeric root_path trips + * it (same trigger as store_integrity_corrupt_bad_path). After the + * query, the original .db must be gone but a .corrupt sidecar must + * exist so the user can inspect/recover. */ +TEST(resolve_store_preserves_corrupt_db_issue557) { + char cache[256]; + snprintf(cache, sizeof(cache), "/tmp/cbm-557-XXXXXX"); + if (!cbm_mkdtemp(cache)) { + PASS(); /* skip if mkdtemp fails */ + } + + const char *saved = getenv("CBM_CACHE_DIR"); + char *saved_copy = saved ? strdup(saved) : NULL; + cbm_setenv("CBM_CACHE_DIR", cache, 1); + + /* Build a structurally-valid DB whose projects row is corrupt + * (numeric root_path), so cbm_store_check_integrity() returns false. */ + char db_path[512]; + snprintf(db_path, sizeof(db_path), "%s/regr557.db", cache); + cbm_store_t *seed = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(seed); + sqlite3_exec(cbm_store_get_db(seed), + "INSERT INTO projects (name, indexed_at, root_path) " + "VALUES ('regr557', '2024-01-01', '826');", + NULL, NULL, NULL); + cbm_store_close(seed); + + /* A query against the project triggers resolve_store → integrity check. */ + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + char *resp = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"tools/call\",\"params\":{\"name\":" + "\"search_graph\",\"arguments\":{\"label\":\"Function\"," + "\"project\":\"regr557\"}}}"); + ASSERT_NOT_NULL(resp); + free(resp); + cbm_mcp_server_free(srv); + + /* The original DB must be gone (renamed, not left in place). */ + struct stat st; + ASSERT_TRUE(stat(db_path, &st) != 0); + + /* A .db.corrupt. sidecar must have been created. */ + bool found_corrupt = false; + DIR *d = opendir(cache); + ASSERT_NOT_NULL(d); + struct dirent *ent; + while ((ent = readdir(d)) != NULL) { + if (strncmp(ent->d_name, "regr557.db.corrupt.", 19) == 0) { + found_corrupt = true; + } + } + closedir(d); + ASSERT_TRUE(found_corrupt); + + if (saved_copy) { + cbm_setenv("CBM_CACHE_DIR", saved_copy, 1); + free(saved_copy); + } else { + cbm_unsetenv("CBM_CACHE_DIR"); + } + /* Clean up any regr557.db* files left in the temp cache dir. */ + DIR *cd = opendir(cache); + if (cd) { + struct dirent *e; + while ((e = readdir(cd)) != NULL) { + if (strncmp(e->d_name, "regr557.db", 10) == 0) { + char p[768]; + snprintf(p, sizeof(p), "%s/%s", cache, e->d_name); + cbm_unlink(p); + } + } + closedir(cd); + } + cbm_rmdir(cache); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -2353,4 +2436,5 @@ SUITE(mcp) { RUN_TEST(snippet_include_neighbors_enabled); RUN_TEST(snippet_source_invalid_utf8); RUN_TEST(tool_bad_project_name_no_overflow_issue235); + RUN_TEST(resolve_store_preserves_corrupt_db_issue557); } diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 98b4204f..dd106a8e 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -21,6 +21,8 @@ #include #include #include +#include /* opendir/readdir — corrupt-DB preservation regression #557 */ +#include /* sqlite3_exec — corrupt-DB preservation regression #557 */ #include "graph_buffer/graph_buffer.h" #include "yyjson/yyjson.h" @@ -273,6 +275,152 @@ TEST(pipeline_structure_nodes) { PASS(); } +/* #557: on re-index, a project DB that fails the integrity check must be + * PRESERVED (renamed to .corrupt.), not silently deleted, so the + * user can recover. Also guards the TOCTOU refactor of + * try_incremental_or_delete_db (existence derived from a query-mode open + * instead of a prior stat()) — the preserve behavior must survive it. */ +TEST(pipeline_reindex_preserves_corrupt_db_issue557) { + if (setup_test_repo() != 0) { + FAIL("failed to create temp dir"); + } + + char db_path[512]; + snprintf(db_path, sizeof(db_path), "%s/test.db", g_tmpdir); + + /* 1. Index once to produce a valid DB. */ + cbm_pipeline_t *p1 = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FAST); + ASSERT_NOT_NULL(p1); + ASSERT_EQ(cbm_pipeline_run(p1), 0); + cbm_pipeline_free(p1); + + /* 2. Corrupt it: a numeric root_path trips cbm_store_check_integrity + * (same trigger as the store_integrity_corrupt_bad_path unit test). */ + cbm_store_t *s = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(s); + sqlite3_exec(cbm_store_get_db(s), "UPDATE projects SET root_path = '826';", NULL, NULL, NULL); + ASSERT_FALSE(cbm_store_check_integrity(s)); + cbm_store_close(s); + + /* 3. Re-index: the corrupt DB must be preserved, not unlink()'d. */ + cbm_pipeline_t *p2 = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FAST); + ASSERT_NOT_NULL(p2); + ASSERT_EQ(cbm_pipeline_run(p2), 0); + cbm_pipeline_free(p2); + + /* 4. A test.db.corrupt. sidecar must have been created. */ + bool found_corrupt = false; + DIR *d = opendir(g_tmpdir); + ASSERT_NOT_NULL(d); + struct dirent *ent; + while ((ent = readdir(d)) != NULL) { + if (strncmp(ent->d_name, "test.db.corrupt.", 16) == 0) { + found_corrupt = true; + } + } + closedir(d); + ASSERT_TRUE(found_corrupt); + + teardown_test_repo(); + PASS(); +} + +/* #516 helpers: store an ADR into project_summaries, and check it survived. */ +static void store_test_adr_516(const char *db_path, const char *project) { + cbm_store_t *s = cbm_store_open_path(db_path); + if (s) { + cbm_store_adr_store(s, project, "# ADR\nDecision: keep me."); + cbm_store_close(s); + } +} +static bool adr_survives_516(const char *db_path, const char *project) { + cbm_store_t *s = cbm_store_open_path(db_path); + if (!s) { + return false; + } + cbm_adr_t adr = {0}; + bool ok = (cbm_store_adr_get(s, project, &adr) == CBM_STORE_OK) && adr.content && + strstr(adr.content, "keep me") != NULL; + cbm_store_adr_free(&adr); + cbm_store_close(s); + return ok; +} + +/* #516: an ADR stored via manage_adr must survive a FULL re-index. The dump + * writes an empty project_summaries, so the ADR has to be carried across. */ +TEST(pipeline_full_reindex_preserves_adr_issue516) { + if (setup_test_repo() != 0) { + FAIL("failed to create temp dir"); + } + char db_path[512]; + snprintf(db_path, sizeof(db_path), "%s/test.db", g_tmpdir); + + cbm_pipeline_t *p1 = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FAST); + ASSERT_NOT_NULL(p1); + ASSERT_EQ(cbm_pipeline_run(p1), 0); + char project[256]; + snprintf(project, sizeof(project), "%s", cbm_pipeline_project_name(p1)); + store_test_adr_516(db_path, project); + + /* Add files so file_count exceeds the incremental threshold → full dump. */ + for (int i = 0; i < 4; i++) { + char fp[600]; + snprintf(fp, sizeof(fp), "%s/extra%d.go", g_tmpdir, i); + FILE *f = fopen(fp, "w"); + if (f) { + fprintf(f, "package main\nfunc Extra%d() {}\n", i); + fclose(f); + } + } + cbm_pipeline_t *p2 = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FAST); + ASSERT_NOT_NULL(p2); + ASSERT_EQ(cbm_pipeline_run(p2), 0); + + ASSERT_TRUE(adr_survives_516(db_path, project)); + + cbm_pipeline_free(p2); + cbm_pipeline_free(p1); + teardown_test_repo(); + PASS(); +} + +/* #516: the same must hold on the INCREMENTAL re-index path (a changed file, + * same file count). The original report missed this second dump site. */ +TEST(pipeline_incremental_reindex_preserves_adr_issue516) { + if (setup_test_repo() != 0) { + FAIL("failed to create temp dir"); + } + char db_path[512]; + snprintf(db_path, sizeof(db_path), "%s/test.db", g_tmpdir); + + cbm_pipeline_t *p1 = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FAST); + ASSERT_NOT_NULL(p1); + ASSERT_EQ(cbm_pipeline_run(p1), 0); + char project[256]; + snprintf(project, sizeof(project), "%s", cbm_pipeline_project_name(p1)); + store_test_adr_516(db_path, project); + + /* Modify one existing file (file count unchanged) → incremental dump. */ + char fp[600]; + snprintf(fp, sizeof(fp), "%s/main.go", g_tmpdir); + FILE *f = fopen(fp, "w"); + ASSERT_NOT_NULL(f); + fprintf(f, "package main\n\nimport \"pkg\"\n\n" + "func main() {\n\tpkg.Serve()\n\tpkg.Serve()\n}\n"); + fclose(f); + + cbm_pipeline_t *p2 = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FAST); + ASSERT_NOT_NULL(p2); + ASSERT_EQ(cbm_pipeline_run(p2), 0); + + ASSERT_TRUE(adr_survives_516(db_path, project)); + + cbm_pipeline_free(p2); + cbm_pipeline_free(p1); + teardown_test_repo(); + PASS(); +} + TEST(pipeline_structure_edges) { if (setup_test_repo() != 0) { FAIL("failed to create temp dir"); @@ -1274,7 +1422,7 @@ TEST(usages_creates_edges) { /* Check for USAGE edges */ cbm_edge_t *edges = NULL; int edge_count = 0; - rc = cbm_store_find_edges_by_type(s, project, "USAGE", &edges, &edge_count); + cbm_store_find_edges_by_type(s, project, "USAGE", &edges, &edge_count); bool found_usage = false; for (int i = 0; i < edge_count; i++) { @@ -6091,6 +6239,9 @@ SUITE(pipeline) { RUN_TEST(store_bulk_persistence); /* Integration: structure pass */ RUN_TEST(pipeline_structure_nodes); + RUN_TEST(pipeline_reindex_preserves_corrupt_db_issue557); + RUN_TEST(pipeline_full_reindex_preserves_adr_issue516); + RUN_TEST(pipeline_incremental_reindex_preserves_adr_issue516); RUN_TEST(pipeline_committed_counts_match_persisted); RUN_TEST(pipeline_structure_edges); RUN_TEST(pipeline_branch_root_structure);