diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 7016a0d2..ea595ce8 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -789,20 +789,42 @@ static cbm_store_t *resolve_store(cbm_mcp_server_t *srv, const char *project) { project_db_path(project, path, sizeof(path)); srv->store = cbm_store_open_path_query(path); if (srv->store) { - /* Check DB integrity — auto-clean corrupt databases */ + /* Check DB integrity — preserve corrupt databases instead of + * silently deleting them so the user can inspect the evidence and + * recover or report the issue (#557). Rename to .corrupt. so + * the path is obvious in the cache directory. */ if (!cbm_store_check_integrity(srv->store)) { - cbm_log_error("store.auto_clean", "project", project, "path", path, "action", - "deleting corrupt db — re-index required"); cbm_store_close(srv->store); srv->store = NULL; - /* Delete the corrupt DB + WAL/SHM files */ - cbm_unlink(path); + /* Rename the corrupt DB + WAL/SHM to preserve evidence */ char wal_path[MCP_FIELD_SIZE]; char shm_path[MCP_FIELD_SIZE]; + char corrupt_path[MCP_FIELD_SIZE]; + char corrupt_wal[MCP_FIELD_SIZE]; + char corrupt_shm[MCP_FIELD_SIZE]; + long long now_s = (long long)time(NULL); snprintf(wal_path, sizeof(wal_path), "%s-wal", path); snprintf(shm_path, sizeof(shm_path), "%s-shm", path); - cbm_unlink(wal_path); - cbm_unlink(shm_path); + snprintf(corrupt_path, sizeof(corrupt_path), "%s.corrupt.%lld", path, now_s); + snprintf(corrupt_wal, sizeof(corrupt_wal), "%s-wal.corrupt.%lld", path, now_s); + snprintf(corrupt_shm, sizeof(corrupt_shm), "%s-shm.corrupt.%lld", path, now_s); + if (rename(path, corrupt_path) != 0) { + cbm_unlink(path); /* fallback: delete if rename fails */ + } + /* best-effort: sidecars may not exist */ + if (rename(wal_path, corrupt_wal) != 0) { + cbm_unlink(wal_path); + } + if (rename(shm_path, corrupt_shm) != 0) { + cbm_unlink(shm_path); + } + cbm_log_error("store.auto_clean", "project", project, "path", path, "action", + "corrupt db preserved", "preserved_as", corrupt_path); + (void)fprintf(stderr, + "ERROR corrupt project database: %s\n" + "ERROR preserved as: %s\n" + "ERROR inspect the .corrupt file, delete it when done, and re-index.\n", + path, corrupt_path); return NULL; } diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 8e370f7c..24d39477 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -783,13 +783,19 @@ static int try_incremental_or_delete_db(cbm_pipeline_t *p, cbm_file_info_t *file if (!db_path) { return CBM_NOT_FOUND; } - struct stat db_st; - if (stat(db_path, &db_st) != 0) { + /* Open the existing DB directly in query mode (never creates the file). + * A NULL result means it is absent or unreadable — nothing to reindex + * from, so we return without touching the path. Deriving existence from + * the open instead of a prior stat() removes the check-then-use race on + * db_path that CodeQL flags as a TOCTOU, and matches resolve_store() in + * mcp.c, which already uses this pattern. */ + bool was_corrupt = false; + cbm_store_t *check_store = cbm_store_open_path_query(db_path); + if (!check_store) { free(db_path); return CBM_NOT_FOUND; } - cbm_store_t *check_store = cbm_store_open_path(db_path); - if (check_store && cbm_store_check_integrity(check_store)) { + if (cbm_store_check_integrity(check_store)) { cbm_file_hash_t *hashes = NULL; int hash_count = 0; cbm_store_get_file_hashes(check_store, p->project_name, &hashes, &hash_count); @@ -806,17 +812,45 @@ static int try_incremental_or_delete_db(cbm_pipeline_t *p, cbm_file_info_t *file cbm_log_info("pipeline.route", "path", "mode_change_reindex", "stored_hashes", itoa_buf(hash_count), "discovered", itoa_buf(file_count)); } - } else if (check_store) { + } else { + /* Integrity check failed — this is the data-loss path of #557. */ + was_corrupt = true; cbm_store_close(check_store); } - cbm_log_info("pipeline.route", "path", "reindex", "action", "deleting old db"); - cbm_unlink(db_path); - char wal[PL_WAL_BUF]; - char shm[PL_WAL_BUF]; - snprintf(wal, sizeof(wal), "%s-wal", db_path); - snprintf(shm, sizeof(shm), "%s-shm", db_path); - cbm_unlink(wal); - cbm_unlink(shm); + char wal_path[PL_WAL_BUF]; + char shm_path[PL_WAL_BUF]; + snprintf(wal_path, sizeof(wal_path), "%s-wal", db_path); + snprintf(shm_path, sizeof(shm_path), "%s-shm", db_path); + if (was_corrupt) { + /* Preserve the corrupt DB as .corrupt. instead of deleting, + * so the user can inspect/recover before the full reindex (#557). + * Only on real corruption — a healthy DB rebuilt due to a mode change + * is expected churn and must not accumulate .corrupt files. */ + long long now_s = (long long)time(NULL); + char corrupt_db[PL_WAL_BUF]; + char corrupt_wal[PL_WAL_BUF]; + char corrupt_shm[PL_WAL_BUF]; + snprintf(corrupt_db, sizeof(corrupt_db), "%s.corrupt.%lld", db_path, now_s); + snprintf(corrupt_wal, sizeof(corrupt_wal), "%s-wal.corrupt.%lld", db_path, now_s); + snprintf(corrupt_shm, sizeof(corrupt_shm), "%s-shm.corrupt.%lld", db_path, now_s); + if (rename(db_path, corrupt_db) != 0) { + cbm_unlink(db_path); + } + if (rename(wal_path, corrupt_wal) != 0) { + cbm_unlink(wal_path); + } + if (rename(shm_path, corrupt_shm) != 0) { + cbm_unlink(shm_path); + } + cbm_log_info("pipeline.route", "path", "reindex", "action", "corrupt db preserved", + "preserved_as", corrupt_db); + } else { + /* Healthy DB being rebuilt (mode change / file-count drift) — delete. */ + cbm_log_info("pipeline.route", "path", "reindex", "action", "deleting old db"); + cbm_unlink(db_path); + cbm_unlink(wal_path); + cbm_unlink(shm_path); + } free(db_path); return CBM_NOT_FOUND; } diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 68edc0e9..3d4a0740 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -9,6 +9,9 @@ #include #include #include +#include /* sqlite3_exec — corrupt-DB regression #557 */ +#include /* opendir/readdir — corrupt-DB regression #557 */ +#include #include #include #include @@ -2209,6 +2212,86 @@ TEST(tool_bad_project_name_no_overflow_issue235) { } #undef ISSUE235_DBNAME +/* Issue #557: a corrupt project DB must be PRESERVED (renamed to + * .db.corrupt.), not silently unlink()'d. resolve_store() + * runs the integrity check on first query; a numeric root_path trips + * it (same trigger as store_integrity_corrupt_bad_path). After the + * query, the original .db must be gone but a .corrupt sidecar must + * exist so the user can inspect/recover. */ +TEST(resolve_store_preserves_corrupt_db_issue557) { + char cache[256]; + snprintf(cache, sizeof(cache), "/tmp/cbm-557-XXXXXX"); + if (!cbm_mkdtemp(cache)) { + PASS(); /* skip if mkdtemp fails */ + } + + const char *saved = getenv("CBM_CACHE_DIR"); + char *saved_copy = saved ? strdup(saved) : NULL; + cbm_setenv("CBM_CACHE_DIR", cache, 1); + + /* Build a structurally-valid DB whose projects row is corrupt + * (numeric root_path), so cbm_store_check_integrity() returns false. */ + char db_path[512]; + snprintf(db_path, sizeof(db_path), "%s/regr557.db", cache); + cbm_store_t *seed = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(seed); + sqlite3_exec(cbm_store_get_db(seed), + "INSERT INTO projects (name, indexed_at, root_path) " + "VALUES ('regr557', '2024-01-01', '826');", + NULL, NULL, NULL); + cbm_store_close(seed); + + /* A query against the project triggers resolve_store → integrity check. */ + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + char *resp = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"tools/call\",\"params\":{\"name\":" + "\"search_graph\",\"arguments\":{\"label\":\"Function\"," + "\"project\":\"regr557\"}}}"); + ASSERT_NOT_NULL(resp); + free(resp); + cbm_mcp_server_free(srv); + + /* The original DB must be gone (renamed, not left in place). */ + struct stat st; + ASSERT_TRUE(stat(db_path, &st) != 0); + + /* A .db.corrupt. sidecar must have been created. */ + bool found_corrupt = false; + DIR *d = opendir(cache); + ASSERT_NOT_NULL(d); + struct dirent *ent; + while ((ent = readdir(d)) != NULL) { + if (strncmp(ent->d_name, "regr557.db.corrupt.", 19) == 0) { + found_corrupt = true; + } + } + closedir(d); + ASSERT_TRUE(found_corrupt); + + if (saved_copy) { + cbm_setenv("CBM_CACHE_DIR", saved_copy, 1); + free(saved_copy); + } else { + cbm_unsetenv("CBM_CACHE_DIR"); + } + /* Clean up any regr557.db* files left in the temp cache dir. */ + DIR *cd = opendir(cache); + if (cd) { + struct dirent *e; + while ((e = readdir(cd)) != NULL) { + if (strncmp(e->d_name, "regr557.db", 10) == 0) { + char p[768]; + snprintf(p, sizeof(p), "%s/%s", cache, e->d_name); + cbm_unlink(p); + } + } + closedir(cd); + } + cbm_rmdir(cache); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -2353,4 +2436,5 @@ SUITE(mcp) { RUN_TEST(snippet_include_neighbors_enabled); RUN_TEST(snippet_source_invalid_utf8); RUN_TEST(tool_bad_project_name_no_overflow_issue235); + RUN_TEST(resolve_store_preserves_corrupt_db_issue557); }