Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 29 additions & 7 deletions src/mcp/mcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -789,20 +789,42 @@ static cbm_store_t *resolve_store(cbm_mcp_server_t *srv, const char *project) {
project_db_path(project, path, sizeof(path));
srv->store = cbm_store_open_path_query(path);
if (srv->store) {
/* Check DB integrity — auto-clean corrupt databases */
/* Check DB integrity — preserve corrupt databases instead of
* silently deleting them so the user can inspect the evidence and
* recover or report the issue (#557). Rename to .corrupt.<ts> so
* the path is obvious in the cache directory. */
if (!cbm_store_check_integrity(srv->store)) {
cbm_log_error("store.auto_clean", "project", project, "path", path, "action",
"deleting corrupt db — re-index required");
cbm_store_close(srv->store);
srv->store = NULL;
/* Delete the corrupt DB + WAL/SHM files */
cbm_unlink(path);
/* Rename the corrupt DB + WAL/SHM to preserve evidence */
char wal_path[MCP_FIELD_SIZE];
char shm_path[MCP_FIELD_SIZE];
char corrupt_path[MCP_FIELD_SIZE];
char corrupt_wal[MCP_FIELD_SIZE];
char corrupt_shm[MCP_FIELD_SIZE];
long long now_s = (long long)time(NULL);
snprintf(wal_path, sizeof(wal_path), "%s-wal", path);
snprintf(shm_path, sizeof(shm_path), "%s-shm", path);
cbm_unlink(wal_path);
cbm_unlink(shm_path);
snprintf(corrupt_path, sizeof(corrupt_path), "%s.corrupt.%lld", path, now_s);
snprintf(corrupt_wal, sizeof(corrupt_wal), "%s-wal.corrupt.%lld", path, now_s);
snprintf(corrupt_shm, sizeof(corrupt_shm), "%s-shm.corrupt.%lld", path, now_s);
if (rename(path, corrupt_path) != 0) {
cbm_unlink(path); /* fallback: delete if rename fails */
}
/* best-effort: sidecars may not exist */
if (rename(wal_path, corrupt_wal) != 0) {
cbm_unlink(wal_path);
}
if (rename(shm_path, corrupt_shm) != 0) {
cbm_unlink(shm_path);
}
cbm_log_error("store.auto_clean", "project", project, "path", path, "action",
"corrupt db preserved", "preserved_as", corrupt_path);
(void)fprintf(stderr,
"ERROR corrupt project database: %s\n"
"ERROR preserved as: %s\n"
"ERROR inspect the .corrupt file, delete it when done, and re-index.\n",
path, corrupt_path);
return NULL;
}

Expand Down
60 changes: 47 additions & 13 deletions src/pipeline/pipeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -783,13 +783,19 @@ static int try_incremental_or_delete_db(cbm_pipeline_t *p, cbm_file_info_t *file
if (!db_path) {
return CBM_NOT_FOUND;
}
struct stat db_st;
if (stat(db_path, &db_st) != 0) {
/* Open the existing DB directly in query mode (never creates the file).
* A NULL result means it is absent or unreadable — nothing to reindex
* from, so we return without touching the path. Deriving existence from
* the open instead of a prior stat() removes the check-then-use race on
* db_path that CodeQL flags as a TOCTOU, and matches resolve_store() in
* mcp.c, which already uses this pattern. */
bool was_corrupt = false;
cbm_store_t *check_store = cbm_store_open_path_query(db_path);
if (!check_store) {
free(db_path);
return CBM_NOT_FOUND;
}
cbm_store_t *check_store = cbm_store_open_path(db_path);
if (check_store && cbm_store_check_integrity(check_store)) {
if (cbm_store_check_integrity(check_store)) {
cbm_file_hash_t *hashes = NULL;
int hash_count = 0;
cbm_store_get_file_hashes(check_store, p->project_name, &hashes, &hash_count);
Expand All @@ -806,17 +812,45 @@ static int try_incremental_or_delete_db(cbm_pipeline_t *p, cbm_file_info_t *file
cbm_log_info("pipeline.route", "path", "mode_change_reindex", "stored_hashes",
itoa_buf(hash_count), "discovered", itoa_buf(file_count));
}
} else if (check_store) {
} else {
/* Integrity check failed — this is the data-loss path of #557. */
was_corrupt = true;
cbm_store_close(check_store);
}
cbm_log_info("pipeline.route", "path", "reindex", "action", "deleting old db");
cbm_unlink(db_path);
char wal[PL_WAL_BUF];
char shm[PL_WAL_BUF];
snprintf(wal, sizeof(wal), "%s-wal", db_path);
snprintf(shm, sizeof(shm), "%s-shm", db_path);
cbm_unlink(wal);
cbm_unlink(shm);
char wal_path[PL_WAL_BUF];
char shm_path[PL_WAL_BUF];
snprintf(wal_path, sizeof(wal_path), "%s-wal", db_path);
snprintf(shm_path, sizeof(shm_path), "%s-shm", db_path);
if (was_corrupt) {
/* Preserve the corrupt DB as .corrupt.<timestamp> instead of deleting,
* so the user can inspect/recover before the full reindex (#557).
* Only on real corruption — a healthy DB rebuilt due to a mode change
* is expected churn and must not accumulate .corrupt files. */
long long now_s = (long long)time(NULL);
char corrupt_db[PL_WAL_BUF];
char corrupt_wal[PL_WAL_BUF];
char corrupt_shm[PL_WAL_BUF];
snprintf(corrupt_db, sizeof(corrupt_db), "%s.corrupt.%lld", db_path, now_s);
snprintf(corrupt_wal, sizeof(corrupt_wal), "%s-wal.corrupt.%lld", db_path, now_s);
snprintf(corrupt_shm, sizeof(corrupt_shm), "%s-shm.corrupt.%lld", db_path, now_s);
if (rename(db_path, corrupt_db) != 0) {
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
cbm_unlink(db_path);
}
if (rename(wal_path, corrupt_wal) != 0) {
cbm_unlink(wal_path);
}
if (rename(shm_path, corrupt_shm) != 0) {
cbm_unlink(shm_path);
}
cbm_log_info("pipeline.route", "path", "reindex", "action", "corrupt db preserved",
"preserved_as", corrupt_db);
} else {
/* Healthy DB being rebuilt (mode change / file-count drift) — delete. */
cbm_log_info("pipeline.route", "path", "reindex", "action", "deleting old db");
cbm_unlink(db_path);
cbm_unlink(wal_path);
cbm_unlink(shm_path);
}
free(db_path);
return CBM_NOT_FOUND;
}
Expand Down
84 changes: 84 additions & 0 deletions tests/test_mcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include <mcp/mcp.h>
#include <store/store.h>
#include <yyjson/yyjson.h>
#include <sqlite3.h> /* sqlite3_exec — corrupt-DB regression #557 */
#include <dirent.h> /* opendir/readdir — corrupt-DB regression #557 */
#include <sys/stat.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
Expand Down Expand Up @@ -2209,6 +2212,86 @@ TEST(tool_bad_project_name_no_overflow_issue235) {
}
#undef ISSUE235_DBNAME

/* Issue #557: a corrupt project DB must be PRESERVED (renamed to
* <name>.db.corrupt.<ts>), not silently unlink()'d. resolve_store()
* runs the integrity check on first query; a numeric root_path trips
* it (same trigger as store_integrity_corrupt_bad_path). After the
* query, the original .db must be gone but a .corrupt sidecar must
* exist so the user can inspect/recover. */
TEST(resolve_store_preserves_corrupt_db_issue557) {
char cache[256];
snprintf(cache, sizeof(cache), "/tmp/cbm-557-XXXXXX");
if (!cbm_mkdtemp(cache)) {
PASS(); /* skip if mkdtemp fails */
}

const char *saved = getenv("CBM_CACHE_DIR");
char *saved_copy = saved ? strdup(saved) : NULL;
cbm_setenv("CBM_CACHE_DIR", cache, 1);

/* Build a structurally-valid DB whose projects row is corrupt
* (numeric root_path), so cbm_store_check_integrity() returns false. */
char db_path[512];
snprintf(db_path, sizeof(db_path), "%s/regr557.db", cache);
cbm_store_t *seed = cbm_store_open_path(db_path);
ASSERT_NOT_NULL(seed);
sqlite3_exec(cbm_store_get_db(seed),
"INSERT INTO projects (name, indexed_at, root_path) "
"VALUES ('regr557', '2024-01-01', '826');",
NULL, NULL, NULL);
cbm_store_close(seed);

/* A query against the project triggers resolve_store → integrity check. */
cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL);
ASSERT_NOT_NULL(srv);
char *resp = cbm_mcp_server_handle(
srv, "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"tools/call\",\"params\":{\"name\":"
"\"search_graph\",\"arguments\":{\"label\":\"Function\","
"\"project\":\"regr557\"}}}");
ASSERT_NOT_NULL(resp);
free(resp);
cbm_mcp_server_free(srv);

/* The original DB must be gone (renamed, not left in place). */
struct stat st;
ASSERT_TRUE(stat(db_path, &st) != 0);

/* A <name>.db.corrupt.<ts> sidecar must have been created. */
bool found_corrupt = false;
DIR *d = opendir(cache);
ASSERT_NOT_NULL(d);
struct dirent *ent;
while ((ent = readdir(d)) != NULL) {
if (strncmp(ent->d_name, "regr557.db.corrupt.", 19) == 0) {
found_corrupt = true;
}
}
closedir(d);
ASSERT_TRUE(found_corrupt);

if (saved_copy) {
cbm_setenv("CBM_CACHE_DIR", saved_copy, 1);
free(saved_copy);
} else {
cbm_unsetenv("CBM_CACHE_DIR");
}
/* Clean up any regr557.db* files left in the temp cache dir. */
DIR *cd = opendir(cache);
if (cd) {
struct dirent *e;
while ((e = readdir(cd)) != NULL) {
if (strncmp(e->d_name, "regr557.db", 10) == 0) {
char p[768];
snprintf(p, sizeof(p), "%s/%s", cache, e->d_name);
cbm_unlink(p);
}
}
closedir(cd);
}
cbm_rmdir(cache);
PASS();
}

/* ══════════════════════════════════════════════════════════════════
* SUITE
* ══════════════════════════════════════════════════════════════════ */
Expand Down Expand Up @@ -2353,4 +2436,5 @@ SUITE(mcp) {
RUN_TEST(snippet_include_neighbors_enabled);
RUN_TEST(snippet_source_invalid_utf8);
RUN_TEST(tool_bad_project_name_no_overflow_issue235);
RUN_TEST(resolve_store_preserves_corrupt_db_issue557);
}
Loading