Skip to content

Commit 8babe67

Browse files
author
test
committed
feat: add persistent artifact storage for team sharing
Add .codebase-memory/graph.db.zst — a zstd-compressed knowledge graph artifact that can be committed to the repo. Teammates bootstrap from the artifact instead of running a full reindex from scratch. - Vendor zstd 1.5.7 (amalgamated build) for 8-13:1 compression - Two-tier export: zstd -9 + index stripping for explicit index, zstd -3 for watcher/incremental auto-updates - Import: decompress → integrity check → auto-recreate indexes - Bootstrap in handle_index_repository: when no local DB exists but artifact is present, import first then run incremental - Auto-create .gitattributes with merge=ours to prevent conflicts - Fix: add missing idx_edges_url_path to create_user_indexes and url_path_gen generated column to init_schema - 13 new tests (5 zstd wrapper + 8 artifact round-trip/edge cases)
1 parent 81385d0 commit 8babe67

File tree

18 files changed

+56740
-78
lines changed

18 files changed

+56740
-78
lines changed

.gitleaksignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# False positives: AVX-512 intrinsic variable names in vendored zstd (xxhash)
2+
internal/cbm/vendored/zstd/zstd.c:generic-api-key:13192
3+
internal/cbm/vendored/zstd/zstd.c:generic-api-key:13241

Makefile.cbm

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ GRAMMAR_SRCS = $(wildcard $(CBM_DIR)/grammar_*.c)
140140
# LZ4 + Aho-Corasick
141141
AC_LZ4_SRCS = $(CBM_DIR)/ac.c $(CBM_DIR)/lz4_store.c
142142

143+
# Zstd compression (for persistent artifacts)
144+
ZSTD_SRCS = $(CBM_DIR)/zstd_store.c
145+
143146
# Preprocessor (C++)
144147
PREPROCESSOR_SRC = $(CBM_DIR)/preprocessor.cpp
145148

@@ -190,7 +193,8 @@ PIPELINE_SRCS = \
190193
src/pipeline/pass_k8s.c \
191194
src/pipeline/pass_similarity.c \
192195
src/pipeline/pass_semantic_edges.c \
193-
src/pipeline/pass_cross_repo.c
196+
src/pipeline/pass_cross_repo.c \
197+
src/pipeline/artifact.c
194198

195199
# SimHash / MinHash module
196200
SIMHASH_SRCS = src/simhash/minhash.c
@@ -251,7 +255,7 @@ YYJSON_SRC = vendored/yyjson/yyjson.c
251255
# All production sources
252256
PROD_SRCS = $(FOUNDATION_SRCS) $(STORE_SRCS) $(CYPHER_SRCS) $(MCP_SRCS) $(DISCOVER_SRCS) $(GRAPH_BUFFER_SRCS) $(PIPELINE_SRCS) $(SIMHASH_SRCS) $(SEMANTIC_SRCS) $(TRACES_SRCS) $(WATCHER_SRCS) $(CLI_SRCS) $(UI_SRCS) $(YYJSON_SRC)
253257
EXISTING_C_SRCS = $(EXTRACTION_SRCS) $(LSP_SRCS) $(TS_RUNTIME_SRC) \
254-
$(GRAMMAR_SRCS) $(AC_LZ4_SRCS) $(SQLITE_WRITER_SRC)
258+
$(GRAMMAR_SRCS) $(AC_LZ4_SRCS) $(ZSTD_SRCS) $(SQLITE_WRITER_SRC)
255259

256260
# ── Test sources ─────────────────────────────────────────────────
257261

@@ -295,6 +299,8 @@ TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.
295299
TEST_WATCHER_SRCS = tests/test_watcher.c
296300

297301
TEST_LZ4_SRCS = tests/test_lz4.c
302+
TEST_ZSTD_SRCS = tests/test_zstd.c
303+
TEST_ARTIFACT_SRCS = tests/test_artifact.c
298304

299305
TEST_SQLITE_WRITER_SRCS = tests/test_sqlite_writer.c
300306

@@ -319,7 +325,7 @@ TEST_YAML_SRCS = tests/test_yaml.c
319325

320326
TEST_SIMHASH_SRCS = tests/test_simhash.c
321327

322-
ALL_TEST_SRCS = $(TEST_FOUNDATION_SRCS) $(TEST_EXTRACTION_SRCS) $(TEST_STORE_SRCS) $(TEST_CYPHER_SRCS) $(TEST_MCP_SRCS) $(TEST_DISCOVER_SRCS) $(TEST_GRAPH_BUFFER_SRCS) $(TEST_PIPELINE_SRCS) $(TEST_WATCHER_SRCS) $(TEST_LZ4_SRCS) $(TEST_SQLITE_WRITER_SRCS) $(TEST_GO_LSP_SRCS) $(TEST_C_LSP_SRCS) $(TEST_TRACES_SRCS) $(TEST_CLI_SRCS) $(TEST_MEM_SRCS) $(TEST_UI_SRCS) $(TEST_SECURITY_SRCS) $(TEST_YAML_SRCS) $(TEST_SIMHASH_SRCS) $(TEST_INTEGRATION_SRCS)
328+
ALL_TEST_SRCS = $(TEST_FOUNDATION_SRCS) $(TEST_EXTRACTION_SRCS) $(TEST_STORE_SRCS) $(TEST_CYPHER_SRCS) $(TEST_MCP_SRCS) $(TEST_DISCOVER_SRCS) $(TEST_GRAPH_BUFFER_SRCS) $(TEST_PIPELINE_SRCS) $(TEST_WATCHER_SRCS) $(TEST_LZ4_SRCS) $(TEST_ZSTD_SRCS) $(TEST_ARTIFACT_SRCS) $(TEST_SQLITE_WRITER_SRCS) $(TEST_GO_LSP_SRCS) $(TEST_C_LSP_SRCS) $(TEST_TRACES_SRCS) $(TEST_CLI_SRCS) $(TEST_MEM_SRCS) $(TEST_UI_SRCS) $(TEST_SECURITY_SRCS) $(TEST_YAML_SRCS) $(TEST_SIMHASH_SRCS) $(TEST_INTEGRATION_SRCS)
323329

324330

325331
# ── Build directories ────────────────────────────────────────────
@@ -423,17 +429,22 @@ $(BUILD_DIR)/test_lz4.o: $(CBM_DIR)/vendored/lz4/lz4.c | $(BUILD_DIR)
423429
$(BUILD_DIR)/test_lz4hc.o: $(CBM_DIR)/vendored/lz4/lz4hc.c | $(BUILD_DIR)
424430
$(CC) -std=c11 -D_DEFAULT_SOURCE -g -O1 $(SANITIZE) -w -I$(CBM_DIR)/vendored/lz4 -c -o $@ $<
425431

432+
# Vendored zstd (test build)
433+
ZSTD_OBJ_TEST = $(BUILD_DIR)/test_zstd.o
434+
$(BUILD_DIR)/test_zstd.o: $(CBM_DIR)/vendored/zstd/zstd.c | $(BUILD_DIR)
435+
$(CC) -std=c11 -D_DEFAULT_SOURCE -g -O1 $(SANITIZE) -w -I$(CBM_DIR)/vendored/zstd -c -o $@ $<
436+
426437
# nomic-embed-code pretrained vector blob
427438
UNIXCODER_OBJ = $(BUILD_DIR)/unixcoder_blob.o
428439
$(UNIXCODER_OBJ): $(UNIXCODER_BLOB_SRC) vendored/nomic/code_vectors.bin | $(BUILD_DIR)
429440
$(CC) -c -o $@ $<
430441

431-
OBJS_VENDORED_TEST = $(MIMALLOC_OBJ_TEST) $(SQLITE3_OBJ_TEST) $(TRE_OBJ_TEST) $(GRAMMAR_OBJS_TEST) $(TS_RUNTIME_OBJ_TEST) $(LSP_OBJ_TEST) $(PP_OBJ_TEST) $(MONGOOSE_OBJ_TEST) $(LZ4_OBJ_TEST) $(UNIXCODER_OBJ)
442+
OBJS_VENDORED_TEST = $(MIMALLOC_OBJ_TEST) $(SQLITE3_OBJ_TEST) $(TRE_OBJ_TEST) $(GRAMMAR_OBJS_TEST) $(TS_RUNTIME_OBJ_TEST) $(LSP_OBJ_TEST) $(PP_OBJ_TEST) $(MONGOOSE_OBJ_TEST) $(LZ4_OBJ_TEST) $(ZSTD_OBJ_TEST) $(UNIXCODER_OBJ)
432443

433-
$(BUILD_DIR)/test-runner: $(ALL_TEST_SRCS) $(PROD_SRCS) $(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(SQLITE_WRITER_SRC) $(OBJS_VENDORED_TEST) | $(BUILD_DIR)
444+
$(BUILD_DIR)/test-runner: $(ALL_TEST_SRCS) $(PROD_SRCS) $(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(ZSTD_SRCS) $(SQLITE_WRITER_SRC) $(OBJS_VENDORED_TEST) | $(BUILD_DIR)
434445
$(CC) $(CFLAGS_TEST) -o $@ \
435446
$(ALL_TEST_SRCS) $(PROD_SRCS) \
436-
$(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(SQLITE_WRITER_SRC) \
447+
$(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(ZSTD_SRCS) $(SQLITE_WRITER_SRC) \
437448
$(OBJS_VENDORED_TEST) \
438449
$(LDFLAGS_TEST)
439450

@@ -472,14 +483,19 @@ $(BUILD_DIR)/prod_lz4.o: $(CBM_DIR)/vendored/lz4/lz4.c | $(BUILD_DIR)
472483
$(BUILD_DIR)/prod_lz4hc.o: $(CBM_DIR)/vendored/lz4/lz4hc.c | $(BUILD_DIR)
473484
$(CC) -std=c11 -D_DEFAULT_SOURCE -O2 -w -I$(CBM_DIR)/vendored/lz4 -c -o $@ $<
474485

475-
OBJS_VENDORED_PROD = $(MIMALLOC_OBJ_PROD) $(SQLITE3_OBJ_PROD) $(TRE_OBJ_PROD) $(GRAMMAR_OBJS_PROD) $(TS_RUNTIME_OBJ_PROD) $(LSP_OBJ_PROD) $(PP_OBJ_PROD) $(MONGOOSE_OBJ_PROD) $(LZ4_OBJ_PROD) $(UNIXCODER_OBJ)
486+
# Vendored zstd (compiled separately, not unity-built via zstd_store.c)
487+
ZSTD_OBJ_PROD = $(BUILD_DIR)/prod_zstd.o
488+
$(BUILD_DIR)/prod_zstd.o: $(CBM_DIR)/vendored/zstd/zstd.c | $(BUILD_DIR)
489+
$(CC) -std=c11 -D_DEFAULT_SOURCE -O2 -w -I$(CBM_DIR)/vendored/zstd -c -o $@ $<
490+
491+
OBJS_VENDORED_PROD = $(MIMALLOC_OBJ_PROD) $(SQLITE3_OBJ_PROD) $(TRE_OBJ_PROD) $(GRAMMAR_OBJS_PROD) $(TS_RUNTIME_OBJ_PROD) $(LSP_OBJ_PROD) $(PP_OBJ_PROD) $(MONGOOSE_OBJ_PROD) $(LZ4_OBJ_PROD) $(ZSTD_OBJ_PROD) $(UNIXCODER_OBJ)
476492

477493
MAIN_SRC = src/main.c
478494

479-
$(BUILD_DIR)/codebase-memory-mcp: $(MAIN_SRC) $(PROD_SRCS) $(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(SQLITE_WRITER_SRC) $(OBJS_VENDORED_PROD) | $(BUILD_DIR)
495+
$(BUILD_DIR)/codebase-memory-mcp: $(MAIN_SRC) $(PROD_SRCS) $(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(ZSTD_SRCS) $(SQLITE_WRITER_SRC) $(OBJS_VENDORED_PROD) | $(BUILD_DIR)
480496
$(CC) $(CFLAGS_PROD) -o $@ \
481497
$(MAIN_SRC) $(PROD_SRCS) \
482-
$(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(SQLITE_WRITER_SRC) \
498+
$(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(ZSTD_SRCS) $(SQLITE_WRITER_SRC) \
483499
$(OBJS_VENDORED_PROD) \
484500
$(LDFLAGS)
485501

@@ -504,7 +520,7 @@ embed: frontend
504520
cbm-with-ui: embed $(OBJS_VENDORED_PROD)
505521
$(CC) $(CFLAGS_PROD) -o $(BUILD_DIR)/codebase-memory-mcp \
506522
$(MAIN_SRC) $(PROD_SRCS_WITH_ASSETS) \
507-
$(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(SQLITE_WRITER_SRC) \
523+
$(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(ZSTD_SRCS) $(SQLITE_WRITER_SRC) \
508524
$(OBJS_VENDORED_PROD) \
509525
$(wildcard $(BUILD_DIR)/embedded/embed_*.o) \
510526
$(LDFLAGS)
@@ -530,7 +546,7 @@ SYSROOT_FLAG = $(if $(SYSROOT),-isysroot $(SYSROOT),)
530546
LINT_SRCS = $(FOUNDATION_SRCS) $(STORE_SRCS) $(CYPHER_SRCS) $(MCP_SRCS) \
531547
$(DISCOVER_SRCS) $(GRAPH_BUFFER_SRCS) $(PIPELINE_SRCS) $(SIMHASH_SRCS) $(SEMANTIC_SRCS) \
532548
$(TRACES_SRCS) $(WATCHER_SRCS) $(CLI_SRCS) $(EXTRACTION_SRCS) $(AC_LZ4_SRCS) \
533-
$(SQLITE_WRITER_SRC) $(MAIN_SRC)
549+
$(ZSTD_SRCS) $(SQLITE_WRITER_SRC) $(MAIN_SRC)
534550
LINT_HDRS = $(wildcard src/**/*.h src/*.h $(CBM_DIR)/*.h)
535551
LINT_TEST_SRCS = $(ALL_TEST_SRCS)
536552

0 commit comments

Comments
 (0)