@@ -107,6 +107,9 @@ enum {
107107// SQLite text serial type offset: serial_type = len*2 + TEXT_SERIAL_BASE.
108108#define TEXT_SERIAL_BASE 13
109109
110+ // SQLite blob serial type offset: serial_type = len*2 + BLOB_SERIAL_BASE.
111+ #define BLOB_SERIAL_BASE 12
112+
110113// SQLite integer storage range limits.
111114#define INT8_MAX_VAL 127
112115#define INT16_MAX_VAL 32767
@@ -360,6 +363,16 @@ static void rec_add_text(RecordBuilder *r, const char *s) {
360363 }
361364}
362365
366+ static void rec_add_blob (RecordBuilder * r , const uint8_t * data , int len ) {
367+ int64_t st = len > 0 ? (int64_t )len * 2 + BLOB_SERIAL_BASE : 0 ;
368+ uint8_t vbuf [VARINT_MAX_BYTES ];
369+ int vlen = put_varint (vbuf , st );
370+ dynbuf_append (& r -> header , vbuf , vlen );
371+ if (len > 0 && data ) {
372+ dynbuf_append (& r -> body , data , len );
373+ }
374+ }
375+
363376// Finalize: returns the complete record bytes (header_len + header + body).
364377// Caller must free the returned buffer.
365378static uint8_t * rec_finalize (RecordBuilder * r , int * out_len ) {
@@ -718,6 +731,21 @@ static uint8_t *build_edge_record(const CBMDumpEdge *e, int *out_len) {
718731 return data ;
719732}
720733
734+ // Build a node_vectors table record: (node_id, project, vector)
735+ // Includes node_id in the record body (same pattern as build_node_record).
736+ static uint8_t * build_vector_record (const CBMDumpVector * v , int * out_len ) {
737+ RecordBuilder r ;
738+ rec_init (& r );
739+
740+ rec_add_int (& r , v -> node_id );
741+ rec_add_text (& r , v -> project );
742+ rec_add_blob (& r , v -> vector , v -> vector_len );
743+
744+ uint8_t * data = rec_finalize (& r , out_len );
745+ rec_free (& r );
746+ return data ;
747+ }
748+
721749// Build a projects table record: (name, indexed_at, root_path)
722750static uint8_t * build_project_record (const char * name , const char * indexed_at ,
723751 const char * root_path , int * out_len ) {
@@ -1462,10 +1490,13 @@ typedef struct {
14621490 int node_count ;
14631491 CBMDumpEdge * edges ;
14641492 int edge_count ;
1493+ CBMDumpVector * vectors ;
1494+ int vector_count ;
14651495} write_db_ctx_t ;
14661496
1467- /* Phase 1: Write node + edge data tables (streaming). */
1468- static int write_data_tables (write_db_ctx_t * w , uint32_t * nodes_root , uint32_t * edges_root ) {
1497+ /* Phase 1: Write node + edge + vector data tables (streaming). */
1498+ static int write_data_tables (write_db_ctx_t * w , uint32_t * nodes_root , uint32_t * edges_root ,
1499+ uint32_t * vectors_root ) {
14691500 if (w -> node_count > 0 ) {
14701501 PageBuilder pb ;
14711502 pb_init (& pb , w -> fp , w -> next_page , false);
@@ -1501,6 +1532,26 @@ static int write_data_tables(write_db_ctx_t *w, uint32_t *nodes_root, uint32_t *
15011532 } else {
15021533 * edges_root = write_table_btree (w -> fp , & w -> next_page , NULL , NULL , NULL , 0 , false);
15031534 }
1535+
1536+ /* node_vectors table — uses node_id as rowid (not AUTOINCREMENT) */
1537+ if (w -> vector_count > 0 && w -> vectors ) {
1538+ PageBuilder pb ;
1539+ pb_init (& pb , w -> fp , w -> next_page , false);
1540+ for (int i = 0 ; i < w -> vector_count ; i ++ ) {
1541+ int rec_len ;
1542+ uint8_t * rec = build_vector_record (& w -> vectors [i ], & rec_len );
1543+ if (!rec ) {
1544+ return ERR_WRITE_FAILED ;
1545+ }
1546+ pb_add_table_cell_with_flush (& pb , w -> vectors [i ].node_id , rec , rec_len ,
1547+ i > 0 ? w -> vectors [i - SKIP_ONE ].node_id : 0 );
1548+ free (rec );
1549+ }
1550+ * vectors_root =
1551+ pb_finalize_table (& pb , & w -> next_page , w -> vectors [w -> vector_count - SKIP_ONE ].node_id );
1552+ } else {
1553+ * vectors_root = write_table_btree (w -> fp , & w -> next_page , NULL , NULL , NULL , 0 , false);
1554+ }
15041555 return 0 ;
15051556}
15061557
@@ -1548,7 +1599,7 @@ static void write_metadata_tables(write_db_ctx_t *w, uint32_t *projects_root,
15481599
15491600int cbm_write_db (const char * path , const char * project , const char * root_path ,
15501601 const char * indexed_at , CBMDumpNode * nodes , int node_count , CBMDumpEdge * edges ,
1551- int edge_count ) {
1602+ int edge_count , CBMDumpVector * vectors , int vector_count ) {
15521603 FILE * fp = fopen (path , "wb" );
15531604 if (!fp ) {
15541605 return CBM_NOT_FOUND ;
@@ -1562,12 +1613,15 @@ int cbm_write_db(const char *path, const char *project, const char *root_path,
15621613 .nodes = nodes ,
15631614 .node_count = node_count ,
15641615 .edges = edges ,
1565- .edge_count = edge_count };
1616+ .edge_count = edge_count ,
1617+ .vectors = vectors ,
1618+ .vector_count = vector_count };
15661619
1567- // Phase 1: Data tables (streaming node + edge records)
1620+ // Phase 1: Data tables (streaming node + edge + vector records)
15681621 uint32_t nodes_root ;
15691622 uint32_t edges_root ;
1570- int rc = write_data_tables (& w , & nodes_root , & edges_root );
1623+ uint32_t vectors_root ;
1624+ int rc = write_data_tables (& w , & nodes_root , & edges_root , & vectors_root );
15711625 if (rc != 0 ) {
15721626 (void )fclose (fp );
15731627 return rc ;
@@ -1753,6 +1807,9 @@ int cbm_write_db(const char *path, const char *project, const char *root_path,
17531807 "NULL,\n\t\t\tupdated_at TEXT NOT NULL\n\t\t)" },
17541808 {"index" , "sqlite_autoindex_project_summaries_1" , "project_summaries" ,
17551809 autoindex_summaries_root , NULL },
1810+ {"table" , "node_vectors" , "node_vectors" , vectors_root ,
1811+ "CREATE TABLE node_vectors (\n\t\tnode_id INTEGER PRIMARY KEY,\n\t\tproject TEXT NOT "
1812+ "NULL,\n\t\tvector BLOB NOT NULL\n\t)" },
17561813 {"table" , "sqlite_sequence" , "sqlite_sequence" , sqlite_seq_root ,
17571814 "CREATE TABLE sqlite_sequence(name,seq)" },
17581815 };
0 commit comments