@@ -6,7 +6,7 @@ use anyhow::{Context, Result};
66use ignore:: WalkBuilder ;
77use rayon:: prelude:: * ;
88use sha2:: { Digest , Sha256 } ;
9- use tracing:: { info, warn } ;
9+ use tracing:: info;
1010
1111use crate :: chunker:: { chunk_markdown, split_oversized_chunks} ;
1212use crate :: config:: Config ;
@@ -141,14 +141,6 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result<In
141141 let store = Store :: open ( & db_path) ?;
142142
143143 let hnsw_dir = data_dir. join ( "hnsw" ) ;
144- let mut hnsw = if rebuild || !hnsw_dir. join ( "engraph.hnsw.data" ) . exists ( ) {
145- HnswIndex :: new ( 100_000 )
146- } else {
147- HnswIndex :: load ( & hnsw_dir) . unwrap_or_else ( |e| {
148- warn ! ( "failed to load HNSW index, creating new: {e:#}" ) ;
149- HnswIndex :: new ( 100_000 )
150- } )
151- } ;
152144
153145 // If rebuild, treat everything as new.
154146 let files = walk_vault ( vault_path, & config. exclude ) ?;
@@ -286,14 +278,23 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result<In
286278 } ) ;
287279 }
288280
289- // Step 8: Serial write — insert files + chunks into store, vectors into HNSW.
281+ // Step 8: Serial write — insert files + chunks into store with vectors.
282+ let mut next_vector_id: u64 = {
283+ // Get the max existing vector_id to avoid collisions.
284+ let all_existing = store. get_all_vectors ( ) . unwrap_or_default ( ) ;
285+ all_existing. iter ( ) . map ( |( id, _) | * id) . max ( ) . map_or ( 0 , |m| m + 1 )
286+ } ;
287+
290288 for result in & results {
291289 let file_id =
292290 store. insert_file ( & result. rel_path , & result. hash , result. mtime , & result. tags ) ?;
293291
294292 for ( heading, snippet, vector, token_count) in & result. chunks {
295- let vector_id = hnsw. insert ( vector) ;
296- store. insert_chunk ( file_id, heading, snippet, vector_id, * token_count as i64 ) ?;
293+ let vector_id = next_vector_id;
294+ next_vector_id += 1 ;
295+ store. insert_chunk_with_vector (
296+ file_id, heading, snippet, vector_id, * token_count as i64 , vector,
297+ ) ?;
297298 }
298299 }
299300
@@ -310,16 +311,19 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result<In
310311 ) ,
311312 ) ?;
312313
313- // Step 10: Check tombstone ratio, auto-rebuild if >20%.
314- let stats = store. stats ( ) ?;
315- let total_vectors = stats. chunk_count + stats. tombstone_count ;
316- if total_vectors > 0 && stats. tombstone_count * 100 / total_vectors > 20 {
317- info ! (
318- tombstone_ratio = format!( "{}%" , stats. tombstone_count * 100 / total_vectors) ,
319- "tombstone ratio exceeds 20%, consider running with --rebuild"
320- ) ;
314+ // Step 10: Rebuild HNSW index from all vectors in SQLite.
315+ // hnsw_rs doesn't support appending after load, so we always rebuild.
316+ let all_vectors = store. get_all_vectors ( ) ?;
317+ let mut hnsw = HnswIndex :: new ( all_vectors. len ( ) . max ( 1000 ) ) ;
318+ for ( vid, vector) in & all_vectors {
319+ hnsw. insert_with_id ( vector, * vid) ;
321320 }
322321
322+ info ! (
323+ vectors = all_vectors. len( ) ,
324+ "rebuilt HNSW index from stored vectors"
325+ ) ;
326+
323327 // Step 11: Save HNSW index to disk.
324328 hnsw. save ( & hnsw_dir) ?;
325329
0 commit comments