@@ -35,6 +35,16 @@ pub struct FtsResult {
3535 pub snippet : String ,
3636}
3737
38+ /// Statistics about edges in the graph.
39+ #[ derive( Debug ) ]
40+ pub struct EdgeStats {
41+ pub total_edges : usize ,
42+ pub wikilink_count : usize ,
43+ pub mention_count : usize ,
44+ pub connected_file_count : usize ,
45+ pub isolated_file_count : usize ,
46+ }
47+
3848/// Summary statistics for the store.
3949#[ derive( Debug ) ]
4050pub struct StoreStats {
@@ -660,6 +670,113 @@ impl Store {
660670 }
661671 Ok ( ids)
662672 }
673+
674+ // ── Graph helpers ────────────────────────────────────────────
675+
676+ /// Get neighbor file IDs within N hops via wikilinks.
677+ /// Uses Rust-side BFS, not recursive SQL CTE.
678+ pub fn get_neighbors ( & self , file_id : i64 , depth : usize ) -> Result < Vec < ( i64 , usize ) > > {
679+ use std:: collections:: VecDeque ;
680+ let mut visited = HashSet :: new ( ) ;
681+ visited. insert ( file_id) ;
682+ let mut queue = VecDeque :: new ( ) ;
683+ let mut results = Vec :: new ( ) ;
684+ queue. push_back ( ( file_id, 0usize ) ) ;
685+ while let Some ( ( current, current_depth) ) = queue. pop_front ( ) {
686+ if current_depth >= depth {
687+ continue ;
688+ }
689+ let outgoing = self . get_outgoing ( current, Some ( "wikilink" ) ) ?;
690+ for ( neighbor_id, _) in outgoing {
691+ if visited. insert ( neighbor_id) {
692+ let hop = current_depth + 1 ;
693+ results. push ( ( neighbor_id, hop) ) ;
694+ queue. push_back ( ( neighbor_id, hop) ) ;
695+ }
696+ }
697+ }
698+ Ok ( results)
699+ }
700+
701+ /// Find files that share at least one tag with the given file.
702+ pub fn get_shared_tags_files ( & self , file_id : i64 , limit : usize ) -> Result < Vec < i64 > > {
703+ let mut stmt = self . conn . prepare (
704+ "SELECT DISTINCT f2.id
705+ FROM files f1
706+ JOIN files f2 ON f2.id != f1.id
707+ WHERE f1.id = ?1
708+ AND EXISTS (
709+ SELECT 1 FROM json_each(f1.tags) t1
710+ JOIN json_each(f2.tags) t2 ON t1.value = t2.value
711+ )
712+ LIMIT ?2" ,
713+ ) ?;
714+ let rows = stmt. query_map ( params ! [ file_id, limit as i64 ] , |row| row. get :: < _ , i64 > ( 0 ) ) ?;
715+ let mut results = Vec :: new ( ) ;
716+ for row in rows {
717+ results. push ( row?) ;
718+ }
719+ Ok ( results)
720+ }
721+
722+ /// Check if a file's FTS5 content contains a term. Escapes for FTS5.
723+ pub fn file_contains_term ( & self , file_id : i64 , term : & str ) -> Result < bool > {
724+ let escaped = term. replace ( '"' , "\" \" " ) ;
725+ let query = format ! ( "\" {}\" " , escaped) ;
726+ let result: Result < i64 , _ > = self . conn . query_row (
727+ "SELECT 1 FROM chunks_fts WHERE chunks_fts MATCH ?1 AND file_id = ?2 LIMIT 1" ,
728+ params ! [ query, file_id] ,
729+ |row| row. get ( 0 ) ,
730+ ) ;
731+ Ok ( result. is_ok ( ) )
732+ }
733+
734+ /// Get the best (highest token_count) chunk for a file.
735+ pub fn get_best_chunk_for_file ( & self , file_id : i64 ) -> Result < Option < ( String , String ) > > {
736+ let mut stmt = self . conn . prepare (
737+ "SELECT heading, snippet FROM chunks WHERE file_id = ?1 ORDER BY token_count DESC LIMIT 1" ,
738+ ) ?;
739+ let mut rows = stmt. query_map ( params ! [ file_id] , |row| {
740+ Ok ( ( row. get :: < _ , String > ( 0 ) ?, row. get :: < _ , String > ( 1 ) ?) )
741+ } ) ?;
742+ match rows. next ( ) {
743+ Some ( r) => Ok ( Some ( r?) ) ,
744+ None => Ok ( None ) ,
745+ }
746+ }
747+
748+ /// Get statistics about edges in the graph.
749+ pub fn get_edge_stats ( & self ) -> Result < EdgeStats > {
750+ let total: i64 = self
751+ . conn
752+ . query_row ( "SELECT COUNT(*) FROM edges" , [ ] , |r| r. get ( 0 ) ) ?;
753+ let wikilinks: i64 = self . conn . query_row (
754+ "SELECT COUNT(*) FROM edges WHERE edge_type = 'wikilink'" ,
755+ [ ] ,
756+ |r| r. get ( 0 ) ,
757+ ) ?;
758+ let mentions: i64 = self . conn . query_row (
759+ "SELECT COUNT(*) FROM edges WHERE edge_type = 'mention'" ,
760+ [ ] ,
761+ |r| r. get ( 0 ) ,
762+ ) ?;
763+ let connected: i64 = self . conn . query_row (
764+ "SELECT COUNT(DISTINCT id) FROM files WHERE id IN \
765+ (SELECT from_file FROM edges UNION SELECT to_file FROM edges)",
766+ [ ] ,
767+ |r| r. get ( 0 ) ,
768+ ) ?;
769+ let total_files: i64 = self
770+ . conn
771+ . query_row ( "SELECT COUNT(*) FROM files" , [ ] , |r| r. get ( 0 ) ) ?;
772+ Ok ( EdgeStats {
773+ total_edges : total as usize ,
774+ wikilink_count : wikilinks as usize ,
775+ mention_count : mentions as usize ,
776+ connected_file_count : connected as usize ,
777+ isolated_file_count : ( total_files - connected) as usize ,
778+ } )
779+ }
663780}
664781
665782fn parse_tags ( json : & str ) -> Vec < String > {
@@ -1009,4 +1126,166 @@ mod tests {
10091126 let inc = store. get_incoming ( b, Some ( "mention" ) ) . unwrap ( ) ;
10101127 assert ! ( inc. is_empty( ) ) ;
10111128 }
1129+
1130+ // ── Graph helper tests ─────────────────────────────────────
1131+
1132+ #[ test]
1133+ fn test_get_neighbors_depth_1 ( ) {
1134+ let store = Store :: open_memory ( ) . unwrap ( ) ;
1135+ let f1 = store
1136+ . insert_file ( "n/f1.md" , "h1" , 100 , & [ ] , & generate_docid ( "n/f1.md" ) )
1137+ . unwrap ( ) ;
1138+ let f2 = store
1139+ . insert_file ( "n/f2.md" , "h2" , 100 , & [ ] , & generate_docid ( "n/f2.md" ) )
1140+ . unwrap ( ) ;
1141+ let f3 = store
1142+ . insert_file ( "n/f3.md" , "h3" , 100 , & [ ] , & generate_docid ( "n/f3.md" ) )
1143+ . unwrap ( ) ;
1144+
1145+ store. insert_edge ( f1, f2, "wikilink" ) . unwrap ( ) ;
1146+ store. insert_edge ( f1, f3, "wikilink" ) . unwrap ( ) ;
1147+
1148+ let neighbors = store. get_neighbors ( f1, 1 ) . unwrap ( ) ;
1149+ assert_eq ! ( neighbors. len( ) , 2 ) ;
1150+
1151+ let ids: Vec < i64 > = neighbors. iter ( ) . map ( |( id, _) | * id) . collect ( ) ;
1152+ assert ! ( ids. contains( & f2) ) ;
1153+ assert ! ( ids. contains( & f3) ) ;
1154+
1155+ // All at depth 1.
1156+ for ( _, d) in & neighbors {
1157+ assert_eq ! ( * d, 1 ) ;
1158+ }
1159+ }
1160+
1161+ #[ test]
1162+ fn test_get_neighbors_depth_2 ( ) {
1163+ let store = Store :: open_memory ( ) . unwrap ( ) ;
1164+ let f1 = store
1165+ . insert_file ( "n/f1.md" , "h1" , 100 , & [ ] , & generate_docid ( "n/f1.md" ) )
1166+ . unwrap ( ) ;
1167+ let f2 = store
1168+ . insert_file ( "n/f2.md" , "h2" , 100 , & [ ] , & generate_docid ( "n/f2.md" ) )
1169+ . unwrap ( ) ;
1170+ let f3 = store
1171+ . insert_file ( "n/f3.md" , "h3" , 100 , & [ ] , & generate_docid ( "n/f3.md" ) )
1172+ . unwrap ( ) ;
1173+ let f4 = store
1174+ . insert_file ( "n/f4.md" , "h4" , 100 , & [ ] , & generate_docid ( "n/f4.md" ) )
1175+ . unwrap ( ) ;
1176+
1177+ // f1 -> f2 -> f3 -> f4
1178+ store. insert_edge ( f1, f2, "wikilink" ) . unwrap ( ) ;
1179+ store. insert_edge ( f2, f3, "wikilink" ) . unwrap ( ) ;
1180+ store. insert_edge ( f3, f4, "wikilink" ) . unwrap ( ) ;
1181+
1182+ let neighbors = store. get_neighbors ( f1, 2 ) . unwrap ( ) ;
1183+ assert_eq ! ( neighbors. len( ) , 2 ) ;
1184+
1185+ // f2 at depth 1, f3 at depth 2, f4 NOT included.
1186+ let map: std:: collections:: HashMap < i64 , usize > = neighbors. into_iter ( ) . collect ( ) ;
1187+ assert_eq ! ( map[ & f2] , 1 ) ;
1188+ assert_eq ! ( map[ & f3] , 2 ) ;
1189+ assert ! ( !map. contains_key( & f4) ) ;
1190+ }
1191+
1192+ #[ test]
1193+ fn test_get_shared_tags_files ( ) {
1194+ let store = Store :: open_memory ( ) . unwrap ( ) ;
1195+ let f1 = store
1196+ . insert_file (
1197+ "n/f1.md" ,
1198+ "h1" ,
1199+ 100 ,
1200+ & [ "rust" . to_string ( ) , "cli" . to_string ( ) ] ,
1201+ & generate_docid ( "n/f1.md" ) ,
1202+ )
1203+ . unwrap ( ) ;
1204+ let f2 = store
1205+ . insert_file (
1206+ "n/f2.md" ,
1207+ "h2" ,
1208+ 100 ,
1209+ & [ "rust" . to_string ( ) , "web" . to_string ( ) ] ,
1210+ & generate_docid ( "n/f2.md" ) ,
1211+ )
1212+ . unwrap ( ) ;
1213+ let _f3 = store
1214+ . insert_file (
1215+ "n/f3.md" ,
1216+ "h3" ,
1217+ 100 ,
1218+ & [ "python" . to_string ( ) ] ,
1219+ & generate_docid ( "n/f3.md" ) ,
1220+ )
1221+ . unwrap ( ) ;
1222+
1223+ let shared = store. get_shared_tags_files ( f1, 10 ) . unwrap ( ) ;
1224+ assert_eq ! ( shared. len( ) , 1 ) ;
1225+ assert_eq ! ( shared[ 0 ] , f2) ;
1226+ }
1227+
1228+ #[ test]
1229+ fn test_file_contains_term ( ) {
1230+ let store = Store :: open_memory ( ) . unwrap ( ) ;
1231+ let f1 = store
1232+ . insert_file ( "n/fts.md" , "h1" , 100 , & [ ] , & generate_docid ( "n/fts.md" ) )
1233+ . unwrap ( ) ;
1234+
1235+ store
1236+ . insert_fts_chunk ( f1, 0 , "BRE-2579 delivery date extension" )
1237+ . unwrap ( ) ;
1238+
1239+ assert ! ( store. file_contains_term( f1, "delivery" ) . unwrap( ) ) ;
1240+ assert ! ( store. file_contains_term( f1, "extension" ) . unwrap( ) ) ;
1241+ assert ! ( !store. file_contains_term( f1, "checkout" ) . unwrap( ) ) ;
1242+ }
1243+
1244+ #[ test]
1245+ fn test_get_best_chunk_for_file ( ) {
1246+ let store = Store :: open_memory ( ) . unwrap ( ) ;
1247+ let f1 = store
1248+ . insert_file ( "n/best.md" , "h1" , 100 , & [ ] , & generate_docid ( "n/best.md" ) )
1249+ . unwrap ( ) ;
1250+
1251+ store
1252+ . insert_chunk ( f1, "Small heading" , "small snippet" , 1 , 10 )
1253+ . unwrap ( ) ;
1254+ store
1255+ . insert_chunk ( f1, "Big heading" , "big snippet" , 2 , 100 )
1256+ . unwrap ( ) ;
1257+
1258+ let best = store. get_best_chunk_for_file ( f1) . unwrap ( ) . unwrap ( ) ;
1259+ assert_eq ! ( best. 0 , "Big heading" ) ;
1260+ assert_eq ! ( best. 1 , "big snippet" ) ;
1261+ }
1262+
1263+ #[ test]
1264+ fn test_get_edge_stats ( ) {
1265+ let store = Store :: open_memory ( ) . unwrap ( ) ;
1266+ let a = store
1267+ . insert_file ( "n/a.md" , "ha" , 100 , & [ ] , & generate_docid ( "n/a.md" ) )
1268+ . unwrap ( ) ;
1269+ let b = store
1270+ . insert_file ( "n/b.md" , "hb" , 100 , & [ ] , & generate_docid ( "n/b.md" ) )
1271+ . unwrap ( ) ;
1272+ let c = store
1273+ . insert_file ( "n/c.md" , "hc" , 100 , & [ ] , & generate_docid ( "n/c.md" ) )
1274+ . unwrap ( ) ;
1275+ // d is isolated (no edges).
1276+ let _d = store
1277+ . insert_file ( "n/d.md" , "hd" , 100 , & [ ] , & generate_docid ( "n/d.md" ) )
1278+ . unwrap ( ) ;
1279+
1280+ store. insert_edge ( a, b, "wikilink" ) . unwrap ( ) ;
1281+ store. insert_edge ( a, c, "wikilink" ) . unwrap ( ) ;
1282+ store. insert_edge ( b, c, "mention" ) . unwrap ( ) ;
1283+
1284+ let stats = store. get_edge_stats ( ) . unwrap ( ) ;
1285+ assert_eq ! ( stats. total_edges, 3 ) ;
1286+ assert_eq ! ( stats. wikilink_count, 2 ) ;
1287+ assert_eq ! ( stats. mention_count, 1 ) ;
1288+ assert_eq ! ( stats. connected_file_count, 3 ) ; // a, b, c
1289+ assert_eq ! ( stats. isolated_file_count, 1 ) ; // d
1290+ }
10121291}
0 commit comments