Skip to content

Commit 6493347

Browse files
devwhodevsclaude
andcommitted
feat: graph helper methods — neighbors, shared tags, FTS term check, edge stats
BFS-based neighbor traversal (Rust loop, not recursive CTE). Shared tags via JSON query. FTS5 term presence check with escaping. Best chunk lookup for snippet resolution. Edge statistics. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7a71cdb commit 6493347

1 file changed

Lines changed: 279 additions & 0 deletions

File tree

src/store.rs

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ pub struct FtsResult {
3535
pub snippet: String,
3636
}
3737

38+
/// Statistics about edges in the graph.
39+
#[derive(Debug)]
40+
pub struct EdgeStats {
41+
pub total_edges: usize,
42+
pub wikilink_count: usize,
43+
pub mention_count: usize,
44+
pub connected_file_count: usize,
45+
pub isolated_file_count: usize,
46+
}
47+
3848
/// Summary statistics for the store.
3949
#[derive(Debug)]
4050
pub struct StoreStats {
@@ -660,6 +670,113 @@ impl Store {
660670
}
661671
Ok(ids)
662672
}
673+
674+
// ── Graph helpers ────────────────────────────────────────────
675+
676+
/// Get neighbor file IDs within N hops via wikilinks.
677+
/// Uses Rust-side BFS, not recursive SQL CTE.
678+
pub fn get_neighbors(&self, file_id: i64, depth: usize) -> Result<Vec<(i64, usize)>> {
679+
use std::collections::VecDeque;
680+
let mut visited = HashSet::new();
681+
visited.insert(file_id);
682+
let mut queue = VecDeque::new();
683+
let mut results = Vec::new();
684+
queue.push_back((file_id, 0usize));
685+
while let Some((current, current_depth)) = queue.pop_front() {
686+
if current_depth >= depth {
687+
continue;
688+
}
689+
let outgoing = self.get_outgoing(current, Some("wikilink"))?;
690+
for (neighbor_id, _) in outgoing {
691+
if visited.insert(neighbor_id) {
692+
let hop = current_depth + 1;
693+
results.push((neighbor_id, hop));
694+
queue.push_back((neighbor_id, hop));
695+
}
696+
}
697+
}
698+
Ok(results)
699+
}
700+
701+
/// Find files that share at least one tag with the given file.
702+
pub fn get_shared_tags_files(&self, file_id: i64, limit: usize) -> Result<Vec<i64>> {
703+
let mut stmt = self.conn.prepare(
704+
"SELECT DISTINCT f2.id
705+
FROM files f1
706+
JOIN files f2 ON f2.id != f1.id
707+
WHERE f1.id = ?1
708+
AND EXISTS (
709+
SELECT 1 FROM json_each(f1.tags) t1
710+
JOIN json_each(f2.tags) t2 ON t1.value = t2.value
711+
)
712+
LIMIT ?2",
713+
)?;
714+
let rows = stmt.query_map(params![file_id, limit as i64], |row| row.get::<_, i64>(0))?;
715+
let mut results = Vec::new();
716+
for row in rows {
717+
results.push(row?);
718+
}
719+
Ok(results)
720+
}
721+
722+
/// Check if a file's FTS5 content contains a term. Escapes for FTS5.
723+
pub fn file_contains_term(&self, file_id: i64, term: &str) -> Result<bool> {
724+
let escaped = term.replace('"', "\"\"");
725+
let query = format!("\"{}\"", escaped);
726+
let result: Result<i64, _> = self.conn.query_row(
727+
"SELECT 1 FROM chunks_fts WHERE chunks_fts MATCH ?1 AND file_id = ?2 LIMIT 1",
728+
params![query, file_id],
729+
|row| row.get(0),
730+
);
731+
Ok(result.is_ok())
732+
}
733+
734+
/// Get the best (highest token_count) chunk for a file.
735+
pub fn get_best_chunk_for_file(&self, file_id: i64) -> Result<Option<(String, String)>> {
736+
let mut stmt = self.conn.prepare(
737+
"SELECT heading, snippet FROM chunks WHERE file_id = ?1 ORDER BY token_count DESC LIMIT 1",
738+
)?;
739+
let mut rows = stmt.query_map(params![file_id], |row| {
740+
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
741+
})?;
742+
match rows.next() {
743+
Some(r) => Ok(Some(r?)),
744+
None => Ok(None),
745+
}
746+
}
747+
748+
/// Get statistics about edges in the graph.
749+
pub fn get_edge_stats(&self) -> Result<EdgeStats> {
750+
let total: i64 = self
751+
.conn
752+
.query_row("SELECT COUNT(*) FROM edges", [], |r| r.get(0))?;
753+
let wikilinks: i64 = self.conn.query_row(
754+
"SELECT COUNT(*) FROM edges WHERE edge_type = 'wikilink'",
755+
[],
756+
|r| r.get(0),
757+
)?;
758+
let mentions: i64 = self.conn.query_row(
759+
"SELECT COUNT(*) FROM edges WHERE edge_type = 'mention'",
760+
[],
761+
|r| r.get(0),
762+
)?;
763+
let connected: i64 = self.conn.query_row(
764+
"SELECT COUNT(DISTINCT id) FROM files WHERE id IN \
765+
(SELECT from_file FROM edges UNION SELECT to_file FROM edges)",
766+
[],
767+
|r| r.get(0),
768+
)?;
769+
let total_files: i64 = self
770+
.conn
771+
.query_row("SELECT COUNT(*) FROM files", [], |r| r.get(0))?;
772+
Ok(EdgeStats {
773+
total_edges: total as usize,
774+
wikilink_count: wikilinks as usize,
775+
mention_count: mentions as usize,
776+
connected_file_count: connected as usize,
777+
isolated_file_count: (total_files - connected) as usize,
778+
})
779+
}
663780
}
664781

665782
fn parse_tags(json: &str) -> Vec<String> {
@@ -1009,4 +1126,166 @@ mod tests {
10091126
let inc = store.get_incoming(b, Some("mention")).unwrap();
10101127
assert!(inc.is_empty());
10111128
}
1129+
1130+
// ── Graph helper tests ─────────────────────────────────────
1131+
1132+
#[test]
1133+
fn test_get_neighbors_depth_1() {
1134+
let store = Store::open_memory().unwrap();
1135+
let f1 = store
1136+
.insert_file("n/f1.md", "h1", 100, &[], &generate_docid("n/f1.md"))
1137+
.unwrap();
1138+
let f2 = store
1139+
.insert_file("n/f2.md", "h2", 100, &[], &generate_docid("n/f2.md"))
1140+
.unwrap();
1141+
let f3 = store
1142+
.insert_file("n/f3.md", "h3", 100, &[], &generate_docid("n/f3.md"))
1143+
.unwrap();
1144+
1145+
store.insert_edge(f1, f2, "wikilink").unwrap();
1146+
store.insert_edge(f1, f3, "wikilink").unwrap();
1147+
1148+
let neighbors = store.get_neighbors(f1, 1).unwrap();
1149+
assert_eq!(neighbors.len(), 2);
1150+
1151+
let ids: Vec<i64> = neighbors.iter().map(|(id, _)| *id).collect();
1152+
assert!(ids.contains(&f2));
1153+
assert!(ids.contains(&f3));
1154+
1155+
// All at depth 1.
1156+
for (_, d) in &neighbors {
1157+
assert_eq!(*d, 1);
1158+
}
1159+
}
1160+
1161+
#[test]
1162+
fn test_get_neighbors_depth_2() {
1163+
let store = Store::open_memory().unwrap();
1164+
let f1 = store
1165+
.insert_file("n/f1.md", "h1", 100, &[], &generate_docid("n/f1.md"))
1166+
.unwrap();
1167+
let f2 = store
1168+
.insert_file("n/f2.md", "h2", 100, &[], &generate_docid("n/f2.md"))
1169+
.unwrap();
1170+
let f3 = store
1171+
.insert_file("n/f3.md", "h3", 100, &[], &generate_docid("n/f3.md"))
1172+
.unwrap();
1173+
let f4 = store
1174+
.insert_file("n/f4.md", "h4", 100, &[], &generate_docid("n/f4.md"))
1175+
.unwrap();
1176+
1177+
// f1 -> f2 -> f3 -> f4
1178+
store.insert_edge(f1, f2, "wikilink").unwrap();
1179+
store.insert_edge(f2, f3, "wikilink").unwrap();
1180+
store.insert_edge(f3, f4, "wikilink").unwrap();
1181+
1182+
let neighbors = store.get_neighbors(f1, 2).unwrap();
1183+
assert_eq!(neighbors.len(), 2);
1184+
1185+
// f2 at depth 1, f3 at depth 2, f4 NOT included.
1186+
let map: std::collections::HashMap<i64, usize> = neighbors.into_iter().collect();
1187+
assert_eq!(map[&f2], 1);
1188+
assert_eq!(map[&f3], 2);
1189+
assert!(!map.contains_key(&f4));
1190+
}
1191+
1192+
#[test]
1193+
fn test_get_shared_tags_files() {
1194+
let store = Store::open_memory().unwrap();
1195+
let f1 = store
1196+
.insert_file(
1197+
"n/f1.md",
1198+
"h1",
1199+
100,
1200+
&["rust".to_string(), "cli".to_string()],
1201+
&generate_docid("n/f1.md"),
1202+
)
1203+
.unwrap();
1204+
let f2 = store
1205+
.insert_file(
1206+
"n/f2.md",
1207+
"h2",
1208+
100,
1209+
&["rust".to_string(), "web".to_string()],
1210+
&generate_docid("n/f2.md"),
1211+
)
1212+
.unwrap();
1213+
let _f3 = store
1214+
.insert_file(
1215+
"n/f3.md",
1216+
"h3",
1217+
100,
1218+
&["python".to_string()],
1219+
&generate_docid("n/f3.md"),
1220+
)
1221+
.unwrap();
1222+
1223+
let shared = store.get_shared_tags_files(f1, 10).unwrap();
1224+
assert_eq!(shared.len(), 1);
1225+
assert_eq!(shared[0], f2);
1226+
}
1227+
1228+
#[test]
1229+
fn test_file_contains_term() {
1230+
let store = Store::open_memory().unwrap();
1231+
let f1 = store
1232+
.insert_file("n/fts.md", "h1", 100, &[], &generate_docid("n/fts.md"))
1233+
.unwrap();
1234+
1235+
store
1236+
.insert_fts_chunk(f1, 0, "BRE-2579 delivery date extension")
1237+
.unwrap();
1238+
1239+
assert!(store.file_contains_term(f1, "delivery").unwrap());
1240+
assert!(store.file_contains_term(f1, "extension").unwrap());
1241+
assert!(!store.file_contains_term(f1, "checkout").unwrap());
1242+
}
1243+
1244+
#[test]
1245+
fn test_get_best_chunk_for_file() {
1246+
let store = Store::open_memory().unwrap();
1247+
let f1 = store
1248+
.insert_file("n/best.md", "h1", 100, &[], &generate_docid("n/best.md"))
1249+
.unwrap();
1250+
1251+
store
1252+
.insert_chunk(f1, "Small heading", "small snippet", 1, 10)
1253+
.unwrap();
1254+
store
1255+
.insert_chunk(f1, "Big heading", "big snippet", 2, 100)
1256+
.unwrap();
1257+
1258+
let best = store.get_best_chunk_for_file(f1).unwrap().unwrap();
1259+
assert_eq!(best.0, "Big heading");
1260+
assert_eq!(best.1, "big snippet");
1261+
}
1262+
1263+
#[test]
1264+
fn test_get_edge_stats() {
1265+
let store = Store::open_memory().unwrap();
1266+
let a = store
1267+
.insert_file("n/a.md", "ha", 100, &[], &generate_docid("n/a.md"))
1268+
.unwrap();
1269+
let b = store
1270+
.insert_file("n/b.md", "hb", 100, &[], &generate_docid("n/b.md"))
1271+
.unwrap();
1272+
let c = store
1273+
.insert_file("n/c.md", "hc", 100, &[], &generate_docid("n/c.md"))
1274+
.unwrap();
1275+
// d is isolated (no edges).
1276+
let _d = store
1277+
.insert_file("n/d.md", "hd", 100, &[], &generate_docid("n/d.md"))
1278+
.unwrap();
1279+
1280+
store.insert_edge(a, b, "wikilink").unwrap();
1281+
store.insert_edge(a, c, "wikilink").unwrap();
1282+
store.insert_edge(b, c, "mention").unwrap();
1283+
1284+
let stats = store.get_edge_stats().unwrap();
1285+
assert_eq!(stats.total_edges, 3);
1286+
assert_eq!(stats.wikilink_count, 2);
1287+
assert_eq!(stats.mention_count, 1);
1288+
assert_eq!(stats.connected_file_count, 3); // a, b, c
1289+
assert_eq!(stats.isolated_file_count, 1); // d
1290+
}
10121291
}

0 commit comments

Comments
 (0)