Skip to content

Commit db3a359

Browse files
devwhodevsclaude
andcommitted
feat: graph agent — expand search results by following wikilinks
1-2 hop expansion with decay (0.8x for 1-hop, 0.5x for 2-hop). Relevance filter: must contain query term (FTS5) or share tags. Multi-parent merge takes highest score. Skips seed files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent fe1a31a commit db3a359

1 file changed

Lines changed: 312 additions & 1 deletion

File tree

src/graph.rs

Lines changed: 312 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
use std::collections::HashSet;
1+
use std::collections::{HashMap, HashSet, hash_map::Entry};
2+
3+
use anyhow::Result;
4+
5+
use crate::fusion::RankedResult;
6+
use crate::store::Store;
27

38
/// Extract unique wikilink targets from text.
49
/// Handles [[Target]], [[Target|Display]], [[Target#Heading]].
@@ -46,9 +51,112 @@ pub fn extract_query_terms(query: &str) -> Vec<String> {
4651
.collect()
4752
}
4853

54+
/// Expand search results by following graph connections.
55+
/// Seeds are the top results from semantic + FTS lanes.
56+
/// Returns expanded results suitable for RRF fusion.
57+
pub fn graph_expand(
58+
store: &Store,
59+
seeds: &[RankedResult],
60+
query: &str,
61+
max_hops: usize,
62+
max_expansions: usize,
63+
) -> Result<Vec<RankedResult>> {
64+
let query_terms = extract_query_terms(query);
65+
let seed_ids: HashSet<i64> = seeds.iter().map(|s| s.file_id).collect();
66+
67+
// Track best score per expanded file (multi-parent merge: take highest)
68+
// (file_id) → (best_score, hop_depth, seed_file_path)
69+
let mut expansions: HashMap<i64, (f64, usize, String)> = HashMap::new();
70+
71+
for seed in seeds {
72+
let neighbors = store.get_neighbors(seed.file_id, max_hops)?;
73+
74+
for (neighbor_id, hop) in neighbors {
75+
if seed_ids.contains(&neighbor_id) {
76+
continue;
77+
}
78+
79+
let decay = match hop {
80+
1 => 0.8,
81+
2 => 0.5,
82+
_ => 0.3,
83+
};
84+
let mut expansion_score = seed.score * decay;
85+
86+
// Relevance filter: must match a query term via FTS or share tags
87+
let term_match = query_terms
88+
.iter()
89+
.any(|t| store.file_contains_term(neighbor_id, t).unwrap_or(false));
90+
91+
if !term_match {
92+
let shared = store
93+
.get_shared_tags_files(neighbor_id, 100)
94+
.unwrap_or_default();
95+
if shared.contains(&seed.file_id) {
96+
expansion_score *= 0.7;
97+
} else {
98+
continue; // tangential — skip
99+
}
100+
}
101+
102+
// Multi-parent merge: keep highest score
103+
match expansions.entry(neighbor_id) {
104+
Entry::Occupied(mut e) => {
105+
if expansion_score > e.get().0 {
106+
e.insert((expansion_score, hop, seed.file_path.clone()));
107+
}
108+
}
109+
Entry::Vacant(e) => {
110+
e.insert((expansion_score, hop, seed.file_path.clone()));
111+
}
112+
}
113+
}
114+
}
115+
116+
// Sort by score descending, cap at max_expansions
117+
let mut results: Vec<(i64, f64, usize, String)> = expansions
118+
.into_iter()
119+
.map(|(fid, (score, hop, seed))| (fid, score, hop, seed))
120+
.collect();
121+
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
122+
results.truncate(max_expansions);
123+
124+
// Convert to RankedResult
125+
let mut ranked = Vec::new();
126+
for (file_id, score, _hop, _seed) in results {
127+
let file = store.get_file_by_id(file_id)?;
128+
let (file_path, docid) = match file {
129+
Some(f) => (f.path, f.docid),
130+
None => continue,
131+
};
132+
let (heading, snippet) = store
133+
.get_best_chunk_for_file(file_id)?
134+
.unwrap_or_else(|| (String::new(), String::new()));
135+
let heading = if heading.is_empty() {
136+
None
137+
} else {
138+
Some(heading)
139+
};
140+
141+
ranked.push(RankedResult {
142+
file_path,
143+
file_id,
144+
score,
145+
heading,
146+
snippet,
147+
docid,
148+
});
149+
}
150+
151+
Ok(ranked)
152+
}
153+
49154
#[cfg(test)]
50155
mod tests {
51156
use super::*;
157+
use crate::docid::generate_docid;
158+
use crate::fusion::RankedResult;
159+
use crate::store::Store;
52160

53161
#[test]
54162
fn test_extract_wikilink_targets() {
@@ -98,4 +206,207 @@ mod tests {
98206
let terms = extract_query_terms("a is the big query");
99207
assert_eq!(terms, vec!["the", "big", "query"]);
100208
}
209+
210+
#[test]
211+
fn test_graph_expand_basic() {
212+
let store = Store::open_memory().unwrap();
213+
let f1 = store
214+
.insert_file(
215+
"seed.md",
216+
"h1",
217+
100,
218+
&["rust".into()],
219+
&generate_docid("seed.md"),
220+
)
221+
.unwrap();
222+
let f2 = store
223+
.insert_file(
224+
"linked.md",
225+
"h2",
226+
100,
227+
&["rust".into()],
228+
&generate_docid("linked.md"),
229+
)
230+
.unwrap();
231+
let _f3 = store
232+
.insert_file(
233+
"unlinked.md",
234+
"h3",
235+
100,
236+
&[],
237+
&generate_docid("unlinked.md"),
238+
)
239+
.unwrap();
240+
241+
store.insert_edge(f1, f2, "wikilink").unwrap();
242+
store
243+
.insert_chunk(f2, "## Linked", "Linked content about delivery", 10, 20)
244+
.unwrap();
245+
store
246+
.insert_fts_chunk(f2, 0, "Linked content about delivery")
247+
.unwrap();
248+
249+
let seeds = vec![RankedResult {
250+
file_path: "seed.md".into(),
251+
file_id: f1,
252+
score: 0.85,
253+
heading: None,
254+
snippet: "Seed".into(),
255+
docid: None,
256+
}];
257+
258+
let expanded = graph_expand(&store, &seeds, "delivery", 2, 20).unwrap();
259+
assert_eq!(expanded.len(), 1);
260+
assert_eq!(expanded[0].file_path, "linked.md");
261+
assert!(expanded[0].score > 0.0 && expanded[0].score < 0.85);
262+
}
263+
264+
#[test]
265+
fn test_graph_expand_skips_seeds() {
266+
let store = Store::open_memory().unwrap();
267+
let f1 = store
268+
.insert_file("a.md", "h1", 100, &[], &generate_docid("a.md"))
269+
.unwrap();
270+
let f2 = store
271+
.insert_file("b.md", "h2", 100, &[], &generate_docid("b.md"))
272+
.unwrap();
273+
274+
store.insert_edge(f1, f2, "wikilink").unwrap();
275+
store.insert_chunk(f2, "## B", "Content B", 10, 20).unwrap();
276+
store.insert_fts_chunk(f2, 0, "Content B").unwrap();
277+
278+
let seeds = vec![
279+
RankedResult {
280+
file_path: "a.md".into(),
281+
file_id: f1,
282+
score: 0.9,
283+
heading: None,
284+
snippet: "A".into(),
285+
docid: None,
286+
},
287+
RankedResult {
288+
file_path: "b.md".into(),
289+
file_id: f2,
290+
score: 0.8,
291+
heading: None,
292+
snippet: "B".into(),
293+
docid: None,
294+
},
295+
];
296+
297+
let expanded = graph_expand(&store, &seeds, "content", 2, 20).unwrap();
298+
assert!(expanded.is_empty());
299+
}
300+
301+
#[test]
302+
fn test_graph_expand_multi_parent_takes_highest() {
303+
let store = Store::open_memory().unwrap();
304+
let f1 = store
305+
.insert_file("a.md", "h1", 100, &[], &generate_docid("a.md"))
306+
.unwrap();
307+
let f2 = store
308+
.insert_file("b.md", "h2", 100, &[], &generate_docid("b.md"))
309+
.unwrap();
310+
let f3 = store
311+
.insert_file("shared.md", "h3", 100, &[], &generate_docid("shared.md"))
312+
.unwrap();
313+
314+
store.insert_edge(f1, f3, "wikilink").unwrap();
315+
store.insert_edge(f2, f3, "wikilink").unwrap();
316+
store
317+
.insert_chunk(f3, "## Shared", "Shared topic content", 10, 20)
318+
.unwrap();
319+
store
320+
.insert_fts_chunk(f3, 0, "Shared topic content")
321+
.unwrap();
322+
323+
let seeds = vec![
324+
RankedResult {
325+
file_path: "a.md".into(),
326+
file_id: f1,
327+
score: 0.9,
328+
heading: None,
329+
snippet: "A".into(),
330+
docid: None,
331+
},
332+
RankedResult {
333+
file_path: "b.md".into(),
334+
file_id: f2,
335+
score: 0.5,
336+
heading: None,
337+
snippet: "B".into(),
338+
docid: None,
339+
},
340+
];
341+
342+
let expanded = graph_expand(&store, &seeds, "topic", 1, 20).unwrap();
343+
assert_eq!(expanded.len(), 1);
344+
assert_eq!(expanded[0].file_path, "shared.md");
345+
// Should use highest parent: 0.9 * 0.8 = 0.72
346+
assert!((expanded[0].score - 0.72).abs() < 0.01);
347+
}
348+
349+
#[test]
350+
fn test_graph_expand_empty_graph() {
351+
let store = Store::open_memory().unwrap();
352+
let f1 = store.insert_file("a.md", "h1", 100, &[], "aaa111").unwrap();
353+
354+
let seeds = vec![RankedResult {
355+
file_path: "a.md".into(),
356+
file_id: f1,
357+
score: 0.9,
358+
heading: None,
359+
snippet: "A".into(),
360+
docid: None,
361+
}];
362+
363+
let expanded = graph_expand(&store, &seeds, "query", 2, 20).unwrap();
364+
assert!(expanded.is_empty());
365+
}
366+
367+
#[test]
368+
fn test_graph_expand_tag_fallback() {
369+
let store = Store::open_memory().unwrap();
370+
let f1 = store
371+
.insert_file(
372+
"seed.md",
373+
"h1",
374+
100,
375+
&["rust".into(), "cli".into()],
376+
&generate_docid("seed.md"),
377+
)
378+
.unwrap();
379+
let f2 = store
380+
.insert_file(
381+
"linked.md",
382+
"h2",
383+
100,
384+
&["rust".into()],
385+
&generate_docid("linked.md"),
386+
)
387+
.unwrap();
388+
389+
store.insert_edge(f1, f2, "wikilink").unwrap();
390+
store
391+
.insert_chunk(f2, "## Linked", "Unrelated content", 10, 20)
392+
.unwrap();
393+
store
394+
.insert_fts_chunk(f2, 0, "Unrelated content here")
395+
.unwrap();
396+
397+
let seeds = vec![RankedResult {
398+
file_path: "seed.md".into(),
399+
file_id: f1,
400+
score: 0.85,
401+
heading: None,
402+
snippet: "Seed".into(),
403+
docid: None,
404+
}];
405+
406+
// Query doesn't match FTS, but shared tag "rust" should keep it (with 0.7x penalty)
407+
let expanded = graph_expand(&store, &seeds, "nonexistent query term", 2, 20).unwrap();
408+
assert_eq!(expanded.len(), 1);
409+
// Score: 0.85 * 0.8 * 0.7 = 0.476
410+
assert!((expanded[0].score - 0.476).abs() < 0.01);
411+
}
101412
}

0 commit comments

Comments
 (0)