Skip to content

Commit fe1a31a

Browse files
devwhodevsclaude
andcommitted
feat: graph module with wikilink extraction and query term helpers
Byte-scanning wikilink extractor handles [[target]], [[target|display]], [[target#heading]], skips embeds (![[...]]). Deduplicates targets. Query term extraction for graph agent relevance filtering. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6493347 commit fe1a31a

2 files changed

Lines changed: 102 additions & 0 deletions

File tree

src/graph.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
use std::collections::HashSet;
2+
3+
/// Extract unique wikilink targets from text.
4+
/// Handles [[Target]], [[Target|Display]], [[Target#Heading]].
5+
/// Skips embeds (![[...]]).
6+
pub fn extract_wikilink_targets(text: &str) -> Vec<String> {
7+
let bytes = text.as_bytes();
8+
let mut targets = Vec::new();
9+
let mut seen = HashSet::new();
10+
let mut i = 0;
11+
12+
while i + 1 < bytes.len() {
13+
if bytes[i] == b'[' && bytes[i + 1] == b'[' {
14+
// Check for embed prefix (! before [[)
15+
let is_embed = i > 0 && bytes[i - 1] == b'!';
16+
if let Some(rest) = text.get(i + 2..)
17+
&& let Some(close) = rest.find("]]")
18+
{
19+
let inner = &rest[..close];
20+
if !is_embed && !inner.is_empty() && !inner.contains('\n') {
21+
// Strip heading: [[Note#Section]] → "Note"
22+
let target = inner.split('#').next().unwrap_or(inner);
23+
// Strip display: [[Note|Display]] → "Note"
24+
let target = target.split('|').next().unwrap_or(target);
25+
let target = target.trim().to_string();
26+
if !target.is_empty() && seen.insert(target.clone()) {
27+
targets.push(target);
28+
}
29+
}
30+
i += 2 + close + 2;
31+
continue;
32+
}
33+
}
34+
i += 1;
35+
}
36+
targets
37+
}
38+
39+
/// Extract query terms for relevance filtering.
40+
/// Splits on whitespace, lowercases, drops terms shorter than 3 chars.
41+
pub fn extract_query_terms(query: &str) -> Vec<String> {
42+
query
43+
.split_whitespace()
44+
.map(|t| t.to_lowercase())
45+
.filter(|t| t.len() >= 3)
46+
.collect()
47+
}
48+
49+
#[cfg(test)]
50+
mod tests {
51+
use super::*;
52+
53+
#[test]
54+
fn test_extract_wikilink_targets() {
55+
let text =
56+
"See [[Note One]] and [[Note Two|display]] for details. Also [[Note One]] again.";
57+
let targets = extract_wikilink_targets(text);
58+
assert!(targets.contains(&"Note One".to_string()));
59+
assert!(targets.contains(&"Note Two".to_string()));
60+
assert_eq!(targets.len(), 2); // deduplicated
61+
}
62+
63+
#[test]
64+
fn test_extract_wikilinks_with_headings() {
65+
let text = "Link to [[Note#Section]] here.";
66+
let targets = extract_wikilink_targets(text);
67+
assert_eq!(targets, vec!["Note"]);
68+
}
69+
70+
#[test]
71+
fn test_extract_wikilinks_empty() {
72+
assert!(extract_wikilink_targets("no links here").is_empty());
73+
assert!(extract_wikilink_targets("").is_empty());
74+
}
75+
76+
#[test]
77+
fn test_extract_wikilinks_skip_embeds() {
78+
let text = "![[embedded image.png]] and [[real link]]";
79+
let targets = extract_wikilink_targets(text);
80+
assert_eq!(targets, vec!["real link"]);
81+
}
82+
83+
#[test]
84+
fn test_extract_wikilinks_heading_and_display() {
85+
let text = "[[Note#Section|Custom Display]]";
86+
let targets = extract_wikilink_targets(text);
87+
assert_eq!(targets, vec!["Note"]); // strip both heading and display
88+
}
89+
90+
#[test]
91+
fn test_extract_query_terms() {
92+
let terms = extract_query_terms("BRE-2579 delivery date");
93+
assert_eq!(terms, vec!["bre-2579", "delivery", "date"]);
94+
}
95+
96+
#[test]
97+
fn test_extract_query_terms_short_words_dropped() {
98+
let terms = extract_query_terms("a is the big query");
99+
assert_eq!(terms, vec!["the", "big", "query"]);
100+
}
101+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ pub mod docid;
44
pub mod embedder;
55
pub mod fts;
66
pub mod fusion;
7+
pub mod graph;
78
pub mod hnsw;
89
pub mod indexer;
910
pub mod model;

0 commit comments

Comments
 (0)