Skip to content

Commit bcd50e6

Browse files
devwhodevsclaude
andcommitted
feat: edge building in indexer — wikilinks and people detection
Wikilink extraction runs on raw file content during indexing. Targets resolved by exact path then basename match (case-insensitive). Bidirectional edges inserted. People detection scans for name mentions in non-People files, with alias extraction from frontmatter. Rebuild clears edges before building. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent db3a359 commit bcd50e6

1 file changed

Lines changed: 246 additions & 3 deletions

File tree

src/indexer.rs

Lines changed: 246 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::chunker::{chunk_markdown, split_oversized_chunks};
1212
use crate::config::Config;
1313
use crate::docid::generate_docid;
1414
use crate::embedder::Embedder;
15+
use crate::graph::extract_wikilink_targets;
1516
use crate::hnsw::HnswIndex;
1617
use crate::store::{FileRecord, Store};
1718

@@ -129,6 +130,138 @@ pub fn diff_vault(
129130
Ok((new_files, changed_files, deleted))
130131
}
131132

133+
/// Resolve a wikilink target name to a file ID in the store.
134+
fn resolve_link_target(store: &Store, target: &str) -> Result<Option<i64>> {
135+
let with_ext = if target.ends_with(".md") {
136+
target.to_string()
137+
} else {
138+
format!("{}.md", target)
139+
};
140+
141+
// Try exact path match
142+
if let Some(f) = store.get_file(&with_ext)? {
143+
return Ok(Some(f.id));
144+
}
145+
146+
// Try basename match (case-insensitive)
147+
let all_files = store.get_all_files()?;
148+
let target_lower = with_ext.to_lowercase();
149+
let mut matches: Vec<&FileRecord> = all_files
150+
.iter()
151+
.filter(|f| {
152+
let path_lower = f.path.to_lowercase();
153+
path_lower == target_lower || path_lower.ends_with(&format!("/{}", target_lower))
154+
})
155+
.collect();
156+
157+
matches.sort_by_key(|f| f.path.len());
158+
Ok(matches.first().map(|f| f.id))
159+
}
160+
161+
/// Build wikilink edges for a single file.
162+
pub fn build_edges_for_file(store: &Store, file_id: i64, content: &str) -> Result<()> {
163+
let targets = extract_wikilink_targets(content);
164+
for target in targets {
165+
if let Some(target_id) = resolve_link_target(store, &target)?
166+
&& target_id != file_id
167+
{
168+
store.insert_edge(file_id, target_id, "wikilink")?;
169+
store.insert_edge(target_id, file_id, "wikilink")?;
170+
}
171+
}
172+
Ok(())
173+
}
174+
175+
/// Load people entities from the People folder.
176+
/// Returns (file_id, [name, aliases...]) for each person note.
177+
pub fn load_people_entities(
178+
store: &Store,
179+
people_folder: &str,
180+
content_by_path: &HashMap<String, String>,
181+
) -> Result<Vec<(i64, Vec<String>)>> {
182+
let all_files = store.get_all_files()?;
183+
let mut people = Vec::new();
184+
for file in &all_files {
185+
if file.path.contains(people_folder) {
186+
let basename = file.path.rsplit('/').next().unwrap_or(&file.path);
187+
let name = basename.trim_end_matches(".md").to_string();
188+
let mut names = vec![name];
189+
190+
// Extract aliases from frontmatter
191+
if let Some(content) = content_by_path.get(&file.path)
192+
&& let Some(aliases) = extract_aliases_from_frontmatter(content)
193+
{
194+
names.extend(aliases);
195+
}
196+
197+
people.push((file.id, names));
198+
}
199+
}
200+
Ok(people)
201+
}
202+
203+
/// Extract aliases from YAML frontmatter.
204+
fn extract_aliases_from_frontmatter(content: &str) -> Option<Vec<String>> {
205+
let trimmed = content.trim_start();
206+
if !trimmed.starts_with("---") {
207+
return None;
208+
}
209+
let after = trimmed[3..].trim_start_matches('-').strip_prefix('\n')?;
210+
let end = after.find("\n---")?;
211+
let yaml = &after[..end];
212+
213+
let lines: Vec<&str> = yaml.lines().collect();
214+
for (i, line) in lines.iter().enumerate() {
215+
let t = line.trim();
216+
if t.starts_with("aliases:") {
217+
let after_colon = t.strip_prefix("aliases:")?.trim();
218+
let mut aliases = Vec::new();
219+
if after_colon.starts_with('[') {
220+
let inner = after_colon.trim_start_matches('[').trim_end_matches(']');
221+
for a in inner.split(',') {
222+
let a = a.trim().trim_matches('"').trim_matches('\'').to_string();
223+
if !a.is_empty() {
224+
aliases.push(a);
225+
}
226+
}
227+
} else if after_colon.is_empty() {
228+
for sub in &lines[i + 1..] {
229+
let st = sub.trim();
230+
if st.starts_with("- ") {
231+
aliases.push(st.strip_prefix("- ").unwrap().trim().to_string());
232+
} else if !st.is_empty() {
233+
break;
234+
}
235+
}
236+
}
237+
return Some(aliases);
238+
}
239+
}
240+
None
241+
}
242+
243+
/// Detect people mentions and create edges.
244+
pub fn build_people_edges(
245+
store: &Store,
246+
file_id: i64,
247+
content: &str,
248+
people: &[(i64, Vec<String>)],
249+
) -> Result<()> {
250+
let content_lower = content.to_lowercase();
251+
for (person_id, names) in people {
252+
if *person_id == file_id {
253+
continue;
254+
}
255+
let mentioned = names
256+
.iter()
257+
.any(|name| content_lower.contains(&name.to_lowercase()));
258+
if mentioned {
259+
store.insert_edge(file_id, *person_id, "mention")?;
260+
}
261+
}
262+
Ok(())
263+
}
264+
132265
/// Main indexing orchestrator.
133266
///
134267
/// Walks the vault, diffs against the store, processes new/changed/deleted files,
@@ -205,6 +338,15 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result<In
205338
})
206339
.collect();
207340

341+
// Preserve raw content for edge building (wikilink extraction needs full text).
342+
let content_by_path: HashMap<String, String> = file_contents
343+
.iter()
344+
.map(|(path, content)| {
345+
let rel = path.strip_prefix(vault_path).unwrap_or(path);
346+
(rel.to_string_lossy().to_string(), content.clone())
347+
})
348+
.collect();
349+
208350
// Parallel chunking (embedding is serial since Embedder is not Send+Sync).
209351
let chunked_files: Vec<_> = file_contents
210352
.par_iter()
@@ -318,7 +460,41 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result<In
318460
}
319461
}
320462

321-
// Step 9: Store vault path in meta.
463+
// Step 9: Build vault graph edges.
464+
info!("building vault graph edges");
465+
if rebuild {
466+
store.clear_edges()?;
467+
}
468+
469+
for result in &results {
470+
if let Some(file_record) = store.get_file(&result.rel_path)?
471+
&& let Some(content) = content_by_path.get(&result.rel_path)
472+
{
473+
build_edges_for_file(&store, file_record.id, content)?;
474+
}
475+
}
476+
477+
// People detection (if configured via vault profile)
478+
if let Ok(Some(profile)) = crate::config::Config::load_vault_profile()
479+
&& let Some(people_folder) = &profile.structure.folders.people
480+
{
481+
let people = load_people_entities(&store, people_folder, &content_by_path)?;
482+
if !people.is_empty() {
483+
info!(people_count = people.len(), "detecting people mentions");
484+
for result in &results {
485+
if let Some(file_record) = store.get_file(&result.rel_path)?
486+
&& let Some(content) = content_by_path.get(&result.rel_path)
487+
{
488+
// Skip files in the People folder itself
489+
if !result.rel_path.contains(people_folder.as_str()) {
490+
build_people_edges(&store, file_record.id, content, &people)?;
491+
}
492+
}
493+
}
494+
}
495+
}
496+
497+
// Step 10: Store vault path in meta.
322498
store.set_meta("vault_path", &vault_path.to_string_lossy())?;
323499
store.set_meta(
324500
"last_indexed_at",
@@ -331,7 +507,7 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result<In
331507
),
332508
)?;
333509

334-
// Step 10: Rebuild HNSW index from all vectors in SQLite.
510+
// Step 11: Rebuild HNSW index from all vectors in SQLite.
335511
// hnsw_rs doesn't support appending after load, so we always rebuild.
336512
let all_vectors = store.get_all_vectors()?;
337513
let mut hnsw = HnswIndex::new(all_vectors.len().max(1000));
@@ -344,7 +520,7 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result<In
344520
"rebuilt HNSW index from stored vectors"
345521
);
346522

347-
// Step 11: Save HNSW index to disk.
523+
// Step 12: Save HNSW index to disk.
348524
hnsw.save(&hnsw_dir)?;
349525

350526
let duration = start.elapsed();
@@ -538,4 +714,71 @@ mod tests {
538714
"b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
539715
);
540716
}
717+
718+
#[test]
719+
fn test_edge_building_during_index() {
720+
let tmp = TempDir::new().unwrap();
721+
let root = tmp.path();
722+
write_file(root, "a.md", "# A\nSee [[b]] for details.");
723+
write_file(root, "b.md", "# B\nLinks to [[a]].");
724+
write_file(root, "c.md", "# C\nNo links here.");
725+
726+
let store = Store::open_memory().unwrap();
727+
let f_a = store.insert_file("a.md", "h1", 100, &[], "aaa111").unwrap();
728+
let f_b = store.insert_file("b.md", "h2", 100, &[], "bbb222").unwrap();
729+
let _f_c = store.insert_file("c.md", "h3", 100, &[], "ccc333").unwrap();
730+
731+
let content_a = std::fs::read_to_string(root.join("a.md")).unwrap();
732+
let content_b = std::fs::read_to_string(root.join("b.md")).unwrap();
733+
734+
build_edges_for_file(&store, f_a, &content_a).unwrap();
735+
build_edges_for_file(&store, f_b, &content_b).unwrap();
736+
737+
let a_out = store.get_outgoing(f_a, Some("wikilink")).unwrap();
738+
assert_eq!(a_out.len(), 1);
739+
assert_eq!(a_out[0].0, f_b);
740+
741+
let b_out = store.get_outgoing(f_b, Some("wikilink")).unwrap();
742+
assert_eq!(b_out.len(), 1);
743+
assert_eq!(b_out[0].0, f_a);
744+
}
745+
746+
#[test]
747+
fn test_extract_aliases_from_frontmatter() {
748+
let content = "---\ntags:\n - person\naliases:\n - Johnny\n - JN\n---\n# John Nelson";
749+
let aliases = extract_aliases_from_frontmatter(content).unwrap();
750+
assert_eq!(aliases, vec!["Johnny", "JN"]);
751+
}
752+
753+
#[test]
754+
fn test_extract_aliases_inline() {
755+
let content = "---\naliases: [Max, MD]\n---\n# Max Darski";
756+
let aliases = extract_aliases_from_frontmatter(content).unwrap();
757+
assert_eq!(aliases, vec!["Max", "MD"]);
758+
}
759+
760+
#[test]
761+
fn test_extract_aliases_no_frontmatter() {
762+
assert!(extract_aliases_from_frontmatter("# Just a heading").is_none());
763+
}
764+
765+
#[test]
766+
fn test_people_mention_detection() {
767+
let store = Store::open_memory().unwrap();
768+
let person = store
769+
.insert_file("People/John Nelson.md", "h1", 100, &[], "aaa111")
770+
.unwrap();
771+
let note = store
772+
.insert_file("daily.md", "h2", 100, &[], "bbb222")
773+
.unwrap();
774+
775+
let people = vec![(person, vec!["John Nelson".to_string()])];
776+
let content = "Discussed with John Nelson about the architecture.";
777+
778+
build_people_edges(&store, note, content, &people).unwrap();
779+
780+
let mentions = store.get_outgoing(note, Some("mention")).unwrap();
781+
assert_eq!(mentions.len(), 1);
782+
assert_eq!(mentions[0].0, person);
783+
}
541784
}

0 commit comments

Comments
 (0)