Skip to content

Commit dc3a771

Browse files
devwhodevsclaude
andcommitted
feat: context topic — rich context bundle with budget trimming
Assembles direct search results + graph expansions within a char budget. Priority ordering: direct matches first, then 1-hop related. Truncation with docid reference for full content. Testable without embedder via context_topic_from_results. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent bb0874f commit dc3a771

1 file changed

Lines changed: 293 additions & 0 deletions

File tree

src/context.rs

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,156 @@ pub fn context_project(params: &ContextParams, name: &str) -> Result<ProjectCont
496496
})
497497
}
498498

499+
// ---------------------------------------------------------------------------
500+
// Context Topic — rich context bundle with budget trimming
501+
// ---------------------------------------------------------------------------
502+
503+
#[derive(Debug, Serialize)]
504+
pub struct ContextBundle {
505+
pub topic: String,
506+
pub sections: Vec<ContextSection>,
507+
pub total_chars: usize,
508+
pub budget_chars: usize,
509+
pub truncated: bool,
510+
}
511+
512+
#[derive(Debug, Serialize)]
513+
pub struct ContextSection {
514+
pub label: String,
515+
pub path: String,
516+
pub docid: Option<String>,
517+
pub content: String,
518+
pub relevance: String,
519+
}
520+
521+
const DEFAULT_BUDGET: usize = 32000;
522+
const SECTION_OVERHEAD: usize = 100;
523+
524+
/// Snap to a valid UTF-8 char boundary at or before `offset`.
525+
fn snap_to_char(s: &str, offset: usize) -> usize {
526+
let offset = offset.min(s.len());
527+
let mut pos = offset;
528+
while pos > 0 && !s.is_char_boundary(pos) {
529+
pos -= 1;
530+
}
531+
pos
532+
}
533+
534+
/// Assemble a context bundle from pre-computed search results.
535+
/// Testable without embedder.
536+
pub fn context_topic_from_results(
537+
params: &ContextParams,
538+
topic: &str,
539+
search_results: &[crate::search::InternalSearchResult],
540+
max_chars: usize,
541+
) -> Result<ContextBundle> {
542+
let budget = if max_chars == 0 {
543+
DEFAULT_BUDGET
544+
} else {
545+
max_chars
546+
};
547+
let mut sections = Vec::new();
548+
let mut used_chars = 0;
549+
let mut included_files: HashSet<String> = HashSet::new();
550+
551+
// Priority 1: Direct search results (top 5)
552+
for r in search_results.iter().take(5) {
553+
if used_chars >= budget {
554+
break;
555+
}
556+
let full_path = params.vault_path.join(&r.file_path);
557+
let content = std::fs::read_to_string(&full_path).unwrap_or_default();
558+
let (_, body) = split_frontmatter(&content);
559+
560+
let available = budget.saturating_sub(used_chars + SECTION_OVERHEAD);
561+
let trimmed = if body.len() > available {
562+
format!(
563+
"{}... [truncated, full note: #{}]",
564+
&body[..snap_to_char(&body, available)],
565+
r.docid.as_deref().unwrap_or("?")
566+
)
567+
} else {
568+
body
569+
};
570+
571+
used_chars += trimmed.len() + SECTION_OVERHEAD;
572+
included_files.insert(r.file_path.clone());
573+
sections.push(ContextSection {
574+
label: "Direct match".into(),
575+
path: r.file_path.clone(),
576+
docid: r.docid.clone(),
577+
content: trimmed,
578+
relevance: format!("score {:.2}", r.score),
579+
});
580+
}
581+
582+
// Priority 2: Graph-expanded notes (1-hop from top 3 results)
583+
for r in search_results.iter().take(3) {
584+
if used_chars >= budget {
585+
break;
586+
}
587+
let neighbors = params.store.get_neighbors(r.file_id, 1).unwrap_or_default();
588+
for (nid, _hop) in neighbors {
589+
if used_chars >= budget {
590+
break;
591+
}
592+
if let Some(nf) = params.store.get_file_by_id(nid).ok().flatten() {
593+
if included_files.contains(&nf.path) {
594+
continue;
595+
}
596+
let full_path = params.vault_path.join(&nf.path);
597+
let content = std::fs::read_to_string(&full_path).unwrap_or_default();
598+
let (_, body) = split_frontmatter(&content);
599+
600+
let available = budget.saturating_sub(used_chars + SECTION_OVERHEAD);
601+
let max_per_expansion = budget / 8;
602+
let cap = available.min(max_per_expansion);
603+
if cap == 0 {
604+
break;
605+
}
606+
let trimmed = if body.len() > cap {
607+
format!("{}... [truncated]", &body[..snap_to_char(&body, cap)])
608+
} else {
609+
body
610+
};
611+
612+
used_chars += trimmed.len() + SECTION_OVERHEAD;
613+
included_files.insert(nf.path.clone());
614+
sections.push(ContextSection {
615+
label: "Related (1-hop)".into(),
616+
path: nf.path.clone(),
617+
docid: nf.docid,
618+
content: trimmed,
619+
relevance: format!("linked from {}", r.file_path),
620+
});
621+
}
622+
}
623+
}
624+
625+
let truncated = used_chars >= budget;
626+
627+
Ok(ContextBundle {
628+
topic: topic.to_string(),
629+
sections,
630+
total_chars: used_chars,
631+
budget_chars: budget,
632+
truncated,
633+
})
634+
}
635+
636+
/// Full context topic function (requires embedder + HNSW).
637+
/// Called from CLI handler which provides the heavy resources.
638+
pub fn context_topic_with_search(
639+
params: &ContextParams,
640+
topic: &str,
641+
max_chars: usize,
642+
embedder: &mut crate::embedder::Embedder,
643+
index: &crate::hnsw::HnswIndex,
644+
) -> Result<ContextBundle> {
645+
let search_output = crate::search::search_internal(topic, 5, params.store, embedder, index)?;
646+
context_topic_from_results(params, topic, &search_output.results, max_chars)
647+
}
648+
499649
// ---------------------------------------------------------------------------
500650
// Tests
501651
// ---------------------------------------------------------------------------
@@ -758,4 +908,147 @@ mod tests {
758908
assert!(proj.note.is_none());
759909
assert!(proj.child_notes.is_empty());
760910
}
911+
912+
// --- context_topic tests ---
913+
914+
#[test]
915+
fn test_context_topic_basic() {
916+
let tmp = TempDir::new().unwrap();
917+
let root = tmp.path().to_path_buf();
918+
std::fs::write(
919+
root.join("result.md"),
920+
"# Result\n\nThis is relevant content about the topic.",
921+
)
922+
.unwrap();
923+
924+
let store = Store::open_memory().unwrap();
925+
store
926+
.insert_file("result.md", "h1", 100, &["topic".into()], "aaa111")
927+
.unwrap();
928+
929+
let params = ContextParams {
930+
store: &store,
931+
vault_path: &root,
932+
profile: None,
933+
};
934+
let search_results = vec![crate::search::InternalSearchResult {
935+
file_path: "result.md".into(),
936+
file_id: 1,
937+
score: 0.85,
938+
heading: Some("# Result".into()),
939+
snippet: "relevant content".into(),
940+
docid: Some("aaa111".into()),
941+
}];
942+
943+
let bundle = context_topic_from_results(&params, "topic", &search_results, 32000).unwrap();
944+
assert!(!bundle.sections.is_empty());
945+
assert!(bundle.sections[0].content.contains("relevant content"));
946+
assert!(bundle.total_chars <= bundle.budget_chars);
947+
assert!(!bundle.truncated);
948+
}
949+
950+
#[test]
951+
fn test_context_topic_budget_trimming() {
952+
let tmp = TempDir::new().unwrap();
953+
let root = tmp.path().to_path_buf();
954+
let long_content = format!("# Long\n\n{}", "word ".repeat(5000));
955+
std::fs::write(root.join("long.md"), &long_content).unwrap();
956+
957+
let store = Store::open_memory().unwrap();
958+
store
959+
.insert_file("long.md", "h1", 100, &[], "aaa111")
960+
.unwrap();
961+
962+
let params = ContextParams {
963+
store: &store,
964+
vault_path: &root,
965+
profile: None,
966+
};
967+
let search_results = vec![crate::search::InternalSearchResult {
968+
file_path: "long.md".into(),
969+
file_id: 1,
970+
score: 0.9,
971+
heading: None,
972+
snippet: "word word".into(),
973+
docid: Some("aaa111".into()),
974+
}];
975+
976+
// Very small budget — should truncate
977+
let bundle = context_topic_from_results(&params, "words", &search_results, 500).unwrap();
978+
assert!(!bundle.sections.is_empty());
979+
assert!(bundle.sections[0].content.contains("[truncated"));
980+
assert!(bundle.truncated);
981+
}
982+
983+
#[test]
984+
fn test_context_topic_with_graph_expansion() {
985+
let tmp = TempDir::new().unwrap();
986+
let root = tmp.path().to_path_buf();
987+
std::fs::write(root.join("main.md"), "# Main\nMain content.").unwrap();
988+
std::fs::write(root.join("related.md"), "# Related\nRelated content.").unwrap();
989+
990+
let store = Store::open_memory().unwrap();
991+
let f1 = store
992+
.insert_file("main.md", "h1", 100, &[], "aaa111")
993+
.unwrap();
994+
let f2 = store
995+
.insert_file("related.md", "h2", 100, &[], "bbb222")
996+
.unwrap();
997+
store.insert_edge(f1, f2, "wikilink").unwrap();
998+
999+
let params = ContextParams {
1000+
store: &store,
1001+
vault_path: &root,
1002+
profile: None,
1003+
};
1004+
let search_results = vec![crate::search::InternalSearchResult {
1005+
file_path: "main.md".into(),
1006+
file_id: f1,
1007+
score: 0.8,
1008+
heading: None,
1009+
snippet: "Main".into(),
1010+
docid: Some("aaa111".into()),
1011+
}];
1012+
1013+
let bundle = context_topic_from_results(&params, "main", &search_results, 32000).unwrap();
1014+
// Should have main as direct match + related as 1-hop
1015+
assert!(bundle.sections.len() >= 2);
1016+
assert!(
1017+
bundle
1018+
.sections
1019+
.iter()
1020+
.any(|s| s.path == "main.md" && s.label == "Direct match")
1021+
);
1022+
assert!(
1023+
bundle
1024+
.sections
1025+
.iter()
1026+
.any(|s| s.path == "related.md" && s.label == "Related (1-hop)")
1027+
);
1028+
}
1029+
1030+
#[test]
1031+
fn test_context_topic_empty_results() {
1032+
let tmp = TempDir::new().unwrap();
1033+
let root = tmp.path().to_path_buf();
1034+
let store = Store::open_memory().unwrap();
1035+
let params = ContextParams {
1036+
store: &store,
1037+
vault_path: &root,
1038+
profile: None,
1039+
};
1040+
1041+
let bundle = context_topic_from_results(&params, "nothing", &[], 32000).unwrap();
1042+
assert!(bundle.sections.is_empty());
1043+
assert_eq!(bundle.total_chars, 0);
1044+
assert!(!bundle.truncated);
1045+
}
1046+
1047+
#[test]
1048+
fn test_snap_to_char() {
1049+
let s = "hello\u{2014}world"; // em dash is 3 bytes
1050+
let snap = snap_to_char(s, 6); // lands inside the em dash
1051+
assert!(s.is_char_boundary(snap));
1052+
assert!(snap <= 6);
1053+
}
7611054
}

0 commit comments

Comments
 (0)