Skip to content

Commit f32d622

Browse files
committed
feat(identity): add L1 extraction engine and identity formatting
1 parent ad72e96 commit f32d622

2 files changed

Lines changed: 378 additions & 0 deletions

File tree

src/identity.rs

Lines changed: 377 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,377 @@
1+
use anyhow::{Context, Result};
2+
use regex::Regex;
3+
4+
use crate::config::Config;
5+
use crate::profile::VaultProfile;
6+
use crate::store::Store;
7+
8+
/// Summary of what L1 extraction found.
9+
#[derive(Debug, Default)]
10+
pub struct L1Summary {
11+
pub active_projects: usize,
12+
pub key_people: usize,
13+
pub current_focus: usize,
14+
pub ooo: usize,
15+
pub blocking: usize,
16+
}
17+
18+
/// Extract L1 identity facts from the indexed vault.
19+
///
20+
/// Clears existing tier-1 facts, then populates five categories:
21+
/// active_projects, key_people, current_focus, ooo, blocking.
22+
pub fn extract_l1_facts(store: &Store, profile: &VaultProfile) -> Result<L1Summary> {
23+
store.clear_identity_facts(1)?;
24+
25+
let all_files = store.get_all_files()?;
26+
let mut summary = L1Summary::default();
27+
28+
// ── Active projects ─────────────────────────────────────────
29+
for file in &all_files {
30+
if file.tags.iter().any(|t| t.eq_ignore_ascii_case("project")) {
31+
let name = file_stem(&file.path);
32+
store.upsert_identity_fact(1, "active_project", &name, Some(&file.path))?;
33+
summary.active_projects += 1;
34+
}
35+
}
36+
37+
// ── Key people ──────────────────────────────────────────────
38+
let people_folder = profile.structure.folders.people.as_deref();
39+
if let Some(pf) = people_folder {
40+
let people_files: Vec<_> = all_files
41+
.iter()
42+
.filter(|f| path_is_in_folder(&f.path, pf))
43+
.collect();
44+
45+
// Sort by incoming edge count (descending), take top 5.
46+
let mut scored: Vec<(&crate::store::FileRecord, usize)> = people_files
47+
.iter()
48+
.filter_map(|f| {
49+
let incoming = store.get_incoming(f.id, None).ok()?;
50+
Some((*f, incoming.len()))
51+
})
52+
.collect();
53+
scored.sort_by(|a, b| b.1.cmp(&a.1));
54+
55+
for (file, _count) in scored.into_iter().take(5) {
56+
let name = file_stem(&file.path);
57+
store.upsert_identity_fact(1, "key_person", &name, Some(&file.path))?;
58+
summary.key_people += 1;
59+
}
60+
}
61+
62+
// ── Daily-note based extractions ────────────────────────────
63+
let daily_folder = profile.structure.folders.daily.as_deref();
64+
if let Some(df) = daily_folder {
65+
let mut daily_files: Vec<_> = all_files
66+
.iter()
67+
.filter(|f| path_is_in_folder(&f.path, df) && f.note_date.is_some())
68+
.collect();
69+
70+
// Sort by note_date descending (most recent first).
71+
daily_files.sort_by(|a, b| b.note_date.cmp(&a.note_date));
72+
73+
// ── Current focus (most recent daily note) ──────────────
74+
if let Some(latest) = daily_files.first() {
75+
if let Ok(chunks) = store.get_chunks_by_file(latest.id) {
76+
let focus_re =
77+
Regex::new(r"(?i)morning\s+focus|top\s+priorit|priorities").unwrap();
78+
for chunk in &chunks {
79+
if focus_re.is_match(&chunk.heading) {
80+
let items = extract_bullet_items(&chunk.snippet, 3);
81+
for item in items {
82+
store.upsert_identity_fact(1, "current_focus", &item, None)?;
83+
summary.current_focus += 1;
84+
}
85+
break;
86+
}
87+
}
88+
}
89+
}
90+
91+
// ── OOO (last 7 daily notes) ───────────────────────────
92+
if people_folder.is_some() {
93+
let people_names: Vec<String> = all_files
94+
.iter()
95+
.filter(|f| path_is_in_folder(&f.path, people_folder.unwrap()))
96+
.map(|f| file_stem(&f.path))
97+
.collect();
98+
99+
let ooo_re = Regex::new(r"(?i)\b(ooo|out\s+of\s+office|vacation|leave|pto)\b").unwrap();
100+
101+
for daily in daily_files.iter().take(7) {
102+
if let Ok(chunks) = store.get_chunks_by_file(daily.id) {
103+
for chunk in &chunks {
104+
if ooo_re.is_match(&chunk.snippet) {
105+
for person in &people_names {
106+
if chunk
107+
.snippet
108+
.to_ascii_lowercase()
109+
.contains(&person.to_ascii_lowercase())
110+
{
111+
// Extract context around the match.
112+
let detail = extract_ooo_detail(&chunk.snippet, &ooo_re);
113+
let label = format!("{} ({})", person, detail);
114+
store.upsert_identity_fact(1, "ooo", &label, None)?;
115+
summary.ooo += 1;
116+
}
117+
}
118+
}
119+
}
120+
}
121+
}
122+
}
123+
124+
// ── Blocking (last 3 daily notes) ───────────────────────
125+
let blocking_re = Regex::new(r"(?i)\b(P0|blocking|blocked)\b").unwrap();
126+
127+
for daily in daily_files.iter().take(3) {
128+
if let Ok(chunks) = store.get_chunks_by_file(daily.id) {
129+
for chunk in &chunks {
130+
let items = extract_matching_bullets(&chunk.snippet, &blocking_re);
131+
for item in items {
132+
store.upsert_identity_fact(1, "blocking", &item, None)?;
133+
summary.blocking += 1;
134+
}
135+
}
136+
}
137+
}
138+
}
139+
140+
Ok(summary)
141+
}
142+
143+
/// Format the identity block combining L0 (config) and L1 (store) facts.
144+
pub fn format_identity_block(config: &Config, store: &Store) -> Result<String> {
145+
let id = &config.identity;
146+
147+
let name = id.name.as_deref().unwrap_or("(not set)");
148+
let role = id.role.as_deref().unwrap_or("(not set)");
149+
let vault = id.vault_purpose.as_deref().unwrap_or("(not set)");
150+
151+
let mut out = String::new();
152+
out.push_str("## Identity (L0)\n");
153+
out.push_str(&format!("Name: {}\n", name));
154+
out.push_str(&format!("Role: {}\n", role));
155+
out.push_str(&format!("Vault: {}\n", vault));
156+
157+
let facts = store
158+
.get_identity_facts(1)
159+
.context("reading L1 identity facts")?;
160+
161+
if facts.is_empty() {
162+
out.push_str("\n## Current State (L1)\n");
163+
out.push_str("[no data — run engraph index]\n");
164+
return Ok(out);
165+
}
166+
167+
// Determine most recent updated_at across all facts.
168+
let latest_ts = facts
169+
.iter()
170+
.map(|f| f.updated_at.as_str())
171+
.max()
172+
.unwrap_or("unknown");
173+
174+
out.push_str(&format!("\n## Current State (L1) [updated {}]\n", latest_ts));
175+
176+
// Group facts by key.
177+
let project_vals: Vec<&str> = facts
178+
.iter()
179+
.filter(|f| f.key == "active_project")
180+
.map(|f| f.value.as_str())
181+
.collect();
182+
let focus_vals: Vec<&str> = facts
183+
.iter()
184+
.filter(|f| f.key == "current_focus")
185+
.map(|f| f.value.as_str())
186+
.collect();
187+
let people_vals: Vec<&str> = facts
188+
.iter()
189+
.filter(|f| f.key == "key_person")
190+
.map(|f| f.value.as_str())
191+
.collect();
192+
let blocking_vals: Vec<&str> = facts
193+
.iter()
194+
.filter(|f| f.key == "blocking")
195+
.map(|f| f.value.as_str())
196+
.collect();
197+
let ooo_vals: Vec<&str> = facts
198+
.iter()
199+
.filter(|f| f.key == "ooo")
200+
.map(|f| f.value.as_str())
201+
.collect();
202+
203+
if !project_vals.is_empty() {
204+
out.push_str(&format!("Active projects: {}\n", project_vals.join(", ")));
205+
}
206+
if !focus_vals.is_empty() {
207+
out.push_str(&format!("Current focus: {}\n", focus_vals.join(", ")));
208+
}
209+
if !people_vals.is_empty() {
210+
out.push_str(&format!("Key people: {}\n", people_vals.join(", ")));
211+
}
212+
if !blocking_vals.is_empty() {
213+
out.push_str(&format!("Blocking: {}\n", blocking_vals.join(", ")));
214+
}
215+
if !ooo_vals.is_empty() {
216+
out.push_str(&format!("OOO: {}\n", ooo_vals.join(", ")));
217+
}
218+
219+
Ok(out)
220+
}
221+
222+
// ── Helpers ─────────────────────────────────────────────────────
223+
224+
/// Extract the file stem (name without extension) from a path string.
225+
fn file_stem(path: &str) -> String {
226+
std::path::Path::new(path)
227+
.file_stem()
228+
.map(|s| s.to_string_lossy().to_string())
229+
.unwrap_or_else(|| path.to_string())
230+
}
231+
232+
/// Check whether a file path belongs to a given folder (case-insensitive prefix match).
233+
fn path_is_in_folder(path: &str, folder: &str) -> bool {
234+
let normalized = folder.trim_end_matches('/');
235+
let lower_path = path.to_ascii_lowercase();
236+
// Match "folder/" prefix or "/folder/" anywhere in the path.
237+
lower_path.starts_with(&format!("{}/", normalized.to_ascii_lowercase()))
238+
|| lower_path.contains(&format!("/{}/", normalized.to_ascii_lowercase()))
239+
}
240+
241+
/// Extract up to `max` bullet-point items from a snippet.
242+
fn extract_bullet_items(snippet: &str, max: usize) -> Vec<String> {
243+
let mut items = Vec::new();
244+
for line in snippet.lines() {
245+
let trimmed = line.trim();
246+
if let Some(rest) = trimmed
247+
.strip_prefix("- ")
248+
.or_else(|| trimmed.strip_prefix("* "))
249+
{
250+
// Strip checkbox markers like [ ] or [x].
251+
let rest = rest
252+
.strip_prefix("[ ] ")
253+
.or_else(|| rest.strip_prefix("[x] "))
254+
.or_else(|| rest.strip_prefix("[X] "))
255+
.unwrap_or(rest);
256+
let clean = rest.trim().to_string();
257+
if !clean.is_empty() {
258+
items.push(clean);
259+
if items.len() >= max {
260+
break;
261+
}
262+
}
263+
}
264+
}
265+
items
266+
}
267+
268+
/// Extract bullet items that match a regex pattern.
269+
fn extract_matching_bullets(snippet: &str, pattern: &Regex) -> Vec<String> {
270+
let mut items = Vec::new();
271+
for line in snippet.lines() {
272+
let trimmed = line.trim();
273+
if (trimmed.starts_with("- ") || trimmed.starts_with("* ")) && pattern.is_match(trimmed) {
274+
let rest = trimmed
275+
.strip_prefix("- ")
276+
.or_else(|| trimmed.strip_prefix("* "))
277+
.unwrap_or(trimmed);
278+
let rest = rest
279+
.strip_prefix("[ ] ")
280+
.or_else(|| rest.strip_prefix("[x] "))
281+
.or_else(|| rest.strip_prefix("[X] "))
282+
.unwrap_or(rest);
283+
let clean = rest.trim().to_string();
284+
if !clean.is_empty() {
285+
items.push(clean);
286+
}
287+
}
288+
}
289+
items
290+
}
291+
292+
/// Extract a short OOO detail string from around the regex match.
293+
fn extract_ooo_detail(snippet: &str, ooo_re: &Regex) -> String {
294+
for line in snippet.lines() {
295+
let trimmed = line.trim();
296+
if ooo_re.is_match(trimmed) {
297+
// Return the line content (stripped of bullet prefix) as the detail.
298+
let rest = trimmed
299+
.strip_prefix("- ")
300+
.or_else(|| trimmed.strip_prefix("* "))
301+
.unwrap_or(trimmed);
302+
let clean = rest.trim();
303+
if clean.len() > 80 {
304+
return format!("{}...", &clean[..77]);
305+
}
306+
return clean.to_string();
307+
}
308+
}
309+
"OOO".to_string()
310+
}
311+
312+
// ── Tests ───────────────────────────────────────────────────────
313+
314+
#[cfg(test)]
315+
mod tests {
316+
use super::*;
317+
use crate::config::Config;
318+
use crate::store::Store;
319+
320+
#[test]
321+
fn test_format_identity_block_l0_only() {
322+
let store = Store::open_memory().unwrap();
323+
let mut config = Config::default();
324+
config.identity.name = Some("Oleksandr".into());
325+
config.identity.role = Some("Engineer".into());
326+
config.identity.vault_purpose = Some("personal knowledge base".into());
327+
328+
let block = format_identity_block(&config, &store).unwrap();
329+
330+
assert!(block.contains("Name: Oleksandr"));
331+
assert!(block.contains("Role: Engineer"));
332+
assert!(block.contains("Vault: personal knowledge base"));
333+
assert!(block.contains("no data"));
334+
}
335+
336+
#[test]
337+
fn test_format_identity_block_with_l1() {
338+
let store = Store::open_memory().unwrap();
339+
let mut config = Config::default();
340+
config.identity.name = Some("Test User".into());
341+
config.identity.role = Some("Developer".into());
342+
config.identity.vault_purpose = Some("notes".into());
343+
344+
// Insert L1 facts manually.
345+
store
346+
.upsert_identity_fact(1, "active_project", "ProjectA", Some("01-Projects/ProjectA.md"))
347+
.unwrap();
348+
store
349+
.upsert_identity_fact(1, "active_project", "ProjectB", Some("01-Projects/ProjectB.md"))
350+
.unwrap();
351+
store
352+
.upsert_identity_fact(1, "key_person", "Alice", Some("03-Resources/People/Alice.md"))
353+
.unwrap();
354+
store
355+
.upsert_identity_fact(1, "current_focus", "Ship feature X", None)
356+
.unwrap();
357+
store
358+
.upsert_identity_fact(1, "blocking", "CI pipeline broken", None)
359+
.unwrap();
360+
store
361+
.upsert_identity_fact(1, "ooo", "Bob (vacation until Friday)", None)
362+
.unwrap();
363+
364+
let block = format_identity_block(&config, &store).unwrap();
365+
366+
assert!(block.contains("Name: Test User"));
367+
assert!(block.contains("Role: Developer"));
368+
assert!(block.contains("Vault: notes"));
369+
assert!(block.contains("Active projects: ProjectA, ProjectB"));
370+
assert!(block.contains("Key people: Alice"));
371+
assert!(block.contains("Current focus: Ship feature X"));
372+
assert!(block.contains("Blocking: CI pipeline broken"));
373+
assert!(block.contains("OOO: Bob (vacation until Friday)"));
374+
assert!(block.contains("## Current State (L1) [updated"));
375+
assert!(!block.contains("no data"));
376+
}
377+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ pub mod fusion;
77
pub mod graph;
88
pub mod health;
99
pub mod http;
10+
pub mod identity;
1011
pub mod indexer;
1112
pub mod links;
1213
pub mod llm;

0 commit comments

Comments
 (0)