Skip to content

Commit 5d3a3ad

Browse files
committed
feat(profile): content-based role detection for non-PARA vaults
1 parent ecf6293 commit 5d3a3ad

1 file changed

Lines changed: 276 additions & 0 deletions

File tree

src/profile.rs

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,229 @@ pub struct VaultStats {
6161
pub folder_count: usize,
6262
}
6363

64+
// ---------------------------------------------------------------------------
65+
// Content-based role detection
66+
// ---------------------------------------------------------------------------
67+
68+
/// Check whether a markdown file's frontmatter looks like a person note.
69+
/// Returns true if it has a tag containing "person" or "people", OR has a "role" key.
70+
fn is_person_like(text: &str) -> bool {
71+
// Find frontmatter block.
72+
let fm = if text.starts_with("---\n") {
73+
text.get(4..)
74+
.and_then(|rest| rest.find("\n---").map(|end| &rest[..end]))
75+
} else if text.starts_with("---\r\n") {
76+
text.get(5..)
77+
.and_then(|rest| rest.find("\n---").map(|end| &rest[..end]))
78+
} else {
79+
None
80+
};
81+
82+
let Some(fm) = fm else {
83+
return false;
84+
};
85+
86+
let mut has_person_tag = false;
87+
let mut in_tags_block = false;
88+
89+
for line in fm.lines() {
90+
let trimmed = line.trim();
91+
92+
if trimmed.starts_with("role:") {
93+
return true;
94+
}
95+
96+
if trimmed.starts_with("tags:") {
97+
let after = trimmed.strip_prefix("tags:").unwrap().trim();
98+
if after.is_empty() {
99+
in_tags_block = true;
100+
continue;
101+
}
102+
// Inline list: tags: [person, ...] or tags: person, ...
103+
let after = after.trim_start_matches('[').trim_end_matches(']');
104+
for tag in after.split(',') {
105+
let t = tag
106+
.trim()
107+
.trim_matches('"')
108+
.trim_matches('\'')
109+
.trim_matches('#')
110+
.to_ascii_lowercase();
111+
if t == "person" || t == "people" {
112+
has_person_tag = true;
113+
}
114+
}
115+
if has_person_tag {
116+
return true;
117+
}
118+
in_tags_block = false;
119+
continue;
120+
}
121+
122+
if in_tags_block {
123+
if trimmed.starts_with("- ") {
124+
let t = trimmed
125+
.strip_prefix("- ")
126+
.unwrap()
127+
.trim()
128+
.trim_matches('"')
129+
.trim_matches('\'')
130+
.trim_matches('#')
131+
.to_ascii_lowercase();
132+
if t == "person" || t == "people" {
133+
return true;
134+
}
135+
} else if !trimmed.is_empty() {
136+
in_tags_block = false;
137+
}
138+
}
139+
}
140+
141+
false
142+
}
143+
144+
/// Check whether a filename looks like a date note (YYYY-MM-DD.md).
145+
fn is_date_filename(name: &str) -> bool {
146+
// Must match exactly: YYYY-MM-DD.md (13 chars: 4+1+2+1+2+3)
147+
let bytes = name.as_bytes();
148+
if bytes.len() != 13 {
149+
return false;
150+
}
151+
if &name[4..5] != "-" || &name[7..8] != "-" || &name[10..] != ".md" {
152+
return false;
153+
}
154+
bytes[..4].iter().all(|b| b.is_ascii_digit())
155+
&& bytes[5..7].iter().all(|b| b.is_ascii_digit())
156+
&& bytes[8..10].iter().all(|b| b.is_ascii_digit())
157+
}
158+
159+
/// Scan top-level subdirectories and return the one (with trailing slash) where
160+
/// 60%+ of the `.md` files have person-like frontmatter. Returns `None` if no
161+
/// folder qualifies.
162+
pub fn detect_people_folder(root: &Path) -> Result<Option<String>> {
163+
let entries = std::fs::read_dir(root)
164+
.with_context(|| format!("cannot read directory {}", root.display()))?;
165+
166+
for entry in entries {
167+
let entry = entry?;
168+
if !entry.file_type()?.is_dir() {
169+
continue;
170+
}
171+
let name = entry.file_name();
172+
let name_str = name.to_string_lossy();
173+
if name_str.starts_with('.') {
174+
continue;
175+
}
176+
177+
let dir = entry.path();
178+
let mut total = 0usize;
179+
let mut person_like = 0usize;
180+
181+
let inner = std::fs::read_dir(&dir)
182+
.with_context(|| format!("cannot read directory {}", dir.display()))?;
183+
for inner_entry in inner {
184+
let inner_entry = inner_entry?;
185+
if !inner_entry.file_type()?.is_file() {
186+
continue;
187+
}
188+
let fname = inner_entry.file_name();
189+
let fname_str = fname.to_string_lossy();
190+
if !fname_str.ends_with(".md") {
191+
continue;
192+
}
193+
total += 1;
194+
let text = std::fs::read_to_string(inner_entry.path()).unwrap_or_default();
195+
if is_person_like(&text) {
196+
person_like += 1;
197+
}
198+
}
199+
200+
if total > 0 && person_like * 100 / total >= 60 {
201+
return Ok(Some(format!("{}/", name_str)));
202+
}
203+
}
204+
205+
Ok(None)
206+
}
207+
208+
/// Scan top-level subdirectories and return the one (with trailing slash) where
209+
/// 60%+ of the `.md` filenames match the YYYY-MM-DD pattern. Returns `None` if
210+
/// no folder qualifies.
211+
pub fn detect_daily_folder(root: &Path) -> Result<Option<String>> {
212+
let entries = std::fs::read_dir(root)
213+
.with_context(|| format!("cannot read directory {}", root.display()))?;
214+
215+
for entry in entries {
216+
let entry = entry?;
217+
if !entry.file_type()?.is_dir() {
218+
continue;
219+
}
220+
let name = entry.file_name();
221+
let name_str = name.to_string_lossy();
222+
if name_str.starts_with('.') {
223+
continue;
224+
}
225+
226+
let dir = entry.path();
227+
let mut total = 0usize;
228+
let mut date_like = 0usize;
229+
230+
let inner = std::fs::read_dir(&dir)
231+
.with_context(|| format!("cannot read directory {}", dir.display()))?;
232+
for inner_entry in inner {
233+
let inner_entry = inner_entry?;
234+
if !inner_entry.file_type()?.is_file() {
235+
continue;
236+
}
237+
let fname = inner_entry.file_name();
238+
let fname_str = fname.to_string_lossy();
239+
if !fname_str.ends_with(".md") {
240+
continue;
241+
}
242+
total += 1;
243+
if is_date_filename(&fname_str) {
244+
date_like += 1;
245+
}
246+
}
247+
248+
if total > 0 && date_like * 100 / total >= 60 {
249+
return Ok(Some(format!("{}/", name_str)));
250+
}
251+
}
252+
253+
Ok(None)
254+
}
255+
256+
/// Find the archive folder by looking for well-known names (case-insensitive):
257+
/// "archive", "_archive", ".archive", or folders matching PARA-style patterns
258+
/// like "04-Archive".
259+
pub fn detect_archive_folder(root: &Path) -> Result<Option<String>> {
260+
let archive_names: &[&str] = &["archive", "_archive", ".archive"];
261+
262+
let entries = std::fs::read_dir(root)
263+
.with_context(|| format!("cannot read directory {}", root.display()))?;
264+
265+
for entry in entries {
266+
let entry = entry?;
267+
if !entry.file_type()?.is_dir() {
268+
continue;
269+
}
270+
let name = entry.file_name();
271+
let name_str = name.to_string_lossy();
272+
273+
// Strip leading digits and separators for PARA-style matching.
274+
let stripped = name_str
275+
.trim_start_matches(|c: char| c.is_ascii_digit())
276+
.trim_start_matches(['-', '_', ' ']);
277+
278+
let lower = stripped.to_ascii_lowercase();
279+
if archive_names.contains(&lower.as_str()) {
280+
return Ok(Some(format!("{}/", name_str)));
281+
}
282+
}
283+
284+
Ok(None)
285+
}
286+
64287
// ---------------------------------------------------------------------------
65288
// Detection helpers
66289
// ---------------------------------------------------------------------------
@@ -159,6 +382,28 @@ pub fn detect_structure(path: &Path) -> Result<StructureDetection> {
159382
StructureMethod::Flat
160383
};
161384

385+
// For non-PARA vaults, try content-based detection for roles not yet filled.
386+
if method != StructureMethod::Para {
387+
if folders.people.is_none() {
388+
folders.people = detect_people_folder(path)
389+
.ok()
390+
.flatten()
391+
.map(|s| s.trim_end_matches('/').to_string());
392+
}
393+
if folders.daily.is_none() {
394+
folders.daily = detect_daily_folder(path)
395+
.ok()
396+
.flatten()
397+
.map(|s| s.trim_end_matches('/').to_string());
398+
}
399+
if folders.archive.is_none() {
400+
folders.archive = detect_archive_folder(path)
401+
.ok()
402+
.flatten()
403+
.map(|s| s.trim_end_matches('/').to_string());
404+
}
405+
}
406+
162407
Ok(StructureDetection { method, folders })
163408
}
164409

@@ -632,4 +877,35 @@ mod tests {
632877
assert_eq!(count, 4); // a, a/b, a/b/c, d
633878
assert_eq!(depth, 3); // a/b/c is depth 3
634879
}
880+
881+
#[test]
882+
fn test_detect_people_folder_from_content() {
883+
let tmp = tempfile::TempDir::new().unwrap();
884+
let root = tmp.path();
885+
std::fs::create_dir_all(root.join("contacts")).unwrap();
886+
// 3 out of 4 files have person-like frontmatter
887+
for name in &["alice.md", "bob.md", "charlie.md"] {
888+
std::fs::write(
889+
root.join("contacts").join(name),
890+
"---\ntags:\n - person\nrole: Engineer\n---\n",
891+
)
892+
.unwrap();
893+
}
894+
std::fs::write(root.join("contacts/readme.md"), "# Contacts\n").unwrap();
895+
896+
let detected = detect_people_folder(root).unwrap();
897+
assert_eq!(detected.as_deref(), Some("contacts/"));
898+
}
899+
900+
#[test]
901+
fn test_detect_daily_folder_from_filenames() {
902+
let tmp = tempfile::TempDir::new().unwrap();
903+
let root = tmp.path();
904+
std::fs::create_dir_all(root.join("journal")).unwrap();
905+
for date in &["2026-03-24.md", "2026-03-25.md", "2026-03-26.md"] {
906+
std::fs::write(root.join("journal").join(date), "# Daily\n").unwrap();
907+
}
908+
let detected = detect_daily_folder(root).unwrap();
909+
assert_eq!(detected.as_deref(), Some("journal/"));
910+
}
635911
}

0 commit comments

Comments
 (0)