Skip to content

Commit 51ee332

Browse files
committed
feat(health): add vault health analysis module
Orphan detection, broken link tracking, and combined health reports. Adds unresolved_links table and find_isolated_files query to Store.
1 parent af44bb8 commit 51ee332

3 files changed

Lines changed: 319 additions & 0 deletions

File tree

src/health.rs

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
use anyhow::Result;
2+
3+
use crate::store::Store;
4+
5+
/// Full vault health report.
6+
#[derive(Debug, Clone, serde::Serialize)]
7+
pub struct HealthReport {
8+
pub orphans: Vec<String>,
9+
pub broken_links: Vec<BrokenLink>,
10+
pub stale_notes: Vec<String>,
11+
pub inbox_pending: Vec<String>,
12+
pub tag_issues: Vec<TagIssue>,
13+
pub index_age_seconds: u64,
14+
pub total_files: usize,
15+
}
16+
17+
/// A wikilink that could not be resolved to any indexed file.
18+
#[derive(Debug, Clone, serde::Serialize)]
19+
pub struct BrokenLink {
20+
pub source: String,
21+
pub target: String,
22+
}
23+
24+
/// A tag-related problem in a file.
25+
#[derive(Debug, Clone, serde::Serialize)]
26+
pub struct TagIssue {
27+
pub file: String,
28+
pub issue: String,
29+
}
30+
31+
/// Configuration controlling which folders are excluded from health checks.
32+
pub struct HealthConfig {
33+
pub daily_folder: Option<String>,
34+
pub inbox_folder: Option<String>,
35+
}
36+
37+
/// Find files with no edges (neither incoming nor outgoing).
38+
///
39+
/// Excludes files whose path starts with the configured daily or inbox folder
40+
/// prefixes — those are expected to be unlinked.
41+
pub fn find_orphans(store: &Store, config: &HealthConfig) -> Result<Vec<String>> {
42+
let mut exclude = Vec::new();
43+
if let Some(ref daily) = config.daily_folder {
44+
exclude.push(daily.as_str());
45+
}
46+
if let Some(ref inbox) = config.inbox_folder {
47+
exclude.push(inbox.as_str());
48+
}
49+
let isolated = store.find_isolated_files(&exclude)?;
50+
Ok(isolated.into_iter().map(|f| f.path).collect())
51+
}
52+
53+
/// Find wikilink references that could not be resolved to any indexed file.
54+
///
55+
/// These are recorded in the `unresolved_links` table during indexing.
56+
pub fn find_broken_links(store: &Store) -> Result<Vec<BrokenLink>> {
57+
let unresolved = store.get_unresolved_links()?;
58+
Ok(unresolved
59+
.into_iter()
60+
.map(|(source, target)| BrokenLink { source, target })
61+
.collect())
62+
}
63+
64+
/// Find notes that haven't been updated in the given number of days.
65+
///
66+
/// Stub — returns an empty vec for now. A full implementation would check
67+
/// `mtime` or a `reviewed_at` frontmatter field.
68+
pub fn find_stale_notes(_store: &Store, _days: u32) -> Result<Vec<String>> {
69+
Ok(Vec::new())
70+
}
71+
72+
/// Generate a combined health report for the vault.
73+
pub fn generate_health_report(store: &Store, config: &HealthConfig) -> Result<HealthReport> {
74+
let orphans = find_orphans(store, config)?;
75+
let broken_links = find_broken_links(store)?;
76+
let stale_notes = find_stale_notes(store, 90)?;
77+
78+
// Inbox pending: files in the inbox folder.
79+
let inbox_pending = if let Some(ref inbox) = config.inbox_folder {
80+
store
81+
.find_files_by_prefix(&format!("{}%", inbox))?
82+
.into_iter()
83+
.map(|f| f.path)
84+
.collect()
85+
} else {
86+
Vec::new()
87+
};
88+
89+
let all_files = store.get_all_files()?;
90+
let total_files = all_files.len();
91+
92+
// Tag issues: find work notes missing required tags.
93+
let tag_issues = all_files
94+
.iter()
95+
.filter(|f| f.path.contains("Work/") || f.path.contains("01-Projects/Work/"))
96+
.filter(|f| !f.tags.iter().any(|t| t == "work"))
97+
.map(|f| TagIssue {
98+
file: f.path.clone(),
99+
issue: "work note missing 'work' tag".to_string(),
100+
})
101+
.collect();
102+
103+
// Index age: seconds since the most recent indexed_at timestamp.
104+
let index_age_seconds = {
105+
let last = all_files
106+
.iter()
107+
.filter_map(|f| f.indexed_at.parse::<u64>().ok())
108+
.max()
109+
.unwrap_or(0);
110+
if last == 0 {
111+
0
112+
} else {
113+
use std::time::SystemTime;
114+
let now = SystemTime::now()
115+
.duration_since(SystemTime::UNIX_EPOCH)
116+
.unwrap_or_default()
117+
.as_secs();
118+
now.saturating_sub(last)
119+
}
120+
};
121+
122+
Ok(HealthReport {
123+
orphans,
124+
broken_links,
125+
stale_notes,
126+
inbox_pending,
127+
tag_issues,
128+
index_age_seconds,
129+
total_files,
130+
})
131+
}
132+
133+
#[cfg(test)]
134+
mod tests {
135+
use super::*;
136+
use crate::store::Store;
137+
138+
fn setup_health_store() -> Store {
139+
let store = Store::open_memory().unwrap();
140+
// Insert files with edges to test orphan detection.
141+
let linked_id = store
142+
.insert_file("linked.md", "aaa111", 100, &[], "aaa111", None)
143+
.unwrap();
144+
let orphan_id = store
145+
.insert_file("orphan.md", "bbb222", 100, &[], "bbb222", None)
146+
.unwrap();
147+
let _daily_id = store
148+
.insert_file("daily/2026-03-26.md", "ccc333", 100, &[], "ccc333", None)
149+
.unwrap();
150+
// Add edge: linked.md → orphan.md (both files are "connected")
151+
store.insert_edge(linked_id, orphan_id, "wikilink").unwrap();
152+
store
153+
}
154+
155+
#[test]
156+
fn test_find_orphans_excludes_daily() {
157+
let store = setup_health_store();
158+
let config = HealthConfig {
159+
daily_folder: Some("daily/".to_string()),
160+
inbox_folder: None,
161+
};
162+
let orphans = find_orphans(&store, &config).unwrap();
163+
// linked.md has outgoing edge, orphan.md has incoming edge — both connected.
164+
// daily note is excluded by prefix. Result should be empty.
165+
assert!(orphans.is_empty());
166+
}
167+
168+
#[test]
169+
fn test_find_orphans_detects_isolated() {
170+
let store = Store::open_memory().unwrap();
171+
store
172+
.insert_file("connected.md", "h1", 100, &[], "d1", None)
173+
.unwrap();
174+
let iso_id = store
175+
.insert_file("island.md", "h2", 100, &[], "d2", None)
176+
.unwrap();
177+
let other_id = store
178+
.insert_file("other.md", "h3", 100, &[], "d3", None)
179+
.unwrap();
180+
store.insert_edge(iso_id, other_id, "wikilink").unwrap();
181+
182+
let config = HealthConfig {
183+
daily_folder: None,
184+
inbox_folder: None,
185+
};
186+
let orphans = find_orphans(&store, &config).unwrap();
187+
// connected.md has no edges at all — it's the orphan.
188+
assert_eq!(orphans.len(), 1);
189+
assert_eq!(orphans[0], "connected.md");
190+
}
191+
192+
#[test]
193+
fn test_find_broken_links() {
194+
let store = setup_health_store();
195+
// Record an unresolved link (wikilink target that doesn't exist).
196+
store
197+
.insert_unresolved_link("linked.md", "nonexistent.md")
198+
.unwrap();
199+
let broken = find_broken_links(&store).unwrap();
200+
assert_eq!(broken.len(), 1);
201+
assert_eq!(broken[0].source, "linked.md");
202+
assert_eq!(broken[0].target, "nonexistent.md");
203+
}
204+
205+
#[test]
206+
fn test_find_broken_links_empty_when_none() {
207+
let store = setup_health_store();
208+
let broken = find_broken_links(&store).unwrap();
209+
assert!(broken.is_empty());
210+
}
211+
212+
#[test]
213+
fn test_generate_health_report() {
214+
let store = Store::open_memory().unwrap();
215+
store
216+
.insert_file("note.md", "h1", 100, &[], "d1", None)
217+
.unwrap();
218+
store
219+
.insert_file("00-Inbox/unsorted.md", "h2", 100, &[], "d2", None)
220+
.unwrap();
221+
store
222+
.insert_unresolved_link("note.md", "missing.md")
223+
.unwrap();
224+
225+
let config = HealthConfig {
226+
daily_folder: Some("daily/".to_string()),
227+
inbox_folder: Some("00-Inbox/".to_string()),
228+
};
229+
let report = generate_health_report(&store, &config).unwrap();
230+
assert_eq!(report.total_files, 2);
231+
// note.md has no edges and is not in daily/ or inbox/ — it's an orphan.
232+
assert_eq!(report.orphans.len(), 1);
233+
assert_eq!(report.orphans[0], "note.md");
234+
// One broken link recorded.
235+
assert_eq!(report.broken_links.len(), 1);
236+
// One file in inbox.
237+
assert_eq!(report.inbox_pending.len(), 1);
238+
assert_eq!(report.inbox_pending[0], "00-Inbox/unsorted.md");
239+
}
240+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pub mod docid;
55
pub mod fts;
66
pub mod fusion;
77
pub mod graph;
8+
pub mod health;
89
pub mod indexer;
910
pub mod links;
1011
pub mod llm;

src/store.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,19 @@ impl Store {
303303
CREATE INDEX IF NOT EXISTS idx_cli_events_ts ON cli_events(timestamp);",
304304
)?;
305305

306+
// Unresolved links table — tracks wikilink targets that couldn't be
307+
// resolved to a file during indexing. Used by health analysis.
308+
self.conn.execute_batch(
309+
"CREATE TABLE IF NOT EXISTS unresolved_links (
310+
id INTEGER PRIMARY KEY,
311+
source_file TEXT NOT NULL,
312+
target TEXT NOT NULL,
313+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
314+
UNIQUE(source_file, target)
315+
);
316+
CREATE INDEX IF NOT EXISTS idx_unresolved_source ON unresolved_links(source_file);",
317+
)?;
318+
306319
Ok(())
307320
}
308321

@@ -1660,6 +1673,71 @@ impl Store {
16601673

16611674
Ok(())
16621675
}
1676+
1677+
// ── Unresolved Links ─────────────────────────────────────────
1678+
1679+
/// Record a wikilink target that could not be resolved during indexing.
1680+
pub fn insert_unresolved_link(&self, source_file: &str, target: &str) -> Result<()> {
1681+
self.conn.execute(
1682+
"INSERT OR IGNORE INTO unresolved_links (source_file, target) VALUES (?1, ?2)",
1683+
params![source_file, target],
1684+
)?;
1685+
Ok(())
1686+
}
1687+
1688+
/// Remove all unresolved links originating from the given source file.
1689+
pub fn clear_unresolved_links_for_file(&self, source_file: &str) -> Result<()> {
1690+
self.conn.execute(
1691+
"DELETE FROM unresolved_links WHERE source_file = ?1",
1692+
params![source_file],
1693+
)?;
1694+
Ok(())
1695+
}
1696+
1697+
/// Return all unresolved links (source_file, target) pairs.
1698+
pub fn get_unresolved_links(&self) -> Result<Vec<(String, String)>> {
1699+
let mut stmt = self
1700+
.conn
1701+
.prepare("SELECT source_file, target FROM unresolved_links ORDER BY source_file")?;
1702+
let rows = stmt.query_map([], |row| {
1703+
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1704+
})?;
1705+
let mut results = Vec::new();
1706+
for row in rows {
1707+
results.push(row?);
1708+
}
1709+
Ok(results)
1710+
}
1711+
1712+
// ── Health Queries ───────────────────────────────────────────
1713+
1714+
/// Find files that have no edges (neither incoming nor outgoing).
1715+
/// Optionally exclude files whose path starts with any of the given prefixes.
1716+
pub fn find_isolated_files(&self, exclude_prefixes: &[&str]) -> Result<Vec<FileRecord>> {
1717+
let all_files = self.get_all_files()?;
1718+
let connected: HashSet<i64> = {
1719+
let mut stmt = self.conn.prepare(
1720+
"SELECT DISTINCT id FROM files WHERE id IN \
1721+
(SELECT from_file FROM edges UNION SELECT to_file FROM edges)",
1722+
)?;
1723+
let rows = stmt.query_map([], |row| row.get::<_, i64>(0))?;
1724+
let mut set = HashSet::new();
1725+
for row in rows {
1726+
set.insert(row?);
1727+
}
1728+
set
1729+
};
1730+
let isolated = all_files
1731+
.into_iter()
1732+
.filter(|f| !connected.contains(&f.id))
1733+
.filter(|f| {
1734+
!exclude_prefixes
1735+
.iter()
1736+
.any(|prefix| f.path.starts_with(prefix))
1737+
})
1738+
.collect();
1739+
Ok(isolated)
1740+
}
16631741
}
16641742

16651743
fn parse_tags(json: &str) -> Vec<String> {

0 commit comments

Comments
 (0)