Skip to content

Commit c31b4f0

Browse files
committed
feat(store): add fuzzy name matching to resolve_file
Adds a Levenshtein-distance fallback (threshold ≤ 2) to resolve_file after exact path, basename, and separator-normalization steps fail. Compares against basenames stripped of .md extension. Returns an error with candidate list when multiple files are equidistant.
1 parent 4a4c1ea commit c31b4f0

1 file changed

Lines changed: 107 additions & 1 deletion

File tree

src/store.rs

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1465,14 +1465,82 @@ impl Store {
14651465
}
14661466

14671467
/// Resolve a file reference (path, basename, or #docid) to a FileRecord.
1468+
///
1469+
/// Resolution order:
1470+
/// 1. `#docid` — 6-char hex prefixed with `#`
1471+
/// 2. Exact path match
1472+
/// 3. Basename match (case-insensitive, with separator normalization)
1473+
/// 4. Fuzzy match — Levenshtein distance ≤ 2 on basenames (stripped of `.md`)
1474+
/// - If exactly one candidate: return it
1475+
/// - If multiple equidistant candidates: error with candidate list
1476+
/// - If none within threshold: return None
14681477
pub fn resolve_file(&self, file_or_docid: &str) -> Result<Option<FileRecord>> {
14691478
if file_or_docid.starts_with('#') && file_or_docid.len() == 7 {
14701479
return self.get_file_by_docid(&file_or_docid[1..]);
14711480
}
14721481
if let Some(f) = self.get_file(file_or_docid)? {
14731482
return Ok(Some(f));
14741483
}
1475-
self.find_file_by_basename(file_or_docid)
1484+
if let Some(f) = self.find_file_by_basename(file_or_docid)? {
1485+
return Ok(Some(f));
1486+
}
1487+
self.find_file_by_fuzzy(file_or_docid)
1488+
}
1489+
1490+
/// Fuzzy-match a query against all stored file basenames using Levenshtein distance.
1491+
/// Returns the unique closest match within distance ≤ 2, or an error if ambiguous.
1492+
fn find_file_by_fuzzy(&self, query: &str) -> Result<Option<FileRecord>> {
1493+
use strsim::levenshtein;
1494+
1495+
// Normalize query: strip .md, lowercase.
1496+
let query_stem = query
1497+
.strip_suffix(".md")
1498+
.unwrap_or(query)
1499+
.to_lowercase();
1500+
1501+
// Collect all (path, basename_stem) pairs from the store.
1502+
let mut stmt = self.conn.prepare("SELECT path FROM files")?;
1503+
let paths: Vec<String> = stmt
1504+
.query_map([], |row| row.get(0))?
1505+
.filter_map(|r| r.ok())
1506+
.collect();
1507+
1508+
let mut best_distance = usize::MAX;
1509+
let mut best_paths: Vec<String> = Vec::new();
1510+
1511+
for path in &paths {
1512+
// Extract basename and strip .md extension for comparison.
1513+
let basename = std::path::Path::new(path)
1514+
.file_name()
1515+
.and_then(|f| f.to_str())
1516+
.unwrap_or(path);
1517+
let stem = basename
1518+
.strip_suffix(".md")
1519+
.unwrap_or(basename)
1520+
.to_lowercase();
1521+
1522+
let dist = levenshtein(&query_stem, &stem);
1523+
if dist > 2 {
1524+
continue;
1525+
}
1526+
if dist < best_distance {
1527+
best_distance = dist;
1528+
best_paths.clear();
1529+
best_paths.push(path.clone());
1530+
} else if dist == best_distance {
1531+
best_paths.push(path.clone());
1532+
}
1533+
}
1534+
1535+
match best_paths.len() {
1536+
0 => Ok(None),
1537+
1 => self.get_file(&best_paths[0]),
1538+
_ => Err(anyhow::anyhow!(
1539+
"ambiguous fuzzy match for '{}': [{}]",
1540+
query,
1541+
best_paths.join(", ")
1542+
)),
1543+
}
14761544
}
14771545

14781546
pub fn resolve_tag(&self, proposed: &str) -> Result<crate::tags::TagResolution> {
@@ -2676,4 +2744,42 @@ mod tests {
26762744
let store = Store::open_memory().unwrap();
26772745
assert!(!store.has_dimension_mismatch(256).unwrap());
26782746
}
2747+
2748+
// ── Fuzzy resolve tests ───────────────────────────────────
2749+
2750+
#[test]
2751+
fn test_resolve_file_fuzzy_match() {
2752+
let store = Store::open_memory().unwrap();
2753+
store
2754+
.insert_file("Steve Barbera.md", "hash1", 100, &[], "ab1234", None)
2755+
.unwrap();
2756+
// "Steve Barbara" is within Levenshtein 2 of "Steve Barbera"
2757+
let result = store.resolve_file("Steve Barbara").unwrap();
2758+
assert!(result.is_some());
2759+
assert_eq!(result.unwrap().path, "Steve Barbera.md");
2760+
}
2761+
2762+
#[test]
2763+
fn test_resolve_file_fuzzy_ambiguous() {
2764+
let store = Store::open_memory().unwrap();
2765+
store
2766+
.insert_file("test-a.md", "h1", 100, &[], "aaa111", None)
2767+
.unwrap();
2768+
store
2769+
.insert_file("test-b.md", "h2", 100, &[], "bbb222", None)
2770+
.unwrap();
2771+
// "test-c" is equidistant from both — should error, not pick arbitrarily
2772+
let result = store.resolve_file("test-c");
2773+
assert!(result.is_err());
2774+
}
2775+
2776+
#[test]
2777+
fn test_resolve_file_existing_docid() {
2778+
let store = Store::open_memory().unwrap();
2779+
store
2780+
.insert_file("note.md", "hash", 100, &[], "abc123", None)
2781+
.unwrap();
2782+
let result = store.resolve_file("#abc123").unwrap();
2783+
assert!(result.is_some());
2784+
}
26792785
}

0 commit comments

Comments
 (0)