Skip to content

Commit 60ce0bd

Browse files
devwhodevsclaude
andcommitted
feat: docid system — 6-char hash IDs for quick file reference
Every indexed file gets a deterministic #docid (SHA-256 of path, truncated to 6 hex chars). Shown in search results. Supports direct lookup via 'engraph get #abc123'. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 55d91bf commit 60ce0bd

6 files changed

Lines changed: 197 additions & 24 deletions

File tree

src/docid.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
use sha2::{Digest, Sha256};
2+
3+
/// Generate a 6-character hex docid from a file path.
4+
/// Deterministic: same path always produces same docid.
5+
pub fn generate_docid(path: &str) -> String {
6+
let mut hasher = Sha256::new();
7+
hasher.update(path.as_bytes());
8+
let hash = hasher.finalize();
9+
format!("{:02x}{:02x}{:02x}", hash[0], hash[1], hash[2])
10+
}
11+
12+
#[cfg(test)]
13+
mod tests {
14+
use super::*;
15+
16+
#[test]
17+
fn test_generate_docid_length_and_hex() {
18+
let docid = generate_docid("notes/test.md");
19+
assert_eq!(docid.len(), 6, "docid should be 6 characters");
20+
assert!(
21+
docid.chars().all(|c| c.is_ascii_hexdigit()),
22+
"docid should be all hex chars, got: {}",
23+
docid
24+
);
25+
}
26+
27+
#[test]
28+
fn test_docid_deterministic() {
29+
let a = generate_docid("notes/test.md");
30+
let b = generate_docid("notes/test.md");
31+
assert_eq!(a, b, "same path must produce same docid");
32+
}
33+
34+
#[test]
35+
fn test_docid_unique() {
36+
let a = generate_docid("notes/a.md");
37+
let b = generate_docid("notes/b.md");
38+
assert_ne!(a, b, "different paths should produce different docids");
39+
}
40+
}

src/indexer.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use tracing::info;
1010

1111
use crate::chunker::{chunk_markdown, split_oversized_chunks};
1212
use crate::config::Config;
13+
use crate::docid::generate_docid;
1314
use crate::embedder::Embedder;
1415
use crate::hnsw::HnswIndex;
1516
use crate::store::{FileRecord, Store};
@@ -290,8 +291,9 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result<In
290291
};
291292

292293
for result in &results {
294+
let docid = generate_docid(&result.rel_path);
293295
let file_id =
294-
store.insert_file(&result.rel_path, &result.hash, result.mtime, &result.tags)?;
296+
store.insert_file(&result.rel_path, &result.hash, result.mtime, &result.tags, &docid)?;
295297

296298
for (heading, snippet, vector, token_count) in &result.chunks {
297299
let vector_id = next_vector_id;
@@ -450,7 +452,7 @@ mod tests {
450452
let store = Store::open_memory().unwrap();
451453
// Insert file with an old/different hash.
452454
store
453-
.insert_file("note.md", "old_hash_that_wont_match", 100, &[])
455+
.insert_file("note.md", "old_hash_that_wont_match", 100, &[], &generate_docid("note.md"))
454456
.unwrap();
455457

456458
let files = walk_vault(root, &[]).unwrap();
@@ -479,10 +481,11 @@ mod tests {
479481
&compute_file_hash(&root.join("surviving.md")).unwrap(),
480482
100,
481483
&[],
484+
&generate_docid("surviving.md"),
482485
)
483486
.unwrap();
484487
store
485-
.insert_file("deleted.md", "some_hash", 100, &[])
488+
.insert_file("deleted.md", "some_hash", 100, &[], &generate_docid("deleted.md"))
486489
.unwrap();
487490

488491
let files = walk_vault(root, &[]).unwrap();

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pub mod chunker;
22
pub mod config;
3+
pub mod docid;
34
pub mod embedder;
45
pub mod hnsw;
56
pub mod indexer;

src/search.rs

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pub struct SearchResult {
1313
pub file_path: String,
1414
pub heading: Option<String>,
1515
pub snippet: String,
16+
pub docid: Option<String>,
1617
}
1718

1819
/// Run a search query and print results.
@@ -36,9 +37,11 @@ pub fn run_search(query: &str, top_n: usize, json: bool, data_dir: &Path) -> Res
3637
let mut results = Vec::new();
3738
for (vector_id, distance) in raw_results {
3839
if let Some(chunk) = store.get_chunk_by_vector_id(vector_id)? {
39-
let file_path = store
40-
.get_file_path_by_id(chunk.file_id)?
41-
.unwrap_or_else(|| "<unknown>".to_string());
40+
// Single query to get both file_path and docid.
41+
let (file_path, docid) = match store.get_file_by_id(chunk.file_id)? {
42+
Some(f) => (f.path, f.docid),
43+
None => ("<unknown>".to_string(), None),
44+
};
4245

4346
// Convert cosine distance to similarity score.
4447
let score = 1.0 - distance;
@@ -53,6 +56,7 @@ pub fn run_search(query: &str, top_n: usize, json: bool, data_dir: &Path) -> Res
5356
file_path,
5457
heading,
5558
snippet: chunk.snippet,
59+
docid,
5660
});
5761
}
5862
}
@@ -82,7 +86,7 @@ pub fn run_status(json: bool, data_dir: &Path) -> Result<()> {
8286
/// Format search results for display (pure function, no I/O).
8387
pub fn format_results(results: &[SearchResult], json: bool) -> String {
8488
if results.is_empty() {
85-
return "No results found.\n".to_string();
89+
return if json { "[]\n".to_string() } else { "No results found.\n".to_string() };
8690
}
8791

8892
if json {
@@ -98,6 +102,7 @@ pub fn format_results(results: &[SearchResult], json: bool) -> String {
98102
"file": r.file_path,
99103
"heading": r.heading,
100104
"snippet": r.snippet,
105+
"docid": r.docid,
101106
})
102107
})
103108
.collect();
@@ -109,13 +114,18 @@ pub fn format_results(results: &[SearchResult], json: bool) -> String {
109114
Some(h) => format!(" > {h}"),
110115
None => String::new(),
111116
};
117+
let docid_part = match &r.docid {
118+
Some(d) => format!(" #{d}"),
119+
None => String::new(),
120+
};
112121
let snippet = truncate_snippet(&r.snippet, 200);
113122
out.push_str(&format!(
114-
"{:>2}. [{:.2}] {}{}\n {}\n",
123+
"{:>2}. [{:.2}] {}{}{}\n {}\n",
115124
i + 1,
116125
r.score,
117126
r.file_path,
118127
heading_part,
128+
docid_part,
119129
snippet,
120130
));
121131
}
@@ -219,6 +229,20 @@ mod tests {
219229
file_path: "foo.md".to_string(),
220230
heading: Some("## Bar".to_string()),
221231
snippet: "Some text...".to_string(),
232+
docid: Some("ab12cd".to_string()),
233+
}];
234+
let output = format_results(&results, false);
235+
assert_eq!(output, " 1. [0.87] foo.md > ## Bar #ab12cd\n Some text...\n");
236+
}
237+
238+
#[test]
239+
fn test_format_human_result_no_docid() {
240+
let results = vec![SearchResult {
241+
score: 0.87,
242+
file_path: "foo.md".to_string(),
243+
heading: Some("## Bar".to_string()),
244+
snippet: "Some text...".to_string(),
245+
docid: None,
222246
}];
223247
let output = format_results(&results, false);
224248
assert_eq!(output, " 1. [0.87] foo.md > ## Bar\n Some text...\n");
@@ -231,6 +255,7 @@ mod tests {
231255
file_path: "foo.md".to_string(),
232256
heading: Some("## Bar".to_string()),
233257
snippet: "Some text...".to_string(),
258+
docid: Some("ab12cd".to_string()),
234259
}];
235260
let output = format_results(&results, true);
236261
let parsed: Vec<serde_json::Value> = serde_json::from_str(&output).unwrap();
@@ -240,6 +265,7 @@ mod tests {
240265
assert_eq!(parsed[0]["file"], "foo.md");
241266
assert_eq!(parsed[0]["heading"], "## Bar");
242267
assert_eq!(parsed[0]["snippet"], "Some text...");
268+
assert_eq!(parsed[0]["docid"], "ab12cd");
243269
}
244270

245271
#[test]
@@ -248,7 +274,7 @@ mod tests {
248274
assert_eq!(output, "No results found.\n");
249275

250276
let json_output = format_results(&[], true);
251-
assert_eq!(json_output, "No results found.\n");
277+
assert_eq!(json_output, "[]\n");
252278
}
253279

254280
#[test]

0 commit comments

Comments
 (0)