diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d05e09..bb65b82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## v1.6.0 — Onboarding + Identity (2026-04-10) + +### Added +- **Interactive onboarding** (`engraph init`) — polished CLI with welcome banner, vault scan checkmarks, identity prompts via dialoguer, progress bars, actionable next steps +- **Agent onboarding** — `engraph init --detect --json` for vault inspection, `--json` for non-interactive apply. Two-phase detect → apply flow for AI agents. +- **`identity` MCP tool + CLI + HTTP** — returns compact L0/L1 identity block (~170 tokens) for AI session context +- **`setup` MCP tool + HTTP** — first-time setup from inside an MCP session (detect/apply modes) +- **`identity_facts` table** — SQLite storage for L0 (static identity) and L1 (dynamic context) facts +- **L1 auto-extraction** — active projects, key people, current focus, OOO status, blocking items extracted during `engraph index` +- **`engraph identity --refresh`** — re-extract L1 facts without full reindex +- **`[identity]` config section** — name, role, vault_purpose in config.toml +- **`[memory]` config section** — feature flags for identity/timeline/mining + +### Changed +- MCP tools: 23 → 25 +- HTTP endpoints: 24 → 26 +- Dependencies: +dialoguer 0.12, +console 0.16, +regex 1 + ## v1.5.5 — Housekeeping (2026-04-10) ### Added diff --git a/CLAUDE.md b/CLAUDE.md index d7d9669..977878e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -32,6 +32,8 @@ Single binary with 26 modules behind a lib crate: - `indexer.rs` — orchestrates vault walking (via `ignore` crate for `.gitignore` support), diffing, chunking, embedding, writes to store + sqlite-vec + FTS5, vault graph edge building (wikilinks + people detection), and folder centroid computation. Exposes `index_file`, `remove_file`, `rename_file` as public per-file functions. `run_index_shared` accepts external store/embedder for watcher FullRescan. Dimension migration on model change. - `temporal.rs` — temporal search lane. Extracts note dates from frontmatter `date:` field or `YYYY-MM-DD` filename patterns. Heuristic date parsing for natural language ("today", "yesterday", "last week", "this month", "recent", month names, ISO dates, date ranges). Smooth decay scoring for files near but outside target date range. Provides `extract_note_date()` for indexing and `score_temporal()` + `parse_date_range_heuristic()` for search - `search.rs` — hybrid search orchestrator. `search_with_intelligence()` runs the full pipeline: orchestrate (intent + expansions) → 5-lane RRF retrieval (semantic + FTS5 + graph + reranker + temporal) per expansion → two-pass RRF fusion. `search_internal()` is a thin wrapper without intelligence models. Adaptive lane weights per query intent including temporal (1.5 weight for time-aware queries). Results display normalized confidence percentages (0-100%) instead of raw RRF scores. +- `identity.rs` — L1 extraction engine: active projects, key people, current focus, OOO, blocking. `format_identity_block()` for compact session context. `extract_l1_facts()` called after indexing. +- `onboarding.rs` — Interactive CLI UX: welcome banner, vault scan, identity prompts (dialoguer), agent mode (--detect --json, --json). `run_interactive()`, `run_detect_json()`, `run_apply_json()`. `main.rs` is a thin clap CLI (async via `#[tokio::main]`). Subcommands: `index` (with progress bar), `search` (with `--explain`, loads intelligence models when enabled), `status` (shows intelligence state + date coverage stats), `clear`, `init` (intelligence onboarding prompt, detects Obsidian CLI + AI agents), `configure` (`--enable-intelligence`, `--disable-intelligence`, `--model`, `--obsidian-cli`, `--no-obsidian-cli`, `--agent`, `--add-api-key`, `--list-api-keys`, `--revoke-api-key`, `--setup-chatgpt`), `models`, `graph` (show/stats), `context` (read/list/vault-map/who/project/topic), `write` (create/append/update-metadata/move/edit/rewrite/edit-frontmatter/delete), `migrate` (para with `--preview`/`--apply`/`--undo` for PARA vault restructuring), `serve` (MCP stdio server with file watcher + intelligence + optional `--http`/`--port`/`--host`/`--no-auth` for HTTP REST API). diff --git a/Cargo.lock b/Cargo.lock index 93fbc68..810ec67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -426,6 +426,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "unicode-width", + "windows-sys 0.61.2", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -603,6 +615,18 @@ dependencies = [ "syn", ] +[[package]] +name = "dialoguer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25f104b501bf2364e78d0d3974cbc774f738f5865306ed128e1e0d7499c0ad96" +dependencies = [ + "console 0.16.3", + "shell-words", + "tempfile", + "zeroize", +] + [[package]] name = "digest" version = "0.10.7" @@ -674,11 +698,13 @@ dependencies = [ [[package]] name = "engraph" -version = "1.5.5" +version = "1.6.0" dependencies = [ "anyhow", "axum", "clap", + "console 0.16.3", + "dialoguer", "dirs", "encoding_rs", "ignore", @@ -688,6 +714,7 @@ dependencies = [ "notify-debouncer-full", "rand", "rayon", + "regex", "rmcp", "rusqlite", "serde", @@ -1314,7 +1341,7 @@ version = "0.17.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" dependencies = [ - "console", + "console 0.15.11", "number_prefix", "portable-atomic", "unicode-width", @@ -2239,6 +2266,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shell-words" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77" + [[package]] name = "shimmytok" version = "0.7.0" diff --git a/Cargo.toml b/Cargo.toml index 46b6b8b..d3194a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "engraph" -version = "1.5.5" +version = "1.6.0" edition = "2024" description = "Local knowledge graph for AI agents. Hybrid search + MCP server for Obsidian vaults." license = "MIT" @@ -25,6 +25,9 @@ tokenizers = { version = "0.22", default-features = false, features = ["fancy-re sha2 = "0.10" ureq = "2.12" indicatif = "0.17" +dialoguer = "0.12" +console = "0.16" +regex = "1" sqlite-vec = "0.1.8-alpha.1" zerocopy = { version = "0.7", features = ["derive"] } rayon = "1" diff --git a/README.md b/README.md index 6723ca5..e448d53 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,8 @@ engraph turns your markdown vault into a searchable knowledge graph that any AI Plain vector search treats your notes as isolated documents. But knowledge isn't flat — your notes link to each other, share tags, reference the same people and projects. engraph understands these connections. - **5-lane hybrid search** — semantic embeddings + BM25 full-text + graph expansion + cross-encoder reranking + temporal scoring, fused via [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf). An LLM orchestrator classifies queries and adapts lane weights per intent. Time-aware queries like "what happened last week" or "March 2026 notes" activate the temporal lane automatically. -- **MCP server for AI agents** — `engraph serve` exposes 22 tools (search, read, section-level editing, frontmatter mutations, vault health, context bundles, note creation, PARA migration) that Claude, Cursor, or any MCP client can call directly. -- **HTTP REST API** — `engraph serve --http` adds an axum-based HTTP server alongside MCP with 23 REST endpoints, API key authentication, rate limiting, and CORS. Web-based agents and scripts can query your vault with simple `curl` calls. +- **MCP server for AI agents** — `engraph serve` exposes 25 tools (search, read, section-level editing, frontmatter mutations, vault health, context bundles, note creation, PARA migration, identity) that Claude, Cursor, or any MCP client can call directly. +- **HTTP REST API** — `engraph serve --http` adds an axum-based HTTP server alongside MCP with 26 REST endpoints, API key authentication, rate limiting, and CORS. Web-based agents and scripts can query your vault with simple `curl` calls. - **Section-level editing** — AI agents can read, replace, prepend, or append to specific sections by heading. Full note rewriting with frontmatter preservation. Granular frontmatter mutations (set/remove fields, add/remove tags and aliases). - **Vault health diagnostics** — detect orphan notes, broken wikilinks, stale content, and tag hygiene issues. Available as MCP tool and CLI command. - **Obsidian CLI integration** — auto-detects running Obsidian and delegates compatible operations. Circuit breaker (Closed/Degraded/Open) ensures graceful fallback. @@ -61,7 +61,7 @@ Your vault (markdown files) │ Search: Orchestrator → 4-lane retrieval │ │ → Reranker → Two-pass RRF fusion │ │ │ -│ 22 MCP tools + 23 REST endpoints │ +│ 25 MCP tools + 26 REST endpoints │ └─────────────────────────────────────────────┘ │ ▼ @@ -268,7 +268,7 @@ Returns orphan notes (no links in or out), broken wikilinks, stale notes, and ta `engraph serve --http` adds a full REST API alongside the MCP server, exposing the same capabilities over HTTP for web agents, scripts, and integrations. -**24 endpoints:** +**26 endpoints:** | Method | Endpoint | Permission | Description | |--------|----------|------------|-------------| @@ -292,6 +292,8 @@ Returns orphan notes (no links in or out), broken wikilinks, stale notes, and ta | POST | `/api/unarchive` | write | Restore archived note | | POST | `/api/update-metadata` | write | Update note metadata | | POST | `/api/delete` | write | Delete note (soft or hard) | +| GET | `/api/identity` | read | User identity (L0) and current context (L1) | +| POST | `/api/setup` | write | First-time onboarding setup (detect/apply modes) | | POST | `/api/reindex-file` | write | Re-index a single file after external edits | | POST | `/api/migrate/preview` | write | Preview PARA migration (classify + suggest moves) | | POST | `/api/migrate/apply` | write | Apply PARA migration (move files) | @@ -526,7 +528,7 @@ STYLE: | Search method | 5-lane RRF (semantic + BM25 + graph + reranker + temporal) | Vector similarity only | Keyword only | | Query understanding | LLM orchestrator classifies intent, adapts weights | None | None | | Understands note links | Yes (wikilink graph traversal) | No | Limited (backlinks panel) | -| AI agent access | MCP server (22 tools) + HTTP REST API (23 endpoints) | Custom API needed | No | +| AI agent access | MCP server (25 tools) + HTTP REST API (26 endpoints) | Custom API needed | No | | Write capability | Create/edit/rewrite/delete with smart filing | No | Manual | | Vault health | Orphans, broken links, stale notes, tag hygiene | No | Limited | | Real-time sync | File watcher, 2s debounce | Manual re-index | N/A | @@ -543,8 +545,9 @@ engraph is not a replacement for Obsidian — it's the intelligence layer that s - LLM research orchestrator: query intent classification + query expansion + adaptive lane weights - llama.cpp inference via Rust bindings (GGUF models, Metal GPU on macOS, CUDA on Linux) - Intelligence opt-in: heuristic fallback when disabled, LLM-powered when enabled -- MCP server with 23 tools (8 read, 10 write, 1 index, 1 diagnostic, 3 migrate) via stdio -- HTTP REST API with 24 endpoints, API key auth (`eg_` prefix), rate limiting, CORS — enabled via `engraph serve --http` +- MCP server with 25 tools (8 read, 10 write, 2 identity, 1 index, 1 diagnostic, 3 migrate) via stdio +- HTTP REST API with 26 endpoints, API key auth (`eg_` prefix), rate limiting, CORS — enabled via `engraph serve --http` +- User identity with L0/L1 tiered context for AI agent session starts - Section-level reading and editing: target specific headings with replace/prepend/append modes - Full note rewriting with automatic frontmatter preservation - Granular frontmatter mutations: set/remove fields, add/remove tags and aliases @@ -573,7 +576,7 @@ engraph is not a replacement for Obsidian — it's the intelligence layer that s - [x] ~~HTTP/REST API — complement MCP with a standard web API~~ (v1.3) - [x] ~~PARA migration — AI-assisted vault restructuring with preview/apply/undo~~ (v1.4) - [x] ~~ChatGPT Actions — OpenAPI 3.1.0 spec + plugin manifest + `--setup-chatgpt` helper~~ (v1.5) -- [ ] Identity — user context at session start, enhanced onboarding (v1.6) +- [x] ~~Identity — user context at session start, enhanced onboarding~~ (v1.6) - [ ] Timeline — temporal knowledge graph with point-in-time queries (v1.7) - [ ] Mining — automatic fact extraction from vault notes (v1.8) diff --git a/src/config.rs b/src/config.rs index 00f8538..145d825 100644 --- a/src/config.rs +++ b/src/config.rs @@ -43,6 +43,38 @@ pub struct PluginConfig { pub public_url: Option, } +/// User identity for AI agent context. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(default)] +pub struct IdentityConfig { + pub name: Option, + pub role: Option, + pub vault_purpose: Option, +} + +/// Memory layer feature flags. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct MemoryConfig { + pub identity_enabled: bool, + pub timeline_enabled: bool, + pub mining_enabled: bool, + pub mining_strategy: String, + pub mining_on_index: bool, +} + +impl Default for MemoryConfig { + fn default() -> Self { + Self { + identity_enabled: true, + timeline_enabled: true, + mining_enabled: true, + mining_strategy: "auto".into(), + mining_on_index: true, + } + } +} + /// HTTP REST API configuration. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] @@ -104,6 +136,10 @@ pub struct Config { /// HTTP REST API settings. #[serde(default)] pub http: HttpConfig, + #[serde(default)] + pub identity: IdentityConfig, + #[serde(default)] + pub memory: MemoryConfig, } impl Default for Config { @@ -118,6 +154,8 @@ impl Default for Config { obsidian: ObsidianConfig::default(), agents: AgentsConfig::default(), http: HttpConfig::default(), + identity: IdentityConfig::default(), + memory: MemoryConfig::default(), } } } @@ -379,4 +417,33 @@ public_url = "https://vault.example.com" let config: Config = toml::from_str(toml).unwrap(); assert_eq!(config.http.plugin.name.as_deref(), Some("my-vault")); } + + #[test] + fn test_identity_config_deserializes() { + let toml_str = r#" +[identity] +name = "Test User" +role = "Developer" +vault_purpose = "notes" +"#; + let config: Config = toml::from_str(toml_str).unwrap(); + assert_eq!(config.identity.name, Some("Test User".into())); + assert_eq!(config.identity.role, Some("Developer".into())); + assert_eq!(config.identity.vault_purpose, Some("notes".into())); + } + + #[test] + fn test_identity_config_defaults_to_empty() { + let config = Config::default(); + assert!(config.identity.name.is_none()); + assert!(config.identity.role.is_none()); + } + + #[test] + fn test_memory_config_defaults() { + let config = Config::default(); + assert!(config.memory.identity_enabled); + assert!(config.memory.timeline_enabled); + assert!(config.memory.mining_enabled); + } } diff --git a/src/http.rs b/src/http.rs index 0e4e9d4..4ef245b 100644 --- a/src/http.rs +++ b/src/http.rs @@ -332,6 +332,14 @@ struct ReindexFileBody { file: String, } +#[derive(Debug, Deserialize)] +struct SetupBody { + mode: String, + name: Option, + role: Option, + purpose: Option, +} + // --------------------------------------------------------------------------- // CORS // --------------------------------------------------------------------------- @@ -388,6 +396,9 @@ pub fn build_router(state: ApiState) -> Router { .route("/api/delete", post(handle_delete)) // Index maintenance .route("/api/reindex-file", post(handle_reindex_file)) + // Identity endpoints + .route("/api/identity", get(handle_identity)) + .route("/api/setup", post(handle_setup)) // Migration endpoints .route("/api/migrate/preview", post(handle_migrate_preview)) .route("/api/migrate/apply", post(handle_migrate_apply)) @@ -1066,6 +1077,56 @@ async fn handle_reindex_file( }))) } +// --------------------------------------------------------------------------- +// Identity / setup endpoint handlers +// --------------------------------------------------------------------------- + +async fn handle_identity( + State(state): State, + headers: HeaderMap, +) -> Result { + authorize(&headers, &state, false)?; + let store = state.store.lock().await; + let config = crate::config::Config::load().unwrap_or_default(); + let block = crate::identity::format_identity_block(&config, &store) + .map_err(|e| ApiError::internal(&format!("{e:#}")))?; + Ok(Json(serde_json::json!({ "identity": block }))) +} + +async fn handle_setup( + State(state): State, + headers: HeaderMap, + Json(body): Json, +) -> Result { + authorize(&headers, &state, true)?; + match body.mode.as_str() { + "detect" => { + let result = crate::onboarding::run_detect_json(&state.vault_path) + .map_err(|e| ApiError::internal(&format!("{e:#}")))?; + Ok(Json(result)) + } + "apply" => { + let mut config = crate::config::Config::load().unwrap_or_default(); + let data_dir = crate::config::Config::data_dir() + .map_err(|e| ApiError::internal(&format!("{e:#}")))?; + let flags = crate::onboarding::ApplyFlags { + name: body.name, + role: body.role, + purpose: body.purpose, + identity_only: false, + reindex_only: false, + }; + let result = + crate::onboarding::run_apply_json(&state.vault_path, &mut config, &data_dir, flags) + .map_err(|e| ApiError::internal(&format!("{e:#}")))?; + Ok(Json(result)) + } + other => Err(ApiError::bad_request(&format!( + "Unknown mode: {other}. Use 'detect' or 'apply'." + ))), + } +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- diff --git a/src/identity.rs b/src/identity.rs new file mode 100644 index 0000000..46af73b --- /dev/null +++ b/src/identity.rs @@ -0,0 +1,417 @@ +use anyhow::{Context, Result}; +use regex::Regex; + +use crate::config::Config; +use crate::profile::VaultProfile; +use crate::store::Store; + +/// Summary of what L1 extraction found. +#[derive(Debug, Default)] +pub struct L1Summary { + pub active_projects: usize, + pub key_people: usize, + pub current_focus: usize, + pub ooo: usize, + pub blocking: usize, +} + +/// Extract L1 identity facts from the indexed vault. +/// +/// Clears existing tier-1 facts, then populates five categories: +/// active_projects, key_people, current_focus, ooo, blocking. +pub fn extract_l1_facts(store: &Store, profile: &VaultProfile) -> Result { + store.clear_identity_facts(1)?; + + let all_files = store.get_all_files()?; + let mut summary = L1Summary::default(); + + // ── Active projects ───────────────────────────────────────── + for file in &all_files { + if path_is_in_excluded_folder(&file.path) { + continue; + } + if file.tags.iter().any(|t| t.eq_ignore_ascii_case("project")) { + let name = file_stem(&file.path); + store.upsert_identity_fact(1, "active_project", &name, Some(&file.path))?; + summary.active_projects += 1; + } + } + + // ── Key people ────────────────────────────────────────────── + let people_folder = profile.structure.folders.people.as_deref(); + if let Some(pf) = people_folder { + let people_files: Vec<_> = all_files + .iter() + .filter(|f| path_is_in_folder(&f.path, pf)) + .collect(); + + // Sort by incoming edge count (descending), take top 5. + let mut scored: Vec<(&crate::store::FileRecord, usize)> = people_files + .iter() + .filter_map(|f| { + let incoming = store.get_incoming(f.id, None).ok()?; + Some((*f, incoming.len())) + }) + .collect(); + scored.sort_by(|a, b| b.1.cmp(&a.1)); + + for (file, _count) in scored.into_iter().take(5) { + let name = file_stem(&file.path); + store.upsert_identity_fact(1, "key_person", &name, Some(&file.path))?; + summary.key_people += 1; + } + } + + // ── Daily-note based extractions ──────────────────────────── + let daily_folder = profile.structure.folders.daily.as_deref(); + if let Some(df) = daily_folder { + let mut daily_files: Vec<_> = all_files + .iter() + .filter(|f| path_is_in_folder(&f.path, df) && f.note_date.is_some()) + .collect(); + + // Sort by note_date descending (most recent first). + daily_files.sort_by(|a, b| b.note_date.cmp(&a.note_date)); + + // ── Current focus (most recent daily note) ────────────── + if let Some(latest) = daily_files.first() + && let Ok(chunks) = store.get_chunks_by_file(latest.id) + { + let focus_re = Regex::new(r"(?i)morning\s+focus|top\s+priorit|priorities").unwrap(); + for chunk in &chunks { + if focus_re.is_match(&chunk.heading) { + let items = extract_bullet_items(&chunk.snippet, 3); + for item in items { + store.upsert_identity_fact(1, "current_focus", &item, None)?; + summary.current_focus += 1; + } + break; + } + } + } + + // ── OOO (last 7 daily notes) ─────────────────────────── + if people_folder.is_some() { + let people_names: Vec = all_files + .iter() + .filter(|f| path_is_in_folder(&f.path, people_folder.unwrap())) + .map(|f| file_stem(&f.path)) + .collect(); + + let ooo_re = Regex::new(r"(?i)\b(ooo|out\s+of\s+office|vacation|leave|pto)\b").unwrap(); + + for daily in daily_files.iter().take(7) { + if let Ok(chunks) = store.get_chunks_by_file(daily.id) { + for chunk in &chunks { + if ooo_re.is_match(&chunk.snippet) { + for person in &people_names { + if chunk + .snippet + .to_ascii_lowercase() + .contains(&person.to_ascii_lowercase()) + { + // Extract context around the match. + let detail = extract_ooo_detail(&chunk.snippet, &ooo_re); + let label = format!("{} ({})", person, detail); + store.upsert_identity_fact(1, "ooo", &label, None)?; + summary.ooo += 1; + } + } + } + } + } + } + } + + // ── Blocking (last 3 daily notes) ─────────────────────── + let blocking_re = Regex::new(r"(?i)\b(P0|blocking|blocked)\b").unwrap(); + + for daily in daily_files.iter().take(3) { + if let Ok(chunks) = store.get_chunks_by_file(daily.id) { + for chunk in &chunks { + let items = extract_matching_bullets(&chunk.snippet, &blocking_re); + for item in items { + store.upsert_identity_fact(1, "blocking", &item, None)?; + summary.blocking += 1; + } + } + } + } + } + + Ok(summary) +} + +/// Format the identity block combining L0 (config) and L1 (store) facts. +pub fn format_identity_block(config: &Config, store: &Store) -> Result { + let id = &config.identity; + + let name = id.name.as_deref().unwrap_or("(not set)"); + let role = id.role.as_deref().unwrap_or("(not set)"); + let vault = id.vault_purpose.as_deref().unwrap_or("(not set)"); + + let mut out = String::new(); + out.push_str("## Identity (L0)\n"); + out.push_str(&format!("Name: {}\n", name)); + out.push_str(&format!("Role: {}\n", role)); + out.push_str(&format!("Vault: {}\n", vault)); + + let facts = store + .get_identity_facts(1) + .context("reading L1 identity facts")?; + + if facts.is_empty() { + out.push_str("\n## Current State (L1)\n"); + out.push_str("[no data — run engraph index]\n"); + return Ok(out); + } + + // Determine most recent updated_at across all facts. + let latest_ts = facts + .iter() + .map(|f| f.updated_at.as_str()) + .max() + .unwrap_or("unknown"); + + out.push_str(&format!( + "\n## Current State (L1) [updated {}]\n", + latest_ts + )); + + // Group facts by key. + let project_vals: Vec<&str> = facts + .iter() + .filter(|f| f.key == "active_project") + .map(|f| f.value.as_str()) + .collect(); + let focus_vals: Vec<&str> = facts + .iter() + .filter(|f| f.key == "current_focus") + .map(|f| f.value.as_str()) + .collect(); + let people_vals: Vec<&str> = facts + .iter() + .filter(|f| f.key == "key_person") + .map(|f| f.value.as_str()) + .collect(); + let blocking_vals: Vec<&str> = facts + .iter() + .filter(|f| f.key == "blocking") + .map(|f| f.value.as_str()) + .collect(); + let ooo_vals: Vec<&str> = facts + .iter() + .filter(|f| f.key == "ooo") + .map(|f| f.value.as_str()) + .collect(); + + if !project_vals.is_empty() { + out.push_str(&format!("Active projects: {}\n", project_vals.join(", "))); + } + if !focus_vals.is_empty() { + out.push_str(&format!("Current focus: {}\n", focus_vals.join(", "))); + } + if !people_vals.is_empty() { + out.push_str(&format!("Key people: {}\n", people_vals.join(", "))); + } + if !blocking_vals.is_empty() { + out.push_str(&format!("Blocking: {}\n", blocking_vals.join(", "))); + } + if !ooo_vals.is_empty() { + out.push_str(&format!("OOO: {}\n", ooo_vals.join(", "))); + } + + Ok(out) +} + +// ── Helpers ───────────────────────────────────────────────────── + +/// Extract the file stem (name without extension) from a path string. +fn file_stem(path: &str) -> String { + std::path::Path::new(path) + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| path.to_string()) +} + +/// Return true if the path is inside a templates or archive folder and should be +/// excluded from L1 extraction. Matches any path component named "templates", +/// "template", "archive", or "archives" (case-insensitive), as well as PARA-style +/// numbered variants (e.g. "05-Templates", "04-Archive"). +fn path_is_in_excluded_folder(path: &str) -> bool { + for component in path.split('/') { + let stripped = component + .trim_start_matches(|c: char| c.is_ascii_digit()) + .trim_start_matches(['-', '_', ' ']); + let lower = stripped.to_ascii_lowercase(); + if matches!( + lower.as_str(), + "templates" | "template" | "archive" | "archives" + ) { + return true; + } + } + false +} + +/// Check whether a file path belongs to a given folder (case-insensitive prefix match). +fn path_is_in_folder(path: &str, folder: &str) -> bool { + let normalized = folder.trim_end_matches('/'); + let lower_path = path.to_ascii_lowercase(); + // Match "folder/" prefix or "/folder/" anywhere in the path. + lower_path.starts_with(&format!("{}/", normalized.to_ascii_lowercase())) + || lower_path.contains(&format!("/{}/", normalized.to_ascii_lowercase())) +} + +/// Extract up to `max` bullet-point items from a snippet. +fn extract_bullet_items(snippet: &str, max: usize) -> Vec { + let mut items = Vec::new(); + for line in snippet.lines() { + let trimmed = line.trim(); + if let Some(rest) = trimmed + .strip_prefix("- ") + .or_else(|| trimmed.strip_prefix("* ")) + { + // Strip checkbox markers like [ ] or [x]. + let rest = rest + .strip_prefix("[ ] ") + .or_else(|| rest.strip_prefix("[x] ")) + .or_else(|| rest.strip_prefix("[X] ")) + .unwrap_or(rest); + let clean = rest.trim().to_string(); + if !clean.is_empty() { + items.push(clean); + if items.len() >= max { + break; + } + } + } + } + items +} + +/// Extract bullet items that match a regex pattern. +fn extract_matching_bullets(snippet: &str, pattern: &Regex) -> Vec { + let mut items = Vec::new(); + for line in snippet.lines() { + let trimmed = line.trim(); + if (trimmed.starts_with("- ") || trimmed.starts_with("* ")) && pattern.is_match(trimmed) { + let rest = trimmed + .strip_prefix("- ") + .or_else(|| trimmed.strip_prefix("* ")) + .unwrap_or(trimmed); + let rest = rest + .strip_prefix("[ ] ") + .or_else(|| rest.strip_prefix("[x] ")) + .or_else(|| rest.strip_prefix("[X] ")) + .unwrap_or(rest); + let clean = rest.trim().to_string(); + if !clean.is_empty() { + items.push(clean); + } + } + } + items +} + +/// Extract a short OOO detail string from around the regex match. +fn extract_ooo_detail(snippet: &str, ooo_re: &Regex) -> String { + for line in snippet.lines() { + let trimmed = line.trim(); + if ooo_re.is_match(trimmed) { + // Return the line content (stripped of bullet prefix) as the detail. + let rest = trimmed + .strip_prefix("- ") + .or_else(|| trimmed.strip_prefix("* ")) + .unwrap_or(trimmed); + let clean = rest.trim(); + if clean.len() > 80 { + return format!("{}...", &clean[..77]); + } + return clean.to_string(); + } + } + "OOO".to_string() +} + +// ── Tests ─────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::Config; + use crate::store::Store; + + #[test] + fn test_format_identity_block_l0_only() { + let store = Store::open_memory().unwrap(); + let mut config = Config::default(); + config.identity.name = Some("Oleksandr".into()); + config.identity.role = Some("Engineer".into()); + config.identity.vault_purpose = Some("personal knowledge base".into()); + + let block = format_identity_block(&config, &store).unwrap(); + + assert!(block.contains("Name: Oleksandr")); + assert!(block.contains("Role: Engineer")); + assert!(block.contains("Vault: personal knowledge base")); + assert!(block.contains("no data")); + } + + #[test] + fn test_format_identity_block_with_l1() { + let store = Store::open_memory().unwrap(); + let mut config = Config::default(); + config.identity.name = Some("Test User".into()); + config.identity.role = Some("Developer".into()); + config.identity.vault_purpose = Some("notes".into()); + + // Insert L1 facts manually. + store + .upsert_identity_fact( + 1, + "active_project", + "ProjectA", + Some("01-Projects/ProjectA.md"), + ) + .unwrap(); + store + .upsert_identity_fact( + 1, + "active_project", + "ProjectB", + Some("01-Projects/ProjectB.md"), + ) + .unwrap(); + store + .upsert_identity_fact( + 1, + "key_person", + "Alice", + Some("03-Resources/People/Alice.md"), + ) + .unwrap(); + store + .upsert_identity_fact(1, "current_focus", "Ship feature X", None) + .unwrap(); + store + .upsert_identity_fact(1, "blocking", "CI pipeline broken", None) + .unwrap(); + store + .upsert_identity_fact(1, "ooo", "Bob (vacation until Friday)", None) + .unwrap(); + + let block = format_identity_block(&config, &store).unwrap(); + + assert!(block.contains("Name: Test User")); + assert!(block.contains("Role: Developer")); + assert!(block.contains("Vault: notes")); + assert!(block.contains("Active projects: ProjectA, ProjectB")); + assert!(block.contains("Key people: Alice")); + assert!(block.contains("Current focus: Ship feature X")); + assert!(block.contains("Blocking: CI pipeline broken")); + assert!(block.contains("OOO: Bob (vacation until Friday)")); + assert!(block.contains("## Current State (L1) [updated")); + assert!(!block.contains("no data")); + } +} diff --git a/src/indexer.rs b/src/indexer.rs index 141083d..ec6106b 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -13,6 +13,7 @@ use crate::config::Config; use crate::docid::generate_docid; use crate::graph::extract_wikilink_targets; use crate::llm::EmbedModel; +use crate::profile::VaultProfile; use crate::store::{FileRecord, Store}; /// Summary of an indexing run. @@ -459,7 +460,15 @@ pub fn run_index(vault_path: &Path, config: &Config, rebuild: bool) -> Result, ) -> Result { - run_index_inner(vault_path, config, store, embedder, rebuild) + run_index_inner(vault_path, config, store, embedder, rebuild, profile) } /// Shared implementation for [`run_index`] and [`run_index_shared`]. @@ -483,6 +493,7 @@ fn run_index_inner( store: &Store, embedder: &mut impl EmbedModel, rebuild: bool, + profile: Option<&VaultProfile>, ) -> Result { let start = Instant::now(); @@ -498,8 +509,8 @@ fn run_index_inner( // Build exclude list: config excludes + archive folder (if detected) let mut exclude = config.exclude.clone(); - if let Ok(Some(profile)) = crate::config::Config::load_vault_profile() - && let Some(archive) = &profile.structure.folders.archive + if let Some(p) = profile + && let Some(archive) = &p.structure.folders.archive { let archive_pattern = format!("{}/", archive); if !exclude.contains(&archive_pattern) { @@ -599,8 +610,8 @@ fn run_index_inner( } // People detection (if configured via vault profile) - if let Ok(Some(profile)) = crate::config::Config::load_vault_profile() - && let Some(people_folder) = &profile.structure.folders.people + if let Some(p) = profile + && let Some(people_folder) = &p.structure.folders.people { let people = load_people_entities(store, people_folder, &content_by_path)?; if !people.is_empty() { @@ -663,6 +674,13 @@ fn run_index_inner( store.upsert_folder_centroid(folder, ¢roid, vectors.len())?; } + // Extract L1 identity facts from the freshly indexed vault + if let Some(p) = profile + && let Err(e) = crate::identity::extract_l1_facts(store, p) + { + tracing::warn!("L1 identity extraction failed (non-fatal): {e:#}"); + } + let duration = start.elapsed(); info!( new = new_files.len(), diff --git a/src/lib.rs b/src/lib.rs index 779e26e..39bcff5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,12 +7,14 @@ pub mod fusion; pub mod graph; pub mod health; pub mod http; +pub mod identity; pub mod indexer; pub mod links; pub mod llm; pub mod markdown; pub mod migrate; pub mod obsidian; +pub mod onboarding; pub mod openapi; pub mod placement; pub mod profile; diff --git a/src/main.rs b/src/main.rs index 15e9e8f..ea91655 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,5 @@ use engraph::config; use engraph::indexer; -use engraph::profile; use engraph::search; use engraph::store; @@ -66,10 +65,44 @@ enum Command { all: bool, }, - /// Initialize vault profile with auto-detection. + /// Initialize vault profile, identity, and search index. Init { - /// Path to the vault (defaults to current directory). + /// Path to vault directory. path: Option, + /// Only run identity setup (skip indexing). + #[arg(long)] + identity: bool, + /// Only re-index (skip identity prompts). + #[arg(long)] + reindex: bool, + /// Detect vault without writing anything (agent mode). + #[arg(long)] + detect: bool, + /// Output as JSON (agent mode). + #[arg(long)] + json: bool, + /// Suppress interactive prompts. + #[arg(long)] + quiet: bool, + /// User name (non-interactive mode). + #[arg(long)] + name: Option, + /// User role (non-interactive mode). + #[arg(long)] + role: Option, + /// Vault purpose (non-interactive mode). + #[arg(long)] + purpose: Option, + }, + + /// Print identity block (L0 + L1 context for AI agents). + Identity { + /// Output as JSON. + #[arg(long)] + json: bool, + /// Force L1 re-extraction without full reindex. + #[arg(long)] + refresh: bool, }, /// Configure engraph settings. @@ -515,8 +548,17 @@ async fn main() -> Result<()> { } } - Command::Init { path } => { - // Resolve vault path: CLI arg > config > cwd. + Command::Init { + path, + identity, + reindex, + detect, + json, + quiet, + name, + role, + purpose, + } => { cfg.merge_vault_path(path); let vault_path = match &cfg.vault_path { Some(p) => p.clone(), @@ -524,149 +566,77 @@ async fn main() -> Result<()> { }; let vault_path = vault_path.canonicalize().unwrap_or(vault_path); - println!("Detecting vault profile for: {}", vault_path.display()); - - let vault_type = profile::detect_vault_type(&vault_path); - let structure = profile::detect_structure(&vault_path)?; - let stats = profile::scan_vault_stats(&vault_path)?; - - // Print detection results. - println!(); - println!(" Vault type: {:?}", vault_type); - println!(" Structure: {:?}", structure.method); - if let Some(ref inbox) = structure.folders.inbox { - println!(" inbox: {}", inbox); - } - if let Some(ref projects) = structure.folders.projects { - println!(" projects: {}", projects); - } - if let Some(ref areas) = structure.folders.areas { - println!(" areas: {}", areas); - } - if let Some(ref resources) = structure.folders.resources { - println!(" resources: {}", resources); - } - if let Some(ref archive) = structure.folders.archive { - println!(" archive: {}", archive); + if detect { + let result = engraph::onboarding::run_detect_json(&vault_path)?; + println!("{}", serde_json::to_string_pretty(&result)?); + return Ok(()); } - if let Some(ref templates) = structure.folders.templates { - println!(" templates: {}", templates); - } - if let Some(ref daily) = structure.folders.daily { - println!(" daily: {}", daily); - } - if let Some(ref people) = structure.folders.people { - println!(" people: {}", people); - } - println!(); - println!(" Total .md files: {}", stats.total_files); - println!(" With frontmatter: {}", stats.files_with_frontmatter); - println!(" Wikilinks: {}", stats.wikilink_count); - println!(" Unique tags: {}", stats.unique_tags); - println!(" Folders: {}", stats.folder_count); - println!(" Max folder depth: {}", stats.folder_depth); - - let vault_profile = profile::VaultProfile { - vault_path: vault_path.clone(), - vault_type, - structure, - stats, - }; - // Ensure data dir exists and write vault.toml. - std::fs::create_dir_all(&data_dir)?; - profile::write_vault_toml(&vault_profile, &data_dir)?; + if json { + let flags = engraph::onboarding::ApplyFlags { + name, + role, + purpose, + identity_only: identity, + reindex_only: reindex, + }; + let result = + engraph::onboarding::run_apply_json(&vault_path, &mut cfg, &data_dir, flags)?; + println!("{}", serde_json::to_string_pretty(&result)?); + return Ok(()); + } - println!(); - println!("Wrote {}", data_dir.join("vault.toml").display()); + let flags = engraph::onboarding::InteractiveFlags { + name, + role, + purpose, + identity_only: identity, + reindex_only: reindex, + quiet, + }; + engraph::onboarding::run_interactive(&vault_path, &mut cfg, &data_dir, flags)?; + } - // Intelligence onboarding (only if not yet configured) - if cfg.intelligence.is_none() { - let enable = prompt_intelligence(&data_dir)?; - cfg.intelligence = Some(enable); - cfg.save()?; + Command::Identity { json, refresh } => { + let db_path = data_dir.join("engraph.db"); + if !db_path.exists() { + anyhow::bail!("No index found. Run `engraph init` first."); } - - // Obsidian CLI detection - let obsidian_running = std::process::Command::new("pgrep") - .args(["-x", "Obsidian"]) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status() - .map(|s| s.success()) - .unwrap_or(false); - - let obsidian_in_path = std::process::Command::new("which") - .arg("obsidian") - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status() - .map(|s| s.success()) - .unwrap_or(false); - - if obsidian_running && obsidian_in_path { - eprint!("\nObsidian CLI detected. Enable integration? [Y/n] "); - io::stderr().flush()?; - let mut answer = String::new(); - io::stdin().lock().read_line(&mut answer)?; - let answer = answer.trim(); - let enable = answer.is_empty() || answer.eq_ignore_ascii_case("y"); - if enable { - let vault_name = vault_path - .file_name() - .and_then(|n| n.to_str()) - .unwrap_or("Personal") - .to_string(); - cfg.obsidian.enabled = true; - cfg.obsidian.vault_name = Some(vault_name.clone()); - cfg.save()?; - println!("Obsidian CLI enabled (vault: {vault_name})."); - } else { - println!( - "Obsidian CLI disabled. Enable later with: engraph configure --enable-obsidian-cli" - ); + let store = engraph::store::Store::open(&db_path)?; + if refresh { + let profile = engraph::config::Config::load_vault_profile()?; + match profile { + Some(ref p) => { + engraph::identity::extract_l1_facts(&store, p)?; + eprintln!("L1 facts refreshed."); + } + None => { + anyhow::bail!("No vault profile found. Run `engraph init` first."); + } } } - - // AI agent detection - let home = dirs::home_dir().unwrap_or_default(); - let agent_configs: &[(&str, &str, &str)] = &[ - ("Claude Code", "claude-code", ".claude/settings.json"), - ("Cursor", "cursor", ".cursor/mcp.json"), - ("Windsurf", "windsurf", ".codeium/windsurf/mcp_config.json"), - ]; - - let mut detected: Vec<(&str, &str, String)> = Vec::new(); - for (name, key, rel_path) in agent_configs { - let full = home.join(rel_path); - if full.exists() { - detected.push((name, key, format!("~/{rel_path}"))); + if json { + // L0 comes from config (not the identity_facts table) + let id = &cfg.identity; + let mut l0_entries = Vec::new(); + if let Some(name) = &id.name { + l0_entries.push(serde_json::json!({"key": "name", "value": name})); } - } - - if !detected.is_empty() { - println!("\nAI agents detected:"); - for (name, _key, path) in &detected { - println!(" \u{2713} {name} ({path})"); + if let Some(role) = &id.role { + l0_entries.push(serde_json::json!({"key": "role", "value": role})); } - println!( - "\nTo register engraph as MCP server, add to your agent's config:\n \ - \"engraph\": {{\n \ - \"command\": \"engraph\",\n \ - \"args\": [\"serve\"]\n \ - }}" - ); - - // Record detected agents in config - for (_name, key, _path) in &detected { - match *key { - "claude-code" => cfg.agents.claude_code = true, - "cursor" => cfg.agents.cursor = true, - "windsurf" => cfg.agents.windsurf = true, - _ => {} - } + if let Some(purpose) = &id.vault_purpose { + l0_entries.push(serde_json::json!({"key": "vault_purpose", "value": purpose})); } - cfg.save()?; + let l1 = store.get_identity_facts(1)?; + let result = serde_json::json!({ + "l0": l0_entries, + "l1": l1.iter().map(|f| serde_json::json!({"key": &f.key, "value": &f.value, "source": &f.source, "updated_at": &f.updated_at})).collect::>(), + }); + println!("{}", serde_json::to_string_pretty(&result)?); + } else { + let block = engraph::identity::format_identity_block(&cfg, &store)?; + println!("{}", block); } } diff --git a/src/onboarding.rs b/src/onboarding.rs new file mode 100644 index 0000000..6a570ec --- /dev/null +++ b/src/onboarding.rs @@ -0,0 +1,750 @@ +use std::path::Path; + +use anyhow::{Context, Result}; +use console::style; +use serde_json::json; + +use crate::config::Config; +use crate::identity::{L1Summary, extract_l1_facts}; +use crate::indexer::{IndexResult, run_index}; +use crate::profile::{ + self, FolderMap, StructureDetection, StructureMethod, VaultProfile, VaultStats, VaultType, +}; +use crate::store::Store; + +// ── Public types ────────────────────────────────────────────────── + +/// Flags for the interactive CLI onboarding flow. +pub struct InteractiveFlags { + pub name: Option, + pub role: Option, + pub purpose: Option, + pub identity_only: bool, + pub reindex_only: bool, + pub quiet: bool, +} + +/// Flags for the non-interactive (JSON) apply flow. +pub struct ApplyFlags { + pub name: Option, + pub role: Option, + pub purpose: Option, + pub identity_only: bool, + pub reindex_only: bool, +} + +// ── Constants ───────────────────────────────────────────────────── + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +const PURPOSE_OPTIONS: &[&str] = &[ + "Personal knowledge base", + "Work tracking", + "Research & learning", + "Team wiki", + "Other", +]; + +// ── Helpers ─────────────────────────────────────────────────────── + +/// Print a section divider: `── Title ──` padded to terminal width. +fn print_divider(title: &str) { + let term = console::Term::stdout(); + let width = term.size().1 as usize; + let prefix = format!("── {} ", title); + let remaining = width.saturating_sub(prefix.len() + 2); + let suffix = "─".repeat(remaining); + println!(); + println!(" {}{}", style(&prefix).bold(), suffix); + println!(); +} + +/// Print the engraph banner box. +fn print_banner() { + let tag = format!("engraph v{}", VERSION); + let sub = "vault intelligence for AI agents"; + let inner_width = tag.len().max(sub.len()) + 4; + + let top = format!(" {}{}{}", "╭", "─".repeat(inner_width + 2), "╮"); + let bot = format!(" {}{}{}", "╰", "─".repeat(inner_width + 2), "╯"); + let empty_line = format!(" │{}│", " ".repeat(inner_width + 2)); + let tag_line = format!( + " │ {: Option { + std::process::Command::new("git") + .args(["config", "user.name"]) + .output() + .ok() + .and_then(|out| { + if out.status.success() { + let name = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if name.is_empty() { None } else { Some(name) } + } else { + None + } + }) +} + +/// Print a green checkmark line. +fn check(msg: &str) { + println!(" {} {}", style("✓").green(), msg); +} + +/// Print a red cross line. +fn cross(msg: &str) { + println!(" {} {}", style("✗").red(), msg); +} + +/// Detect vault profile (type, structure, stats) without writing anything. +fn detect_profile(vault_path: &Path) -> Result<(VaultType, StructureDetection, VaultStats)> { + let vault_type = profile::detect_vault_type(vault_path); + let structure = profile::detect_structure(vault_path)?; + let stats = profile::scan_vault_stats(vault_path)?; + Ok((vault_type, structure, stats)) +} + +/// Build a VaultProfile from detected components. +fn build_profile( + vault_path: &Path, + vault_type: VaultType, + structure: StructureDetection, + stats: VaultStats, +) -> VaultProfile { + VaultProfile { + vault_path: vault_path.to_path_buf(), + vault_type, + structure, + stats, + } +} + +/// Print vault scan results. +fn print_scan_results(vault_type: &VaultType, structure: &StructureDetection, stats: &VaultStats) { + let type_label = match vault_type { + VaultType::Obsidian => "Obsidian vault detected", + VaultType::Logseq => "Logseq vault detected", + VaultType::Plain => "Plain markdown folder detected", + VaultType::Custom => "Custom vault detected", + }; + check(type_label); + + check(&format!("{} markdown files", stats.total_files)); + + let structure_label = match structure.method { + StructureMethod::Para => "PARA structure", + StructureMethod::Folders => "Folder-based structure", + StructureMethod::Flat => "Flat structure", + StructureMethod::Custom => "Custom structure", + }; + check(structure_label); + + // Show detected folder roles + if let Some(ref daily) = structure.folders.daily { + check(&format!( + "{} daily notes in {}/", + count_files_in_folder_approx(stats, daily), + daily + )); + } + + if structure.folders.templates.is_some() { + check("Templates folder detected"); + } else { + cross("No templates folder detected"); + } + + if let Some(ref people) = structure.folders.people { + check(&format!("People folder: {}/", people)); + } +} + +/// Rough count for daily notes — we don't have per-folder counts, so report the folder name. +fn count_files_in_folder_approx(_stats: &VaultStats, _folder: &str) -> String { + // We don't track per-folder file counts in VaultStats. + // The total_files stat is the best we have. Return "some" as placeholder. + // A more accurate count would require walking the folder again. + String::new() +} + +/// Print L1 summary as a compact table. +fn print_l1_summary(summary: &L1Summary) { + if summary.active_projects > 0 { + println!( + " {} active projects", + style(summary.active_projects).cyan() + ); + } + if summary.key_people > 0 { + println!(" {} key people", style(summary.key_people).cyan()); + } + if summary.current_focus > 0 { + println!( + " {} current focus items", + style(summary.current_focus).cyan() + ); + } + if summary.blocking > 0 { + println!(" {} blocking items", style(summary.blocking).yellow()); + } + if summary.ooo > 0 { + println!(" {} people OOO", style(summary.ooo).yellow()); + } + if summary.active_projects == 0 + && summary.key_people == 0 + && summary.current_focus == 0 + && summary.blocking == 0 + && summary.ooo == 0 + { + println!( + " {}", + style("No structured facts extracted yet. Add tags and daily notes to enrich.").dim() + ); + } +} + +/// Print index results. +fn print_index_result(result: &IndexResult) { + check(&format!( + "Index built ({} files, {} chunks, {:.1}s)", + result.new_files + result.updated_files, + result.total_chunks, + result.duration.as_secs_f64() + )); +} + +/// Print the "What's Next" section. +fn print_next_steps(config_path: &Path) { + print_divider("What's Next"); + + check(&format!("Identity saved to {}", config_path.display())); + println!(); + println!(" Try these:"); + println!(" {}", style("engraph search \"...\"").cyan()); + println!(" {}", style("engraph identity").cyan()); + println!(" {}", style("engraph serve").cyan()); + println!(); +} + +// ── Public functions ────────────────────────────────────────────── + +/// Full interactive onboarding flow with banner, prompts, and progress. +pub fn run_interactive( + vault_path: &Path, + config: &mut Config, + data_dir: &Path, + flags: InteractiveFlags, +) -> Result<()> { + let quiet = flags.quiet; + + // ── Banner ── + if !quiet { + print_banner(); + } + + // ── Vault Scan ── + let (vault_type, structure, stats) = if !flags.identity_only { + if !quiet { + println!(" {}", style("Scanning vault...").dim()); + println!(); + } + + let (vt, st, vs) = detect_profile(vault_path)?; + + if !quiet { + print_scan_results(&vt, &st, &vs); + } + + (vt, st, vs) + } else { + // identity_only: skip vault scan, use minimal defaults + ( + VaultType::Plain, + StructureDetection { + method: StructureMethod::Flat, + folders: FolderMap::default(), + }, + VaultStats::default(), + ) + }; + + // ── Identity Setup ── + if !flags.reindex_only { + if !quiet { + print_divider("Identity Setup"); + } + + // Name + let name = if let Some(ref n) = flags.name { + n.clone() + } else { + let default_name = git_user_name().unwrap_or_default(); + let mut input = dialoguer::Input::::new().with_prompt(" ? What's your name?"); + if !default_name.is_empty() { + input = input.default(default_name); + } + input.interact_text()? + }; + + // Role + let role = if let Some(ref r) = flags.role { + r.clone() + } else { + dialoguer::Input::::new() + .with_prompt(" ? What do you do?") + .interact_text()? + }; + + // Vault purpose + let purpose = if let Some(ref p) = flags.purpose { + p.clone() + } else { + let selection = dialoguer::Select::new() + .with_prompt(" ? What's this vault for?") + .items(PURPOSE_OPTIONS) + .default(0) + .interact()?; + + if selection == PURPOSE_OPTIONS.len() - 1 { + // "Other" selected — ask for freeform input + dialoguer::Input::::new() + .with_prompt(" ? Describe your vault's purpose") + .interact_text()? + } else { + PURPOSE_OPTIONS[selection].to_string() + } + }; + + // Save identity to config + config.identity.name = Some(name); + config.identity.role = Some(role); + config.identity.vault_purpose = Some(purpose); + config.save().context("saving identity to config")?; + } + + // ── Vault Profile ── + if !flags.identity_only { + let vault_profile = build_profile(vault_path, vault_type, structure, stats); + profile::write_vault_toml(&vault_profile, data_dir).context("writing vault profile")?; + } + + // ── Indexing ── + if !flags.identity_only { + if !quiet { + print_divider("Indexing"); + } + + // Confirm if vault is large + if !quiet && !flags.reindex_only { + let total = profile::scan_vault_stats(vault_path) + .map(|s| s.total_files) + .unwrap_or(0); + if total > 500 { + let confirm = dialoguer::Confirm::new() + .with_prompt(format!(" {} files found. Ready to index?", total)) + .default(true) + .interact()?; + if !confirm { + println!( + "\n {}", + style("Skipped indexing. Run `engraph index` when ready.").dim() + ); + let config_path = Config::data_dir()?.join("config.toml"); + print_next_steps(&config_path); + return Ok(()); + } + } + } + + let result = run_index(vault_path, config, false)?; + + if !quiet { + println!(); + print_index_result(&result); + } + + // ── L1 Extraction ── + let db_path = data_dir.join("engraph.db"); + if db_path.exists() { + let store = Store::open(&db_path)?; + if let Ok(Some(vault_profile)) = Config::load_vault_profile() { + if !quiet { + print_divider("Auto-extracted Context"); + } + + match extract_l1_facts(&store, &vault_profile) { + Ok(summary) => { + if !quiet { + print_l1_summary(&summary); + } + } + Err(e) => { + if !quiet { + println!(" {} L1 extraction: {}", style("!").yellow(), e); + } + } + } + } + } + } + + // ── What's Next ── + if !quiet { + let config_path = Config::data_dir()?.join("config.toml"); + print_next_steps(&config_path); + } + + Ok(()) +} + +/// Non-destructive vault inspection returning JSON. Writes nothing. +pub fn run_detect_json(vault_path: &Path) -> Result { + let vault_path = vault_path + .canonicalize() + .unwrap_or_else(|_| vault_path.to_path_buf()); + + let vault_type = profile::detect_vault_type(&vault_path); + let structure = profile::detect_structure(&vault_path)?; + let stats = profile::scan_vault_stats(&vault_path)?; + + let vault_type_str = match vault_type { + VaultType::Obsidian => "obsidian", + VaultType::Logseq => "logseq", + VaultType::Plain => "plain", + VaultType::Custom => "custom", + }; + + let structure_str = match structure.method { + StructureMethod::Para => "para", + StructureMethod::Folders => "folders", + StructureMethod::Flat => "flat", + StructureMethod::Custom => "custom", + }; + + // Build folders object + let folders = json!({ + "inbox": structure.folders.inbox, + "projects": structure.folders.projects, + "areas": structure.folders.areas, + "resources": structure.folders.resources, + "archive": structure.folders.archive, + "templates": structure.folders.templates, + "daily": structure.folders.daily, + "people": structure.folders.people, + }); + + // Suggested identity + let git_name = git_user_name(); + let name_source = if git_name.is_some() { + "git_config" + } else { + "none" + }; + + // Check for existing index + let data_dir = Config::data_dir()?; + let db_path = data_dir.join("engraph.db"); + + let (existing_index, active_projects, key_people) = if db_path.exists() { + let store = Store::open(&db_path)?; + let all_files = store.get_all_files()?; + let last_indexed = store.get_meta("last_indexed_at")?; + + let index_info = json!({ + "files": all_files.len(), + "last_indexed": last_indexed, + }); + + // Try to get projects and people from L1 facts + let projects: Vec = store + .get_identity_facts(1) + .unwrap_or_default() + .iter() + .filter(|f| f.key == "active_project") + .map(|f| f.value.clone()) + .collect(); + + let people: Vec = store + .get_identity_facts(1) + .unwrap_or_default() + .iter() + .filter(|f| f.key == "key_person") + .map(|f| f.value.clone()) + .collect(); + + (Some(index_info), projects, people) + } else { + (None, vec![], vec![]) + }; + + // Warnings + let mut warnings: Vec = Vec::new(); + if stats.total_files == 0 { + warnings.push("Vault contains no markdown files".into()); + } + if stats.files_with_frontmatter == 0 && stats.total_files > 0 { + warnings + .push("No files have YAML frontmatter — tags and metadata won't be extracted".into()); + } + if stats.wikilink_count == 0 && stats.total_files > 5 { + warnings.push("No wikilinks found — graph features will be limited".into()); + } + + let ready = stats.total_files > 0 && warnings.is_empty(); + + // Count daily notes (approximate: files in the daily folder) + let daily_count = count_daily_notes(&vault_path, &structure.folders); + let people_count = count_people_notes(&vault_path, &structure.folders); + + Ok(json!({ + "vault_path": vault_path.to_string_lossy(), + "vault_type": vault_type_str, + "structure": structure_str, + "files": stats.total_files, + "folders": folders, + "stats": { + "daily_notes": daily_count, + "people_notes": people_count, + "unique_tags": stats.unique_tags, + "wikilinks": stats.wikilink_count, + }, + "suggested_identity": { + "name": git_name, + "name_source": name_source, + "active_projects": active_projects, + "key_people": key_people, + }, + "existing_index": existing_index, + "ready": ready, + "warnings": warnings, + })) +} + +/// Non-interactive setup with JSON result. Sets identity, detects profile, +/// runs index, extracts L1 facts, and returns a JSON summary. +pub fn run_apply_json( + vault_path: &Path, + config: &mut Config, + data_dir: &Path, + flags: ApplyFlags, +) -> Result { + let vault_path = vault_path + .canonicalize() + .unwrap_or_else(|_| vault_path.to_path_buf()); + + let mut steps_completed: Vec = Vec::new(); + + // ── Identity ── + if !flags.reindex_only { + if let Some(ref name) = flags.name { + config.identity.name = Some(name.clone()); + } + if let Some(ref role) = flags.role { + config.identity.role = Some(role.clone()); + } + if let Some(ref purpose) = flags.purpose { + config.identity.vault_purpose = Some(purpose.clone()); + } + config.save().context("saving identity to config")?; + steps_completed.push("identity_saved".into()); + } + + // ── Vault Profile ── + let vault_profile = if !flags.identity_only { + let vault_type = profile::detect_vault_type(&vault_path); + let structure = profile::detect_structure(&vault_path)?; + let stats = profile::scan_vault_stats(&vault_path)?; + + let vp = build_profile(&vault_path, vault_type, structure, stats); + profile::write_vault_toml(&vp, data_dir).context("writing vault profile")?; + steps_completed.push("vault_profile_written".into()); + Some(vp) + } else { + None + }; + + // ── Indexing ── + let index_result = if !flags.identity_only { + let result = run_index(&vault_path, config, false)?; + steps_completed.push("index_built".into()); + Some(result) + } else { + None + }; + + // ── L1 Extraction ── + let l1_summary = if !flags.identity_only { + let db_path = data_dir.join("engraph.db"); + if db_path.exists() { + let store = Store::open(&db_path)?; + if let Some(ref vp) = vault_profile { + match extract_l1_facts(&store, vp) { + Ok(summary) => { + steps_completed.push("l1_extracted".into()); + Some(summary) + } + Err(_) => None, + } + } else if let Ok(Some(loaded_profile)) = Config::load_vault_profile() { + match extract_l1_facts(&store, &loaded_profile) { + Ok(summary) => { + steps_completed.push("l1_extracted".into()); + Some(summary) + } + Err(_) => None, + } + } else { + None + } + } else { + None + } + } else { + None + }; + + // ── Build response ── + let config_path = Config::data_dir()?.join("config.toml"); + + let index_stats = index_result.as_ref().map(|r| { + json!({ + "new_files": r.new_files, + "updated_files": r.updated_files, + "deleted_files": r.deleted_files, + "total_chunks": r.total_chunks, + "duration_secs": r.duration.as_secs_f64(), + }) + }); + + let identity_summary = json!({ + "name": config.identity.name, + "role": config.identity.role, + "vault_purpose": config.identity.vault_purpose, + }); + + let l1_info = l1_summary.as_ref().map(|s| { + json!({ + "active_projects": s.active_projects, + "key_people": s.key_people, + "current_focus": s.current_focus, + "blocking": s.blocking, + "ooo": s.ooo, + }) + }); + + let vault_profile_info = vault_profile.as_ref().map(|vp| { + json!({ + "vault_type": format!("{:?}", vp.vault_type), + "structure": format!("{:?}", vp.structure.method), + "total_files": vp.stats.total_files, + }) + }); + + Ok(json!({ + "status": "ok", + "config_path": config_path.to_string_lossy(), + "vault_profile": vault_profile_info, + "index": index_stats, + "identity": identity_summary, + "l1": l1_info, + "steps_completed": steps_completed, + "next_steps": [ + "engraph search \"...\"", + "engraph identity", + "engraph serve", + ], + })) +} + +// ── Private helpers for detect ──────────────────────────────────── + +/// Count markdown files in the daily folder (if detected). +fn count_daily_notes(vault_path: &Path, folders: &FolderMap) -> usize { + let Some(ref daily) = folders.daily else { + return 0; + }; + let daily_dir = vault_path.join(daily); + if !daily_dir.is_dir() { + return 0; + } + count_md_files_in_dir(&daily_dir) +} + +/// Count markdown files in the people folder (if detected). +/// Falls back to scanning common nested paths (e.g. `*/People/`) when the +/// profile doesn't report a top-level people folder. +fn count_people_notes(vault_path: &Path, folders: &FolderMap) -> usize { + // 1. Use profile-detected folder if available. + if let Some(ref people) = folders.people { + let people_dir = vault_path.join(people); + if people_dir.is_dir() { + return count_md_files_in_dir(&people_dir); + } + } + + // 2. Fallback: walk one level of subdirectories looking for a "People" subfolder. + let Ok(entries) = std::fs::read_dir(vault_path) else { + return 0; + }; + for entry in entries.filter_map(|e| e.ok()) { + let Ok(ft) = entry.file_type() else { continue }; + if !ft.is_dir() { + continue; + } + if entry.file_name().to_string_lossy().starts_with('.') { + continue; + } + let subdir = entry.path(); + let Ok(inner) = std::fs::read_dir(&subdir) else { + continue; + }; + for inner_entry in inner.filter_map(|e| e.ok()) { + let Ok(ift) = inner_entry.file_type() else { + continue; + }; + if !ift.is_dir() { + continue; + } + let name = inner_entry.file_name(); + let name_lower = name.to_string_lossy().to_ascii_lowercase(); + if name_lower == "people" { + let count = count_md_files_in_dir(&inner_entry.path()); + if count > 0 { + return count; + } + } + } + } + + 0 +} + +/// Count `.md` files directly in a directory (non-recursive). +fn count_md_files_in_dir(dir: &Path) -> usize { + std::fs::read_dir(dir) + .map(|entries| { + entries + .filter_map(|e| e.ok()) + .filter(|e| { + e.file_type().map(|ft| ft.is_file()).unwrap_or(false) + && e.path().extension().map(|ext| ext == "md").unwrap_or(false) + }) + .count() + }) + .unwrap_or(0) +} diff --git a/src/openapi.rs b/src/openapi.rs index 6f6620d..3d1e1a0 100644 --- a/src/openapi.rs +++ b/src/openapi.rs @@ -29,6 +29,10 @@ pub fn build_openapi_spec(server_url: &str) -> serde_json::Value { paths.insert("/api/delete".into(), build_delete()); paths.insert("/api/reindex-file".into(), build_reindex_file()); + // Identity endpoints + paths.insert("/api/identity".into(), build_identity_endpoint()); + paths.insert("/api/setup".into(), build_setup_endpoint()); + // Migration endpoints paths.insert("/api/migrate/preview".into(), build_migrate_preview()); paths.insert("/api/migrate/apply".into(), build_migrate_apply()); @@ -38,7 +42,7 @@ pub fn build_openapi_spec(server_url: &str) -> serde_json::Value { "openapi": "3.1.0", "info": { "title": "engraph", - "version": "1.5.5", + "version": "1.6.0", "description": "AI-powered semantic search and management API for Obsidian vaults." }, "servers": [{ "url": server_url }], @@ -450,6 +454,39 @@ fn build_reindex_file() -> serde_json::Value { }) } +fn build_identity_endpoint() -> serde_json::Value { + serde_json::json!({ + "get": { + "operationId": "getIdentity", + "summary": "Returns compact user identity (L0) and current context (L1).", + "responses": { "200": { "description": "Identity block as JSON with 'identity' key" } } + } + }) +} + +fn build_setup_endpoint() -> serde_json::Value { + serde_json::json!({ + "post": { + "operationId": "setup", + "summary": "Run first-time setup or update identity. Use 'detect' to inspect, 'apply' to configure.", + "requestBody": { + "required": true, + "content": { "application/json": { "schema": { + "type": "object", + "required": ["mode"], + "properties": { + "mode": { "type": "string", "description": "'detect' or 'apply'" }, + "name": { "type": "string", "description": "User name (apply mode)" }, + "role": { "type": "string", "description": "User role (apply mode)" }, + "purpose": { "type": "string", "description": "Vault purpose (apply mode)" } + } + }}} + }, + "responses": { "200": { "description": "Setup result as JSON" } } + } + }) +} + fn build_migrate_preview() -> serde_json::Value { serde_json::json!({ "post": { diff --git a/src/serve.rs b/src/serve.rs index 378dc17..a778699 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -192,6 +192,18 @@ pub struct ReindexFileParams { pub file: String, } +#[derive(Debug, Deserialize, JsonSchema)] +pub struct SetupParams { + /// Mode: "detect" to inspect vault, "apply" to configure identity and index. + pub mode: String, + /// User name (required for apply mode). + pub name: Option, + /// User role (required for apply mode). + pub role: Option, + /// Vault purpose (optional for apply mode). + pub purpose: Option, +} + // --------------------------------------------------------------------------- // Server // --------------------------------------------------------------------------- @@ -886,6 +898,56 @@ impl EngraphServer { }); to_json_result(&output) } + + #[tool( + name = "identity", + description = "Returns compact user identity and current context. Call at session start for instant context. L0 = static identity (~50 tokens), L1 = dynamic state (~120 tokens)." + )] + async fn identity(&self) -> Result { + let store = self.store.lock().await; + let config = crate::config::Config::load().unwrap_or_default(); + let block = + crate::identity::format_identity_block(&config, &store).map_err(|e| mcp_err(&e))?; + Ok(CallToolResult::success(vec![Content::text(block)])) + } + + #[tool( + name = "setup", + description = "Run first-time setup or update identity. Use 'detect' mode to inspect the vault without changes, 'apply' mode to configure identity and index. Returns JSON." + )] + async fn setup(&self, params: Parameters) -> Result { + match params.0.mode.as_str() { + "detect" => { + let result = crate::onboarding::run_detect_json(&self.vault_path) + .map_err(|e| mcp_err(&e))?; + to_json_result(&result) + } + "apply" => { + let mut config = crate::config::Config::load().unwrap_or_default(); + let data_dir = crate::config::Config::data_dir().map_err(|e| mcp_err(&e))?; + let flags = crate::onboarding::ApplyFlags { + name: params.0.name, + role: params.0.role, + purpose: params.0.purpose, + identity_only: false, + reindex_only: false, + }; + let result = crate::onboarding::run_apply_json( + &self.vault_path, + &mut config, + &data_dir, + flags, + ) + .map_err(|e| mcp_err(&e))?; + to_json_result(&result) + } + other => Err(McpError::new( + rmcp::model::ErrorCode::INVALID_PARAMS, + format!("Unknown mode: {other}. Use 'detect' or 'apply'."), + None::, + )), + } + } } #[tool_handler] @@ -898,6 +960,7 @@ impl rmcp::handler::server::ServerHandler for EngraphServer { edit_frontmatter for tags/properties, update_metadata for bulk tag/alias replacement. \ Lifecycle: move_note to relocate, archive to soft-delete, unarchive to restore, delete for permanent removal. \ Index: reindex_file to refresh a single file's index after external edits. \ + Identity: identity for user context at session start, setup to run first-time onboarding (detect/apply). \ Migration: migrate_preview to classify notes into PARA folders, migrate_apply to execute the migration, migrate_undo to revert.", ) } diff --git a/src/store.rs b/src/store.rs index fbc3aaf..c67bc8d 100644 --- a/src/store.rs +++ b/src/store.rs @@ -79,6 +79,17 @@ pub struct PlacementCorrection { pub corrected_at: String, } +/// A fact about the user's identity, inferred or stated (v1.6). +#[derive(Debug, Clone, serde::Serialize)] +pub struct IdentityFact { + pub id: i64, + pub tier: i64, + pub key: String, + pub value: String, + pub source: Option, + pub updated_at: String, +} + /// Summary statistics for the store. #[derive(Debug)] pub struct StoreStats { @@ -357,6 +368,19 @@ impl Store { CREATE INDEX IF NOT EXISTS idx_migration_id ON migration_log(migration_id);", )?; + // Identity facts table (v1.6) + self.conn.execute_batch( + "CREATE TABLE IF NOT EXISTS identity_facts ( + id INTEGER PRIMARY KEY, + tier INTEGER NOT NULL, + key TEXT NOT NULL, + value TEXT NOT NULL, + source TEXT, + updated_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE(tier, key, value) + );", + )?; + Ok(()) } @@ -1614,6 +1638,52 @@ impl Store { Ok(()) } + // ── Identity Facts ─────────────────────────────────────────── + + pub fn upsert_identity_fact( + &self, + tier: i64, + key: &str, + value: &str, + source: Option<&str>, + ) -> Result<()> { + self.conn.execute( + "INSERT INTO identity_facts (tier, key, value, source, updated_at) + VALUES (?1, ?2, ?3, ?4, datetime('now')) + ON CONFLICT(tier, key, value) DO UPDATE SET + source = excluded.source, + updated_at = datetime('now')", + rusqlite::params![tier, key, value, source], + )?; + Ok(()) + } + + pub fn get_identity_facts(&self, tier: i64) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT id, tier, key, value, source, updated_at + FROM identity_facts WHERE tier = ?1 ORDER BY key, value", + )?; + let rows = stmt.query_map(rusqlite::params![tier], |row| { + Ok(IdentityFact { + id: row.get(0)?, + tier: row.get(1)?, + key: row.get(2)?, + value: row.get(3)?, + source: row.get(4)?, + updated_at: row.get(5)?, + }) + })?; + Ok(rows.collect::, _>>()?) + } + + pub fn clear_identity_facts(&self, tier: i64) -> Result<()> { + self.conn.execute( + "DELETE FROM identity_facts WHERE tier = ?1", + rusqlite::params![tier], + )?; + Ok(()) + } + // ── Helpers ───────────────────────────────────────────────── pub fn next_vector_id(&self) -> Result { @@ -3525,4 +3595,53 @@ mod tests { assert!(record.is_some()); assert_eq!(record.unwrap().content_hash, "hash1"); } + + #[test] + fn test_insert_and_get_identity_facts() { + let store = Store::open_memory().unwrap(); + store + .upsert_identity_fact(0, "name", "Test User", None) + .unwrap(); + store + .upsert_identity_fact(1, "active_project", "Project A", Some("01-Projects/a.md")) + .unwrap(); + store + .upsert_identity_fact(1, "active_project", "Project B", Some("01-Projects/b.md")) + .unwrap(); + + let l0 = store.get_identity_facts(0).unwrap(); + assert_eq!(l0.len(), 1); + assert_eq!(l0[0].key, "name"); + assert_eq!(l0[0].value, "Test User"); + + let l1 = store.get_identity_facts(1).unwrap(); + assert_eq!(l1.len(), 2); + } + + #[test] + fn test_upsert_identity_fact_replaces() { + let store = Store::open_memory().unwrap(); + store + .upsert_identity_fact(0, "name", "Old Name", None) + .unwrap(); + store + .upsert_identity_fact(0, "name", "New Name", None) + .unwrap(); + + let facts = store.get_identity_facts(0).unwrap(); + assert_eq!(facts.len(), 2); // Different values = different rows + } + + #[test] + fn test_clear_identity_facts_by_tier() { + let store = Store::open_memory().unwrap(); + store.upsert_identity_fact(0, "name", "User", None).unwrap(); + store + .upsert_identity_fact(1, "active_project", "P1", None) + .unwrap(); + store.clear_identity_facts(1).unwrap(); + + assert_eq!(store.get_identity_facts(0).unwrap().len(), 1); + assert_eq!(store.get_identity_facts(1).unwrap().len(), 0); + } } diff --git a/src/watcher.rs b/src/watcher.rs index 3318f11..d198ab2 100644 --- a/src/watcher.rs +++ b/src/watcher.rs @@ -53,6 +53,7 @@ pub fn start_watcher( &store_lock, &mut *embedder_lock, false, + profile_clone.as_ref().as_ref(), ) { tracing::warn!("Startup reconciliation failed: {:#}", e); } @@ -302,7 +303,7 @@ pub async fn run_consumer( store: Arc>, embedder: Arc>>, vault_path: Arc, - _profile: Arc>, + profile: Arc>, config: Config, recent_writes: RecentWrites, ) { @@ -625,6 +626,7 @@ pub async fn run_consumer( &store_guard, &mut *embedder_guard, false, + profile.as_ref().as_ref(), ) { Ok(result) => { tracing::info!(