From 8c350abd08fd690c12bd04edfe4aa34f96958d17 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Tue, 24 Mar 2026 23:22:41 +0200 Subject: [PATCH 1/8] refactor: extract search_internal from run_search Reusable 3-lane RRF search function returning structured results. run_search now delegates to search_internal then formats output. InternalSearchResult + SearchOutput types for context engine. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/search.rs | 101 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 29 deletions(-) diff --git a/src/search.rs b/src/search.rs index a89d299..2746869 100644 --- a/src/search.rs +++ b/src/search.rs @@ -19,27 +19,32 @@ pub struct SearchResult { pub docid: Option, } -/// Run a search query and print results. -/// -/// Performs both semantic (HNSW) and keyword (FTS5) search, then fuses -/// results using Reciprocal Rank Fusion. When `explain` is true, each -/// result includes per-lane score breakdown. -pub fn run_search( - query: &str, - top_n: usize, - json: bool, - explain: bool, - data_dir: &Path, -) -> Result<()> { - let models_dir = data_dir.join("models"); - let mut embedder = Embedder::new(&models_dir).context("loading embedder")?; - - let hnsw_dir = data_dir.join("hnsw"); - let index = HnswIndex::load(&hnsw_dir).context("loading HNSW index")?; +/// Structured search result for internal use (no I/O). +#[derive(Debug, Clone)] +pub struct InternalSearchResult { + pub file_path: String, + pub file_id: i64, + pub score: f64, + pub heading: Option, + pub snippet: String, + pub docid: Option, +} - let db_path = data_dir.join("engraph.db"); - let store = Store::open(&db_path).context("opening store")?; +/// Output from `search_internal`: structured results plus raw fused data for --explain. +pub struct SearchOutput { + pub results: Vec, + pub fused: Vec, +} +/// Run hybrid search and return structured results (no I/O). +/// Used by both `run_search` (CLI) and context engine. +pub fn search_internal( + query: &str, + top_n: usize, + store: &Store, + embedder: &mut Embedder, + index: &HnswIndex, +) -> Result { // --- Semantic lane --- let query_vec = embedder.embed_one(query).context("embedding query")?; let tombstones = store.get_tombstones().context("loading tombstones")?; @@ -144,7 +149,7 @@ pub fn run_search( }; let graph_results = - graph::graph_expand(&store, &combined_seeds, query, 2, 20).unwrap_or_default(); + graph::graph_expand(store, &combined_seeds, query, 2, 20).unwrap_or_default(); // --- RRF Fusion --- const RRF_K: usize = 60; @@ -157,32 +162,70 @@ pub fn run_search( RRF_K, ); - // Convert to SearchResult, taking top_n. - let results: Vec = fused + // Convert fused results to InternalSearchResult, taking top_n. + let results: Vec = fused .iter() .take(top_n) - .map(|f| SearchResult { - score: f.rrf_score as f32, + .map(|f| InternalSearchResult { file_path: f.file_path.clone(), + file_id: f.file_id, + score: f.rrf_score, heading: f.heading.clone(), snippet: f.snippet.clone(), docid: f.docid.clone(), }) .collect(); - let mut output = format_results(&results, json); + Ok(SearchOutput { results, fused }) +} + +/// Run a search query and print results. +/// +/// Performs both semantic (HNSW) and keyword (FTS5) search, then fuses +/// results using Reciprocal Rank Fusion. When `explain` is true, each +/// result includes per-lane score breakdown. +pub fn run_search( + query: &str, + top_n: usize, + json: bool, + explain: bool, + data_dir: &Path, +) -> Result<()> { + let models_dir = data_dir.join("models"); + let mut embedder = Embedder::new(&models_dir).context("loading embedder")?; + + let hnsw_dir = data_dir.join("hnsw"); + let index = HnswIndex::load(&hnsw_dir).context("loading HNSW index")?; + + let db_path = data_dir.join("engraph.db"); + let store = Store::open(&db_path).context("opening store")?; + + let output = search_internal(query, top_n, &store, &mut embedder, &index)?; + + let results: Vec = output + .results + .iter() + .map(|r| SearchResult { + score: r.score as f32, + file_path: r.file_path.clone(), + heading: r.heading.clone(), + snippet: r.snippet.clone(), + docid: r.docid.clone(), + }) + .collect(); + + let mut out = format_results(&results, json); if explain && !json { - // Append explain info after results. let mut explain_out = String::from("\n--- Explain ---\n"); - for f in fused.iter().take(top_n) { + for f in output.fused.iter().take(top_n) { explain_out.push_str(&format!("{}\n", f.file_path)); explain_out.push_str(&fusion::format_explain(f)); } - output.push_str(&explain_out); + out.push_str(&explain_out); } - print!("{output}"); + print!("{out}"); Ok(()) } From 7ed53454a5ccbe664f98b4fd61a64a8f88f5946b Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Tue, 24 Mar 2026 23:25:00 +0200 Subject: [PATCH 2/8] feat: store query methods for context engine list_files with folder/tag filtering, folder_note_counts with top-level grouping, top_tags via json_each aggregation, recent_files by indexed_at, edge_count_for_file. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/store.rs | 225 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) diff --git a/src/store.rs b/src/store.rs index c318122..85860c9 100644 --- a/src/store.rs +++ b/src/store.rs @@ -791,6 +791,118 @@ impl Store { isolated_file_count: (total_files - connected) as usize, }) } + + /// List files filtered by folder prefix and/or tags (AND logic). + pub fn list_files( + &self, + folder: Option<&str>, + tags: &[String], + limit: usize, + ) -> Result> { + let mut sql = String::from( + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid FROM files WHERE 1=1", + ); + let mut param_values: Vec> = Vec::new(); + if let Some(f) = folder { + sql.push_str(" AND path LIKE ?"); + param_values.push(Box::new(format!("{}%", f))); + } + for tag in tags { + sql.push_str(" AND EXISTS (SELECT 1 FROM json_each(tags) WHERE value = ?)"); + param_values.push(Box::new(tag.clone())); + } + sql.push_str(" ORDER BY indexed_at DESC LIMIT ?"); + param_values.push(Box::new(limit as i64)); + + let mut stmt = self.conn.prepare(&sql)?; + let rows = stmt.query_map(rusqlite::params_from_iter(param_values.iter()), |row| { + Ok(FileRecord { + id: row.get(0)?, + path: row.get(1)?, + content_hash: row.get(2)?, + mtime: row.get(3)?, + tags: parse_tags(&row.get::<_, String>(4)?), + indexed_at: row.get(5)?, + docid: row.get(6)?, + }) + })?; + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Top-level folder grouping with note counts. + pub fn folder_note_counts(&self) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT CASE WHEN instr(path, '/') > 0 + THEN substr(path, 1, instr(path, '/') - 1) + ELSE '(root)' + END AS folder, + COUNT(*) as cnt + FROM files GROUP BY folder ORDER BY cnt DESC", + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as usize)) + })?; + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Tag frequency aggregation via json_each. + pub fn top_tags(&self, limit: usize) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT value, COUNT(*) as cnt + FROM files, json_each(files.tags) + GROUP BY value ORDER BY cnt DESC LIMIT ?", + )?; + let rows = stmt.query_map(params![limit as i64], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as usize)) + })?; + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Most recently indexed files. + pub fn recent_files(&self, limit: usize) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid + FROM files ORDER BY indexed_at DESC LIMIT ?", + )?; + let rows = stmt.query_map(params![limit as i64], |row| { + Ok(FileRecord { + id: row.get(0)?, + path: row.get(1)?, + content_hash: row.get(2)?, + mtime: row.get(3)?, + tags: parse_tags(&row.get::<_, String>(4)?), + indexed_at: row.get(5)?, + docid: row.get(6)?, + }) + })?; + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Total edges (both directions) for a given file. + pub fn edge_count_for_file(&self, file_id: i64) -> Result { + let count: i64 = self.conn.query_row( + "SELECT COUNT(*) FROM edges WHERE from_file = ?1 OR to_file = ?1", + params![file_id], + |row| row.get(0), + )?; + Ok(count as usize) + } } fn parse_tags(json: &str) -> Vec { @@ -1302,4 +1414,117 @@ mod tests { assert_eq!(stats.connected_file_count, 3); // a, b, c assert_eq!(stats.isolated_file_count, 1); // d } + + #[test] + fn test_list_files_no_filter() { + let store = Store::open_memory().unwrap(); + store + .insert_file("01-Projects/a.md", "h1", 100, &["rust".into()], "aaa111") + .unwrap(); + store + .insert_file("02-Areas/b.md", "h2", 200, &["health".into()], "bbb222") + .unwrap(); + store + .insert_file( + "01-Projects/c.md", + "h3", + 300, + &["rust".into(), "cli".into()], + "ccc333", + ) + .unwrap(); + let files = store.list_files(None, &[], 20).unwrap(); + assert_eq!(files.len(), 3); + } + + #[test] + fn test_list_files_folder_filter() { + let store = Store::open_memory().unwrap(); + store + .insert_file("01-Projects/a.md", "h1", 100, &[], "aaa111") + .unwrap(); + store + .insert_file("02-Areas/b.md", "h2", 200, &[], "bbb222") + .unwrap(); + let files = store.list_files(Some("01-Projects"), &[], 20).unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].path, "01-Projects/a.md"); + } + + #[test] + fn test_list_files_tag_filter() { + let store = Store::open_memory().unwrap(); + store + .insert_file("a.md", "h1", 100, &["rust".into(), "cli".into()], "aaa111") + .unwrap(); + store + .insert_file("b.md", "h2", 200, &["rust".into()], "bbb222") + .unwrap(); + store + .insert_file("c.md", "h3", 300, &["python".into()], "ccc333") + .unwrap(); + let files = store.list_files(None, &["rust".into()], 20).unwrap(); + assert_eq!(files.len(), 2); + let files = store + .list_files(None, &["rust".into(), "cli".into()], 20) + .unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].path, "a.md"); + } + + #[test] + fn test_folder_note_counts() { + let store = Store::open_memory().unwrap(); + store + .insert_file("01-Projects/a.md", "h1", 100, &[], "a1") + .unwrap(); + store + .insert_file("01-Projects/b.md", "h2", 100, &[], "b2") + .unwrap(); + store + .insert_file("02-Areas/c.md", "h3", 100, &[], "c3") + .unwrap(); + store.insert_file("root.md", "h4", 100, &[], "d4").unwrap(); + let counts = store.folder_note_counts().unwrap(); + assert!(counts.iter().any(|(f, c)| f == "01-Projects" && *c == 2)); + assert!(counts.iter().any(|(f, c)| f == "02-Areas" && *c == 1)); + assert!(counts.iter().any(|(f, c)| f == "(root)" && *c == 1)); + } + + #[test] + fn test_top_tags() { + let store = Store::open_memory().unwrap(); + store + .insert_file("a.md", "h1", 100, &["rust".into(), "cli".into()], "a1") + .unwrap(); + store + .insert_file("b.md", "h2", 100, &["rust".into(), "web".into()], "b2") + .unwrap(); + store + .insert_file("c.md", "h3", 100, &["rust".into()], "c3") + .unwrap(); + let tags = store.top_tags(10).unwrap(); + assert_eq!(tags[0].0, "rust"); + assert_eq!(tags[0].1, 3); + } + + #[test] + fn test_recent_files() { + let store = Store::open_memory().unwrap(); + store.insert_file("old.md", "h1", 100, &[], "a1").unwrap(); + store.insert_file("new.md", "h2", 200, &[], "b2").unwrap(); + let recent = store.recent_files(1).unwrap(); + assert_eq!(recent.len(), 1); + } + + #[test] + fn test_edge_count_for_file() { + let store = Store::open_memory().unwrap(); + let f1 = store.insert_file("a.md", "h1", 100, &[], "a1").unwrap(); + let f2 = store.insert_file("b.md", "h2", 100, &[], "b2").unwrap(); + store.insert_edge(f1, f2, "wikilink").unwrap(); + store.insert_edge(f2, f1, "wikilink").unwrap(); + assert_eq!(store.edge_count_for_file(f1).unwrap(), 2); + assert_eq!(store.edge_count_for_file(f2).unwrap(), 2); + } } From 05d9d0324615fdedb09a938e4a836c0798f33b9c Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Tue, 24 Mar 2026 23:28:03 +0200 Subject: [PATCH 3/8] =?UTF-8?q?feat:=20context=20engine=20=E2=80=94=20read?= =?UTF-8?q?,=20list,=20and=20vault=5Fmap=20functions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NoteContent with full content, metadata, graph edges. NoteListItem for filtered listing. VaultMap with folder counts, top tags, recent files. ContextParams shared context. File resolution supports docid, exact path, and basename match. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/context.rs | 386 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 2 files changed, 387 insertions(+) create mode 100644 src/context.rs diff --git a/src/context.rs b/src/context.rs new file mode 100644 index 0000000..00594dc --- /dev/null +++ b/src/context.rs @@ -0,0 +1,386 @@ +use std::path::Path; + +use anyhow::Result; +use serde::Serialize; + +use crate::profile::VaultProfile; +use crate::store::Store; + +/// Shared context for all context engine functions. +pub struct ContextParams<'a> { + pub store: &'a Store, + pub vault_path: &'a Path, + pub profile: Option<&'a VaultProfile>, +} + +#[derive(Debug, Serialize)] +pub struct NoteContent { + pub path: String, + pub docid: Option, + pub content: String, + pub tags: Vec, + pub frontmatter: String, + pub body: String, + pub outgoing_links: Vec, + pub incoming_links: Vec, + pub mentions_people: Vec, + pub mentioned_by: Vec, + pub char_count: usize, +} + +#[derive(Debug, Serialize)] +pub struct NoteListItem { + pub path: String, + pub docid: Option, + pub tags: Vec, + pub indexed_at: String, + pub edge_count: usize, +} + +#[derive(Debug, Serialize)] +pub struct VaultMap { + pub vault_path: String, + pub vault_type: String, + pub structure: String, + pub total_files: usize, + pub total_chunks: usize, + pub total_edges: usize, + pub folders: Vec, + pub top_tags: Vec<(String, usize)>, + pub recent_files: Vec, +} + +#[derive(Debug, Serialize)] +pub struct FolderInfo { + pub path: String, + pub note_count: usize, +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Resolve a file by docid (#abcdef), exact path, or basename match. +fn resolve_file( + params: &ContextParams, + file_or_docid: &str, +) -> Result> { + // Docid lookup: #abcdef + if file_or_docid.starts_with('#') && file_or_docid.len() == 7 { + return params.store.get_file_by_docid(&file_or_docid[1..]); + } + + // Exact path lookup + if let Some(f) = params.store.get_file(file_or_docid)? { + return Ok(Some(f)); + } + + // Basename fallback: append .md if needed, then case-insensitive suffix match + let target = if file_or_docid.ends_with(".md") { + file_or_docid.to_string() + } else { + format!("{}.md", file_or_docid) + }; + let target_lower = target.to_lowercase(); + let all = params.store.get_all_files()?; + Ok(all.into_iter().find(|f| { + let p = f.path.to_lowercase(); + p == target_lower || p.ends_with(&format!("/{}", target_lower)) + })) +} + +/// Split content into (frontmatter YAML, body) parts. +fn split_frontmatter(content: &str) -> (String, String) { + let trimmed = content.trim_start(); + if !trimmed.starts_with("---") { + return (String::new(), content.to_string()); + } + let after = &trimmed[3..]; + let after = after.trim_start_matches('-'); + let after = after.strip_prefix('\n').unwrap_or(after); + if let Some(end) = after.find("\n---") { + let fm = after[..end].to_string(); + let body = after[end + 4..] + .strip_prefix('\n') + .unwrap_or(&after[end + 4..]); + (fm, body.to_string()) + } else { + (String::new(), content.to_string()) + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Read a single note with full content, metadata, and graph edges. +pub fn context_read(params: &ContextParams, file_or_docid: &str) -> Result { + let record = resolve_file(params, file_or_docid)? + .ok_or_else(|| anyhow::anyhow!("File not found: {}", file_or_docid))?; + + let full_path = params.vault_path.join(&record.path); + let (content, body, frontmatter) = match std::fs::read_to_string(&full_path) { + Ok(c) => { + let (fm, b) = split_frontmatter(&c); + (c, b, fm) + } + Err(_) => { + let msg = "[File not found on disk. Re-run 'engraph index' to update.]".to_string(); + (String::new(), msg, String::new()) + } + }; + + let outgoing_links: Vec = params + .store + .get_outgoing(record.id, Some("wikilink"))? + .iter() + .filter_map(|(fid, _)| params.store.get_file_path_by_id(*fid).ok().flatten()) + .collect(); + let incoming_links: Vec = params + .store + .get_incoming(record.id, Some("wikilink"))? + .iter() + .filter_map(|(fid, _)| params.store.get_file_path_by_id(*fid).ok().flatten()) + .collect(); + let mentions_people: Vec = params + .store + .get_outgoing(record.id, Some("mention"))? + .iter() + .filter_map(|(fid, _)| params.store.get_file_path_by_id(*fid).ok().flatten()) + .collect(); + let mentioned_by: Vec = params + .store + .get_incoming(record.id, Some("mention"))? + .iter() + .filter_map(|(fid, _)| params.store.get_file_path_by_id(*fid).ok().flatten()) + .collect(); + + let char_count = content.len(); + Ok(NoteContent { + path: record.path, + docid: record.docid, + content, + tags: record.tags, + frontmatter, + body, + outgoing_links, + incoming_links, + mentions_people, + mentioned_by, + char_count, + }) +} + +/// List notes with optional folder/tag filters and edge counts. +pub fn context_list( + params: &ContextParams, + folder: Option<&str>, + tags: &[String], + limit: usize, +) -> Result> { + let files = params.store.list_files(folder, tags, limit)?; + let mut items = Vec::new(); + for f in files { + let edge_count = params.store.edge_count_for_file(f.id).unwrap_or(0); + items.push(NoteListItem { + path: f.path, + docid: f.docid, + tags: f.tags, + indexed_at: f.indexed_at, + edge_count, + }); + } + Ok(items) +} + +/// High-level vault overview: folders, tags, recent files, counts. +pub fn vault_map(params: &ContextParams) -> Result { + let stats = params.store.stats()?; + let edge_stats = params.store.get_edge_stats().ok(); + + let (vault_type, structure) = match params.profile { + Some(p) => ( + format!("{:?}", p.vault_type), + format!("{:?}", p.structure.method), + ), + None => ("Unknown".into(), "Unknown".into()), + }; + + let folder_counts = params.store.folder_note_counts()?; + let folders: Vec = folder_counts + .into_iter() + .map(|(path, count)| FolderInfo { + path, + note_count: count, + }) + .collect(); + + let top_tags = params.store.top_tags(20)?; + + let recent = params.store.recent_files(10)?; + let recent_files: Vec = recent.into_iter().map(|f| f.path).collect(); + + Ok(VaultMap { + vault_path: params.vault_path.to_string_lossy().to_string(), + vault_type, + structure, + total_files: stats.file_count, + total_chunks: stats.chunk_count, + total_edges: edge_stats.map(|e| e.total_edges).unwrap_or(0), + folders, + top_tags, + recent_files, + }) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::docid::generate_docid; + use crate::store::Store; + use tempfile::TempDir; + + fn setup_vault() -> (TempDir, Store, std::path::PathBuf) { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + + std::fs::write( + root.join("note.md"), + "---\ntags:\n - rust\n---\n# Note\n\nContent here.\n\nSee [[other]].", + ) + .unwrap(); + std::fs::write(root.join("other.md"), "# Other\n\nMore content.").unwrap(); + + let store = Store::open_memory().unwrap(); + let d1 = generate_docid("note.md"); + let d2 = generate_docid("other.md"); + store + .insert_file("note.md", "h1", 100, &["rust".into()], &d1) + .unwrap(); + store.insert_file("other.md", "h2", 100, &[], &d2).unwrap(); + + let f1 = store.get_file("note.md").unwrap().unwrap().id; + let f2 = store.get_file("other.md").unwrap().unwrap().id; + store.insert_edge(f1, f2, "wikilink").unwrap(); + store.insert_edge(f2, f1, "wikilink").unwrap(); + + (tmp, store, root) + } + + #[test] + fn test_read_by_path() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let note = context_read(¶ms, "note.md").unwrap(); + assert_eq!(note.path, "note.md"); + assert!(note.content.contains("Content here.")); + assert!(note.body.contains("Content here.")); + assert!(note.frontmatter.contains("tags:")); + assert!(note.tags.contains(&"rust".to_string())); + assert_eq!(note.outgoing_links.len(), 1); + assert_eq!(note.incoming_links.len(), 1); + assert!(note.char_count > 0); + } + + #[test] + fn test_read_by_docid() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let docid = generate_docid("note.md"); + let note = context_read(¶ms, &format!("#{}", docid)).unwrap(); + assert_eq!(note.path, "note.md"); + } + + #[test] + fn test_read_file_not_on_disk() { + let (_tmp, store, root) = setup_vault(); + store + .insert_file("ghost.md", "h3", 100, &[], "ggg333") + .unwrap(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let note = context_read(¶ms, "ghost.md").unwrap(); + assert!(note.body.contains("File not found on disk")); + } + + #[test] + fn test_read_by_basename() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let note = context_read(¶ms, "note").unwrap(); + assert_eq!(note.path, "note.md"); + } + + #[test] + fn test_context_list_no_filter() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let items = context_list(¶ms, None, &[], 20).unwrap(); + assert_eq!(items.len(), 2); + } + + #[test] + fn test_context_list_tag_filter() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let items = context_list(¶ms, None, &["rust".into()], 20).unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0].path, "note.md"); + } + + #[test] + fn test_vault_map() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let map = vault_map(¶ms).unwrap(); + assert_eq!(map.total_files, 2); + assert!(!map.folders.is_empty()); + assert!(map.top_tags.iter().any(|(t, _)| t == "rust")); + } + + #[test] + fn test_split_frontmatter() { + let (fm, body) = split_frontmatter("---\ntags:\n - rust\n---\n# Hello\nWorld"); + assert!(fm.contains("tags:")); + assert!(body.contains("# Hello")); + assert!(!body.contains("---")); + } + + #[test] + fn test_split_frontmatter_no_fm() { + let (fm, body) = split_frontmatter("# Just content\nHere."); + assert!(fm.is_empty()); + assert!(body.contains("# Just content")); + } +} diff --git a/src/lib.rs b/src/lib.rs index 870efbf..c19dbe6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod chunker; pub mod config; +pub mod context; pub mod docid; pub mod embedder; pub mod fts; From 37b9cc624e6a7022212e1e2b95e6e89c392d46fa Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Tue, 24 Mar 2026 23:33:20 +0200 Subject: [PATCH 4/8] =?UTF-8?q?feat:=20who=20=E2=80=94=20person=20context?= =?UTF-8?q?=20bundle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finds person note by basename, returns full content + mention edges with snippets + wikilink connections. FTS snippet extraction with disk-read fallback. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 2 +- src/context.rs | 163 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 60cec92..ee36be5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -560,7 +560,7 @@ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "engraph" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "clap", diff --git a/src/context.rs b/src/context.rs index 00594dc..75327e2 100644 --- a/src/context.rs +++ b/src/context.rs @@ -56,6 +56,23 @@ pub struct FolderInfo { pub note_count: usize, } +#[derive(Debug, Serialize)] +pub struct PersonContext { + pub name: String, + pub note: Option, + pub mentioned_in: Vec, + pub linked_from: Vec, + pub linked_to: Vec, + pub total_chars: usize, +} + +#[derive(Debug, Serialize)] +pub struct MentionInfo { + pub path: String, + pub docid: Option, + pub snippet: String, +} + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -233,6 +250,100 @@ pub fn vault_map(params: &ContextParams) -> Result { }) } +/// Build a person context bundle: note content, mentions, wikilink connections. +pub fn context_who(params: &ContextParams, name: &str) -> Result { + let name_md = format!("{}.md", name); + let name_lower = name_md.to_lowercase(); + let all_files = params.store.get_all_files()?; + let person_file = all_files.iter().find(|f| { + let basename = f.path.rsplit('/').next().unwrap_or(&f.path).to_lowercase(); + basename == name_lower + }); + + let (note, person_id) = if let Some(pf) = person_file { + let n = context_read(params, &pf.path)?; + (Some(n), Some(pf.id)) + } else { + (None, None) + }; + + let mut mentioned_in = Vec::new(); + let mut linked_from = Vec::new(); + let mut linked_to = Vec::new(); + + if let Some(pid) = person_id { + // Mention edges + let mentions = params.store.get_incoming(pid, Some("mention"))?; + for (fid, _) in &mentions { + if let Some(path) = params.store.get_file_path_by_id(*fid).ok().flatten() { + let docid = params + .store + .get_file_by_id(*fid) + .ok() + .flatten() + .and_then(|f| f.docid); + let snippet = get_mention_snippet(params, *fid, name); + mentioned_in.push(MentionInfo { + path, + docid, + snippet, + }); + } + } + // Wikilink edges + let incoming_wl = params.store.get_incoming(pid, Some("wikilink"))?; + for (fid, _) in &incoming_wl { + if let Some(path) = params.store.get_file_path_by_id(*fid).ok().flatten() { + linked_from.push(path); + } + } + let outgoing_wl = params.store.get_outgoing(pid, Some("wikilink"))?; + for (fid, _) in &outgoing_wl { + if let Some(path) = params.store.get_file_path_by_id(*fid).ok().flatten() { + linked_to.push(path); + } + } + } + + let total_chars = note.as_ref().map(|n| n.char_count).unwrap_or(0) + + mentioned_in.iter().map(|m| m.snippet.len()).sum::(); + + Ok(PersonContext { + name: name.to_string(), + note, + mentioned_in, + linked_from, + linked_to, + total_chars, + }) +} + +/// Get a snippet from a file mentioning a name. Try FTS first, fall back to disk read. +fn get_mention_snippet(params: &ContextParams, file_id: i64, name: &str) -> String { + if let Ok(results) = params.store.fts_search(name, 5) + && let Some(r) = results.iter().find(|r| r.file_id == file_id) + { + return r.snippet.clone(); + } + if let Some(path) = params.store.get_file_path_by_id(file_id).ok().flatten() { + let full_path = params.vault_path.join(&path); + if let Ok(content) = std::fs::read_to_string(&full_path) { + let name_lower = name.to_lowercase(); + for line in content.lines() { + if line.to_lowercase().contains(&name_lower) { + let truncated: String = line.chars().take(200).collect(); + return if line.len() > 200 { + format!("{}...", truncated) + } else { + truncated + }; + } + } + } + } + String::new() +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -383,4 +494,56 @@ mod tests { assert!(fm.is_empty()); assert!(body.contains("# Just content")); } + + #[test] + fn test_who_finds_person() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + std::fs::create_dir_all(root.join("People")).unwrap(); + std::fs::write( + root.join("People/John.md"), + "---\naliases:\n - JN\n---\n# John\nDeveloper.", + ) + .unwrap(); + std::fs::write(root.join("daily.md"), "# Daily\nTalked to John about Rust.").unwrap(); + + let store = Store::open_memory().unwrap(); + let f1 = store + .insert_file("People/John.md", "h1", 100, &["person".into()], "aaa111") + .unwrap(); + let f2 = store + .insert_file("daily.md", "h2", 100, &[], "bbb222") + .unwrap(); + store.insert_edge(f2, f1, "mention").unwrap(); + store + .insert_chunk(f2, "# Daily", "Talked to John about Rust.", 10, 20) + .unwrap(); + store + .insert_fts_chunk(f2, 0, "Talked to John about Rust.") + .unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let person = context_who(¶ms, "John").unwrap(); + assert!(person.note.is_some()); + assert_eq!(person.name, "John"); + assert_eq!(person.mentioned_in.len(), 1); + assert!(person.mentioned_in[0].path.contains("daily")); + } + + #[test] + fn test_who_not_found() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let person = context_who(¶ms, "NonExistent").unwrap(); + assert!(person.note.is_none()); + assert!(person.mentioned_in.is_empty()); + } } From bb0874f6cb632f81951446ef20233826a34b1ef8 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Tue, 24 Mar 2026 23:33:37 +0200 Subject: [PATCH 5/8] =?UTF-8?q?feat:=20project=20=E2=80=94=20project=20con?= =?UTF-8?q?text=20bundle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finds project note, child notes (same folder + linkers), active tasks (unchecked checkboxes), team (people linked from project), recent mentions in daily notes via FTS. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/context.rs | 212 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) diff --git a/src/context.rs b/src/context.rs index 75327e2..59593f7 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,3 +1,4 @@ +use std::collections::HashSet; use std::path::Path; use anyhow::Result; @@ -73,6 +74,23 @@ pub struct MentionInfo { pub snippet: String, } +#[derive(Debug, Serialize)] +pub struct ProjectContext { + pub name: String, + pub note: Option, + pub child_notes: Vec, + pub active_tasks: Vec, + pub team: Vec, + pub recent_mentions: Vec, + pub total_chars: usize, +} + +#[derive(Debug, Serialize)] +pub struct TaskItem { + pub text: String, + pub source_file: String, +} + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -344,6 +362,140 @@ fn get_mention_snippet(params: &ContextParams, file_id: i64, name: &str) -> Stri String::new() } +/// Build a project context bundle: note, child notes, tasks, team, recent mentions. +pub fn context_project(params: &ContextParams, name: &str) -> Result { + let name_md = format!("{}.md", name); + let name_lower = name_md.to_lowercase(); + let all_files = params.store.get_all_files()?; + let project_file = all_files.iter().find(|f| { + let basename = f.path.rsplit('/').next().unwrap_or(&f.path).to_lowercase(); + basename == name_lower + }); + + let (note, project_id, project_folder) = if let Some(pf) = project_file { + let n = context_read(params, &pf.path)?; + let folder = pf.path.rsplit_once('/').map(|(f, _)| f.to_string()); + (Some(n), Some(pf.id), folder) + } else { + (None, None, None) + }; + + let mut child_ids = HashSet::new(); + let mut child_notes = Vec::new(); + + // Files in same folder + if let Some(folder) = &project_folder { + let folder_files = params.store.list_files(Some(folder), &[], 50)?; + for f in folder_files { + if Some(f.id) != project_id && child_ids.insert(f.id) { + let ec = params.store.edge_count_for_file(f.id).unwrap_or(0); + child_notes.push(NoteListItem { + path: f.path, + docid: f.docid, + tags: f.tags, + indexed_at: f.indexed_at, + edge_count: ec, + }); + } + } + } + + // Files linking to project + if let Some(pid) = project_id { + let incoming = params.store.get_incoming(pid, Some("wikilink"))?; + for (fid, _) in &incoming { + if child_ids.insert(*fid) + && let Some(f) = params.store.get_file_by_id(*fid).ok().flatten() + { + let ec = params.store.edge_count_for_file(*fid).unwrap_or(0); + child_notes.push(NoteListItem { + path: f.path, + docid: f.docid, + tags: f.tags, + indexed_at: f.indexed_at, + edge_count: ec, + }); + } + } + } + + // Active tasks + let mut active_tasks = Vec::new(); + let scan_tasks = |path: &str, tasks: &mut Vec| { + let full = params.vault_path.join(path); + if let Ok(content) = std::fs::read_to_string(&full) { + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("- [ ] ") { + tasks.push(TaskItem { + text: trimmed + .strip_prefix("- [ ] ") + .unwrap_or(trimmed) + .to_string(), + source_file: path.to_string(), + }); + } + } + } + }; + if let Some(n) = ¬e { + scan_tasks(&n.path, &mut active_tasks); + } + for child in &child_notes { + scan_tasks(&child.path, &mut active_tasks); + } + + // Team: people linked from project + let mut team = Vec::new(); + if let Some(pid) = project_id { + let outgoing = params.store.get_outgoing(pid, Some("wikilink"))?; + for (fid, _) in &outgoing { + if let Some(path) = params.store.get_file_path_by_id(*fid).ok().flatten() + && path.to_lowercase().contains("people") + { + team.push(path); + } + } + } + + // Recent mentions in daily notes + let mut recent_mentions = Vec::new(); + if let Ok(fts_results) = params.store.fts_search(name, 10) { + for r in fts_results { + if let Some(path) = params.store.get_file_path_by_id(r.file_id).ok().flatten() + && (path.contains("Daily") || path.contains("daily")) + { + let docid = params + .store + .get_file_by_id(r.file_id) + .ok() + .flatten() + .and_then(|f| f.docid); + recent_mentions.push(MentionInfo { + path, + docid, + snippet: r.snippet, + }); + if recent_mentions.len() >= 5 { + break; + } + } + } + } + + let total_chars = note.as_ref().map(|n| n.char_count).unwrap_or(0); + + Ok(ProjectContext { + name: name.to_string(), + note, + child_notes, + active_tasks, + team, + recent_mentions, + total_chars, + }) +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -546,4 +698,64 @@ mod tests { assert!(person.note.is_none()); assert!(person.mentioned_in.is_empty()); } + + #[test] + fn test_project_context() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + std::fs::create_dir_all(root.join("01-Projects")).unwrap(); + std::fs::write( + root.join("01-Projects/MyProject.md"), + "# MyProject\n\n- [ ] Task one\n- [x] Done task\n- [ ] Task two", + ) + .unwrap(); + std::fs::write( + root.join("01-Projects/child.md"), + "# Child\nRelated to [[MyProject]].\n- [ ] Sub task", + ) + .unwrap(); + + let store = Store::open_memory().unwrap(); + let f1 = store + .insert_file( + "01-Projects/MyProject.md", + "h1", + 100, + &["project".into()], + "aaa111", + ) + .unwrap(); + let f2 = store + .insert_file("01-Projects/child.md", "h2", 100, &[], "bbb222") + .unwrap(); + store.insert_edge(f2, f1, "wikilink").unwrap(); + store.insert_edge(f1, f2, "wikilink").unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let proj = context_project(¶ms, "MyProject").unwrap(); + assert!(proj.note.is_some()); + assert!(!proj.child_notes.is_empty()); + // Should find "Task one" and "Task two" (not "Done task") + assert!(proj.active_tasks.len() >= 2); + assert!(proj.active_tasks.iter().any(|t| t.text == "Task one")); + assert!(proj.active_tasks.iter().any(|t| t.text == "Task two")); + assert!(!proj.active_tasks.iter().any(|t| t.text.contains("Done"))); + } + + #[test] + fn test_project_not_found() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let proj = context_project(¶ms, "NonExistent").unwrap(); + assert!(proj.note.is_none()); + assert!(proj.child_notes.is_empty()); + } } From dc3a771a9890ec4033c5b6f2d6ba3bbdb4295721 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Tue, 24 Mar 2026 23:36:18 +0200 Subject: [PATCH 6/8] =?UTF-8?q?feat:=20context=20topic=20=E2=80=94=20rich?= =?UTF-8?q?=20context=20bundle=20with=20budget=20trimming?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assembles direct search results + graph expansions within a char budget. Priority ordering: direct matches first, then 1-hop related. Truncation with docid reference for full content. Testable without embedder via context_topic_from_results. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/context.rs | 293 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) diff --git a/src/context.rs b/src/context.rs index 59593f7..7f5952e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -496,6 +496,156 @@ pub fn context_project(params: &ContextParams, name: &str) -> Result, + pub total_chars: usize, + pub budget_chars: usize, + pub truncated: bool, +} + +#[derive(Debug, Serialize)] +pub struct ContextSection { + pub label: String, + pub path: String, + pub docid: Option, + pub content: String, + pub relevance: String, +} + +const DEFAULT_BUDGET: usize = 32000; +const SECTION_OVERHEAD: usize = 100; + +/// Snap to a valid UTF-8 char boundary at or before `offset`. +fn snap_to_char(s: &str, offset: usize) -> usize { + let offset = offset.min(s.len()); + let mut pos = offset; + while pos > 0 && !s.is_char_boundary(pos) { + pos -= 1; + } + pos +} + +/// Assemble a context bundle from pre-computed search results. +/// Testable without embedder. +pub fn context_topic_from_results( + params: &ContextParams, + topic: &str, + search_results: &[crate::search::InternalSearchResult], + max_chars: usize, +) -> Result { + let budget = if max_chars == 0 { + DEFAULT_BUDGET + } else { + max_chars + }; + let mut sections = Vec::new(); + let mut used_chars = 0; + let mut included_files: HashSet = HashSet::new(); + + // Priority 1: Direct search results (top 5) + for r in search_results.iter().take(5) { + if used_chars >= budget { + break; + } + let full_path = params.vault_path.join(&r.file_path); + let content = std::fs::read_to_string(&full_path).unwrap_or_default(); + let (_, body) = split_frontmatter(&content); + + let available = budget.saturating_sub(used_chars + SECTION_OVERHEAD); + let trimmed = if body.len() > available { + format!( + "{}... [truncated, full note: #{}]", + &body[..snap_to_char(&body, available)], + r.docid.as_deref().unwrap_or("?") + ) + } else { + body + }; + + used_chars += trimmed.len() + SECTION_OVERHEAD; + included_files.insert(r.file_path.clone()); + sections.push(ContextSection { + label: "Direct match".into(), + path: r.file_path.clone(), + docid: r.docid.clone(), + content: trimmed, + relevance: format!("score {:.2}", r.score), + }); + } + + // Priority 2: Graph-expanded notes (1-hop from top 3 results) + for r in search_results.iter().take(3) { + if used_chars >= budget { + break; + } + let neighbors = params.store.get_neighbors(r.file_id, 1).unwrap_or_default(); + for (nid, _hop) in neighbors { + if used_chars >= budget { + break; + } + if let Some(nf) = params.store.get_file_by_id(nid).ok().flatten() { + if included_files.contains(&nf.path) { + continue; + } + let full_path = params.vault_path.join(&nf.path); + let content = std::fs::read_to_string(&full_path).unwrap_or_default(); + let (_, body) = split_frontmatter(&content); + + let available = budget.saturating_sub(used_chars + SECTION_OVERHEAD); + let max_per_expansion = budget / 8; + let cap = available.min(max_per_expansion); + if cap == 0 { + break; + } + let trimmed = if body.len() > cap { + format!("{}... [truncated]", &body[..snap_to_char(&body, cap)]) + } else { + body + }; + + used_chars += trimmed.len() + SECTION_OVERHEAD; + included_files.insert(nf.path.clone()); + sections.push(ContextSection { + label: "Related (1-hop)".into(), + path: nf.path.clone(), + docid: nf.docid, + content: trimmed, + relevance: format!("linked from {}", r.file_path), + }); + } + } + } + + let truncated = used_chars >= budget; + + Ok(ContextBundle { + topic: topic.to_string(), + sections, + total_chars: used_chars, + budget_chars: budget, + truncated, + }) +} + +/// Full context topic function (requires embedder + HNSW). +/// Called from CLI handler which provides the heavy resources. +pub fn context_topic_with_search( + params: &ContextParams, + topic: &str, + max_chars: usize, + embedder: &mut crate::embedder::Embedder, + index: &crate::hnsw::HnswIndex, +) -> Result { + let search_output = crate::search::search_internal(topic, 5, params.store, embedder, index)?; + context_topic_from_results(params, topic, &search_output.results, max_chars) +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -758,4 +908,147 @@ mod tests { assert!(proj.note.is_none()); assert!(proj.child_notes.is_empty()); } + + // --- context_topic tests --- + + #[test] + fn test_context_topic_basic() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + std::fs::write( + root.join("result.md"), + "# Result\n\nThis is relevant content about the topic.", + ) + .unwrap(); + + let store = Store::open_memory().unwrap(); + store + .insert_file("result.md", "h1", 100, &["topic".into()], "aaa111") + .unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let search_results = vec![crate::search::InternalSearchResult { + file_path: "result.md".into(), + file_id: 1, + score: 0.85, + heading: Some("# Result".into()), + snippet: "relevant content".into(), + docid: Some("aaa111".into()), + }]; + + let bundle = context_topic_from_results(¶ms, "topic", &search_results, 32000).unwrap(); + assert!(!bundle.sections.is_empty()); + assert!(bundle.sections[0].content.contains("relevant content")); + assert!(bundle.total_chars <= bundle.budget_chars); + assert!(!bundle.truncated); + } + + #[test] + fn test_context_topic_budget_trimming() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + let long_content = format!("# Long\n\n{}", "word ".repeat(5000)); + std::fs::write(root.join("long.md"), &long_content).unwrap(); + + let store = Store::open_memory().unwrap(); + store + .insert_file("long.md", "h1", 100, &[], "aaa111") + .unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let search_results = vec![crate::search::InternalSearchResult { + file_path: "long.md".into(), + file_id: 1, + score: 0.9, + heading: None, + snippet: "word word".into(), + docid: Some("aaa111".into()), + }]; + + // Very small budget — should truncate + let bundle = context_topic_from_results(¶ms, "words", &search_results, 500).unwrap(); + assert!(!bundle.sections.is_empty()); + assert!(bundle.sections[0].content.contains("[truncated")); + assert!(bundle.truncated); + } + + #[test] + fn test_context_topic_with_graph_expansion() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + std::fs::write(root.join("main.md"), "# Main\nMain content.").unwrap(); + std::fs::write(root.join("related.md"), "# Related\nRelated content.").unwrap(); + + let store = Store::open_memory().unwrap(); + let f1 = store + .insert_file("main.md", "h1", 100, &[], "aaa111") + .unwrap(); + let f2 = store + .insert_file("related.md", "h2", 100, &[], "bbb222") + .unwrap(); + store.insert_edge(f1, f2, "wikilink").unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let search_results = vec![crate::search::InternalSearchResult { + file_path: "main.md".into(), + file_id: f1, + score: 0.8, + heading: None, + snippet: "Main".into(), + docid: Some("aaa111".into()), + }]; + + let bundle = context_topic_from_results(¶ms, "main", &search_results, 32000).unwrap(); + // Should have main as direct match + related as 1-hop + assert!(bundle.sections.len() >= 2); + assert!( + bundle + .sections + .iter() + .any(|s| s.path == "main.md" && s.label == "Direct match") + ); + assert!( + bundle + .sections + .iter() + .any(|s| s.path == "related.md" && s.label == "Related (1-hop)") + ); + } + + #[test] + fn test_context_topic_empty_results() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + let store = Store::open_memory().unwrap(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + + let bundle = context_topic_from_results(¶ms, "nothing", &[], 32000).unwrap(); + assert!(bundle.sections.is_empty()); + assert_eq!(bundle.total_chars, 0); + assert!(!bundle.truncated); + } + + #[test] + fn test_snap_to_char() { + let s = "hello\u{2014}world"; // em dash is 3 bytes + let snap = snap_to_char(s, 6); // lands inside the em dash + assert!(s.is_char_boundary(snap)); + assert!(snap <= 6); + } } From 9c9d480320d6f671084779de8b18825a37413b30 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Tue, 24 Mar 2026 23:38:54 +0200 Subject: [PATCH 7/8] =?UTF-8?q?feat:=20engraph=20context=20CLI=20=E2=80=94?= =?UTF-8?q?=20read,=20list,=20vault-map,=20who,=20project,=20topic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six context subcommands with --json support. Topic loads embedder for hybrid search. All others are lightweight (no model load). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/main.rs | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) diff --git a/src/main.rs b/src/main.rs index 17d18a8..360a71d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -87,6 +87,12 @@ enum Command { #[command(subcommand)] action: GraphAction, }, + + /// Query vault context. + Context { + #[command(subcommand)] + action: ContextAction, + }, } #[derive(Subcommand, Debug)] @@ -100,6 +106,47 @@ enum GraphAction { Stats, } +#[derive(Subcommand, Debug)] +enum ContextAction { + /// Read a note's full content with metadata. + Read { + /// File path, basename, or #docid. + file: String, + }, + /// List notes by metadata filters. + List { + /// Filter to folder path prefix. + #[arg(long)] + folder: Option, + /// Filter to notes with all listed tags (comma-separated). + #[arg(long, value_delimiter = ',')] + tags: Vec, + /// Maximum results. + #[arg(long, default_value = "20")] + limit: usize, + }, + /// Vault structure overview. + VaultMap, + /// Person context bundle. + Who { + /// Person name (matches filename in People folder). + name: String, + }, + /// Project context bundle. + Project { + /// Project name (matches filename). + name: String, + }, + /// Rich topic context with budget. + Topic { + /// Search query for the topic. + query: String, + /// Character budget (default 32000, ~8000 tokens). + #[arg(long, default_value = "32000")] + budget: usize, + }, +} + #[derive(Subcommand, Debug)] enum ModelsAction { /// List available models. @@ -460,6 +507,215 @@ fn main() -> Result<()> { } } + Command::Context { action } => { + if !index_exists(&data_dir) { + eprintln!("No index found. Run 'engraph index ' first."); + std::process::exit(1); + } + let db_path = data_dir.join("engraph.db"); + let store = store::Store::open(&db_path)?; + let vault_path_str = store.get_meta("vault_path")?.ok_or_else(|| { + anyhow::anyhow!("No vault path in index. Run 'engraph index ' first.") + })?; + let vault_path = PathBuf::from(&vault_path_str); + let profile = config::Config::load_vault_profile().ok().flatten(); + + let params = engraph::context::ContextParams { + store: &store, + vault_path: &vault_path, + profile: profile.as_ref(), + }; + + match action { + ContextAction::Read { file } => { + let note = engraph::context::context_read(¶ms, &file)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(¬e)?); + } else { + println!( + "{} {}", + note.path, + note.docid + .as_deref() + .map(|d| format!("(#{})", d)) + .unwrap_or_default() + ); + println!("Tags: {}", note.tags.join(", ")); + println!("Outgoing links: {}", note.outgoing_links.len()); + println!("Incoming links: {}", note.incoming_links.len()); + println!("Chars: {}\n", note.char_count); + println!("{}", note.body); + } + } + ContextAction::List { + folder, + tags, + limit, + } => { + let items = + engraph::context::context_list(¶ms, folder.as_deref(), &tags, limit)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&items)?); + } else { + for item in &items { + let did = item + .docid + .as_deref() + .map(|d| format!(" #{d}")) + .unwrap_or_default(); + let tags_str = if item.tags.is_empty() { + String::new() + } else { + format!(" [{}]", item.tags.join(", ")) + }; + println!( + "{}{}{} ({} edges)", + item.path, did, tags_str, item.edge_count + ); + } + println!("\n{} notes", items.len()); + } + } + ContextAction::VaultMap => { + let map = engraph::context::vault_map(¶ms)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&map)?); + } else { + println!("Vault: {}", map.vault_path); + println!("Type: {}, Structure: {}", map.vault_type, map.structure); + println!( + "Files: {}, Chunks: {}, Edges: {}\n", + map.total_files, map.total_chunks, map.total_edges + ); + println!("Folders:"); + for f in &map.folders { + println!(" {}: {} notes", f.path, f.note_count); + } + println!("\nTop tags:"); + for (tag, count) in &map.top_tags { + println!(" {}: {}", tag, count); + } + println!("\nRecent files:"); + for path in &map.recent_files { + println!(" {}", path); + } + } + } + ContextAction::Who { name } => { + let person = engraph::context::context_who(¶ms, &name)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&person)?); + } else { + println!("# {}\n", person.name); + if let Some(note) = &person.note { + println!( + "Note: {} {}", + note.path, + note.docid + .as_deref() + .map(|d| format!("(#{})", d)) + .unwrap_or_default() + ); + println!("Tags: {}\n", note.tags.join(", ")); + println!("{}\n", note.body); + } else { + println!("(No person note found)\n"); + } + if !person.mentioned_in.is_empty() { + println!("Mentioned in ({} notes):", person.mentioned_in.len()); + for m in &person.mentioned_in { + println!(" {} — {}", m.path, m.snippet); + } + println!(); + } + if !person.linked_from.is_empty() { + println!("Linked from ({}):", person.linked_from.len()); + for p in &person.linked_from { + println!(" {}", p); + } + println!(); + } + println!("Total: {} chars", person.total_chars); + } + } + ContextAction::Project { name } => { + let proj = engraph::context::context_project(¶ms, &name)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&proj)?); + } else { + println!("# {}\n", proj.name); + if let Some(note) = &proj.note { + println!("Note: {}\n", note.path); + println!("{}\n", note.body); + } + if !proj.active_tasks.is_empty() { + println!("Active tasks ({}):", proj.active_tasks.len()); + for t in &proj.active_tasks { + println!(" - [ ] {} ({})", t.text, t.source_file); + } + println!(); + } + if !proj.child_notes.is_empty() { + println!("Child notes ({}):", proj.child_notes.len()); + for c in &proj.child_notes { + println!(" {}", c.path); + } + println!(); + } + if !proj.team.is_empty() { + println!("Team:"); + for p in &proj.team { + println!(" {}", p); + } + println!(); + } + if !proj.recent_mentions.is_empty() { + println!("Recent daily mentions:"); + for m in &proj.recent_mentions { + println!(" {} — {}", m.path, m.snippet); + } + println!(); + } + } + } + ContextAction::Topic { query, budget } => { + let models_dir = data_dir.join("models"); + let mut embedder = engraph::embedder::Embedder::new(&models_dir)?; + let hnsw_dir = data_dir.join("hnsw"); + let index = engraph::hnsw::HnswIndex::load(&hnsw_dir)?; + + let bundle = engraph::context::context_topic_with_search( + ¶ms, + &query, + budget, + &mut embedder, + &index, + )?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&bundle)?); + } else { + println!("# Context: {}\n", bundle.topic); + println!( + "Budget: {} / {} chars{}\n", + bundle.total_chars, + bundle.budget_chars, + if bundle.truncated { " (truncated)" } else { "" } + ); + for s in &bundle.sections { + let did = s + .docid + .as_deref() + .map(|d| format!(" #{d}")) + .unwrap_or_default(); + println!("## {} — {}{}", s.label, s.path, did); + println!("[{}]\n", s.relevance); + println!("{}\n", s.content); + } + } + } + } + } + Command::Models { action } => { let registry = model::ModelRegistry::default(); match action { From 3eb5365745b41e2599b4977639417b4d321ca09c Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Tue, 24 Mar 2026 23:40:12 +0200 Subject: [PATCH 8/8] =?UTF-8?q?chore:=20bump=20to=20v0.4.0=20=E2=80=94=20c?= =?UTF-8?q?ontext=20engine?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Version bump and docs update for engraph v0.4.0: - Context engine with 6 functions (read, list, vault_map, who, project, topic) - engraph context CLI with --json support - search_internal extracted for reuse - 13 modules, 144 tests Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 7 ++++--- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index b9d8d0d..c20d22d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ Local hybrid search CLI for Obsidian vaults. Rust, MIT licensed. ## Architecture -Single binary with 12 modules behind a lib crate: +Single binary with 13 modules behind a lib crate: - `config.rs` — loads `~/.engraph/config.toml` and `vault.toml`, merges CLI args, provides `data_dir()` - `chunker.rs` — smart chunking with break-point scoring algorithm. Finds optimal split points considering headings, code fences, blank lines, and thematic breaks. `split_oversized_chunks()` handles token-aware secondary splitting with overlap @@ -13,13 +13,14 @@ Single binary with 12 modules behind a lib crate: - `model.rs` — pluggable `ModelBackend` trait, model registry, and `parse_model_spec()`. Enables future model swapping without changing consumer code - `fts.rs` — FTS5 full-text search support. Re-exports `FtsResult` from store. BM25-ranked keyword search - `fusion.rs` — Reciprocal Rank Fusion (RRF) engine. Merges semantic + FTS5 + graph results. Supports lane weighting, `--explain` output with per-lane detail +- `context.rs` — context engine. Six functions: `read` (full note content + metadata), `list` (filtered note listing), `vault_map` (structure overview), `who` (person context bundle), `project` (project context bundle), `context_topic` (rich topic context with budget trimming). Pure functions taking `ContextParams` — no model loading except `context_topic` which reuses `search_internal` - `graph.rs` — vault graph agent. Extracts wikilink targets, expands search results by following graph connections 1-2 hops. Relevance filtering via FTS5 term check and shared tags - `profile.rs` — vault profile detection. Auto-detects PARA/Folders/Flat structure, vault type (Obsidian/Logseq/Plain), wikilinks, frontmatter, tags. Writes/loads `vault.toml` - `store.rs` — SQLite persistence. Tables: `meta`, `files` (with docid), `chunks` (with vector BLOBs), `chunks_fts` (FTS5), `edges` (vault graph), `tombstones`. Handles incremental diffing via content hashes - `hnsw.rs` — thin wrapper around `hnsw_rs`. **Important:** `hnsw_rs` does not support inserting after `load_hnsw()`. The index is rebuilt from vectors stored in SQLite on every index run - `indexer.rs` — orchestrates vault walking (via `ignore` crate for `.gitignore` support), diffing, chunking, embedding (Rayon for parallel chunking, serial embedding since `Embedder` is not `Send`), serial writes to store + HNSW + FTS5, and vault graph edge building (wikilinks + people detection) -`main.rs` is a thin clap CLI. Subcommands: `index`, `search` (with `--explain`), `status`, `clear`, `init`, `configure`, `models`, `graph` (show/stats). +`main.rs` is a thin clap CLI. Subcommands: `index`, `search` (with `--explain`), `status`, `clear`, `init`, `configure`, `models`, `graph` (show/stats), `context` (read/list/vault-map/who/project/topic). ## Key patterns @@ -49,7 +50,7 @@ Single vault only. Re-indexing a different vault path triggers a confirmation pr ## Testing -- Unit tests in each module (`cargo test --lib`) — 119 tests, no network required +- Unit tests in each module (`cargo test --lib`) — 144 tests, no network required - 1 ignored smoke test (`test_embed_smoke`) — downloads ONNX model, verifies embedding - Integration tests (`cargo test --test integration -- --ignored`) — 8 tests, require model download diff --git a/Cargo.toml b/Cargo.toml index 09f68e5..46ad44a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "engraph" -version = "0.3.0" +version = "0.4.0" edition = "2024" description = "Local semantic search for Obsidian vaults" license = "MIT"