diff --git a/CLAUDE.md b/CLAUDE.md index b9d8d0d..c20d22d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ Local hybrid search CLI for Obsidian vaults. Rust, MIT licensed. ## Architecture -Single binary with 12 modules behind a lib crate: +Single binary with 13 modules behind a lib crate: - `config.rs` — loads `~/.engraph/config.toml` and `vault.toml`, merges CLI args, provides `data_dir()` - `chunker.rs` — smart chunking with break-point scoring algorithm. Finds optimal split points considering headings, code fences, blank lines, and thematic breaks. `split_oversized_chunks()` handles token-aware secondary splitting with overlap @@ -13,13 +13,14 @@ Single binary with 12 modules behind a lib crate: - `model.rs` — pluggable `ModelBackend` trait, model registry, and `parse_model_spec()`. Enables future model swapping without changing consumer code - `fts.rs` — FTS5 full-text search support. Re-exports `FtsResult` from store. BM25-ranked keyword search - `fusion.rs` — Reciprocal Rank Fusion (RRF) engine. Merges semantic + FTS5 + graph results. Supports lane weighting, `--explain` output with per-lane detail +- `context.rs` — context engine. Six functions: `read` (full note content + metadata), `list` (filtered note listing), `vault_map` (structure overview), `who` (person context bundle), `project` (project context bundle), `context_topic` (rich topic context with budget trimming). Pure functions taking `ContextParams` — no model loading except `context_topic` which reuses `search_internal` - `graph.rs` — vault graph agent. Extracts wikilink targets, expands search results by following graph connections 1-2 hops. Relevance filtering via FTS5 term check and shared tags - `profile.rs` — vault profile detection. Auto-detects PARA/Folders/Flat structure, vault type (Obsidian/Logseq/Plain), wikilinks, frontmatter, tags. Writes/loads `vault.toml` - `store.rs` — SQLite persistence. Tables: `meta`, `files` (with docid), `chunks` (with vector BLOBs), `chunks_fts` (FTS5), `edges` (vault graph), `tombstones`. Handles incremental diffing via content hashes - `hnsw.rs` — thin wrapper around `hnsw_rs`. **Important:** `hnsw_rs` does not support inserting after `load_hnsw()`. The index is rebuilt from vectors stored in SQLite on every index run - `indexer.rs` — orchestrates vault walking (via `ignore` crate for `.gitignore` support), diffing, chunking, embedding (Rayon for parallel chunking, serial embedding since `Embedder` is not `Send`), serial writes to store + HNSW + FTS5, and vault graph edge building (wikilinks + people detection) -`main.rs` is a thin clap CLI. Subcommands: `index`, `search` (with `--explain`), `status`, `clear`, `init`, `configure`, `models`, `graph` (show/stats). +`main.rs` is a thin clap CLI. Subcommands: `index`, `search` (with `--explain`), `status`, `clear`, `init`, `configure`, `models`, `graph` (show/stats), `context` (read/list/vault-map/who/project/topic). ## Key patterns @@ -49,7 +50,7 @@ Single vault only. Re-indexing a different vault path triggers a confirmation pr ## Testing -- Unit tests in each module (`cargo test --lib`) — 119 tests, no network required +- Unit tests in each module (`cargo test --lib`) — 144 tests, no network required - 1 ignored smoke test (`test_embed_smoke`) — downloads ONNX model, verifies embedding - Integration tests (`cargo test --test integration -- --ignored`) — 8 tests, require model download diff --git a/Cargo.lock b/Cargo.lock index 60cec92..ee36be5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -560,7 +560,7 @@ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "engraph" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 09f68e5..46ad44a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "engraph" -version = "0.3.0" +version = "0.4.0" edition = "2024" description = "Local semantic search for Obsidian vaults" license = "MIT" diff --git a/src/context.rs b/src/context.rs new file mode 100644 index 0000000..7f5952e --- /dev/null +++ b/src/context.rs @@ -0,0 +1,1054 @@ +use std::collections::HashSet; +use std::path::Path; + +use anyhow::Result; +use serde::Serialize; + +use crate::profile::VaultProfile; +use crate::store::Store; + +/// Shared context for all context engine functions. +pub struct ContextParams<'a> { + pub store: &'a Store, + pub vault_path: &'a Path, + pub profile: Option<&'a VaultProfile>, +} + +#[derive(Debug, Serialize)] +pub struct NoteContent { + pub path: String, + pub docid: Option, + pub content: String, + pub tags: Vec, + pub frontmatter: String, + pub body: String, + pub outgoing_links: Vec, + pub incoming_links: Vec, + pub mentions_people: Vec, + pub mentioned_by: Vec, + pub char_count: usize, +} + +#[derive(Debug, Serialize)] +pub struct NoteListItem { + pub path: String, + pub docid: Option, + pub tags: Vec, + pub indexed_at: String, + pub edge_count: usize, +} + +#[derive(Debug, Serialize)] +pub struct VaultMap { + pub vault_path: String, + pub vault_type: String, + pub structure: String, + pub total_files: usize, + pub total_chunks: usize, + pub total_edges: usize, + pub folders: Vec, + pub top_tags: Vec<(String, usize)>, + pub recent_files: Vec, +} + +#[derive(Debug, Serialize)] +pub struct FolderInfo { + pub path: String, + pub note_count: usize, +} + +#[derive(Debug, Serialize)] +pub struct PersonContext { + pub name: String, + pub note: Option, + pub mentioned_in: Vec, + pub linked_from: Vec, + pub linked_to: Vec, + pub total_chars: usize, +} + +#[derive(Debug, Serialize)] +pub struct MentionInfo { + pub path: String, + pub docid: Option, + pub snippet: String, +} + +#[derive(Debug, Serialize)] +pub struct ProjectContext { + pub name: String, + pub note: Option, + pub child_notes: Vec, + pub active_tasks: Vec, + pub team: Vec, + pub recent_mentions: Vec, + pub total_chars: usize, +} + +#[derive(Debug, Serialize)] +pub struct TaskItem { + pub text: String, + pub source_file: String, +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Resolve a file by docid (#abcdef), exact path, or basename match. +fn resolve_file( + params: &ContextParams, + file_or_docid: &str, +) -> Result> { + // Docid lookup: #abcdef + if file_or_docid.starts_with('#') && file_or_docid.len() == 7 { + return params.store.get_file_by_docid(&file_or_docid[1..]); + } + + // Exact path lookup + if let Some(f) = params.store.get_file(file_or_docid)? { + return Ok(Some(f)); + } + + // Basename fallback: append .md if needed, then case-insensitive suffix match + let target = if file_or_docid.ends_with(".md") { + file_or_docid.to_string() + } else { + format!("{}.md", file_or_docid) + }; + let target_lower = target.to_lowercase(); + let all = params.store.get_all_files()?; + Ok(all.into_iter().find(|f| { + let p = f.path.to_lowercase(); + p == target_lower || p.ends_with(&format!("/{}", target_lower)) + })) +} + +/// Split content into (frontmatter YAML, body) parts. +fn split_frontmatter(content: &str) -> (String, String) { + let trimmed = content.trim_start(); + if !trimmed.starts_with("---") { + return (String::new(), content.to_string()); + } + let after = &trimmed[3..]; + let after = after.trim_start_matches('-'); + let after = after.strip_prefix('\n').unwrap_or(after); + if let Some(end) = after.find("\n---") { + let fm = after[..end].to_string(); + let body = after[end + 4..] + .strip_prefix('\n') + .unwrap_or(&after[end + 4..]); + (fm, body.to_string()) + } else { + (String::new(), content.to_string()) + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Read a single note with full content, metadata, and graph edges. +pub fn context_read(params: &ContextParams, file_or_docid: &str) -> Result { + let record = resolve_file(params, file_or_docid)? + .ok_or_else(|| anyhow::anyhow!("File not found: {}", file_or_docid))?; + + let full_path = params.vault_path.join(&record.path); + let (content, body, frontmatter) = match std::fs::read_to_string(&full_path) { + Ok(c) => { + let (fm, b) = split_frontmatter(&c); + (c, b, fm) + } + Err(_) => { + let msg = "[File not found on disk. Re-run 'engraph index' to update.]".to_string(); + (String::new(), msg, String::new()) + } + }; + + let outgoing_links: Vec = params + .store + .get_outgoing(record.id, Some("wikilink"))? + .iter() + .filter_map(|(fid, _)| params.store.get_file_path_by_id(*fid).ok().flatten()) + .collect(); + let incoming_links: Vec = params + .store + .get_incoming(record.id, Some("wikilink"))? + .iter() + .filter_map(|(fid, _)| params.store.get_file_path_by_id(*fid).ok().flatten()) + .collect(); + let mentions_people: Vec = params + .store + .get_outgoing(record.id, Some("mention"))? + .iter() + .filter_map(|(fid, _)| params.store.get_file_path_by_id(*fid).ok().flatten()) + .collect(); + let mentioned_by: Vec = params + .store + .get_incoming(record.id, Some("mention"))? + .iter() + .filter_map(|(fid, _)| params.store.get_file_path_by_id(*fid).ok().flatten()) + .collect(); + + let char_count = content.len(); + Ok(NoteContent { + path: record.path, + docid: record.docid, + content, + tags: record.tags, + frontmatter, + body, + outgoing_links, + incoming_links, + mentions_people, + mentioned_by, + char_count, + }) +} + +/// List notes with optional folder/tag filters and edge counts. +pub fn context_list( + params: &ContextParams, + folder: Option<&str>, + tags: &[String], + limit: usize, +) -> Result> { + let files = params.store.list_files(folder, tags, limit)?; + let mut items = Vec::new(); + for f in files { + let edge_count = params.store.edge_count_for_file(f.id).unwrap_or(0); + items.push(NoteListItem { + path: f.path, + docid: f.docid, + tags: f.tags, + indexed_at: f.indexed_at, + edge_count, + }); + } + Ok(items) +} + +/// High-level vault overview: folders, tags, recent files, counts. +pub fn vault_map(params: &ContextParams) -> Result { + let stats = params.store.stats()?; + let edge_stats = params.store.get_edge_stats().ok(); + + let (vault_type, structure) = match params.profile { + Some(p) => ( + format!("{:?}", p.vault_type), + format!("{:?}", p.structure.method), + ), + None => ("Unknown".into(), "Unknown".into()), + }; + + let folder_counts = params.store.folder_note_counts()?; + let folders: Vec = folder_counts + .into_iter() + .map(|(path, count)| FolderInfo { + path, + note_count: count, + }) + .collect(); + + let top_tags = params.store.top_tags(20)?; + + let recent = params.store.recent_files(10)?; + let recent_files: Vec = recent.into_iter().map(|f| f.path).collect(); + + Ok(VaultMap { + vault_path: params.vault_path.to_string_lossy().to_string(), + vault_type, + structure, + total_files: stats.file_count, + total_chunks: stats.chunk_count, + total_edges: edge_stats.map(|e| e.total_edges).unwrap_or(0), + folders, + top_tags, + recent_files, + }) +} + +/// Build a person context bundle: note content, mentions, wikilink connections. +pub fn context_who(params: &ContextParams, name: &str) -> Result { + let name_md = format!("{}.md", name); + let name_lower = name_md.to_lowercase(); + let all_files = params.store.get_all_files()?; + let person_file = all_files.iter().find(|f| { + let basename = f.path.rsplit('/').next().unwrap_or(&f.path).to_lowercase(); + basename == name_lower + }); + + let (note, person_id) = if let Some(pf) = person_file { + let n = context_read(params, &pf.path)?; + (Some(n), Some(pf.id)) + } else { + (None, None) + }; + + let mut mentioned_in = Vec::new(); + let mut linked_from = Vec::new(); + let mut linked_to = Vec::new(); + + if let Some(pid) = person_id { + // Mention edges + let mentions = params.store.get_incoming(pid, Some("mention"))?; + for (fid, _) in &mentions { + if let Some(path) = params.store.get_file_path_by_id(*fid).ok().flatten() { + let docid = params + .store + .get_file_by_id(*fid) + .ok() + .flatten() + .and_then(|f| f.docid); + let snippet = get_mention_snippet(params, *fid, name); + mentioned_in.push(MentionInfo { + path, + docid, + snippet, + }); + } + } + // Wikilink edges + let incoming_wl = params.store.get_incoming(pid, Some("wikilink"))?; + for (fid, _) in &incoming_wl { + if let Some(path) = params.store.get_file_path_by_id(*fid).ok().flatten() { + linked_from.push(path); + } + } + let outgoing_wl = params.store.get_outgoing(pid, Some("wikilink"))?; + for (fid, _) in &outgoing_wl { + if let Some(path) = params.store.get_file_path_by_id(*fid).ok().flatten() { + linked_to.push(path); + } + } + } + + let total_chars = note.as_ref().map(|n| n.char_count).unwrap_or(0) + + mentioned_in.iter().map(|m| m.snippet.len()).sum::(); + + Ok(PersonContext { + name: name.to_string(), + note, + mentioned_in, + linked_from, + linked_to, + total_chars, + }) +} + +/// Get a snippet from a file mentioning a name. Try FTS first, fall back to disk read. +fn get_mention_snippet(params: &ContextParams, file_id: i64, name: &str) -> String { + if let Ok(results) = params.store.fts_search(name, 5) + && let Some(r) = results.iter().find(|r| r.file_id == file_id) + { + return r.snippet.clone(); + } + if let Some(path) = params.store.get_file_path_by_id(file_id).ok().flatten() { + let full_path = params.vault_path.join(&path); + if let Ok(content) = std::fs::read_to_string(&full_path) { + let name_lower = name.to_lowercase(); + for line in content.lines() { + if line.to_lowercase().contains(&name_lower) { + let truncated: String = line.chars().take(200).collect(); + return if line.len() > 200 { + format!("{}...", truncated) + } else { + truncated + }; + } + } + } + } + String::new() +} + +/// Build a project context bundle: note, child notes, tasks, team, recent mentions. +pub fn context_project(params: &ContextParams, name: &str) -> Result { + let name_md = format!("{}.md", name); + let name_lower = name_md.to_lowercase(); + let all_files = params.store.get_all_files()?; + let project_file = all_files.iter().find(|f| { + let basename = f.path.rsplit('/').next().unwrap_or(&f.path).to_lowercase(); + basename == name_lower + }); + + let (note, project_id, project_folder) = if let Some(pf) = project_file { + let n = context_read(params, &pf.path)?; + let folder = pf.path.rsplit_once('/').map(|(f, _)| f.to_string()); + (Some(n), Some(pf.id), folder) + } else { + (None, None, None) + }; + + let mut child_ids = HashSet::new(); + let mut child_notes = Vec::new(); + + // Files in same folder + if let Some(folder) = &project_folder { + let folder_files = params.store.list_files(Some(folder), &[], 50)?; + for f in folder_files { + if Some(f.id) != project_id && child_ids.insert(f.id) { + let ec = params.store.edge_count_for_file(f.id).unwrap_or(0); + child_notes.push(NoteListItem { + path: f.path, + docid: f.docid, + tags: f.tags, + indexed_at: f.indexed_at, + edge_count: ec, + }); + } + } + } + + // Files linking to project + if let Some(pid) = project_id { + let incoming = params.store.get_incoming(pid, Some("wikilink"))?; + for (fid, _) in &incoming { + if child_ids.insert(*fid) + && let Some(f) = params.store.get_file_by_id(*fid).ok().flatten() + { + let ec = params.store.edge_count_for_file(*fid).unwrap_or(0); + child_notes.push(NoteListItem { + path: f.path, + docid: f.docid, + tags: f.tags, + indexed_at: f.indexed_at, + edge_count: ec, + }); + } + } + } + + // Active tasks + let mut active_tasks = Vec::new(); + let scan_tasks = |path: &str, tasks: &mut Vec| { + let full = params.vault_path.join(path); + if let Ok(content) = std::fs::read_to_string(&full) { + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("- [ ] ") { + tasks.push(TaskItem { + text: trimmed + .strip_prefix("- [ ] ") + .unwrap_or(trimmed) + .to_string(), + source_file: path.to_string(), + }); + } + } + } + }; + if let Some(n) = ¬e { + scan_tasks(&n.path, &mut active_tasks); + } + for child in &child_notes { + scan_tasks(&child.path, &mut active_tasks); + } + + // Team: people linked from project + let mut team = Vec::new(); + if let Some(pid) = project_id { + let outgoing = params.store.get_outgoing(pid, Some("wikilink"))?; + for (fid, _) in &outgoing { + if let Some(path) = params.store.get_file_path_by_id(*fid).ok().flatten() + && path.to_lowercase().contains("people") + { + team.push(path); + } + } + } + + // Recent mentions in daily notes + let mut recent_mentions = Vec::new(); + if let Ok(fts_results) = params.store.fts_search(name, 10) { + for r in fts_results { + if let Some(path) = params.store.get_file_path_by_id(r.file_id).ok().flatten() + && (path.contains("Daily") || path.contains("daily")) + { + let docid = params + .store + .get_file_by_id(r.file_id) + .ok() + .flatten() + .and_then(|f| f.docid); + recent_mentions.push(MentionInfo { + path, + docid, + snippet: r.snippet, + }); + if recent_mentions.len() >= 5 { + break; + } + } + } + } + + let total_chars = note.as_ref().map(|n| n.char_count).unwrap_or(0); + + Ok(ProjectContext { + name: name.to_string(), + note, + child_notes, + active_tasks, + team, + recent_mentions, + total_chars, + }) +} + +// --------------------------------------------------------------------------- +// Context Topic — rich context bundle with budget trimming +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize)] +pub struct ContextBundle { + pub topic: String, + pub sections: Vec, + pub total_chars: usize, + pub budget_chars: usize, + pub truncated: bool, +} + +#[derive(Debug, Serialize)] +pub struct ContextSection { + pub label: String, + pub path: String, + pub docid: Option, + pub content: String, + pub relevance: String, +} + +const DEFAULT_BUDGET: usize = 32000; +const SECTION_OVERHEAD: usize = 100; + +/// Snap to a valid UTF-8 char boundary at or before `offset`. +fn snap_to_char(s: &str, offset: usize) -> usize { + let offset = offset.min(s.len()); + let mut pos = offset; + while pos > 0 && !s.is_char_boundary(pos) { + pos -= 1; + } + pos +} + +/// Assemble a context bundle from pre-computed search results. +/// Testable without embedder. +pub fn context_topic_from_results( + params: &ContextParams, + topic: &str, + search_results: &[crate::search::InternalSearchResult], + max_chars: usize, +) -> Result { + let budget = if max_chars == 0 { + DEFAULT_BUDGET + } else { + max_chars + }; + let mut sections = Vec::new(); + let mut used_chars = 0; + let mut included_files: HashSet = HashSet::new(); + + // Priority 1: Direct search results (top 5) + for r in search_results.iter().take(5) { + if used_chars >= budget { + break; + } + let full_path = params.vault_path.join(&r.file_path); + let content = std::fs::read_to_string(&full_path).unwrap_or_default(); + let (_, body) = split_frontmatter(&content); + + let available = budget.saturating_sub(used_chars + SECTION_OVERHEAD); + let trimmed = if body.len() > available { + format!( + "{}... [truncated, full note: #{}]", + &body[..snap_to_char(&body, available)], + r.docid.as_deref().unwrap_or("?") + ) + } else { + body + }; + + used_chars += trimmed.len() + SECTION_OVERHEAD; + included_files.insert(r.file_path.clone()); + sections.push(ContextSection { + label: "Direct match".into(), + path: r.file_path.clone(), + docid: r.docid.clone(), + content: trimmed, + relevance: format!("score {:.2}", r.score), + }); + } + + // Priority 2: Graph-expanded notes (1-hop from top 3 results) + for r in search_results.iter().take(3) { + if used_chars >= budget { + break; + } + let neighbors = params.store.get_neighbors(r.file_id, 1).unwrap_or_default(); + for (nid, _hop) in neighbors { + if used_chars >= budget { + break; + } + if let Some(nf) = params.store.get_file_by_id(nid).ok().flatten() { + if included_files.contains(&nf.path) { + continue; + } + let full_path = params.vault_path.join(&nf.path); + let content = std::fs::read_to_string(&full_path).unwrap_or_default(); + let (_, body) = split_frontmatter(&content); + + let available = budget.saturating_sub(used_chars + SECTION_OVERHEAD); + let max_per_expansion = budget / 8; + let cap = available.min(max_per_expansion); + if cap == 0 { + break; + } + let trimmed = if body.len() > cap { + format!("{}... [truncated]", &body[..snap_to_char(&body, cap)]) + } else { + body + }; + + used_chars += trimmed.len() + SECTION_OVERHEAD; + included_files.insert(nf.path.clone()); + sections.push(ContextSection { + label: "Related (1-hop)".into(), + path: nf.path.clone(), + docid: nf.docid, + content: trimmed, + relevance: format!("linked from {}", r.file_path), + }); + } + } + } + + let truncated = used_chars >= budget; + + Ok(ContextBundle { + topic: topic.to_string(), + sections, + total_chars: used_chars, + budget_chars: budget, + truncated, + }) +} + +/// Full context topic function (requires embedder + HNSW). +/// Called from CLI handler which provides the heavy resources. +pub fn context_topic_with_search( + params: &ContextParams, + topic: &str, + max_chars: usize, + embedder: &mut crate::embedder::Embedder, + index: &crate::hnsw::HnswIndex, +) -> Result { + let search_output = crate::search::search_internal(topic, 5, params.store, embedder, index)?; + context_topic_from_results(params, topic, &search_output.results, max_chars) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::docid::generate_docid; + use crate::store::Store; + use tempfile::TempDir; + + fn setup_vault() -> (TempDir, Store, std::path::PathBuf) { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + + std::fs::write( + root.join("note.md"), + "---\ntags:\n - rust\n---\n# Note\n\nContent here.\n\nSee [[other]].", + ) + .unwrap(); + std::fs::write(root.join("other.md"), "# Other\n\nMore content.").unwrap(); + + let store = Store::open_memory().unwrap(); + let d1 = generate_docid("note.md"); + let d2 = generate_docid("other.md"); + store + .insert_file("note.md", "h1", 100, &["rust".into()], &d1) + .unwrap(); + store.insert_file("other.md", "h2", 100, &[], &d2).unwrap(); + + let f1 = store.get_file("note.md").unwrap().unwrap().id; + let f2 = store.get_file("other.md").unwrap().unwrap().id; + store.insert_edge(f1, f2, "wikilink").unwrap(); + store.insert_edge(f2, f1, "wikilink").unwrap(); + + (tmp, store, root) + } + + #[test] + fn test_read_by_path() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let note = context_read(¶ms, "note.md").unwrap(); + assert_eq!(note.path, "note.md"); + assert!(note.content.contains("Content here.")); + assert!(note.body.contains("Content here.")); + assert!(note.frontmatter.contains("tags:")); + assert!(note.tags.contains(&"rust".to_string())); + assert_eq!(note.outgoing_links.len(), 1); + assert_eq!(note.incoming_links.len(), 1); + assert!(note.char_count > 0); + } + + #[test] + fn test_read_by_docid() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let docid = generate_docid("note.md"); + let note = context_read(¶ms, &format!("#{}", docid)).unwrap(); + assert_eq!(note.path, "note.md"); + } + + #[test] + fn test_read_file_not_on_disk() { + let (_tmp, store, root) = setup_vault(); + store + .insert_file("ghost.md", "h3", 100, &[], "ggg333") + .unwrap(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let note = context_read(¶ms, "ghost.md").unwrap(); + assert!(note.body.contains("File not found on disk")); + } + + #[test] + fn test_read_by_basename() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let note = context_read(¶ms, "note").unwrap(); + assert_eq!(note.path, "note.md"); + } + + #[test] + fn test_context_list_no_filter() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let items = context_list(¶ms, None, &[], 20).unwrap(); + assert_eq!(items.len(), 2); + } + + #[test] + fn test_context_list_tag_filter() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let items = context_list(¶ms, None, &["rust".into()], 20).unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0].path, "note.md"); + } + + #[test] + fn test_vault_map() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let map = vault_map(¶ms).unwrap(); + assert_eq!(map.total_files, 2); + assert!(!map.folders.is_empty()); + assert!(map.top_tags.iter().any(|(t, _)| t == "rust")); + } + + #[test] + fn test_split_frontmatter() { + let (fm, body) = split_frontmatter("---\ntags:\n - rust\n---\n# Hello\nWorld"); + assert!(fm.contains("tags:")); + assert!(body.contains("# Hello")); + assert!(!body.contains("---")); + } + + #[test] + fn test_split_frontmatter_no_fm() { + let (fm, body) = split_frontmatter("# Just content\nHere."); + assert!(fm.is_empty()); + assert!(body.contains("# Just content")); + } + + #[test] + fn test_who_finds_person() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + std::fs::create_dir_all(root.join("People")).unwrap(); + std::fs::write( + root.join("People/John.md"), + "---\naliases:\n - JN\n---\n# John\nDeveloper.", + ) + .unwrap(); + std::fs::write(root.join("daily.md"), "# Daily\nTalked to John about Rust.").unwrap(); + + let store = Store::open_memory().unwrap(); + let f1 = store + .insert_file("People/John.md", "h1", 100, &["person".into()], "aaa111") + .unwrap(); + let f2 = store + .insert_file("daily.md", "h2", 100, &[], "bbb222") + .unwrap(); + store.insert_edge(f2, f1, "mention").unwrap(); + store + .insert_chunk(f2, "# Daily", "Talked to John about Rust.", 10, 20) + .unwrap(); + store + .insert_fts_chunk(f2, 0, "Talked to John about Rust.") + .unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let person = context_who(¶ms, "John").unwrap(); + assert!(person.note.is_some()); + assert_eq!(person.name, "John"); + assert_eq!(person.mentioned_in.len(), 1); + assert!(person.mentioned_in[0].path.contains("daily")); + } + + #[test] + fn test_who_not_found() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let person = context_who(¶ms, "NonExistent").unwrap(); + assert!(person.note.is_none()); + assert!(person.mentioned_in.is_empty()); + } + + #[test] + fn test_project_context() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + std::fs::create_dir_all(root.join("01-Projects")).unwrap(); + std::fs::write( + root.join("01-Projects/MyProject.md"), + "# MyProject\n\n- [ ] Task one\n- [x] Done task\n- [ ] Task two", + ) + .unwrap(); + std::fs::write( + root.join("01-Projects/child.md"), + "# Child\nRelated to [[MyProject]].\n- [ ] Sub task", + ) + .unwrap(); + + let store = Store::open_memory().unwrap(); + let f1 = store + .insert_file( + "01-Projects/MyProject.md", + "h1", + 100, + &["project".into()], + "aaa111", + ) + .unwrap(); + let f2 = store + .insert_file("01-Projects/child.md", "h2", 100, &[], "bbb222") + .unwrap(); + store.insert_edge(f2, f1, "wikilink").unwrap(); + store.insert_edge(f1, f2, "wikilink").unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let proj = context_project(¶ms, "MyProject").unwrap(); + assert!(proj.note.is_some()); + assert!(!proj.child_notes.is_empty()); + // Should find "Task one" and "Task two" (not "Done task") + assert!(proj.active_tasks.len() >= 2); + assert!(proj.active_tasks.iter().any(|t| t.text == "Task one")); + assert!(proj.active_tasks.iter().any(|t| t.text == "Task two")); + assert!(!proj.active_tasks.iter().any(|t| t.text.contains("Done"))); + } + + #[test] + fn test_project_not_found() { + let (_tmp, store, root) = setup_vault(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let proj = context_project(¶ms, "NonExistent").unwrap(); + assert!(proj.note.is_none()); + assert!(proj.child_notes.is_empty()); + } + + // --- context_topic tests --- + + #[test] + fn test_context_topic_basic() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + std::fs::write( + root.join("result.md"), + "# Result\n\nThis is relevant content about the topic.", + ) + .unwrap(); + + let store = Store::open_memory().unwrap(); + store + .insert_file("result.md", "h1", 100, &["topic".into()], "aaa111") + .unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let search_results = vec![crate::search::InternalSearchResult { + file_path: "result.md".into(), + file_id: 1, + score: 0.85, + heading: Some("# Result".into()), + snippet: "relevant content".into(), + docid: Some("aaa111".into()), + }]; + + let bundle = context_topic_from_results(¶ms, "topic", &search_results, 32000).unwrap(); + assert!(!bundle.sections.is_empty()); + assert!(bundle.sections[0].content.contains("relevant content")); + assert!(bundle.total_chars <= bundle.budget_chars); + assert!(!bundle.truncated); + } + + #[test] + fn test_context_topic_budget_trimming() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + let long_content = format!("# Long\n\n{}", "word ".repeat(5000)); + std::fs::write(root.join("long.md"), &long_content).unwrap(); + + let store = Store::open_memory().unwrap(); + store + .insert_file("long.md", "h1", 100, &[], "aaa111") + .unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let search_results = vec![crate::search::InternalSearchResult { + file_path: "long.md".into(), + file_id: 1, + score: 0.9, + heading: None, + snippet: "word word".into(), + docid: Some("aaa111".into()), + }]; + + // Very small budget — should truncate + let bundle = context_topic_from_results(¶ms, "words", &search_results, 500).unwrap(); + assert!(!bundle.sections.is_empty()); + assert!(bundle.sections[0].content.contains("[truncated")); + assert!(bundle.truncated); + } + + #[test] + fn test_context_topic_with_graph_expansion() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + std::fs::write(root.join("main.md"), "# Main\nMain content.").unwrap(); + std::fs::write(root.join("related.md"), "# Related\nRelated content.").unwrap(); + + let store = Store::open_memory().unwrap(); + let f1 = store + .insert_file("main.md", "h1", 100, &[], "aaa111") + .unwrap(); + let f2 = store + .insert_file("related.md", "h2", 100, &[], "bbb222") + .unwrap(); + store.insert_edge(f1, f2, "wikilink").unwrap(); + + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + let search_results = vec![crate::search::InternalSearchResult { + file_path: "main.md".into(), + file_id: f1, + score: 0.8, + heading: None, + snippet: "Main".into(), + docid: Some("aaa111".into()), + }]; + + let bundle = context_topic_from_results(¶ms, "main", &search_results, 32000).unwrap(); + // Should have main as direct match + related as 1-hop + assert!(bundle.sections.len() >= 2); + assert!( + bundle + .sections + .iter() + .any(|s| s.path == "main.md" && s.label == "Direct match") + ); + assert!( + bundle + .sections + .iter() + .any(|s| s.path == "related.md" && s.label == "Related (1-hop)") + ); + } + + #[test] + fn test_context_topic_empty_results() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().to_path_buf(); + let store = Store::open_memory().unwrap(); + let params = ContextParams { + store: &store, + vault_path: &root, + profile: None, + }; + + let bundle = context_topic_from_results(¶ms, "nothing", &[], 32000).unwrap(); + assert!(bundle.sections.is_empty()); + assert_eq!(bundle.total_chars, 0); + assert!(!bundle.truncated); + } + + #[test] + fn test_snap_to_char() { + let s = "hello\u{2014}world"; // em dash is 3 bytes + let snap = snap_to_char(s, 6); // lands inside the em dash + assert!(s.is_char_boundary(snap)); + assert!(snap <= 6); + } +} diff --git a/src/lib.rs b/src/lib.rs index 870efbf..c19dbe6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod chunker; pub mod config; +pub mod context; pub mod docid; pub mod embedder; pub mod fts; diff --git a/src/main.rs b/src/main.rs index 17d18a8..360a71d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -87,6 +87,12 @@ enum Command { #[command(subcommand)] action: GraphAction, }, + + /// Query vault context. + Context { + #[command(subcommand)] + action: ContextAction, + }, } #[derive(Subcommand, Debug)] @@ -100,6 +106,47 @@ enum GraphAction { Stats, } +#[derive(Subcommand, Debug)] +enum ContextAction { + /// Read a note's full content with metadata. + Read { + /// File path, basename, or #docid. + file: String, + }, + /// List notes by metadata filters. + List { + /// Filter to folder path prefix. + #[arg(long)] + folder: Option, + /// Filter to notes with all listed tags (comma-separated). + #[arg(long, value_delimiter = ',')] + tags: Vec, + /// Maximum results. + #[arg(long, default_value = "20")] + limit: usize, + }, + /// Vault structure overview. + VaultMap, + /// Person context bundle. + Who { + /// Person name (matches filename in People folder). + name: String, + }, + /// Project context bundle. + Project { + /// Project name (matches filename). + name: String, + }, + /// Rich topic context with budget. + Topic { + /// Search query for the topic. + query: String, + /// Character budget (default 32000, ~8000 tokens). + #[arg(long, default_value = "32000")] + budget: usize, + }, +} + #[derive(Subcommand, Debug)] enum ModelsAction { /// List available models. @@ -460,6 +507,215 @@ fn main() -> Result<()> { } } + Command::Context { action } => { + if !index_exists(&data_dir) { + eprintln!("No index found. Run 'engraph index ' first."); + std::process::exit(1); + } + let db_path = data_dir.join("engraph.db"); + let store = store::Store::open(&db_path)?; + let vault_path_str = store.get_meta("vault_path")?.ok_or_else(|| { + anyhow::anyhow!("No vault path in index. Run 'engraph index ' first.") + })?; + let vault_path = PathBuf::from(&vault_path_str); + let profile = config::Config::load_vault_profile().ok().flatten(); + + let params = engraph::context::ContextParams { + store: &store, + vault_path: &vault_path, + profile: profile.as_ref(), + }; + + match action { + ContextAction::Read { file } => { + let note = engraph::context::context_read(¶ms, &file)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(¬e)?); + } else { + println!( + "{} {}", + note.path, + note.docid + .as_deref() + .map(|d| format!("(#{})", d)) + .unwrap_or_default() + ); + println!("Tags: {}", note.tags.join(", ")); + println!("Outgoing links: {}", note.outgoing_links.len()); + println!("Incoming links: {}", note.incoming_links.len()); + println!("Chars: {}\n", note.char_count); + println!("{}", note.body); + } + } + ContextAction::List { + folder, + tags, + limit, + } => { + let items = + engraph::context::context_list(¶ms, folder.as_deref(), &tags, limit)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&items)?); + } else { + for item in &items { + let did = item + .docid + .as_deref() + .map(|d| format!(" #{d}")) + .unwrap_or_default(); + let tags_str = if item.tags.is_empty() { + String::new() + } else { + format!(" [{}]", item.tags.join(", ")) + }; + println!( + "{}{}{} ({} edges)", + item.path, did, tags_str, item.edge_count + ); + } + println!("\n{} notes", items.len()); + } + } + ContextAction::VaultMap => { + let map = engraph::context::vault_map(¶ms)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&map)?); + } else { + println!("Vault: {}", map.vault_path); + println!("Type: {}, Structure: {}", map.vault_type, map.structure); + println!( + "Files: {}, Chunks: {}, Edges: {}\n", + map.total_files, map.total_chunks, map.total_edges + ); + println!("Folders:"); + for f in &map.folders { + println!(" {}: {} notes", f.path, f.note_count); + } + println!("\nTop tags:"); + for (tag, count) in &map.top_tags { + println!(" {}: {}", tag, count); + } + println!("\nRecent files:"); + for path in &map.recent_files { + println!(" {}", path); + } + } + } + ContextAction::Who { name } => { + let person = engraph::context::context_who(¶ms, &name)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&person)?); + } else { + println!("# {}\n", person.name); + if let Some(note) = &person.note { + println!( + "Note: {} {}", + note.path, + note.docid + .as_deref() + .map(|d| format!("(#{})", d)) + .unwrap_or_default() + ); + println!("Tags: {}\n", note.tags.join(", ")); + println!("{}\n", note.body); + } else { + println!("(No person note found)\n"); + } + if !person.mentioned_in.is_empty() { + println!("Mentioned in ({} notes):", person.mentioned_in.len()); + for m in &person.mentioned_in { + println!(" {} — {}", m.path, m.snippet); + } + println!(); + } + if !person.linked_from.is_empty() { + println!("Linked from ({}):", person.linked_from.len()); + for p in &person.linked_from { + println!(" {}", p); + } + println!(); + } + println!("Total: {} chars", person.total_chars); + } + } + ContextAction::Project { name } => { + let proj = engraph::context::context_project(¶ms, &name)?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&proj)?); + } else { + println!("# {}\n", proj.name); + if let Some(note) = &proj.note { + println!("Note: {}\n", note.path); + println!("{}\n", note.body); + } + if !proj.active_tasks.is_empty() { + println!("Active tasks ({}):", proj.active_tasks.len()); + for t in &proj.active_tasks { + println!(" - [ ] {} ({})", t.text, t.source_file); + } + println!(); + } + if !proj.child_notes.is_empty() { + println!("Child notes ({}):", proj.child_notes.len()); + for c in &proj.child_notes { + println!(" {}", c.path); + } + println!(); + } + if !proj.team.is_empty() { + println!("Team:"); + for p in &proj.team { + println!(" {}", p); + } + println!(); + } + if !proj.recent_mentions.is_empty() { + println!("Recent daily mentions:"); + for m in &proj.recent_mentions { + println!(" {} — {}", m.path, m.snippet); + } + println!(); + } + } + } + ContextAction::Topic { query, budget } => { + let models_dir = data_dir.join("models"); + let mut embedder = engraph::embedder::Embedder::new(&models_dir)?; + let hnsw_dir = data_dir.join("hnsw"); + let index = engraph::hnsw::HnswIndex::load(&hnsw_dir)?; + + let bundle = engraph::context::context_topic_with_search( + ¶ms, + &query, + budget, + &mut embedder, + &index, + )?; + if cli.json { + println!("{}", serde_json::to_string_pretty(&bundle)?); + } else { + println!("# Context: {}\n", bundle.topic); + println!( + "Budget: {} / {} chars{}\n", + bundle.total_chars, + bundle.budget_chars, + if bundle.truncated { " (truncated)" } else { "" } + ); + for s in &bundle.sections { + let did = s + .docid + .as_deref() + .map(|d| format!(" #{d}")) + .unwrap_or_default(); + println!("## {} — {}{}", s.label, s.path, did); + println!("[{}]\n", s.relevance); + println!("{}\n", s.content); + } + } + } + } + } + Command::Models { action } => { let registry = model::ModelRegistry::default(); match action { diff --git a/src/search.rs b/src/search.rs index a89d299..2746869 100644 --- a/src/search.rs +++ b/src/search.rs @@ -19,27 +19,32 @@ pub struct SearchResult { pub docid: Option, } -/// Run a search query and print results. -/// -/// Performs both semantic (HNSW) and keyword (FTS5) search, then fuses -/// results using Reciprocal Rank Fusion. When `explain` is true, each -/// result includes per-lane score breakdown. -pub fn run_search( - query: &str, - top_n: usize, - json: bool, - explain: bool, - data_dir: &Path, -) -> Result<()> { - let models_dir = data_dir.join("models"); - let mut embedder = Embedder::new(&models_dir).context("loading embedder")?; - - let hnsw_dir = data_dir.join("hnsw"); - let index = HnswIndex::load(&hnsw_dir).context("loading HNSW index")?; +/// Structured search result for internal use (no I/O). +#[derive(Debug, Clone)] +pub struct InternalSearchResult { + pub file_path: String, + pub file_id: i64, + pub score: f64, + pub heading: Option, + pub snippet: String, + pub docid: Option, +} - let db_path = data_dir.join("engraph.db"); - let store = Store::open(&db_path).context("opening store")?; +/// Output from `search_internal`: structured results plus raw fused data for --explain. +pub struct SearchOutput { + pub results: Vec, + pub fused: Vec, +} +/// Run hybrid search and return structured results (no I/O). +/// Used by both `run_search` (CLI) and context engine. +pub fn search_internal( + query: &str, + top_n: usize, + store: &Store, + embedder: &mut Embedder, + index: &HnswIndex, +) -> Result { // --- Semantic lane --- let query_vec = embedder.embed_one(query).context("embedding query")?; let tombstones = store.get_tombstones().context("loading tombstones")?; @@ -144,7 +149,7 @@ pub fn run_search( }; let graph_results = - graph::graph_expand(&store, &combined_seeds, query, 2, 20).unwrap_or_default(); + graph::graph_expand(store, &combined_seeds, query, 2, 20).unwrap_or_default(); // --- RRF Fusion --- const RRF_K: usize = 60; @@ -157,32 +162,70 @@ pub fn run_search( RRF_K, ); - // Convert to SearchResult, taking top_n. - let results: Vec = fused + // Convert fused results to InternalSearchResult, taking top_n. + let results: Vec = fused .iter() .take(top_n) - .map(|f| SearchResult { - score: f.rrf_score as f32, + .map(|f| InternalSearchResult { file_path: f.file_path.clone(), + file_id: f.file_id, + score: f.rrf_score, heading: f.heading.clone(), snippet: f.snippet.clone(), docid: f.docid.clone(), }) .collect(); - let mut output = format_results(&results, json); + Ok(SearchOutput { results, fused }) +} + +/// Run a search query and print results. +/// +/// Performs both semantic (HNSW) and keyword (FTS5) search, then fuses +/// results using Reciprocal Rank Fusion. When `explain` is true, each +/// result includes per-lane score breakdown. +pub fn run_search( + query: &str, + top_n: usize, + json: bool, + explain: bool, + data_dir: &Path, +) -> Result<()> { + let models_dir = data_dir.join("models"); + let mut embedder = Embedder::new(&models_dir).context("loading embedder")?; + + let hnsw_dir = data_dir.join("hnsw"); + let index = HnswIndex::load(&hnsw_dir).context("loading HNSW index")?; + + let db_path = data_dir.join("engraph.db"); + let store = Store::open(&db_path).context("opening store")?; + + let output = search_internal(query, top_n, &store, &mut embedder, &index)?; + + let results: Vec = output + .results + .iter() + .map(|r| SearchResult { + score: r.score as f32, + file_path: r.file_path.clone(), + heading: r.heading.clone(), + snippet: r.snippet.clone(), + docid: r.docid.clone(), + }) + .collect(); + + let mut out = format_results(&results, json); if explain && !json { - // Append explain info after results. let mut explain_out = String::from("\n--- Explain ---\n"); - for f in fused.iter().take(top_n) { + for f in output.fused.iter().take(top_n) { explain_out.push_str(&format!("{}\n", f.file_path)); explain_out.push_str(&fusion::format_explain(f)); } - output.push_str(&explain_out); + out.push_str(&explain_out); } - print!("{output}"); + print!("{out}"); Ok(()) } diff --git a/src/store.rs b/src/store.rs index c318122..85860c9 100644 --- a/src/store.rs +++ b/src/store.rs @@ -791,6 +791,118 @@ impl Store { isolated_file_count: (total_files - connected) as usize, }) } + + /// List files filtered by folder prefix and/or tags (AND logic). + pub fn list_files( + &self, + folder: Option<&str>, + tags: &[String], + limit: usize, + ) -> Result> { + let mut sql = String::from( + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid FROM files WHERE 1=1", + ); + let mut param_values: Vec> = Vec::new(); + if let Some(f) = folder { + sql.push_str(" AND path LIKE ?"); + param_values.push(Box::new(format!("{}%", f))); + } + for tag in tags { + sql.push_str(" AND EXISTS (SELECT 1 FROM json_each(tags) WHERE value = ?)"); + param_values.push(Box::new(tag.clone())); + } + sql.push_str(" ORDER BY indexed_at DESC LIMIT ?"); + param_values.push(Box::new(limit as i64)); + + let mut stmt = self.conn.prepare(&sql)?; + let rows = stmt.query_map(rusqlite::params_from_iter(param_values.iter()), |row| { + Ok(FileRecord { + id: row.get(0)?, + path: row.get(1)?, + content_hash: row.get(2)?, + mtime: row.get(3)?, + tags: parse_tags(&row.get::<_, String>(4)?), + indexed_at: row.get(5)?, + docid: row.get(6)?, + }) + })?; + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Top-level folder grouping with note counts. + pub fn folder_note_counts(&self) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT CASE WHEN instr(path, '/') > 0 + THEN substr(path, 1, instr(path, '/') - 1) + ELSE '(root)' + END AS folder, + COUNT(*) as cnt + FROM files GROUP BY folder ORDER BY cnt DESC", + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as usize)) + })?; + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Tag frequency aggregation via json_each. + pub fn top_tags(&self, limit: usize) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT value, COUNT(*) as cnt + FROM files, json_each(files.tags) + GROUP BY value ORDER BY cnt DESC LIMIT ?", + )?; + let rows = stmt.query_map(params![limit as i64], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as usize)) + })?; + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Most recently indexed files. + pub fn recent_files(&self, limit: usize) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid + FROM files ORDER BY indexed_at DESC LIMIT ?", + )?; + let rows = stmt.query_map(params![limit as i64], |row| { + Ok(FileRecord { + id: row.get(0)?, + path: row.get(1)?, + content_hash: row.get(2)?, + mtime: row.get(3)?, + tags: parse_tags(&row.get::<_, String>(4)?), + indexed_at: row.get(5)?, + docid: row.get(6)?, + }) + })?; + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Total edges (both directions) for a given file. + pub fn edge_count_for_file(&self, file_id: i64) -> Result { + let count: i64 = self.conn.query_row( + "SELECT COUNT(*) FROM edges WHERE from_file = ?1 OR to_file = ?1", + params![file_id], + |row| row.get(0), + )?; + Ok(count as usize) + } } fn parse_tags(json: &str) -> Vec { @@ -1302,4 +1414,117 @@ mod tests { assert_eq!(stats.connected_file_count, 3); // a, b, c assert_eq!(stats.isolated_file_count, 1); // d } + + #[test] + fn test_list_files_no_filter() { + let store = Store::open_memory().unwrap(); + store + .insert_file("01-Projects/a.md", "h1", 100, &["rust".into()], "aaa111") + .unwrap(); + store + .insert_file("02-Areas/b.md", "h2", 200, &["health".into()], "bbb222") + .unwrap(); + store + .insert_file( + "01-Projects/c.md", + "h3", + 300, + &["rust".into(), "cli".into()], + "ccc333", + ) + .unwrap(); + let files = store.list_files(None, &[], 20).unwrap(); + assert_eq!(files.len(), 3); + } + + #[test] + fn test_list_files_folder_filter() { + let store = Store::open_memory().unwrap(); + store + .insert_file("01-Projects/a.md", "h1", 100, &[], "aaa111") + .unwrap(); + store + .insert_file("02-Areas/b.md", "h2", 200, &[], "bbb222") + .unwrap(); + let files = store.list_files(Some("01-Projects"), &[], 20).unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].path, "01-Projects/a.md"); + } + + #[test] + fn test_list_files_tag_filter() { + let store = Store::open_memory().unwrap(); + store + .insert_file("a.md", "h1", 100, &["rust".into(), "cli".into()], "aaa111") + .unwrap(); + store + .insert_file("b.md", "h2", 200, &["rust".into()], "bbb222") + .unwrap(); + store + .insert_file("c.md", "h3", 300, &["python".into()], "ccc333") + .unwrap(); + let files = store.list_files(None, &["rust".into()], 20).unwrap(); + assert_eq!(files.len(), 2); + let files = store + .list_files(None, &["rust".into(), "cli".into()], 20) + .unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0].path, "a.md"); + } + + #[test] + fn test_folder_note_counts() { + let store = Store::open_memory().unwrap(); + store + .insert_file("01-Projects/a.md", "h1", 100, &[], "a1") + .unwrap(); + store + .insert_file("01-Projects/b.md", "h2", 100, &[], "b2") + .unwrap(); + store + .insert_file("02-Areas/c.md", "h3", 100, &[], "c3") + .unwrap(); + store.insert_file("root.md", "h4", 100, &[], "d4").unwrap(); + let counts = store.folder_note_counts().unwrap(); + assert!(counts.iter().any(|(f, c)| f == "01-Projects" && *c == 2)); + assert!(counts.iter().any(|(f, c)| f == "02-Areas" && *c == 1)); + assert!(counts.iter().any(|(f, c)| f == "(root)" && *c == 1)); + } + + #[test] + fn test_top_tags() { + let store = Store::open_memory().unwrap(); + store + .insert_file("a.md", "h1", 100, &["rust".into(), "cli".into()], "a1") + .unwrap(); + store + .insert_file("b.md", "h2", 100, &["rust".into(), "web".into()], "b2") + .unwrap(); + store + .insert_file("c.md", "h3", 100, &["rust".into()], "c3") + .unwrap(); + let tags = store.top_tags(10).unwrap(); + assert_eq!(tags[0].0, "rust"); + assert_eq!(tags[0].1, 3); + } + + #[test] + fn test_recent_files() { + let store = Store::open_memory().unwrap(); + store.insert_file("old.md", "h1", 100, &[], "a1").unwrap(); + store.insert_file("new.md", "h2", 200, &[], "b2").unwrap(); + let recent = store.recent_files(1).unwrap(); + assert_eq!(recent.len(), 1); + } + + #[test] + fn test_edge_count_for_file() { + let store = Store::open_memory().unwrap(); + let f1 = store.insert_file("a.md", "h1", 100, &[], "a1").unwrap(); + let f2 = store.insert_file("b.md", "h2", 100, &[], "b2").unwrap(); + store.insert_edge(f1, f2, "wikilink").unwrap(); + store.insert_edge(f2, f1, "wikilink").unwrap(); + assert_eq!(store.edge_count_for_file(f1).unwrap(), 2); + assert_eq!(store.edge_count_for_file(f2).unwrap(), 2); + } }