diff --git a/Cargo.toml b/Cargo.toml
index 00d69362..01f983fb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectorless"
-version = "0.1.10"
+version = "0.1.11"
 edition = "2024"
 authors = ["zTgx <beautifularea@gmail.com>"]
 description = "Hierarchical, reasoning-native document intelligence engine"
@@ -73,6 +73,7 @@ rand = "0.8"
 [dev-dependencies]
 tempfile = "3.10"
 tokio-test = "0.4"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 
 [profile.release]
 opt-level = 3
diff --git a/README.md b/README.md
index 50b88351..d0649efb 100644
--- a/README.md
+++ b/README.md
@@ -11,10 +11,12 @@
 
 </div>
 
-Ultra performant document intelligence engine for RAG, with core written in **Rust**. Zero vector database, zero embedding model — just LLM-powered tree navigation. Incremental indexing and multi-format support out-of-box.
+Ultra performant document intelligence engine for RAG, with written in **Rust**. Zero vector database, zero embedding model — just LLM-powered tree navigation. Incremental indexing and multi-format support out-of-box.
 
 ⭐ **Drop a star to help us grow!**
 
+**⚠️ Early Development**: This project is in active development. The API and features are likely to evolve, and breaking changes may occur.
+
 
 ## Why Vectorless?
 
@@ -109,14 +111,15 @@ cp templates/template.toml ./vectorless.toml
 Basic usage:
 
 ```rust
-use vectorless::client::{Engine, EngineBuilder};
+use vectorless::Engine;
 
 #[tokio::main]
-async fn main() -> vectorless::domain::Result<()> {
+async fn main() -> vectorless::Result<()> {
     // Create client
-    let client = EngineBuilder::new()
+    let client = Engine::builder()
         .with_workspace("./workspace")
-        .build()?;
+        .build()
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
 
     // Index a document
     let doc_id = client.index("./document.md").await?;
@@ -133,6 +136,21 @@ async fn main() -> vectorless::domain::Result<()> {
 
 See the [examples/](examples/) directory for complete working examples:
 
+| Example | Description |
+|---------|-------------|
+| [basic.rs](examples/basic.rs) | Minimal ~30 line example showing core API |
+| [index.rs](examples/index.rs) | Document indexing pipeline |
+| [retrieve.rs](examples/retrieve.rs) | Retrieval pipeline with options |
+| [events.rs](examples/events.rs) | Event-driven indexing with EventEmitter |
+| [session.rs](examples/session.rs) | Session management with statistics |
+| [batch_processing.rs](examples/batch_processing.rs) | Batch document processing |
+| [content_aggregation.rs](examples/content_aggregation.rs) | Content aggregation strategies |
+| [streaming.rs](examples/streaming.rs) | Streaming document processing |
+| [multi_format.rs](examples/multi_format.rs) | Multi-format document support |
+| [custom_pilot.rs](examples/custom_pilot.rs) | Custom pilot implementation |
+| [cli_tool.rs](examples/cli_tool.rs) | CLI application example |
+| [markdownflow.rs](examples/markdownflow.rs) | Markdown workflow example |
+
 ## Architecture
 
 ### Pilot Architecture
@@ -141,7 +159,7 @@ See the [examples/](examples/) directory for complete working examples:
 
 ### System Overview
 
-![Architecture](docs/design/architecture-v2.svg)
+![Architecture](docs/design/architecture.svg)
 
 ## Contributing
 
diff --git a/docs/design/architecture-v2.svg b/docs/design/architecture.svg
similarity index 100%
rename from docs/design/architecture-v2.svg
rename to docs/design/architecture.svg
diff --git a/docs/design/client-module.md b/docs/design/client-module.md
new file mode 100644
index 00000000..e4ab796b
--- /dev/null
+++ b/docs/design/client-module.md
@@ -0,0 +1,794 @@
+# Client Module Refactoring Design
+
+## Overview
+
+This document describes the refactoring of the `client` module to achieve a more professional, product-level architecture with clear separation of concerns.
+
+## Current Problems
+
+### 1. God Object Anti-pattern
+`engine.rs` (600+ lines) handles too many responsibilities:
+- Document indexing
+- Document retrieval
+- Workspace management
+- Configuration management
+- Format detection
+- Page parsing
+
+### 2. Mixed Abstraction Levels
+High-level operations (`query()`) mixed with low-level utilities (`parse_page_range()`).
+
+### 3. No Session Management
+Each operation is independent; no way to maintain context across multiple operations.
+
+### 4. Missing Event System
+No progress callbacks or event hooks for long-running operations.
+
+### 5. Scattered State Management
+State split across `Arc<RwLock<Workspace>>`, `Arc<Mutex<Executor>>`, `Arc<Retriever>`.
+
+---
+
+## Proposed Architecture
+
+### Module Structure
+
+```
+src/client/
+├── mod.rs           # Re-exports and documentation
+├── engine.rs        # Core orchestrator (simplified)
+├── builder.rs       # Builder pattern (enhanced)
+├── types.rs         # Public API types
+├── context.rs       # Request context and configuration
+├── session.rs       # Session management
+├── indexer.rs       # Document indexing operations
+├── retriever.rs     # Query and retrieval operations
+├── workspace.rs     # Workspace operations (CRUD)
+└── events.rs        # Event system and callbacks
+```
+
+### Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                           Client API                             │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                  │
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐       │
+│  │ EngineBuilder │───▶│    Engine    │◀───│   Session    │       │
+│  └──────────────┘    └──────┬───────┘    └──────────────┘       │
+│                             │                                    │
+│              ┌──────────────┼──────────────┐                    │
+│              ▼              ▼              ▼                    │
+│     ┌─────────────┐ ┌─────────────┐ ┌─────────────┐            │
+│     │   Indexer   │ │  Retriever  │ │  Workspace  │            │
+│     │   Client    │ │   Client    │ │   Client    │            │
+│     └──────┬──────┘ └──────┬──────┘ └──────┬──────┘            │
+│            │               │               │                    │
+│            └───────────────┴───────────────┘                    │
+│                            │                                    │
+│                            ▼                                    │
+│                   ┌────────────────┐                           │
+│                   │    Context     │                           │
+│                   │  (Request State)│                           │
+│                   └────────────────┘                           │
+│                                                                   │
+│                   ┌────────────────┐                           │
+│                   │    Events      │                           │
+│                   │  (Callbacks)   │                           │
+│                   └────────────────┘                           │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Component Design
+
+### 1. Context (`context.rs`)
+
+Request-scoped configuration and state management.
+
+```rust
+/// Request context for client operations.
+pub struct ClientContext {
+    /// Unique request ID for tracing.
+    pub request_id: Uuid,
+
+    /// Request-specific configuration overrides.
+    pub config: RequestContextConfig,
+
+    /// Event emitter for this request.
+    pub events: EventEmitter,
+
+    /// Request metadata.
+    pub metadata: HashMap<String, String>,
+
+    /// Request deadline (for timeout).
+    pub deadline: Option<Instant>,
+}
+
+/// Request-specific configuration overrides.
+pub struct RequestContextConfig {
+    /// Override top_k for retrieval.
+    pub top_k: Option<usize>,
+
+    /// Override token budget.
+    pub token_budget: Option<usize>,
+
+    /// Override content format.
+    pub content_format: Option<ContentFormat>,
+
+    /// Enable/disable features.
+    pub features: FeatureFlags,
+}
+
+/// Feature flags for request.
+pub struct FeatureFlags {
+    pub include_summaries: bool,
+    pub include_content: bool,
+    pub enable_cache: bool,
+    pub enable_sufficiency_check: bool,
+}
+```
+
+### 2. Session (`session.rs`)
+
+Multi-document session management.
+
+```rust
+/// Session for managing multiple document operations.
+pub struct Session {
+    /// Session ID.
+    pub id: Uuid,
+
+    /// Session configuration.
+    config: SessionConfig,
+
+    /// Active document contexts.
+    documents: HashMap<String, DocumentContext>,
+
+    /// Shared engine reference.
+    engine: Engine,
+
+    /// Session statistics.
+    stats: SessionStats,
+
+    /// Created at timestamp.
+    created_at: DateTime<Utc>,
+}
+
+/// Document context within a session.
+pub struct DocumentContext {
+    /// Document ID.
+    pub doc_id: String,
+
+    /// Preloaded tree (cached).
+    tree: Option<Arc<DocumentTree>>,
+
+    /// Document metadata.
+    meta: DocumentMeta,
+
+    /// Access statistics.
+    access_count: usize,
+    last_accessed: DateTime<Utc>,
+}
+
+/// Session configuration.
+pub struct SessionConfig {
+    /// Maximum documents to keep in memory.
+    pub max_cached_documents: usize,
+
+    /// Preload strategy.
+    pub preload_strategy: PreloadStrategy,
+
+    /// Cache eviction policy.
+    pub eviction_policy: EvictionPolicy,
+}
+
+impl Session {
+    /// Create a new session.
+    pub fn new(engine: Engine) -> Self;
+
+    /// Index a document into this session.
+    pub async fn index(&self, path: impl AsRef<Path>) -> Result<String>;
+
+    /// Query a document within this session.
+    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult>;
+
+    /// Query across all documents in session.
+    pub async fn query_all(&self, question: &str) -> Result<Vec<QueryResult>>;
+
+    /// Get document tree (cached).
+    pub fn get_tree(&self, doc_id: &str) -> Result<Arc<DocumentTree>>;
+
+    /// Preload documents for faster access.
+    pub async fn preload(&self, doc_ids: &[&str]) -> Result<()>;
+
+    /// Clear session cache.
+    pub fn clear_cache(&self);
+
+    /// Get session statistics.
+    pub fn stats(&self) -> &SessionStats;
+}
+```
+
+### 3. Indexer Client (`indexer.rs`)
+
+Document indexing operations.
+
+```rust
+/// Document indexing client.
+pub struct IndexerClient {
+    /// Pipeline executor.
+    executor: Arc<Mutex<PipelineExecutor>>,
+
+    /// Configuration.
+    config: IndexerConfig,
+}
+
+/// Indexing configuration.
+pub struct IndexerConfig {
+    /// Default index mode.
+    pub default_mode: IndexMode,
+
+    /// Summary generation strategy.
+    pub summary_strategy: SummaryStrategy,
+
+    /// Whether to generate node IDs.
+    pub generate_ids: bool,
+
+    /// Whether to generate descriptions.
+    pub generate_descriptions: bool,
+}
+
+impl IndexerClient {
+    /// Create a new indexer client.
+    pub fn new(executor: PipelineExecutor) -> Self;
+
+    /// Index a document from file.
+    pub async fn index_file(
+        &self,
+        path: impl AsRef<Path>,
+        options: IndexOptions,
+        events: &EventEmitter,
+    ) -> Result<IndexedDocument>;
+
+    /// Index from raw content.
+    pub async fn index_content(
+        &self,
+        content: &str,
+        format: DocumentFormat,
+        options: IndexOptions,
+    ) -> Result<IndexedDocument>;
+
+    /// Detect document format.
+    pub fn detect_format(&self, path: &Path, options: &IndexOptions) -> Result<DocumentFormat>;
+
+    /// Validate document before indexing.
+    pub fn validate(&self, path: &Path) -> Result<ValidationResult>;
+}
+
+/// Indexing events.
+pub enum IndexEvent {
+    /// Started indexing.
+    Started { path: String },
+
+    /// Format detected.
+    FormatDetected { format: DocumentFormat },
+
+    /// Parsing progress.
+    ParsingProgress { percent: u8 },
+
+    /// Tree building complete.
+    TreeBuilt { node_count: usize },
+
+    /// Summary generation progress.
+    SummaryProgress { completed: usize, total: usize },
+
+    /// Indexing complete.
+    Complete { doc_id: String },
+
+    /// Error occurred.
+    Error { message: String },
+}
+```
+
+### 4. Retriever Client (`retriever.rs`)
+
+Query and retrieval operations.
+
+```rust
+/// Document retrieval client.
+pub struct RetrieverClient {
+    /// Pipeline retriever.
+    retriever: Arc<PipelineRetriever>,
+
+    /// Configuration.
+    config: RetrieverConfig,
+}
+
+/// Retrieval configuration.
+pub struct RetrieverConfig {
+    /// Default top_k.
+    pub default_top_k: usize,
+
+    /// Default token budget.
+    pub default_token_budget: usize,
+
+    /// Content aggregator config.
+    pub content_config: ContentAggregatorConfig,
+
+    /// Enable caching.
+    pub enable_cache: bool,
+}
+
+impl RetrieverClient {
+    /// Create a new retriever client.
+    pub fn new(retriever: PipelineRetriever) -> Self;
+
+    /// Query a document tree.
+    pub async fn query(
+        &self,
+        tree: &DocumentTree,
+        question: &str,
+        options: RetrieveOptions,
+        ctx: &ClientContext,
+    ) -> Result<QueryResult>;
+
+    /// Query with streaming results.
+    pub async fn query_stream(
+        &self,
+        tree: &DocumentTree,
+        question: &str,
+        options: RetrieveOptions,
+    ) -> impl Stream<Item = QueryEvent>;
+
+    /// Get similar nodes.
+    pub fn find_similar(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        top_k: usize,
+    ) -> Result<Vec<RetrievalResult>>;
+
+    /// Get node context (ancestors + siblings).
+    pub fn get_node_context(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        depth: usize,
+    ) -> Result<NodeContext>;
+}
+
+/// Query events for streaming.
+pub enum QueryEvent {
+    /// Search started.
+    SearchStarted { query: String },
+
+    /// Node visited during search.
+    NodeVisited { node_id: String, title: String, score: f32 },
+
+    /// Candidate found.
+    CandidateFound { node_id: String, score: f32 },
+
+    /// Sufficiency check result.
+    SufficiencyCheck { level: SufficiencyLevel, tokens: usize },
+
+    /// Result ready.
+    ResultReady { result: RetrievalResult },
+
+    /// Query complete.
+    Complete { total_results: usize, confidence: f32 },
+}
+```
+
+### 5. Workspace Client (`workspace.rs`)
+
+Document persistence operations.
+
+```rust
+/// Workspace management client.
+pub struct WorkspaceClient {
+    /// Workspace storage.
+    workspace: Arc<RwLock<Workspace>>,
+
+    /// Configuration.
+    config: WorkspaceConfig,
+}
+
+/// Workspace configuration.
+pub struct WorkspaceConfig {
+    /// Auto-save interval (seconds).
+    pub auto_save_interval: Option<u64>,
+
+    /// Maximum cache size.
+    pub max_cache_size: usize,
+}
+
+impl WorkspaceClient {
+    /// Create a new workspace client.
+    pub fn new(workspace: Workspace) -> Self;
+
+    /// Save a document.
+    pub fn save(&self, doc: &PersistedDocument) -> Result<()>;
+
+    /// Load a document.
+    pub fn load(&self, doc_id: &str) -> Result<Option<PersistedDocument>>;
+
+    /// Remove a document.
+    pub fn remove(&self, doc_id: &str) -> Result<bool>;
+
+    /// Check if document exists.
+    pub fn exists(&self, doc_id: &str) -> Result<bool>;
+
+    /// List all documents.
+    pub fn list(&self) -> Result<Vec<DocumentInfo>>;
+
+    /// Get document metadata.
+    pub fn get_meta(&self, doc_id: &str) -> Result<Option<DocumentMeta>>;
+
+    /// Batch operations.
+    pub fn batch_remove(&self, doc_ids: &[&str]) -> Result<usize>;
+
+    /// Clear workspace.
+    pub fn clear(&self) -> Result<usize>;
+
+    /// Get workspace statistics.
+    pub fn stats(&self) -> WorkspaceStats;
+}
+
+/// Workspace statistics.
+pub struct WorkspaceStats {
+    pub document_count: usize,
+    pub total_size_bytes: u64,
+    pub cache_hit_rate: f32,
+    pub oldest_document: Option<DateTime<Utc>>,
+    pub newest_document: Option<DateTime<Utc>>,
+}
+```
+
+### 6. Events (`events.rs`)
+
+Event system for callbacks and progress reporting.
+
+```rust
+/// Event emitter for client operations.
+pub struct EventEmitter {
+    /// Event handlers.
+    handlers: Vec<Box<dyn EventHandler>>,
+
+    /// Async handlers (for non-blocking events).
+    async_handlers: Vec<Arc<dyn AsyncEventHandler>>,
+}
+
+/// Event handler trait.
+pub trait EventHandler: Send + Sync {
+    fn handle(&self, event: &Event);
+}
+
+/// Async event handler trait.
+#[async_trait]
+pub trait AsyncEventHandler: Send + Sync {
+    async fn handle(&self, event: &Event);
+}
+
+/// Event types.
+#[derive(Debug, Clone)]
+pub enum Event {
+    /// Indexing events.
+    Index(IndexEvent),
+
+    /// Query events.
+    Query(QueryEvent),
+
+    /// Workspace events.
+    Workspace(WorkspaceEvent),
+
+    /// Session events.
+    Session(SessionEvent),
+}
+
+/// Workspace events.
+pub enum WorkspaceEvent {
+    DocumentSaved { doc_id: String },
+    DocumentLoaded { doc_id: String, cache_hit: bool },
+    DocumentRemoved { doc_id: String },
+    WorkspaceCleared { count: usize },
+}
+
+/// Session events.
+pub enum SessionEvent {
+    SessionCreated { session_id: Uuid },
+    DocumentAdded { doc_id: String },
+    DocumentEvicted { doc_id: String, reason: EvictionReason },
+    SessionClosed { session_id: Uuid },
+}
+
+impl EventEmitter {
+    /// Create a new event emitter.
+    pub fn new() -> Self;
+
+    /// Add a sync handler.
+    pub fn on<H: EventHandler + 'static>(mut self, handler: H) -> Self;
+
+    /// Add an async handler.
+    pub fn on_async<H: AsyncEventHandler + 'static>(mut self, handler: Arc<H>) -> Self;
+
+    /// Emit an event.
+    pub fn emit(&self, event: Event);
+
+    /// Emit an event asynchronously.
+    pub async fn emit_async(&self, event: Event);
+}
+
+/// Convenience handler builders.
+impl EventEmitter {
+    /// Create handler from closure.
+    pub fn on_index<F: Fn(&IndexEvent) + Send + Sync + 'static>(self, f: F) -> Self;
+
+    /// Create handler from closure.
+    pub fn on_query<F: Fn(&QueryEvent) + Send + Sync + 'static>(self, f: F) -> Self;
+
+    /// Create progress callback.
+    pub fn on_progress<F: Fn(Progress) + Send + Sync + 'static>(self, f: F) -> Self;
+}
+
+/// Progress information.
+pub struct Progress {
+    pub operation: Operation,
+    pub current: usize,
+    pub total: usize,
+    pub message: String,
+}
+
+pub enum Operation {
+    Indexing,
+    Querying,
+    Loading,
+    Saving,
+}
+```
+
+### 7. Simplified Engine (`engine.rs`)
+
+The main orchestrator, now much simpler.
+
+```rust
+/// The main Engine client - orchestrates sub-clients.
+pub struct Engine {
+    /// Configuration.
+    config: Arc<Config>,
+
+    /// Indexer client.
+    indexer: IndexerClient,
+
+    /// Retriever client.
+    retriever: RetrieverClient,
+
+    /// Workspace client (optional).
+    workspace: Option<WorkspaceClient>,
+
+    /// Event emitter.
+    events: EventEmitter,
+}
+
+impl Engine {
+    /// Create a builder for custom configuration.
+    pub fn builder() -> EngineBuilder;
+
+    // ============================================================
+    // Convenience Methods (delegate to sub-clients)
+    // ============================================================
+
+    /// Index a document.
+    pub async fn index(&self, path: impl AsRef<Path>) -> Result<String> {
+        self.index_with_options(path, IndexOptions::default()).await
+    }
+
+    /// Index with options.
+    pub async fn index_with_options(
+        &self,
+        path: impl AsRef<Path>,
+        options: IndexOptions,
+    ) -> Result<String>;
+
+    /// Query a document.
+    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult>;
+
+    /// Create a session for multi-document operations.
+    pub fn session(&self) -> Session;
+
+    /// Get the indexer client.
+    pub fn indexer(&self) -> &IndexerClient;
+
+    /// Get the retriever client.
+    pub fn retriever(&self) -> &RetrieverClient;
+
+    /// Get the workspace client.
+    pub fn workspace(&self) -> Option<&WorkspaceClient>;
+
+    /// Get configuration.
+    pub fn config(&self) -> &Config;
+
+    // ============================================================
+    // Document Operations (delegate to workspace)
+    // ============================================================
+
+    /// List documents.
+    pub fn list_documents(&self) -> Vec<DocumentInfo>;
+
+    /// Get document structure.
+    pub fn get_structure(&self, doc_id: &str) -> Result<DocumentTree>;
+
+    /// Get page content.
+    pub fn get_page_content(&self, doc_id: &str, pages: &str) -> Result<String>;
+
+    /// Remove document.
+    pub fn remove(&self, doc_id: &str) -> Result<bool>;
+
+    /// Check existence.
+    pub fn exists(&self, doc_id: &str) -> Result<bool>;
+}
+```
+
+---
+
+## API Examples
+
+### Basic Usage (Same as Before)
+
+```rust
+let client = EngineBuilder::new()
+    .with_workspace("./workspace")
+    .build()?;
+
+// Index
+let doc_id = client.index("./document.md").await?;
+
+// Query
+let result = client.query(&doc_id, "What is this?").await?;
+```
+
+### With Events
+
+```rust
+let client = EngineBuilder::new()
+    .with_workspace("./workspace")
+    .with_events(
+        EventEmitter::new()
+            .on_index(|e| match e {
+                IndexEvent::Complete { doc_id } => println!("Indexed: {}", doc_id),
+                _ => {}
+            })
+            .on_query(|e| match e {
+                QueryEvent::NodeVisited { title, score, .. } => {
+                    println!("Visited: {} (score: {:.2})", title, score);
+                }
+                _ => {}
+            })
+    )
+    .build()?;
+```
+
+### Session-Based Multi-Document
+
+```rust
+let client = EngineBuilder::new()
+    .with_workspace("./workspace")
+    .build()?;
+
+// Create session
+let session = client.session();
+
+// Index multiple documents
+let doc1 = session.index("./doc1.md").await?;
+let doc2 = session.index("./doc2.md").await?;
+let doc3 = session.index("./doc3.md").await?;
+
+// Query across all documents
+let results = session.query_all("What is the architecture?").await?;
+
+// Query single document (cached tree)
+let result = session.query(&doc1, "Summary?").await?;
+
+// Session stats
+println!("Cache hit rate: {:.2}%", session.stats().cache_hit_rate * 100.0);
+```
+
+### Streaming Query
+
+```rust
+let client = EngineBuilder::new()
+    .with_workspace("./workspace")
+    .build()?;
+
+// Stream query results
+let mut stream = client.retriever()
+    .query_stream(&tree, "What is X?", RetrieveOptions::default());
+
+while let Some(event) = stream.next().await {
+    match event {
+        QueryEvent::NodeVisited { title, score, .. } => {
+            println!("Exploring: {}", title);
+        }
+        QueryEvent::ResultReady { result } => {
+            println!("Found: {}", result.title);
+        }
+        QueryEvent::Complete { total_results, confidence } => {
+            println!("Done: {} results, confidence: {:.2}", total_results, confidence);
+        }
+        _ => {}
+    }
+}
+```
+
+### Request Context
+
+```rust
+let ctx = ClientContext::new()
+    .with_top_k(10)
+    .with_token_budget(8000)
+    .with_deadline(Duration::from_secs(30));
+
+let result = client.retriever()
+    .query(&tree, "complex question", options, &ctx)
+    .await?;
+```
+
+---
+
+## Migration Path
+
+### Phase 1: Add New Modules (Non-Breaking)
+1. Create `context.rs`, `events.rs`
+2. Create `indexer.rs`, `retriever.rs`, `workspace.rs` as wrappers
+3. Update `engine.rs` to use sub-clients internally
+4. All existing API remains unchanged
+
+### Phase 2: Add Session Support (Non-Breaking)
+1. Add `session.rs`
+2. Add `Engine::session()` method
+3. Add multi-document query support
+
+### Phase 3: Enhance Events (Non-Breaking)
+1. Add streaming query support
+2. Add progress callbacks
+3. Add async event handlers
+
+### Phase 4: Deprecate Old API (Breaking, Future)
+1. Mark direct workspace access as deprecated
+2. Encourage use of sub-clients
+3. Eventually remove deprecated methods
+
+---
+
+## File Structure After Refactoring
+
+```
+src/client/
+├── mod.rs           # ~50 lines - exports and docs
+├── engine.rs        # ~150 lines - orchestration only
+├── builder.rs       # ~200 lines - enhanced builder
+├── types.rs         # ~250 lines - public types
+├── context.rs       # ~150 lines - request context
+├── session.rs       # ~200 lines - session management
+├── indexer.rs       # ~200 lines - indexing ops
+├── retriever.rs     # ~200 lines - retrieval ops
+├── workspace.rs     # ~150 lines - workspace ops
+└── events.rs        # ~200 lines - event system
+```
+
+Total: ~1750 lines (vs current ~1000 lines, but much better organized)
+
+---
+
+## Benefits
+
+1. **Single Responsibility**: Each module has one clear purpose
+2. **Testability**: Sub-clients can be tested independently
+3. **Extensibility**: Easy to add new features without touching Engine
+4. **Performance**: Session caching reduces redundant loads
+5. **Observability**: Events provide visibility into operations
+6. **API Clarity**: Clear separation between indexing, retrieval, and storage
+7. **Streaming**: Support for progressive results
+8. **Context Management**: Request-scoped configuration
diff --git a/docs/design/content-aggregation.md b/docs/design/content-aggregation.md
new file mode 100644
index 00000000..22a7d7dd
--- /dev/null
+++ b/docs/design/content-aggregation.md
@@ -0,0 +1,361 @@
+# Content Aggregation Design
+
+> Version: 1.0
+> Status: Draft
+> Last Updated: 2026-04-04
+
+## Overview
+
+Content Aggregation is the final stage of the retrieval pipeline that transforms candidate nodes into structured, relevant content for the user. This document describes the design for a precision-focused, budget-aware content aggregation system.
+
+## Problem Statement
+
+### Current Implementation
+
+The current `aggregate_content` in `JudgeStage` collects content naively:
+
+```
+Candidate Node → Node's own content + ALL descendant leaf content
+```
+
+### Issues
+
+| Issue | Impact |
+|-------|--------|
+| **No relevance filtering** | Returns all content from subtree, including irrelevant parts |
+| **No token budget** | Large documents may return tens of thousands of tokens |
+| **No prioritization** | All leaf content treated equally |
+| **Lost structure** | Flat concatenation loses hierarchical context |
+
+## Design Goals
+
+1. **Precision First** - Only return truly relevant content
+2. **Budget Aware** - Optimize within token constraints
+3. **Structure Aware** - Maintain hierarchical context
+4. **Incremental** - Support progressive refinement
+5. **Explainable** - Traceable selection decisions
+
+## Architecture
+
+### High-Level Flow
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Content Aggregator                         │
+├─────────────────────────────────────────────────────────────┤
+│                                                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐      │
+│  │   Relevance  │  │    Budget    │  │  Structure   │      │
+│  │    Scorer    │─▶│   Allocator  │─▶│   Builder    │      │
+│  └──────────────┘  └──────────────┘  └──────────────┘      │
+│         ↑                 ↑                 ↑               │
+│         │                 │                 │               │
+│  ┌──────┴──────┐  ┌──────┴──────┐  ┌──────┴──────┐        │
+│  │   Query-    │  │   Token     │  │  Hierarchy  │        │
+│  │   Node      │  │   Budget    │  │  Context    │        │
+│  │   Scoring   │  │   Config    │  │  Assembly   │        │
+│  └─────────────┘  └─────────────┘  └─────────────┘        │
+│                                                              │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### Processing Pipeline
+
+```
+Candidate Nodes
+      │
+      ▼
+┌─────────────────┐
+│  1. Collect     │  Gather all nodes from candidates + descendants
+│     Nodes       │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  2. Score       │  Compute relevance score for each content chunk
+│     Relevance   │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  3. Filter      │  Remove content below relevance threshold
+│     by Score    │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  4. Allocate    │  Distribute token budget optimally
+│     Budget      │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  5. Build       │  Assemble structured output
+│     Structure   │
+└────────┬────────┘
+         │
+         ▼
+    Final Content
+```
+
+## Module Design
+
+### 1. RelevanceScorer
+
+Computes fine-grained relevance scores for content.
+
+```rust
+pub struct RelevanceScorer {
+    query_keywords: Vec<String>,
+    strategy: ScoringStrategy,
+}
+
+pub enum ScoringStrategy {
+    /// Fast keyword matching only
+    KeywordOnly,
+    /// Keyword + BM25 scoring
+    KeywordWithBM25,
+    /// Keyword + LLM reranking
+    Hybrid { rerank_top_k: usize },
+}
+
+pub struct ContentRelevance {
+    pub node_id: NodeId,
+    pub chunk: ContentChunk,
+    pub score: f32,
+    pub components: ScoreComponents,
+}
+
+pub struct ScoreComponents {
+    pub keyword_score: f32,      // Keyword match quality
+    pub depth_penalty: f32,      // Distance from candidate node
+    pub path_bonus: f32,         // Parent node relevance
+    pub density_score: f32,      // Information density
+}
+```
+
+#### Scoring Formula
+
+```
+final_score = (
+    keyword_score * 0.50 +
+    depth_penalty * 0.20 +
+    path_bonus * 0.15 +
+    density_score * 0.15
+).clamp(0.0, 1.0)
+
+where:
+  depth_penalty = 0.9^depth  // 10% penalty per level
+  path_bonus = parent_score * 0.2
+  density_score = (1 - stopword_ratio) * 0.7 + entity_ratio * 0.3
+```
+
+### 2. BudgetAllocator
+
+Distributes token budget across scored content.
+
+```rust
+pub struct BudgetAllocator {
+    total_budget: usize,
+    strategy: AllocationStrategy,
+}
+
+pub enum AllocationStrategy {
+    /// Select highest-scoring content first
+    Greedy,
+    /// Distribute proportionally to scores
+    Proportional,
+    /// Ensure each depth level has representation
+    Hierarchical { min_per_level: f32 },
+}
+
+pub struct AllocationResult {
+    pub selected: Vec<SelectedContent>,
+    pub tokens_used: usize,
+    pub remaining_budget: usize,
+}
+
+pub struct SelectedContent {
+    pub node_id: NodeId,
+    pub content: String,
+    pub tokens: usize,
+    pub score: f32,
+    pub truncation: Option<TruncationInfo>,
+}
+```
+
+#### Hierarchical Allocation
+
+```
+For each depth level (0 to max_depth):
+    1. Sort content by score
+    2. Allocate up to min_per_level budget
+    3. Continue until level budget exhausted
+    4. Move to next level
+
+Benefits:
+- Ensures context from all levels
+- Prevents shallow-only or deep-only results
+- Maintains document structure awareness
+```
+
+### 3. StructureBuilder
+
+Assembles selected content into structured output.
+
+```rust
+pub struct StructureBuilder {
+    format: OutputFormat,
+    include_metadata: bool,
+}
+
+pub enum OutputFormat {
+    Markdown,
+    Json,
+    Tree,
+    Flat,
+}
+
+pub struct StructuredContent {
+    pub content: String,
+    pub structure: Option<ContentTree>,
+    pub metadata: ContentMetadata,
+}
+```
+
+#### Markdown Output Format
+
+```markdown
+## Parent Section
+Parent content here...
+
+### Child Section A
+Child A content here...
+
+### Child Section B
+Child B content here...
+```
+
+## Configuration
+
+```toml
+[retrieval.content]
+# Maximum tokens to return
+token_budget = 4000
+
+# Minimum relevance score (0.0 - 1.0)
+min_relevance_score = 0.3
+
+# Scoring strategy: "keyword_only" | "keyword_bm25" | "hybrid"
+scoring_strategy = "keyword_bm25"
+
+# Output format: "markdown" | "json" | "tree"
+output_format = "markdown"
+
+# Include relevance scores in output
+include_scores = false
+
+# Hierarchical allocation minimum per level
+hierarchical_min_per_level = 0.1
+```
+
+## Integration Points
+
+### JudgeStage Integration
+
+```rust
+impl JudgeStage {
+    pub fn with_content_aggregator(mut self, config: ContentAggregatorConfig) -> Self {
+        self.content_aggregator = Some(ContentAggregator::new(config));
+        self
+    }
+
+    fn aggregate_content(&self, ctx: &PipelineContext) -> (String, usize) {
+        if let Some(aggregator) = &self.content_aggregator {
+            aggregator.aggregate(&ctx.candidates, &ctx.tree, &ctx.query)
+        } else {
+            // Fallback to legacy behavior
+            self.aggregate_content_legacy(ctx)
+        }
+    }
+}
+```
+
+### RetrieveOptions Extension
+
+```rust
+impl RetrieveOptions {
+    pub fn with_content_config(mut self, config: ContentAggregatorConfig) -> Self {
+        self.content_config = Some(config);
+        self
+    }
+}
+```
+
+## Performance Characteristics
+
+### Latency by Strategy
+
+| Strategy | Latency | Precision | Use Case |
+|----------|---------|-----------|----------|
+| `KeywordOnly` | ~1ms | Medium | Quick preview |
+| `KeywordWithBM25` | ~5ms | High | Default choice |
+| `Hybrid` | ~200ms | Highest | Precision queries |
+
+### Memory Usage
+
+- Scorer: O(n) where n = total content length
+- Allocator: O(m) where m = number of chunks
+- Builder: O(k) where k = selected content size
+
+## Future Enhancements
+
+1. **Semantic Chunking** - Split content by semantic boundaries, not just nodes
+2. **LLM Reranking** - Use LLM to rerank top-k chunks
+3. **Query-Aware Truncation** - Truncate based on query relevance, not just length
+4. **Caching** - Cache aggregation results for repeated queries
+5. **Streaming** - Stream content as it's selected
+
+## File Structure
+
+```
+src/retrieval/content/
+├── mod.rs              # Module entry point
+├── aggregator.rs       # Main aggregator logic
+├── scorer.rs           # Relevance scoring
+├── budget.rs           # Token budget allocation
+├── builder.rs          # Structured output building
+├── truncation.rs       # Smart truncation utilities
+└── config.rs           # Configuration types
+```
+
+## Implementation Priority
+
+| Phase | Component | Priority |
+|-------|-----------|----------|
+| P0 | `RelevanceScorer` (keyword) | High |
+| P0 | `BudgetAllocator` (greedy) | High |
+| P1 | `StructureBuilder` (markdown) | Medium |
+| P1 | BM25 scoring | Medium |
+| P2 | Hybrid strategy (LLM rerank) | Low |
+| P2 | Caching layer | Low |
+
+## Testing Strategy
+
+### Unit Tests
+
+- Scorer: Test keyword extraction, BM25 calculation, density scoring
+- Allocator: Test budget distribution, truncation, edge cases
+- Builder: Test output formats, structure preservation
+
+### Integration Tests
+
+- End-to-end aggregation with real documents
+- Performance benchmarks
+- Token budget compliance
+
+### Quality Metrics
+
+- Precision@k: Relevance of top-k selected chunks
+- Recall: Coverage of relevant content
+- Latency: P50, P95, P99 response times
diff --git a/docs/design/pilot.md b/docs/design/pilot.md
index 0f907f25..d86e00a7 100644
--- a/docs/design/pilot.md
+++ b/docs/design/pilot.md
@@ -76,115 +76,6 @@ Pilot 是 Vectorless 检索系统的核心智能组件，负责理解查询、
 └─────────────────────────────────────────────────────────────────────────────┘
 ```
 
-### 1.2 核心接口定义
-
-```rust
-/// 搜索状态 - 传给 Pilot 的上下文信息
-pub struct SearchState<'a> {
-    /// 文档树
-    pub tree: &'a DocumentTree,
-    /// 用户查询
-    pub query: &'a str,
-    /// 当前路径（从根到当前节点）
-    pub path: &'a [NodeId],
-    /// 候选子节点
-    pub candidates: &'a [NodeId],
-    /// 已访问的节点
-    pub visited: &'a HashSet<NodeId>,
-    /// 当前深度
-    pub depth: usize,
-    /// 搜索迭代次数
-    pub iteration: usize,
-    /// 当前最高分
-    pub best_score: f32,
-    /// 是否在回溯中
-    pub is_backtracking: bool,
-}
-
-/// Pilot trait - 核心接口
-#[async_trait]
-pub trait Pilot: Send + Sync {
-    /// 获取 Pilot 名称
-    fn name(&self) -> &str;
-    
-    /// 判断是否应该介入
-    fn should_intervene(&self, state: &SearchState<'_>) -> bool;
-    
-    /// 做出决策
-    async fn decide(&self, state: &SearchState<'_>) -> PilotDecision;
-    
-    /// 搜索开始前的指导
-    async fn guide_start(
-        &self, 
-        tree: &DocumentTree, 
-        query: &str
-    ) -> Option<PilotDecision>;
-    
-    /// 获取配置
-    fn config(&self) -> &PilotConfig;
-    
-    /// 获取指标
-    fn metrics(&self) -> &PilotMetrics;
-    
-    /// 重置状态（新查询开始时调用）
-    fn reset(&self);
-}
-```
-
-### 1.3 Pilot 决策类型
-
-```rust
-/// Pilot 决策结果
-#[derive(Debug, Clone)]
-pub struct PilotDecision {
-    /// 候选节点排序（按推荐优先级）
-    pub ranked_candidates: Vec<RankedCandidate>,
-    /// 搜索方向建议
-    pub direction: SearchDirection,
-    /// 置信度 (0.0 - 1.0)
-    pub confidence: f32,
-    /// 决策原因（可解释性）
-    pub reasoning: String,
-    /// 介入点标识
-    pub intervention_point: InterventionPoint,
-}
-
-/// 排序后的候选节点
-#[derive(Debug, Clone)]
-pub struct RankedCandidate {
-    pub node_id: NodeId,
-    pub score: f32,
-    pub reason: Option<String>,
-}
-
-/// 搜索方向建议
-#[derive(Debug, Clone)]
-pub enum SearchDirection {
-    /// 继续深入当前分支
-    GoDeeper {
-        reason: String,
-    },
-    /// 探索兄弟节点
-    ExploreSiblings {
-        recommended: Vec<NodeId>,
-    },
-    /// 回溯到父节点
-    Backtrack {
-        reason: String,
-        alternative_branches: Vec<NodeId>,
-    },
-    /// 跳转到指定节点（非局部移动）
-    JumpTo {
-        target: NodeId,
-        reason: String,
-    },
-    /// 当前节点就是答案
-    FoundAnswer {
-        confidence: f32,
-    },
-}
-```
-
 ---
 
 ## 1.4 Pilot 决策的信息来源
diff --git a/docs/design/roadmap.md b/docs/design/roadmap.md
deleted file mode 100644
index 87a867a6..00000000
--- a/docs/design/roadmap.md
+++ /dev/null
@@ -1,247 +0,0 @@
-# 架构评估与路线图
-
-> 评估日期: 2026-04-03
-> 评估版本: v0.1.7
-
-## 当前状态
-
-| 指标 | 状态 |
-|------|------|
-| **测试** | 129 passed, 0 failed |
-| **代码量** | 17,695 行 Rust (112 文件) |
-| **模块** | client, domain, index, retrieval, llm, parser, storage, throttle |
-| **编译** | 成功 (仅 warnings) |
-
-## 架构亮点
-
-### 1. 双 Pipeline 设计一致
-
-Index 和 Retrieval 都采用相同的 orchestrator 模式:
-- 依赖解析 (topological sort)
-- ExecutionGroup 支持并行
-- FailurePolicy (Fail/Skip/Retry)
-- StageOutcome 流程控制
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│                     Orchestrator 模式                        │
-├─────────────────────────────────────────────────────────────┤
-│  Index Pipeline          │  Retrieval Pipeline              │
-│  ─────────────           │  ─────────────────               │
-│  Parse → Build →         │  Analyze → Plan →                │
-│  Enhance → Enrich →      │  Search → Judge                  │
-│  Optimize                │  (支持回溯)                       │
-└─────────────────────────────────────────────────────────────┘
-```
-
-### 2. 清晰的分层架构
-
-```
-client (Engine) → index/retrieval → domain ← parser/llm/config
-```
-
-- **client**: 高层 API，封装内部复杂性
-- **domain**: 核心领域类型，无外部依赖
-- **index/retrieval**: 业务逻辑，操作 domain
-- **parser/llm/config**: 基础设施，提供能力
-
-### 3. 良好的模块化
-
-每个模块职责单一:
-- `parser/` - 文档解析 (Markdown, PDF, DOCX)
-- `llm/` - LLM 客户端 (retry, fallback, pool)
-- `storage/` - 持久化 (Workspace, LRU cache)
-- `throttle/` - 限流控制
-
----
-
-## 待改进项
-
-### 代码质量 (Clippy Warnings)
-
-| 类型 | 数量 | 示例 |
-|------|------|------|
-| unused variable | 8 | `_context`, `_query`, `_strategy` |
-| dead_code | 5 | `find_stage_index`, `term_frequency` |
-| must_use | 12 | builder 方法缺少 `#[must_use]` |
-| style | 3 | redundant else, unnecessary hashes |
-
-### 功能缺失
-
-| 模块 | 缺失 | 影响 |
-|------|------|------|
-| `parser/registry.rs` | HTML parser | HTML 格式不支持 |
-| `parser/toc/processor.rs` | 无 ToC 文档的结构提取 | 依赖 LLM |
-| `retrieval/strategy/llm.rs` | 批量 prompt 优化 | 性能 |
-
-### 架构限制
-
-| 限制 | 说明 |
-|------|------|
-| **并行执行未实现** | ExecutionGroup 已设计但 `execute()` 仍顺序执行 |
-| **Strategy 无切换** | Plan 选择策略后中途不可切换 |
-| **增量索引骨架** | `ChangeDetector` 存在但未集成到 pipeline |
-
----
-
-## 下一阶段优化方案
-
-### Phase 1: 代码清理 (优先级: 高)
-
-**目标**: 消除所有 clippy warnings
-
-| 任务 | 文件 | 工作量 |
-|------|------|--------|
-| 添加 `#[must_use]` | builder 类型 | ~12 处 |
-| 修复 unused variables | 各模块 | ~8 处 |
-| 移除 dead code | `search/mod.rs`, `strategy/keyword.rs` | ~5 处 |
-| 修复 style issues | 散落各处 | ~3 处 |
-
-**验收标准**: `cargo clippy` 无 warnings
-
----
-
-### Phase 2: 功能补全 (优先级: 中)
-
-#### 2.1 HTML Parser
-
-```rust
-// src/parser/html/mod.rs (新建)
-pub struct HtmlParser {
-    config: HtmlConfig,
-}
-
-impl DocumentParser for HtmlParser {
-    fn parse(&self, content: &str) -> ParseResult {
-        // 使用 html5ever 或 scraper crate
-    }
-}
-```
-
-#### 2.2 Strategy 热切换
-
-当前: Plan 阶段选择策略后固定
-目标: Search 阶段根据效果动态切换
-
-```rust
-// 在 SearchStage 中
-if current_strategy.is_struggling() {
-    ctx.switch_strategy(Strategy::more_capable());
-}
-```
-
-#### 2.3 增量索引集成
-
-```rust
-// 在 PipelineExecutor 中
-pub fn execute_incremental(
-    &mut self,
-    input: IndexInput,
-    changes: ChangeSet,
-) -> Result<IndexResult> {
-    // 只处理变更部分
-}
-```
-
----
-
-### Phase 3: 性能优化 (优先级: 中)
-
-#### 3.1 并行执行实现
-
-**当前状态**: `ExecutionGroup` 已设计，但 `execute()` 仍顺序执行
-
-```rust
-// 当前 (顺序)
-for &stage_idx in &group.stage_indices {
-    entry.stage.execute(&mut ctx).await?;
-}
-
-// 目标 (并行)
-futures::future::try_join_all(
-    group.stage_indices.iter()
-        .map(|&idx| self.stages[idx].execute(&ctx))
-).await?;
-```
-
-**挑战**:
-- `PipelineContext` 需要 `Send + Sync`
-- 需要细粒度锁或消息传递
-
-#### 3.2 Path Cache 命中率
-
-```rust
-// 添加热点查询缓存
-pub struct PathCache {
-    entries: LruCache<QueryHash, CachedPath>,
-    hot_queries: Arc<RwLock<HashSet<QueryHash>>>, // 新增
-}
-```
-
-#### 3.3 批量 LLM 调用
-
-```rust
-// 当前: 逐个评估
-for node_id in node_ids {
-    self.evaluate_node(tree, node_id, context).await;
-}
-
-// 目标: 批量评估
-self.evaluate_nodes_batch(tree, node_ids, context).await;
-```
-
----
-
-### Phase 4: 测试增强 (优先级: 低)
-
-| 测试类型 | 当前 | 目标 |
-|----------|------|------|
-| 单元测试 | 129 | +50 |
-| 集成测试 | 0 (仅 examples) | +10 |
-| Property 测试 | 0 | +5 |
-| 覆盖率报告 | 无 | cargo-tarpaulin |
-
----
-
-## 执行顺序
-
-```
-Phase 1 (代码清理)
-    ↓
-Phase 3.1 (并行执行)
-    ↓
-Phase 2 (功能补全)
-    ↓
-Phase 4 (测试增强)
-```
-
-**建议首先执行 Phase 1 代码清理**，消除所有 clippy warnings，使代码库更干净。
-
----
-
-## 文件变更预览
-
-### Phase 1 涉及文件
-
-```
-src/
-├── client/builder.rs          # 添加 #[must_use]
-├── config/types.rs            # 添加 #[must_use]
-├── domain/tree.rs             # 移除 dead code
-├── index/
-│   ├── pipeline/orchestrator.rs  # 移除 find_stage_index
-│   └── stages/*.rs               # 修复 unused
-├── retrieval/
-│   ├── search/mod.rs          # 移除 dead code
-│   ├── strategy/keyword.rs    # 移除 term_frequency
-│   └── stages/*.rs            # 修复 unused
-└── llm/client.rs              # 修复 unused max_tokens
-```
-
----
-
-## 参考资料
-
-- [Architecture v2](./architecture-v2.svg)
-- [Pipeline Design](./v2.md)
-- [RFCs](../rfcs/)
diff --git a/examples/batch_processing.rs b/examples/batch_processing.rs
new file mode 100644
index 00000000..6906189f
--- /dev/null
+++ b/examples/batch_processing.rs
@@ -0,0 +1,972 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Batch document processing example.
+//!
+//! This example demonstrates how to efficiently process
+//! multiple documents in batch mode using sessions.
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example batch_processing
+//! ```
+
+use vectorless::client::EngineBuilder;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    println!("=== Batch Document Processing Example ===\n");
+
+    // 1. Create engine and session
+    println!("Step 1: Setting up...");
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_batch_example")
+        .build()
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+
+    let session = engine.session();
+    println!("  ✓ Session created: {}\n", session.id());
+
+    // 2. Create sample documents
+    println!("Step 2: Creating sample documents...");
+    let temp_dir = tempfile::tempdir()?;
+
+    let documents = vec![
+        ("intro.md", r#"# Introduction
+
+Welcome to the vectorless library. This is a document intelligence engine.
+
+## Features
+
+- Tree-based navigation
+- Multi-format support
+- Session management
+"#),
+        ("api.md", r#"# API Reference
+
+## Engine
+
+The main client for document operations.
+
+### Methods
+
+- `index(path)`: Index a document
+- `query(question)`: Query indexed content
+
+## Session
+
+Multi-document operations with caching.
+
+### Methods
+
+- `index(path)`: Index into session
+- `query_all(question)`: Query across all documents
+"#),
+        ("guide.md", r#"# User Guide
+
+## Getting Started
+
+First, create a client with workspace configuration.
+
+## Best Practices
+
+- Use sessions for multi-document operations
+- Enable caching for better performance
+- Monitor events for debugging
+"#),
+    ("advanced.md", r#"# Advanced Topics
+
+## Performance Tuning
+
+Configure retrieval parameters for optimal performance.
+
+### Parameters
+
+- `top_k`: Number of results
+- `max_tokens`: Token budget
+
+## Custom Pilots
+
+Implement custom navigation logic.
+"#),
+    ("reference.md", r#"# Reference
+
+## Configuration
+
+All configuration is done via TOML files.
+
+### Example
+
+```toml
+[retrieval]
+top_k = 5
+max_tokens = 4000
+```
+"#),
+    ("examples.md", r#"# Examples
+
+## Basic Usage
+
+Simple indexing and querying example.
+
+## Batch Processing
+
+Process multiple documents concurrently.
+
+## Session Usage
+
+Multi-document operations with caching.
+"#),
+    ("faq.md", r#"# FAQ
+
+## Common Questions
+
+**Q: How do I index a document?**
+A: Use `engine.index(path)` method.
+
+**Q: How to query?**
+A: Use `engine.query(doc_id, question)` method.
+
+**Q: What formats are supported?**
+A: Markdown, PDF, DOCX, HTML.
+"#),
+    ("changelog.md", r#"# Changelog
+
+## Version 0.1.0
+
+- Initial release
+- Basic indexing support
+- Simple retrieval
+
+## Version 0.2.0
+
+- Session support
+- Event system
+- Content aggregator
+"#),
+        ("contributing.md", r#"# Contributing
+
+## How to Contribute
+
+We welcome contributions! Please follow these steps:
+
+1. Fork the repository
+2. Create a feature branch
+3. Submit a pull request
+
+## Code Style
+
+- Run `cargo fmt`
+- Run `cargo clippy`
+- Add tests
+"#),
+        ("license.md", r#"# License
+
+Apache License, Version 2.0
+
+Copyright 2026 vectorless developers
+"#),
+    ("architecture.md", r#"# Architecture
+
+## Overview
+
+Vectorless uses a tree-based architecture.
+
+## Components
+
+- Parser: Document parsing
+- Indexer: Tree building
+- Retriever: Content search
+- Storage: Persistence
+"#),
+        ("security.md", r#"# Security
+
+## Security Considerations
+
+- API keys are stored securely
+- No sensitive data in logs
+- Input validation
+
+## Best Practices
+
+- Use environment variables
+- Rotate keys periodically
+"#),
+    ("performance.md", r#"# Performance
+
+## Optimization Tips
+
+- Use caching effectively
+- Configure appropriate batch sizes
+- Monitor memory usage
+
+## Benchmarks
+
+Run `cargo bench` for performance metrics.
+"#),
+        ("testing.md", r#"# Testing
+
+## Running Tests
+
+```bash
+cargo test
+```
+
+## Test Coverage
+
+- Unit tests
+- Integration tests
+- Example tests
+"#),
+    ("deployment.md", r#"# Deployment
+
+## Production Setup
+
+- Configure workspace directory
+- Set up logging
+- Monitor performance
+
+## Configuration
+
+Use TOML configuration files.
+"#),
+        ("troubleshooting.md", r#"# Troubleshooting
+
+## Common Issues
+
+### Indexing Fails
+
+Check file format and permissions.
+
+### Query Returns Empty
+
+Ensure document is indexed.
+
+### Performance Issues
+
+Reduce batch size or enable caching.
+"#),
+    ("integrations.md", r#"# Integrations
+
+## LLM Providers
+
+- OpenAI
+- Anthropic
+- Local models
+
+## Storage Backends
+
+- File system (default)
+- S3 (planned)
+"#),
+        ("migrations.md", r#"# Migrations
+
+## Version Migrations
+
+### 0.1.x to 0.2.x
+
+- Update configuration format
+- Re-index documents
+"#),
+    ("roadmap.md", r#"# Roadmap
+
+## Future Plans
+
+### Short Term
+
+- Streaming support
+- More formats
+
+### Long Term
+
+- Distributed indexing
+- Real-time updates
+"#),
+    ("credits.md", r#"# Credits
+
+## Contributors
+
+Thanks to all contributors!
+
+## Libraries
+
+Built with Rust and many open-source libraries.
+"#),
+    ("index.md", r#"# Index
+
+## Quick Links
+
+- [Introduction](intro.md)
+- [API Reference](api.md)
+- [User Guide](guide.md)
+
+## Search
+
+Use the search functionality to find specific content.
+"#),
+        ("search.md", r#"# Search
+
+## Search Functionality
+
+### Basic Search
+
+```rust
+let results = engine.query(&doc_id, "search term").await?;
+```
+
+### Advanced Search
+
+Use sessions for cross-document search.
+"#),
+        ("export.md", r#"# Export
+
+## Exporting Data
+
+### JSON Export
+
+```rust
+let json = tree.to_structure_json();
+```
+
+### Custom Formats
+
+Implement custom exporters as needed.
+"#),
+    ("import.md", r#"# Import
+
+## Importing Data
+
+### From Files
+
+```rust
+let doc_id = engine.index("./document.md").await?;
+```
+
+### From Memory
+
+Use the content directly with parsers.
+"#),
+        ("validation.md", r#"# Validation
+
+## Input Validation
+
+### Document Paths
+
+Must exist and be readable.
+
+### Configuration
+
+Validated on load with helpful errors.
+
+### Queries
+
+Sanitized before processing.
+"#),
+        ("formatting.md", r#"# Formatting
+
+## Content Formatting
+
+### Markdown
+
+Standard CommonMark with extensions.
+
+### Code Blocks
+
+Syntax highlighting support.
+
+### Tables
+
+Basic table parsing.
+"#),
+        ("localization.md", r#"# Localization
+
+## Internationalization
+
+Currently English-only.
+
+## Future Support
+
+Planned i18n support for:
+- Error messages
+- UI strings
+- Documentation
+"#),
+        ("accessibility.md", r#"# Accessibility
+
+## Accessibility
+
+### Documentation
+
+Clear and comprehensive docs.
+
+### API Design
+
+Consistent and intuitive naming.
+
+### Error Messages
+
+Helpful and actionable.
+"#),
+    ("glossary.md", r#"# Glossary
+
+## Terms
+
+- **Document Tree**: Hierarchical structure
+- **Session**: Multi-document context
+- **Workspace**: Document storage
+- **Retrieval**: Content search
+"#),
+        ("appendix.md", r#"# Appendix
+
+## Additional Resources
+
+- [GitHub Repository](https://github.com)
+- [Documentation Site](https://docs.vectorless.dev)
+- [Community Discord](https://discord.gg)
+"#),
+    ("summary.md", r#"# Summary
+
+## Overview
+
+This documentation covers all aspects of vectorless.
+
+## Next Steps
+
+- Try the examples
+- Join the community
+- Contribute!
+"#),
+    ("conclusion.md", r#"# Conclusion
+
+## Thank You
+
+Thanks for using vectorless!
+
+## Feedback
+
+We'd love to hear from you. Open an issue on GitHub.
+"#),
+    ("revision.md", r#"# Revision History
+
+## Document Versions
+
+| Version | Date       | Changes                    |
+|---------|------------|---------------------------|
+| 1.0     | 2026-01-01 | Initial version           |
+| 1.1     | 2026-02-01 | Session support           |
+"#),
+    ("feedback.md", r#"# Feedback
+
+## Providing Feedback
+
+We value your input!
+
+### Channels
+
+- GitHub Issues
+- Discord Community
+- Email Support
+
+### What to Share
+
+- Bug reports
+- Feature requests
+- Documentation improvements
+"#),
+    ("support.md", r#"# Support
+
+## Getting Help
+
+### Documentation
+
+Start with the user guide.
+
+### Community
+
+Join our Discord for discussions.
+
+### Enterprise
+
+Contact us for enterprise support.
+"#),
+        ("updates.md", r#"# Updates
+
+## Staying Updated
+
+### Version Updates
+
+Check the changelog for updates.
+
+### Security Updates
+
+Apply security patches promptly.
+
+### Deprecations
+
+Watch for deprecation notices.
+"#),
+    ("resources.md", r#"# Resources
+
+## External Resources
+
+### Official
+
+- Documentation: docs.vectorless.dev
+- GitHub: github.com/vectorless
+- Discord: discord.gg/vectorless
+
+### Community
+
+- Blog posts
+- Tutorial videos
+- Example projects
+"#),
+        ("contact.md", r#"# Contact
+
+## Contact Information
+
+### General Inquiries
+
+Email: hello@vectorless.dev
+
+### Security Issues
+
+Email: security@vectorless.dev
+
+### Enterprise Sales
+
+Email: enterprise@vectorless.dev
+"#),
+        ("privacy.md", r#"# Privacy Policy
+
+## Data Handling
+
+Vectorless processes documents locally.
+
+## No Tracking
+
+We don't track usage or content.
+
+## API Keys
+
+Stored securely in configuration files.
+"#),
+        ("terms.md", r#"# Terms of Service
+
+## Usage Terms
+
+By using vectorless, you agree to:
+
+- Use responsibly
+- Follow applicable laws
+- Respect rate limits
+
+## Changes
+
+Terms may be updated. Check for revisions.
+"#),
+    ("legal.md", r#"# Legal
+
+## Licensing
+
+Apache License 2.0
+
+## Copyright
+
+Copyright 2026 vectorless developers
+
+## Trademarks
+
+Vectorless is a trademark.
+"#),
+    ("versioning.md", r#"# Versioning
+
+## Semantic Versioning
+
+We follow semver:
+
+- MAJOR: Breaking changes
+- MINOR: New features
+- PATCH: Bug fixes
+
+## Current Version
+
+0.1.10
+"#),
+        ("compatibility.md", r#"# Compatibility
+
+## Supported Versions
+
+- Rust 1.70+
+- Tokio 1.x
+
+## Platform Support
+
+- Linux
+- macOS
+- Windows
+
+## Breaking Changes
+
+Documented in changelog.
+"#),
+        ("installation.md", r#"# Installation
+
+## Requirements
+
+- Rust 1.70+
+- Tokio runtime
+
+## Install
+
+```bash
+cargo install vectorless
+```
+
+## Verify
+
+```bash
+vectorless --version
+```
+"#),
+        ("quickstart.md", r#"# Quick Start
+
+## 5-Minute Setup
+
+1. Install vectorless
+2. Create a client
+3. Index a document
+4. Query!
+
+```rust
+let client = Engine::builder().build()?;
+let doc_id = client.index("./doc.md").await?;
+let result = client.query(&doc_id, "What is this?").await?;
+```
+"#),
+    ("tutorial.md", r#"# Tutorial
+
+## Introduction
+
+This tutorial covers basic usage.
+
+## Step 1: Setup
+
+Create a client with workspace.
+
+## Step 2: Index
+
+Index your first document.
+
+## Step 3: Query
+
+Ask questions about your document.
+
+## Step 4: Next
+
+Explore advanced features.
+"#),
+    ("examples_overview.md", r#"# Examples Overview
+
+## Available Examples
+
+| Example         | Description                    |
+|-----------------|--------------------------------|
+| basic.rs        | Basic usage                   |
+| session.rs      | Multi-document operations     |
+| events.rs       | Event callbacks              |
+| batch.rs        | Batch processing             |
+
+## Running Examples
+
+```bash
+cargo run --example <name>
+```
+"#),
+    ("configuration.md", r#"# Configuration
+
+## Configuration File
+
+Use `config.toml` for settings:
+
+```toml
+[storage]
+workspace_dir = "./workspace"
+
+[retrieval]
+top_k = 5
+max_tokens = 4000
+```
+
+## Environment Variables
+
+- `OPENAI_API_KEY`: LLM API key
+"#),
+        ("optimization.md", r#"# Optimization
+
+## Performance Tips
+
+- Use sessions for caching
+- Batch document indexing
+- Configure appropriate token limits
+
+## Memory Management
+
+Documents are cached in sessions.
+
+## Concurrency
+
+Use `buffer_unordered` for parallel indexing.
+"#),
+        ("errors.md", r#"# Error Handling
+
+## Error Types
+
+- `ConfigError`: Configuration issues
+- `ParseError`: Document parsing failures
+- `RetrievalError`: Query failures
+
+## Handling Errors
+
+```rust
+match result {
+    Ok(response) => { /* success */ },
+    Err(Error::Parse(msg)) => { /* handle parse error */ },
+    Err(e) => { /* other error */ },
+}
+```
+"#),
+        ("logging.md", r#"# Logging
+
+## Log Levels
+
+- ERROR: Serious issues
+- WARN: Potential issues
+- INFO: General information
+- DEBUG: Detailed information
+- TRACE: Very detailed
+
+## Enabling Logs
+
+```bash
+RUST_LOG=debug cargo run
+```
+"#),
+        ("metrics.md", r#"# Metrics
+
+## Available Metrics
+
+- Query count
+- Cache hit rate
+- Average query time
+
+## Accessing Metrics
+
+```rust
+let stats = session.stats();
+println!("Cache hit rate: {:.1}%", stats.cache_hit_rate() * 100.0);
+```
+"#),
+        ("health.md", r#"# Health Checks
+
+## Workspace Health
+
+Check workspace integrity:
+
+```rust
+let docs = engine.list_documents();
+println!("{} documents indexed", docs.len());
+```
+
+## Session Health
+
+Monitor session statistics regularly.
+"#),
+        ("backup.md", r#"# Backup
+
+## Backing Up
+
+Copy the workspace directory:
+
+```bash
+cp -r ./workspace ./workspace_backup
+```
+
+## Restoration
+
+Restore by copying back:
+
+```bash
+cp -r ./workspace_backup ./workspace
+```
+"#),
+        ("recovery.md", r#"# Recovery
+
+## Corrupted Documents
+
+Remove and re-index:
+
+```rust
+engine.remove(&doc_id)?;
+engine.index(&path).await?;
+```
+
+## Session Recovery
+
+Create a new session if issues occur.
+"#),
+        ("monitoring.md", r#"# Monitoring
+
+## Production Monitoring
+
+Use events for real-time monitoring:
+
+```rust
+let events = EventEmitter::new()
+    .on_query(|e| {
+        // Log to monitoring system
+    });
+```
+
+## Alerts
+
+Set up alerts for error rates.
+"#),
+        ("scaling.md", r#"# Scaling
+
+## Horizontal Scaling
+
+Run multiple instances with shared storage.
+
+## Vertical Scaling
+
+Increase resources for single instance.
+
+## Considerations
+
+- Storage backend
+- Cache coordination
+- Rate limiting
+"#),
+        ("security_config.md", r#"# Security Configuration
+
+## API Keys
+
+Store securely:
+
+```toml
+[summary]
+api_key = "${OPENAI_API_KEY}"
+```
+
+## Network Security
+
+Use HTTPS for all API calls.
+
+## Access Control
+
+Implement authentication for production.
+"#),
+    ];
+
+    for (name, content) in &documents {
+        let path = temp_dir.path().join(name);
+        std::fs::write(&path, content)?;
+    }
+
+    println!("  ✓ Created {} sample documents\n", documents.len());
+
+    // 3. Batch indexing with progress
+    println!("Step 3: Batch indexing...");
+    let start = std::time::Instant::now();
+    let mut doc_ids = Vec::new();
+
+    for (name, _) in &documents {
+        let path = temp_dir.path().join(name);
+        match session.index(&path).await {
+            Ok(doc_id) => {
+                doc_ids.push(doc_id);
+            }
+            Err(e) => {
+                eprintln!("  ✗ Failed to index {}: {}", name, e);
+            }
+        }
+    }
+
+    let elapsed = start.elapsed();
+    println!("  ✓ Indexed {} documents in {:?}", doc_ids.len(), elapsed);
+    println!("  - Rate: {:.1} docs/sec", doc_ids.len() as f64 / elapsed.as_secs_f64());
+    println!();
+
+    // 4. Show session stats
+    println!("Step 4: Session statistics:");
+    let stats = session.stats();
+    println!("  - Documents in session: {}", session.list_documents().len());
+    println!("  - Queries: {}", stats.query_count.get());
+    println!();
+
+    // 5. Batch query with progress
+    println!("Step 5: Batch querying...");
+    let queries = vec![
+        "What is vectorless?",
+        "How to index?",
+        "Configuration options",
+        "API methods",
+        "Performance tips",
+        "Error handling",
+        "Logging setup",
+        "Security considerations",
+        "Scaling options",
+        "Getting help",
+    ];
+
+    let start = std::time::Instant::now();
+    let mut success_count = 0;
+
+    for query in &queries {
+        match session.query_all(query).await {
+            Ok(results) => {
+                if !results.is_empty() {
+                    success_count += 1;
+                }
+            }
+            Err(e) => {
+                eprintln!("  ✗ Query failed: {}", e);
+            }
+        }
+    }
+
+    let elapsed = start.elapsed();
+    println!("  ✓ Completed {} queries in {:?}", queries.len(), elapsed);
+    println!("  - Success rate: {:.0}%", (success_count as f64 / queries.len() as f64) * 100.0);
+    println!("  - Rate: {:.1} queries/sec", queries.len() as f64 / elapsed.as_secs_f64());
+    println!();
+
+    // 6. Final statistics
+    println!("Step 6: Final statistics:");
+    let stats = session.stats();
+    println!("  - Total documents: {}", session.list_documents().len());
+    println!("  - Total queries: {}", stats.query_count.get());
+    println!("  - Cache hits: {}", stats.cache_hits.get());
+    println!("  - Cache misses: {}", stats.cache_misses.get());
+    println!(
+        "  - Cache hit rate: {:.1}%",
+        stats.cache_hit_rate() * 100.0
+    );
+    if let Some(avg_time) = stats.avg_query_time() {
+        println!("  - Avg query time: {:?}", avg_time);
+    }
+    println!("  - Session age: {:?}", session.age());
+    println!();
+
+    // 7. Cleanup
+    println!("Step 7: Cleanup...");
+    for doc_id in &doc_ids {
+        engine.remove(doc_id)?;
+    }
+    println!("  ✓ Removed {} documents\n", doc_ids.len());
+
+    println!("=== Example Complete ===");
+    Ok(())
+}
diff --git a/examples/cli_tool.rs b/examples/cli_tool.rs
new file mode 100644
index 00000000..62a05f33
--- /dev/null
+++ b/examples/cli_tool.rs
@@ -0,0 +1,122 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! CLI tool example for vectorless.
+//!
+//! This example shows how to build a command-line tool
+//! using vectorless for document indexing and querying.
+//!
+//! # What you'll learn:
+//! - How to structure a CLI application
+//! - How to handle subcommands (index, query, info)
+//! - How to manage configuration and workspace
+//! - How to provide user-friendly output
+//!
+//! # Example commands:
+//!
+//! ```bash
+//! # Index a document
+//! vectorless-cli index ./document.md
+//!
+//! # Query a document
+//! vectorless-cli query <doc-id> "What is the main topic?"
+//!
+//! # List indexed documents
+//! vectorless-cli list
+//!
+//! # Show document info
+//! vectorless-cli info <doc-id>
+//!
+//! # Delete a document
+//! vectorless-cli delete <doc-id>
+//! ```
+//!
+//! # Implementation notes:
+//!
+//! ## Recommended crates:
+//! - `clap` for argument parsing
+//! - `colored` or `termcolor` for colored output
+//! - `indicatif` for progress bars
+//! - `serde` for configuration
+//!
+//! ## Configuration file:
+//! ```toml
+//! # ~/.vectorless/config.toml
+//! [llm]
+//! provider = "openai"
+//! model = "gpt-4"
+//!
+//! [index]
+//! cache_size = 100
+//!
+//! [retrieval]
+//! max_iterations = 10
+//! ```
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Define CLI structure with clap
+//! 2. Implement index subcommand
+//! 3. Implement query subcommand
+//! 4. Implement list/info subcommands
+//! 5. Add configuration management
+//! 6. Add colored output and progress
+
+// TODO: Implement CLI tool
+// ```
+// use clap::{Parser, Subcommand};
+// use vectorless::client::{Engine, EngineBuilder};
+//
+// #[derive(Parser)]
+// #[command(name = "vectorless-cli")]
+// struct Cli {
+//     #[command(subcommand)]
+//     command: Commands,
+// }
+//
+// #[derive(Subcommand)]
+// enum Commands {
+//     /// Index a document
+//     Index {
+//         /// Path to document
+//         path: PathBuf,
+//     },
+//     /// Query an indexed document
+//     Query {
+//         /// Document ID
+//         doc_id: String,
+//         /// Query string
+//         query: String,
+//     },
+//     /// List all indexed documents
+//     List,
+// }
+//
+// #[tokio::main]
+// async fn main() -> Result<()> {
+//     let cli = Cli::parse();
+//     let engine = EngineBuilder::new().build()?;
+//
+//     match cli.command {
+//         Commands::Index { path } => {
+//             let doc_id = engine.index(&path).await?;
+//             println!("Indexed: {}", doc_id);
+//         }
+//         Commands::Query { doc_id, query } => {
+//             let result = engine.query(&doc_id, &query).await?;
+//             println!("{}", result.content);
+//         }
+//         Commands::List => {
+//             // List documents
+//         }
+//     }
+//
+//     Ok(())
+// }
+// ```
+
+fn main() {
+    // TODO: Implement full CLI tool
+
+    println!("TODO: Implement cli_tool example");
+}
diff --git a/examples/content_aggregation.rs b/examples/content_aggregation.rs
new file mode 100644
index 00000000..5fe71a32
--- /dev/null
+++ b/examples/content_aggregation.rs
@@ -0,0 +1,175 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Content Aggregation Accuracy Example
+//!
+//! This example demonstrates the content aggregation module's ability to:
+//! 1. Score content relevance
+//! 2. Allocate token budget
+//! 3. Build structured output
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example content_aggregation
+//! ```
+
+use vectorless::retrieval::content::{
+    ContentAggregator, ContentAggregatorConfig, BudgetAllocator, AllocationStrategy,
+    StructureBuilder, OutputFormat, RelevanceScorer, ScoringStrategyConfig,
+    ContentChunk, ScoringContext,
+};
+use vectorless::domain::NodeId;
+use indextree::Arena;
+
+fn make_node_id() -> NodeId {
+    let mut arena = Arena::new();
+    let node = vectorless::domain::TreeNode {
+        title: "Test".to_string(),
+        structure: String::new(),
+        content: String::new(),
+        summary: String::new(),
+        depth: 0,
+        start_index: 0,
+        end_index: 0,
+        start_page: None,
+        end_page: None,
+        node_id: None,
+        physical_index: None,
+        token_count: None,
+    };
+    NodeId(arena.new_node(node))
+}
+
+fn main() {
+    println!("=== Content Aggregation Accuracy Demo ===\n");
+
+    // 1. Demonstrate Relevance Scoring
+    println!("1. Relevance Scoring Demo");
+    println!("---------------------------");
+
+    let query = "What is the architecture of vectorless?";
+    let scorer = RelevanceScorer::new(query, ScoringStrategyConfig::KeywordWithBM25);
+
+    let chunks = vec![
+        ContentChunk::new(
+            make_node_id(),
+            "Architecture Overview".to_string(),
+            "Vectorless uses a tree-based architecture for document navigation. The system consists of multiple stages: parsing, indexing, and retrieval.".to_string(),
+            0,
+        ),
+        ContentChunk::new(
+            make_node_id(),
+            "Installation Guide".to_string(),
+            "To install vectorless, add it to your Cargo.toml file. Then run cargo build to compile.".to_string(),
+            1,
+        ),
+        ContentChunk::new(
+            make_node_id(),
+            "Core Components".to_string(),
+            "The architecture includes Pilot for navigation, Judge for sufficiency checking, and multiple search algorithms like beam search and greedy search.".to_string(),
+            1,
+        ),
+    ];
+
+    let ctx = ScoringContext::default();
+
+    println!("Query: \"{}\"", query);
+    println!("\nScored chunks:");
+    for chunk in &chunks {
+        let relevance = scorer.score_chunk(chunk, &ctx);
+        println!("  - '{}' (depth {}): score {:.3}",
+            chunk.title, chunk.depth, relevance.score);
+        println!("    Components: keyword={:.2}, bm25={:.2}, depth_penalty={:.2}, density={:.2}",
+            relevance.components.keyword_score,
+            relevance.components.bm25_score,
+            relevance.components.depth_penalty,
+            relevance.components.density_score,
+        );
+    }
+
+    // 2. Demonstrate Budget Allocation
+    println!("\n\n2. Budget Allocation Demo");
+    println!("---------------------------");
+
+    let scored: Vec<_> = chunks
+        .iter()
+        .map(|chunk| scorer.score_chunk(chunk, &ctx))
+        .collect();
+
+    let strategies = vec![
+        ("Greedy", AllocationStrategy::Greedy),
+        ("Hierarchical (20%/level)", AllocationStrategy::Hierarchical { min_per_level: 0.2 }),
+    ];
+
+    for (name, strategy) in strategies {
+        let allocator = BudgetAllocator::new(200)
+            .with_strategy(strategy);
+
+        let result = allocator.allocate(scored.clone(), 2);
+
+        println!("\n{} Strategy:", name);
+        println!("  Tokens used: {}/{}", result.tokens_used, 200);
+        println!("  Items selected: {}", result.selected.len());
+        println!("  Avg score: {:.3}", result.stats.avg_score);
+
+        for content in &result.selected {
+            let trunc = if content.is_truncated() { " [truncated]" } else { "" };
+            println!("    - '{}' ({} tokens, score {:.2}){}",
+                content.title, content.tokens, content.score, trunc);
+        }
+    }
+
+    // 3. Demonstrate Structure Building
+    println!("\n\n3. Structure Building Demo");
+    println!("---------------------------");
+
+    let formats = vec![
+        ("Markdown", OutputFormat::Markdown),
+        ("Flat", OutputFormat::Flat),
+    ];
+
+    let allocator = BudgetAllocator::new(500)
+        .with_strategy(AllocationStrategy::Greedy);
+    let result = allocator.allocate(scored.clone(), 2);
+
+    for (name, format) in formats {
+        let builder = StructureBuilder::new(format);
+        let tree = vectorless::domain::DocumentTree::new("Test", "");
+        let structured = builder.build(result.selected.clone(), &tree);
+
+        println!("\n{} Output ({} chars, {} tokens):", name, structured.content.len(), structured.metadata.total_tokens);
+        let preview = if structured.content.len() > 300 {
+            format!("{}...", &structured.content[..300])
+        } else {
+            structured.content.clone()
+        };
+        println!("{}", preview.lines().take(8).collect::<Vec<_>>().join("\n"));
+    }
+
+    // 4. Demonstrate Full Aggregation Pipeline
+    println!("\n\n4. Full Aggregation Pipeline Demo");
+    println!("-----------------------------------");
+
+    let configs = vec![
+        ("Default (4000 tokens)", ContentAggregatorConfig::default()),
+        ("Conservative (1000 tokens)", ContentAggregatorConfig::new()
+            .with_token_budget(1000)
+            .with_min_relevance(0.3)),
+        ("High Precision (2000 tokens, 0.5 threshold)", ContentAggregatorConfig::new()
+            .with_token_budget(2000)
+            .with_min_relevance(0.5)),
+    ];
+
+    for (name, config) in configs {
+        println!("\n{} Config:", name);
+        println!("  Token budget: {}", config.token_budget);
+        println!("  Min relevance: {:.1}", config.min_relevance_score);
+
+        let aggregator = ContentAggregator::new(config);
+        // Note: Full aggregation requires a DocumentTree with actual content
+        let _ = aggregator; // Suppress unused warning
+    }
+
+    println!("\n=== Demo Complete ===");
+}
diff --git a/examples/custom_pilot.rs b/examples/custom_pilot.rs
new file mode 100644
index 00000000..bd7a730e
--- /dev/null
+++ b/examples/custom_pilot.rs
@@ -0,0 +1,67 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Custom Pilot implementation example.
+//!
+//! This example demonstrates how to implement a custom Pilot
+//! that provides navigation guidance during retrieval.
+//!
+//! # What you'll learn:
+//! - How to implement the Pilot trait
+//! - When to intervene (START, FORK, BACKTRACK, EVALUATE)
+//! - How to provide ranked candidates
+//! - How to integrate custom Pilot with the retrieval pipeline
+//!
+//! # Key concepts:
+//!
+//! ## Intervention Points
+//! - START: Before search begins - analyze query, set direction
+//! - FORK: At branch points - rank candidates, guide path selection
+//! - BACKTRACK: When search fails - suggest alternatives
+//! - EVALUATE: After content found - check sufficiency
+//!
+//! ## Score Merging
+//! ```text
+//! final_score = α × algorithm_score + β × llm_score
+//! ```
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Define your custom Pilot struct
+//! 2. Implement the Pilot trait
+//! 3. Configure intervention conditions
+//! 4. Integrate with EngineBuilder
+
+// TODO: Implement custom Pilot
+// ```
+// use vectorless::retrieval::pilot::{Pilot, PilotDecision, SearchState, InterventionPoint};
+//
+// pub struct MyCustomPilot {
+//     // Your fields here
+// }
+//
+// impl Pilot for MyCustomPilot {
+//     fn should_intervene(&self, state: &SearchState, point: InterventionPoint) -> bool {
+//         // Decide when to intervene
+//         todo!()
+//     }
+//
+//     async fn decide(&self, state: &SearchState) -> PilotDecision {
+//         // Make navigation decision
+//         todo!()
+//     }
+// }
+// ```
+
+fn main() {
+    // TODO: Show how to use custom Pilot with EngineBuilder
+    //
+    // let pilot = MyCustomPilot::new();
+    // let engine = EngineBuilder::new()
+    //     .with_pilot(Arc::new(pilot))
+    //     .build()?;
+    //
+    // // Use engine with custom Pilot guidance
+
+    println!("TODO: Implement custom_pilot example");
+}
diff --git a/examples/events.rs b/examples/events.rs
new file mode 100644
index 00000000..eab7b68a
--- /dev/null
+++ b/examples/events.rs
@@ -0,0 +1,152 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Event callbacks example.
+//!
+//! This example demonstrates the event system for:
+//! - Monitoring indexing progress
+//! - Tracking query execution
+//! - Debugging retrieval behavior
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example events
+//! ```
+
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+
+use vectorless::client::{EngineBuilder, EventEmitter, IndexEvent, QueryEvent};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    println!("=== Event Callbacks Example ===\n");
+
+    // 1. Create event emitter with handlers
+    println!("Step 1: Setting up event handlers...\n");
+
+    let index_count = Arc::new(AtomicUsize::new(0));
+    let query_count = Arc::new(AtomicUsize::new(0));
+    let nodes_visited = Arc::new(AtomicUsize::new(0));
+
+    let index_count_clone = index_count.clone();
+    let query_count_clone = query_count.clone();
+    let nodes_visited_clone = nodes_visited.clone();
+
+    let events = EventEmitter::new()
+        // Index events
+        .on_index(move |e| {
+            match e {
+                IndexEvent::Started { path } => {
+                    println!("  [INDEX] Started: {}", path);
+                }
+                IndexEvent::FormatDetected { format } => {
+                    println!("  [INDEX] Format: {:?}", format);
+                }
+                IndexEvent::TreeBuilt { node_count } => {
+                    println!("  [INDEX] Tree built: {} nodes", node_count);
+                }
+                IndexEvent::Complete { doc_id } => {
+                    println!("  [INDEX] Complete: {}", &doc_id[..8]);
+                    index_count_clone.fetch_add(1, Ordering::SeqCst);
+                }
+                IndexEvent::Error { message } => {
+                    println!("  [INDEX] Error: {}", message);
+                }
+                _ => {}
+            }
+        })
+        // Query events
+        .on_query(move |e| {
+            match e {
+                QueryEvent::Started { query } => {
+                    println!("  [QUERY] Started: \"{}\"", query);
+                    query_count_clone.fetch_add(1, Ordering::SeqCst);
+                }
+                QueryEvent::NodeVisited { title, score, .. } => {
+                    println!("  [QUERY] Visited: \"{}\" (score: {:.2})", title, score);
+                    nodes_visited_clone.fetch_add(1, Ordering::SeqCst);
+                }
+                QueryEvent::CandidateFound { node_id, score } => {
+                    println!("  [QUERY] Candidate: {} (score: {:.2})", &node_id[..8], score);
+                }
+                QueryEvent::Complete { total_results, confidence } => {
+                    println!("  [QUERY] Complete: {} results, confidence: {:.2}", total_results, confidence);
+                }
+                QueryEvent::Error { message } => {
+                    println!("  [QUERY] Error: {}", message);
+                }
+                _ => {}
+            }
+        });
+
+    println!("  ✓ Event handlers configured\n");
+
+    // 2. Create engine with events
+    println!("Step 2: Creating engine with event emitter...");
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_events_example")
+        .with_events(events)
+        .build()
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+    println!("  ✓ Engine created\n");
+
+    // 3. Index a document (events will fire)
+    println!("Step 3: Indexing document (watch events)...\n");
+
+    let temp_dir = tempfile::tempdir()?;
+    let doc_content = r#"# Example Document
+
+## Introduction
+
+This is an example document for demonstrating event callbacks.
+
+## Features
+
+- Event monitoring for indexing
+- Event monitoring for queries
+- Progress tracking
+
+## Architecture
+
+The event system uses handlers that can be attached to the engine builder.
+"#;
+
+    let doc_path = temp_dir.path().join("example.md");
+    tokio::fs::write(&doc_path, doc_content).await?;
+
+    let doc_id = engine.index(&doc_path).await?;
+    println!();
+
+    // 4. Query the document (events will fire)
+    println!("Step 4: Querying document (watch events)...\n");
+
+    let result = engine.query(&doc_id, "What features are available?").await?;
+    println!();
+
+    // 5. Show results
+    println!("Step 5: Query result:");
+    println!("  - Score: {:.2}", result.score);
+    println!("  - Nodes: {}", result.node_ids.len());
+    if !result.content.is_empty() {
+        let preview: String = result.content.chars().take(100).collect();
+        println!("  - Content: {}...", preview);
+    }
+    println!();
+
+    // 6. Show statistics
+    println!("Step 6: Event statistics:");
+    println!("  - Index events fired: {}", index_count.load(Ordering::SeqCst));
+    println!("  - Query events fired: {}", query_count.load(Ordering::SeqCst));
+    println!("  - Nodes visited: {}", nodes_visited.load(Ordering::SeqCst));
+    println!();
+
+    // 7. Cleanup
+    println!("Step 7: Cleanup...");
+    engine.remove(&doc_id)?;
+    println!("  ✓ Document removed\n");
+
+    println!("=== Example Complete ===");
+    Ok(())
+}
diff --git a/examples/markdownflow.rs b/examples/markdownflow.rs
index 4cde85f9..ba566aa6 100644
--- a/examples/markdownflow.rs
+++ b/examples/markdownflow.rs
@@ -35,6 +35,9 @@ Vectorless is a document indexing and retrieval library that uses tree-based nav
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Initialize tracing for debug output (set RUST_LOG=debug to see more)
+    tracing_subscriber::fmt::init();
+
     println!("=== Vectorless Markdown Flow Example ===\n");
 
     // Step 1: Create a Vectorless client (no API key needed - LLM config is automatic)
diff --git a/examples/multi_format.rs b/examples/multi_format.rs
new file mode 100644
index 00000000..f146b851
--- /dev/null
+++ b/examples/multi_format.rs
@@ -0,0 +1,77 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Multi-format document processing example.
+//!
+//! This example demonstrates how to work with different
+//! document formats (Markdown, PDF, DOCX, HTML).
+//!
+//! # What you'll learn:
+//! - How to index documents of different formats
+//! - How format detection works
+//! - How to configure format-specific parsing options
+//! - How to handle mixed-format document sets
+//!
+//! # Supported formats:
+//! - **Markdown** (.md): Full support with ToC extraction
+//! - **PDF** (.pdf): Text extraction, structure inference
+//! - **DOCX** (.docx): Word document parsing
+//! - **HTML** (.html, .htm): Web page parsing (planned)
+//! - **Plain text** (.txt): Basic text parsing (planned)
+//!
+//! # Format-specific considerations:
+//!
+//! ## Markdown
+//! - Best format for structured documents
+//! - Automatic heading hierarchy detection
+//! - Code block handling
+//!
+//! ## PDF
+//! - Text extraction quality varies
+//! - No explicit structure (inferred from fonts/spacing)
+//! - Tables and images not supported
+//!
+//! ## DOCX
+//! - Good structure preservation
+//! - Styles mapped to hierarchy
+//! - Limited formatting support
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Detect document format from extension or content
+//! 2. Configure format-specific parser options
+//! 3. Index documents of mixed formats
+//! 4. Query across all formats
+
+// TODO: Implement multi-format example
+// ```
+// use vectorless::client::{Engine, EngineBuilder};
+// use vectorless::parser::DocumentFormat;
+//
+// async fn index_multiple_formats(engine: &Engine) {
+//     // Index different formats
+//     let md_doc = engine.index("./README.md").await?;
+//     let pdf_doc = engine.index("./paper.pdf").await?;
+//     let docx_doc = engine.index("./report.docx").await?;
+//
+//     // Query works across all formats
+//     let result = engine.query(&md_doc, "What is this about?").await?;
+// }
+// ```
+
+fn main() {
+    // TODO: Show multi-format indexing and querying
+    //
+    // // Index documents of different formats
+    // let md_id = engine.index("./docs/guide.md").await?;
+    // let pdf_id = engine.index("./docs/paper.pdf").await?;
+    // let docx_id = engine.index("./docs/report.docx").await?;
+    //
+    // // Each can be queried independently
+    // for doc_id in &[md_id, pdf_id, docx_id] {
+    //     let result = engine.query(doc_id, "summary").await?;
+    //     println!("Result: {}", result.content);
+    // }
+
+    println!("TODO: Implement multi_format example");
+}
diff --git a/examples/session.rs b/examples/session.rs
new file mode 100644
index 00000000..25aaf3ab
--- /dev/null
+++ b/examples/session.rs
@@ -0,0 +1,207 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Session-based multi-document operations example.
+//!
+//! This example demonstrates the Session API for:
+//! - Managing multiple documents in a single session
+//! - Cross-document queries
+//! - Session caching for improved performance
+//! - Session statistics
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example session
+//! ```
+
+use vectorless::client::EngineBuilder;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    println!("=== Session-Based Multi-Document Example ===\n");
+
+    // 1. Create the engine
+    println!("Step 1: Creating engine...");
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_session_example")
+        .build()
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+    println!("  ✓ Engine created\n");
+
+    // 2. Create a session for multi-document operations
+    println!("Step 2: Creating session...");
+    let session = engine.session();
+    println!("  ✓ Session ID: {}\n", session.id());
+
+    // 3. Index multiple documents into the session
+    println!("Step 3: Indexing documents...");
+
+    // Create sample documents
+    let temp_dir = tempfile::tempdir()?;
+
+    let doc1_content = r#"# Architecture Guide
+
+## Overview
+
+Vectorless uses a tree-based architecture for document navigation.
+
+## Components
+
+- **Indexer**: Parses documents and builds tree structure
+- **Retriever**: Navigates tree to find relevant content
+- **Workspace**: Manages document persistence
+"#;
+
+    let doc2_content = r#"# API Reference
+
+## Engine
+
+The main entry point for vectorless operations.
+
+### Methods
+
+- `index(path)`: Index a document
+- `query(doc_id, question)`: Query a document
+- `list_documents()`: List all documents
+
+## Session
+
+Multi-document operations with caching.
+
+### Methods
+
+- `index(path)`: Index into session
+- `query(doc_id, question)`: Query cached document
+- `query_all(question)`: Query across all documents
+"#;
+
+    let doc3_content = r#"# Configuration Guide
+
+## Workspace Settings
+
+The workspace directory stores indexed documents.
+
+```toml
+[storage]
+workspace_dir = "./workspace"
+```
+
+## Retrieval Settings
+
+Configure retrieval behavior:
+
+```toml
+[retrieval]
+top_k = 5
+max_tokens = 4000
+```
+
+## Content Aggregator
+
+Control content aggregation:
+
+```toml
+[retrieval.content]
+enabled = true
+token_budget = 4000
+```
+"#;
+
+    // Write sample documents
+    let doc1_path = temp_dir.path().join("architecture.md");
+    let doc2_path = temp_dir.path().join("api.md");
+    let doc3_path = temp_dir.path().join("config.md");
+
+    tokio::fs::write(&doc1_path, doc1_content).await?;
+    tokio::fs::write(&doc2_path, doc2_content).await?;
+    tokio::fs::write(&doc3_path, doc3_content).await?;
+
+    // Index into session
+    let doc1_id = session.index(&doc1_path).await?;
+    println!("  ✓ Indexed: architecture.md -> {}", &doc1_id[..8]);
+
+    let doc2_id = session.index(&doc2_path).await?;
+    println!("  ✓ Indexed: api.md -> {}", &doc2_id[..8]);
+
+    let doc3_id = session.index(&doc3_path).await?;
+    println!("  ✓ Indexed: config.md -> {}", &doc3_id[..8]);
+    println!();
+
+    // 4. List documents in session
+    println!("Step 4: Session documents:");
+    for doc in session.list_documents() {
+        println!("  - {} ({})", doc.name, &doc.id[..8]);
+    }
+    println!();
+
+    // 5. Query individual documents (uses cache)
+    println!("Step 5: Query individual documents...");
+    let query = "What methods are available?";
+
+    println!("  Query: \"{}\"", query);
+    let start = std::time::Instant::now();
+    let result = session.query(&doc2_id, query).await?;
+    let elapsed = start.elapsed();
+    println!("    - Time: {:?}", elapsed);
+    println!("    - Score: {:.2}", result.score);
+    if !result.content.is_empty() {
+        let preview: String = result.content.chars().take(100).collect();
+        println!("    - Preview: {}...", preview);
+    }
+    println!();
+
+    // 6. Query same document again (should be faster due to cache)
+    println!("Step 6: Query cached document (should be faster)...");
+    let start = std::time::Instant::now();
+    let result = session.query(&doc2_id, "How to list documents?").await?;
+    let cached_elapsed = start.elapsed();
+    println!("    - Time: {:?}", cached_elapsed);
+    println!("    - Score: {:.2}", result.score);
+    println!();
+
+    // 7. Query across all documents
+    println!("Step 7: Cross-document query...");
+    let query = "How to configure the workspace?";
+    println!("  Query: \"{}\"", query);
+
+    let results = session.query_all(query).await?;
+    println!("  Found {} relevant documents:", results.len());
+
+    for (i, result) in results.iter().enumerate() {
+        println!(
+            "    {}. {} (score: {:.2})",
+            i + 1,
+            &result.doc_id[..8],
+            result.score
+        );
+    }
+    println!();
+
+    // 8. Show session statistics
+    println!("Step 8: Session statistics:");
+    let stats = session.stats();
+    println!("  - Documents: {}", session.list_documents().len());
+    println!("  - Queries: {}", stats.query_count.get());
+    println!("  - Cache hits: {}", stats.cache_hits.get());
+    println!("  - Cache misses: {}", stats.cache_misses.get());
+    println!(
+        "  - Cache hit rate: {:.1}%",
+        stats.cache_hit_rate() * 100.0
+    );
+    if let Some(avg_time) = stats.avg_query_time() {
+        println!("  - Avg query time: {:?}", avg_time);
+    }
+    println!("  - Session age: {:?}", session.age());
+    println!();
+
+    // 9. Cleanup
+    println!("Step 9: Cleanup...");
+    engine.remove(&doc1_id)?;
+    engine.remove(&doc2_id)?;
+    engine.remove(&doc3_id)?;
+    println!("  ✓ Documents removed\n");
+
+    println!("=== Example Complete ===");
+    Ok(())
+}
diff --git a/examples/streaming.rs b/examples/streaming.rs
new file mode 100644
index 00000000..8942110c
--- /dev/null
+++ b/examples/streaming.rs
@@ -0,0 +1,70 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Streaming retrieval example.
+//!
+//! This example demonstrates how to use streaming retrieval
+//! to get results incrementally as they are found.
+//!
+//! # What you'll learn:
+//! - How to use `query_stream()` for progressive results
+//! - How to handle RetrieveEvent types
+//! - How to display results as they arrive
+//! - How to cancel long-running queries
+//!
+//! # RetrieveEvent types:
+//! - `Started`: Query began, shows planned strategy
+//! - `NodeVisited`: A node was visited during search
+//! - `ContentFound`: Relevant content was found
+//! - `Backtracking`: Search is backtracking for more data
+//! - `Completed`: Query finished with final results
+//! - `Error`: An error occurred
+//!
+//! # Use cases:
+//! - Interactive Q&A with real-time feedback
+//! - Long-running queries on large documents
+//! - Debugging retrieval behavior
+//! - Building responsive UIs
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Configure engine for streaming
+//! 2. Call query_stream() instead of query()
+//! 3. Process events as they arrive
+//! 4. Handle completion and errors
+
+// TODO: Implement streaming retrieval
+// ```
+// use vectorless::client::{Engine, RetrieveEvent};
+//
+// async fn streaming_query(
+//     engine: &Engine,
+//     doc_id: &DocumentId,
+//     query: &str,
+// ) {
+//     let mut stream = engine.query_stream(doc_id, query).await;
+//
+//     while let Some(event) = stream.next().await {
+//         match event {
+//             RetrieveEvent::Started { strategy } => {
+//                 println!("Starting search with strategy: {:?}", strategy);
+//             }
+//             RetrieveEvent::ContentFound { node_id, preview } => {
+//                 println!("Found: {} - {}", node_id, preview);
+//             }
+//             RetrieveEvent::Completed { response } => {
+//                 println!("Done! Confidence: {}", response.confidence);
+//             }
+//             _ => {}
+//         }
+//     }
+// }
+// ```
+
+fn main() {
+    // TODO: Show streaming query usage
+    //
+    // streaming_query(&engine, &doc_id, "What is the architecture?").await;
+
+    println!("TODO: Implement streaming example");
+}
diff --git a/src/client/builder.rs b/src/client/builder.rs
index 243e047e..76a335cf 100644
--- a/src/client/builder.rs
+++ b/src/client/builder.rs
@@ -9,7 +9,8 @@ use crate::config::{Config, ConfigLoader, RetrievalConfig};
 use crate::retrieval::PipelineRetriever;
 use crate::storage::Workspace;
 
-use super::Engine;
+use super::engine::Engine;
+use super::events::EventEmitter;
 
 /// Default configuration file names to search for.
 const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorless.toml"];
@@ -42,6 +43,9 @@ pub struct EngineBuilder {
 
     /// Custom retrieval config.
     retrieval_config: Option<RetrievalConfig>,
+
+    /// Event emitter.
+    events: Option<EventEmitter>,
 }
 
 impl EngineBuilder {
@@ -53,6 +57,7 @@ impl EngineBuilder {
             config_path: None,
             config: None,
             retrieval_config: None,
+            events: None,
         }
     }
 
@@ -84,6 +89,13 @@ impl EngineBuilder {
         self
     }
 
+    /// Set the event emitter for callbacks.
+    #[must_use]
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = Some(events);
+        self
+    }
+
     /// Search for config file in current directory and parent directories.
     fn find_config_file() -> Option<PathBuf> {
         let current_dir = std::env::current_dir().ok()?;
@@ -127,32 +139,33 @@ impl EngineBuilder {
     /// Returns a [`BuildError`] if:
     /// - Configuration loading fails
     /// - Workspace creation fails
+    /// - Required API key is missing
     pub fn build(self) -> Result<Engine, BuildError> {
         // Load or create configuration
-        let config = if let Some(config) = self.config {
-            // Use explicitly provided config
+        let mut config = if let Some(config) = self.config {
             config
         } else if let Some(path) = self.config_path {
-            // Load from specified path
             ConfigLoader::new()
                 .file(&path)
                 .load()
                 .map_err(|e| BuildError::Config(e.to_string()))?
         } else if let Some(config_path) = Self::find_config_file() {
-            // Auto-detect config file
             ConfigLoader::new().file(&config_path).load().map_err(|e| {
                 BuildError::Config(format!("Failed to load {}: {}", config_path.display(), e))
             })?
         } else {
-            // Use defaults
             Config::default()
         };
 
+        // Override retrieval config if provided
+        if let Some(retrieval_config) = self.retrieval_config {
+            config.retrieval = retrieval_config;
+        }
+
         // Open workspace: prefer explicit path, fallback to config
         let workspace = if let Some(path) = &self.workspace {
             Some(Workspace::open(path).map_err(|e| BuildError::Workspace(e.to_string()))?)
         } else {
-            // Use workspace_dir from config
             Some(
                 Workspace::open(&config.storage.workspace_dir)
                     .map_err(|e| BuildError::Workspace(e.to_string()))?,
@@ -175,25 +188,33 @@ impl EngineBuilder {
         };
 
         // Create pipeline retriever with config
-        let retrieval_config = self
-            .retrieval_config
-            .unwrap_or_else(|| config.retrieval.clone());
+        let retrieval_config = config.retrieval.clone();
         let mut retriever =
             PipelineRetriever::new().with_max_iterations(retrieval_config.search.max_iterations);
 
-        // Add LLM client if API key is available in retrieval config
-        if let Some(ref api_key) = retrieval_config.api_key {
-            let llm_config = crate::llm::LlmConfig::new(&retrieval_config.model)
-                .with_endpoint(retrieval_config.endpoint.clone())
-                .with_api_key(api_key.clone())
-                .with_temperature(retrieval_config.temperature);
-            let llm_client = crate::llm::LlmClient::new(llm_config);
-            retriever = retriever.with_llm_client(llm_client);
+        // LLM API key is REQUIRED for retrieval (Pilot needs it for semantic navigation)
+        // Try retrieval config first, then fall back to summary config
+        let retrieval_api_key = retrieval_config.api_key.clone()
+            .or_else(|| config.summary.api_key.clone())
+            .ok_or(BuildError::MissingApiKey)?;
+
+        let llm_config = crate::llm::LlmConfig::new(&retrieval_config.model)
+            .with_endpoint(retrieval_config.endpoint.clone())
+            .with_api_key(retrieval_api_key)
+            .with_temperature(retrieval_config.temperature);
+        let llm_client = crate::llm::LlmClient::new(llm_config);
+        retriever = retriever.with_llm_client(llm_client);
+
+        // Configure content aggregator if enabled
+        if retrieval_config.content.enabled {
+            retriever = retriever.with_content_config(
+                retrieval_config.content.to_aggregator_config()
+            );
         }
 
-        Ok(Engine::with_components(
-            config, workspace, retriever, executor,
-        ))
+        // Build engine
+        Engine::with_components(config, workspace, retriever, executor)
+            .map_err(|e| BuildError::Other(e.to_string()))
     }
 }
 
@@ -213,6 +234,14 @@ pub enum BuildError {
     /// Workspace error.
     #[error("Workspace error: {0}")]
     Workspace(String),
+
+    /// Missing API key for retrieval.
+    #[error("Missing API key: LLM API key is required for retrieval. Set OPENAI_API_KEY environment variable or configure retrieval.api_key")]
+    MissingApiKey,
+
+    /// Other error.
+    #[error("{0}")]
+    Other(String),
 }
 
 #[cfg(test)]
diff --git a/src/client/context.rs b/src/client/context.rs
new file mode 100644
index 00000000..344c05cb
--- /dev/null
+++ b/src/client/context.rs
@@ -0,0 +1,337 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Request context and configuration.
+//!
+//! This module provides request-scoped configuration and state management
+//! for client operations. It allows overriding global configuration on a
+//! per-request basis.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let ctx = ClientContext::new()
+//!     .with_top_k(10)
+//!     .with_token_budget(8000)
+//!     .with_timeout(Duration::from_secs(30));
+//!
+//! let result = client.query_with_context(&doc_id, "query", &ctx).await?;
+//! ```
+
+use std::collections::HashMap;
+use std::time::{Duration, Instant};
+
+use uuid::Uuid;
+
+use crate::retrieval::content::OutputFormatConfig;
+
+/// Request context for client operations.
+///
+/// Provides request-scoped configuration overrides and metadata.
+#[derive(Debug, Clone)]
+pub struct ClientContext {
+    /// Unique request ID for tracing.
+    pub request_id: Uuid,
+
+    /// Request-specific configuration overrides.
+    pub config: RequestContextConfig,
+
+    /// Request metadata (custom key-value pairs).
+    pub metadata: HashMap<String, String>,
+
+    /// Request deadline (for timeout).
+    pub deadline: Option<Instant>,
+
+    /// Priority (higher = more important).
+    pub priority: u8,
+}
+
+impl Default for ClientContext {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ClientContext {
+    /// Create a new context with defaults.
+    pub fn new() -> Self {
+        Self {
+            request_id: Uuid::new_v4(),
+            config: RequestContextConfig::default(),
+            metadata: HashMap::new(),
+            deadline: None,
+            priority: 5, // Default priority
+        }
+    }
+
+    /// Create a context with a specific request ID.
+    pub fn with_id(id: Uuid) -> Self {
+        Self {
+            request_id: id,
+            ..Self::new()
+        }
+    }
+
+    /// Set the top_k override for retrieval.
+    pub fn with_top_k(mut self, top_k: usize) -> Self {
+        self.config.top_k = Some(top_k);
+        self
+    }
+
+    /// Set the token budget override.
+    pub fn with_token_budget(mut self, budget: usize) -> Self {
+        self.config.token_budget = Some(budget);
+        self
+    }
+
+    /// Set the content format override.
+    pub fn with_content_format(mut self, format: OutputFormatConfig) -> Self {
+        self.config.content_format = Some(format);
+        self
+    }
+
+    /// Set whether to include summaries.
+    pub fn with_summaries(mut self, include: bool) -> Self {
+        self.config.features.include_summaries = include;
+        self
+    }
+
+    /// Set whether to include content.
+    pub fn with_content(mut self, include: bool) -> Self {
+        self.config.features.include_content = include;
+        self
+    }
+
+    /// Set whether to enable caching.
+    pub fn with_cache(mut self, enable: bool) -> Self {
+        self.config.features.enable_cache = enable;
+        self
+    }
+
+    /// Set whether to enable sufficiency checking.
+    pub fn with_sufficiency_check(mut self, enable: bool) -> Self {
+        self.config.features.enable_sufficiency_check = enable;
+        self
+    }
+
+    /// Set a timeout duration.
+    pub fn with_timeout(mut self, duration: Duration) -> Self {
+        self.deadline = Some(Instant::now() + duration);
+        self
+    }
+
+    /// Set a deadline.
+    pub fn with_deadline(mut self, deadline: Instant) -> Self {
+        self.deadline = Some(deadline);
+        self
+    }
+
+    /// Set the priority (0-10, higher = more important).
+    pub fn with_priority(mut self, priority: u8) -> Self {
+        self.priority = priority.min(10);
+        self
+    }
+
+    /// Add metadata.
+    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        self.metadata.insert(key.into(), value.into());
+        self
+    }
+
+    /// Check if the request has timed out.
+    pub fn is_timed_out(&self) -> bool {
+        self.deadline
+            .map(|d| Instant::now() > d)
+            .unwrap_or(false)
+    }
+
+    /// Get remaining time until deadline.
+    pub fn remaining_time(&self) -> Option<Duration> {
+        self.deadline
+            .map(|d| d.saturating_duration_since(Instant::now()))
+    }
+
+    /// Merge with another context (other takes precedence).
+    pub fn merge(&self, other: &ClientContext) -> ClientContext {
+        let mut merged = self.clone();
+        merged.request_id = other.request_id;
+
+        if other.config.top_k.is_some() {
+            merged.config.top_k = other.config.top_k;
+        }
+        if other.config.token_budget.is_some() {
+            merged.config.token_budget = other.config.token_budget;
+        }
+        if other.config.content_format.is_some() {
+            merged.config.content_format = other.config.content_format.clone();
+        }
+        if other.deadline.is_some() {
+            merged.deadline = other.deadline;
+        }
+        if other.priority != 5 {
+            merged.priority = other.priority;
+        }
+
+        // Merge metadata
+        for (k, v) in &other.metadata {
+            merged.metadata.insert(k.clone(), v.clone());
+        }
+
+        // Merge feature flags
+        merged.config.features = FeatureFlags {
+            include_summaries: other.config.features.include_summaries,
+            include_content: other.config.features.include_content,
+            enable_cache: other.config.features.enable_cache,
+            enable_sufficiency_check: other.config.features.enable_sufficiency_check,
+        };
+
+        merged
+    }
+}
+
+/// Request-specific configuration overrides.
+#[derive(Debug, Clone, Default)]
+pub struct RequestContextConfig {
+    /// Override top_k for retrieval.
+    pub top_k: Option<usize>,
+
+    /// Override token budget.
+    pub token_budget: Option<usize>,
+
+    /// Override content format.
+    pub content_format: Option<OutputFormatConfig>,
+
+    /// Feature flags.
+    pub features: FeatureFlags,
+}
+
+/// Feature flags for request.
+#[derive(Debug, Clone, Copy)]
+pub struct FeatureFlags {
+    /// Include summaries in results.
+    pub include_summaries: bool,
+
+    /// Include content in results.
+    pub include_content: bool,
+
+    /// Enable result caching.
+    pub enable_cache: bool,
+
+    /// Enable sufficiency checking.
+    pub enable_sufficiency_check: bool,
+}
+
+impl Default for FeatureFlags {
+    fn default() -> Self {
+        Self {
+            include_summaries: true,
+            include_content: true,
+            enable_cache: true,
+            enable_sufficiency_check: true,
+        }
+    }
+}
+
+impl FeatureFlags {
+    /// Create with all features enabled.
+    pub fn all() -> Self {
+        Self {
+            include_summaries: true,
+            include_content: true,
+            enable_cache: true,
+            enable_sufficiency_check: true,
+        }
+    }
+
+    /// Create with minimal features (fastest).
+    pub fn minimal() -> Self {
+        Self {
+            include_summaries: false,
+            include_content: true,
+            enable_cache: false,
+            enable_sufficiency_check: false,
+        }
+    }
+
+    /// Create for deep analysis.
+    pub fn deep() -> Self {
+        Self {
+            include_summaries: true,
+            include_content: true,
+            enable_cache: true,
+            enable_sufficiency_check: true,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_context_creation() {
+        let ctx = ClientContext::new();
+        assert!(!ctx.request_id.is_nil());
+        assert!(ctx.config.top_k.is_none());
+        assert!(ctx.deadline.is_none());
+    }
+
+    #[test]
+    fn test_context_with_overrides() {
+        let ctx = ClientContext::new()
+            .with_top_k(10)
+            .with_token_budget(8000)
+            .with_cache(false);
+
+        assert_eq!(ctx.config.top_k, Some(10));
+        assert_eq!(ctx.config.token_budget, Some(8000));
+        assert!(!ctx.config.features.enable_cache);
+    }
+
+    #[test]
+    fn test_context_timeout() {
+        let ctx = ClientContext::new()
+            .with_timeout(Duration::from_millis(100));
+
+        assert!(!ctx.is_timed_out());
+        assert!(ctx.remaining_time().is_some());
+    }
+
+    #[test]
+    fn test_context_metadata() {
+        let ctx = ClientContext::new()
+            .with_metadata("user", "test")
+            .with_metadata("version", "1.0");
+
+        assert_eq!(ctx.metadata.get("user"), Some(&"test".to_string()));
+        assert_eq!(ctx.metadata.get("version"), Some(&"1.0".to_string()));
+    }
+
+    #[test]
+    fn test_context_merge() {
+        let ctx1 = ClientContext::new()
+            .with_top_k(5)
+            .with_metadata("key1", "value1");
+
+        let ctx2 = ClientContext::new()
+            .with_top_k(10)
+            .with_metadata("key2", "value2");
+
+        let merged = ctx1.merge(&ctx2);
+
+        assert_eq!(merged.config.top_k, Some(10));
+        assert_eq!(merged.metadata.get("key1"), Some(&"value1".to_string()));
+        assert_eq!(merged.metadata.get("key2"), Some(&"value2".to_string()));
+    }
+
+    #[test]
+    fn test_feature_flags() {
+        let all = FeatureFlags::all();
+        assert!(all.include_summaries);
+        assert!(all.include_content);
+
+        let minimal = FeatureFlags::minimal();
+        assert!(!minimal.include_summaries);
+        assert!(!minimal.enable_cache);
+    }
+}
diff --git a/src/client/engine.rs b/src/client/engine.rs
index aeaa87b5..8156586e 100644
--- a/src/client/engine.rs
+++ b/src/client/engine.rs
@@ -1,26 +1,20 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Main Engine client for document indexing and retrieval.
+//! Main Engine client - the entry point for vectorless.
 //!
-//! This module provides the high-level API for:
-//! - Indexing documents (Markdown, PDF, DOCX, HTML)
-//! - Retrieving document structure
-//! - Querying documents with adaptive retrieval
+//! This module provides the main client for document indexing and retrieval.
+//! The Engine is an orchestrator that delegates to specialized sub-clients.
 //!
-//! # Design
+//! # Architecture
 //!
-//! The client uses **interior mutability** patterns to allow sharing across
-//! async tasks while maintaining thread safety:
-//!
-//! - `Arc<RwLock<Workspace>>` - Thread-safe workspace access (multiple readers, single writer)
-//! - `Arc<Mutex<PipelineExecutor>>` - Exclusive pipeline execution
-//! - `Arc<PipelineRetriever>` - Immutable retriever (uses interior mutability internally)
-//!
-//! # Thread Safety
-//!
-//! `Engine` is `Clone + Send + Sync`. Cloning is cheap (reference count increment).
-//! All clones share the same underlying resources.
+//! ```text
+//! Engine (Orchestrator)
+//! ├── IndexerClient   → Document indexing
+//! ├── RetrieverClient → Query and retrieval
+//! ├── WorkspaceClient → Document persistence
+//! └── EventEmitter    → Progress and events
+//! ```
 //!
 //! # Example
 //!
@@ -34,13 +28,13 @@
 //!     .with_workspace("./my_workspace")
 //!     .build()?;
 //!
-//! // Clone for use in multiple tasks (cheap - just Arc clone)
-//! let client1 = client.clone();
-//! let client2 = client.clone();
-//!
-//! // Can use concurrently
+//! // Index a document
 //! let doc_id = client.index("./document.md").await?;
+//!
+//! // Query the document
 //! let result = client.query(&doc_id, "What is this?").await?;
+//!
+//! println!("Found: {}", result.content);
 //! # Ok(())
 //! # }
 //! ```
@@ -49,16 +43,20 @@ use std::path::Path;
 use std::sync::{Arc, Mutex, RwLock};
 
 use tracing::info;
-use uuid::Uuid;
 
 use crate::config::Config;
 use crate::domain::{DocumentTree, Error, Result};
-use crate::index::{IndexInput, PipelineExecutor, PipelineOptions, SummaryStrategy};
-use crate::parser::DocumentFormat;
-use crate::retrieval::{PipelineRetriever, Retriever};
-use crate::storage::{DocumentMeta as StorageMeta, PersistedDocument, Workspace};
-
-use super::types::{DocumentInfo, IndexMode, IndexOptions, QueryResult};
+use crate::index::PipelineExecutor;
+use crate::retrieval::{PipelineRetriever, RetrieveOptions};
+use crate::storage::Workspace;
+
+use super::context::ClientContext;
+use super::events::EventEmitter;
+use super::indexer::IndexerClient;
+use super::retriever::RetrieverClient;
+use super::session::Session;
+use super::types::{DocumentInfo, IndexOptions, QueryResult};
+use super::workspace::WorkspaceClient;
 
 /// The main Engine client.
 ///
@@ -68,30 +66,26 @@ use super::types::{DocumentInfo, IndexMode, IndexOptions, QueryResult};
 /// # Cloning
 ///
 /// Cloning is cheap - it only increments reference counts (`Arc`). All clones
-/// share the same underlying resources (workspace, retriever, executor).
+/// share the same underlying resources.
 ///
 /// # Thread Safety
 ///
-/// The client is `Clone + Send + Sync` and can be safely shared across
-/// threads. All mutable state is protected by appropriate synchronization:
-///
-/// - Workspace: `Arc<RwLock<Workspace>>` - Multiple readers, single writer
-/// - Executor: `Arc<Mutex<PipelineExecutor>>` - Exclusive access during indexing
-/// - Retriever: `Arc<PipelineRetriever>` - Immutable, uses internal synchronization
+/// The client is `Clone + Send + Sync` and can be safely shared across threads.
 pub struct Engine {
     /// Configuration (immutable, shared).
     config: Arc<Config>,
 
-    /// Workspace for persistence (with built-in LRU cache).
-    /// Uses RwLock for concurrent read access.
-    workspace: Option<Arc<RwLock<Workspace>>>,
+    /// Indexer client for document indexing.
+    indexer: IndexerClient,
 
-    /// Pipeline retriever (immutable, uses interior mutability internally).
-    retriever: Arc<PipelineRetriever>,
+    /// Retriever client for queries.
+    retriever: RetrieverClient,
 
-    /// Pipeline executor for indexing.
-    /// Uses Mutex for exclusive access during pipeline execution.
-    executor: Arc<Mutex<PipelineExecutor>>,
+    /// Workspace client for persistence.
+    workspace: Option<WorkspaceClient>,
+
+    /// Event emitter.
+    events: EventEmitter,
 }
 
 impl Engine {
@@ -106,11 +100,47 @@ impl Engine {
     /// Note: Prefer using [`Engine::builder()`] for more control.
     fn new() -> Result<Self> {
         let config = Config::default();
+        Self::with_components(
+            config,
+            None,
+            PipelineRetriever::new(),
+            PipelineExecutor::new(),
+        )
+    }
+
+    // ============================================================
+    // Constructor (for Builder)
+    // ============================================================
+
+    /// Create a new client with the given components.
+    pub(crate) fn with_components(
+        config: Config,
+        workspace: Option<Workspace>,
+        retriever: PipelineRetriever,
+        executor: PipelineExecutor,
+    ) -> Result<Self> {
+        let config = Arc::new(config);
+        let events = EventEmitter::new();
+
+        // Create indexer client
+        let indexer = IndexerClient::new(executor)
+            .with_events(events.clone());
+
+        // Create retriever client
+        let retriever = RetrieverClient::new(retriever, Arc::clone(&config))
+            .with_events(events.clone());
+
+        // Create workspace client (if workspace provided)
+        let workspace_client = workspace.map(|ws| {
+            WorkspaceClient::new(ws).with_events(events.clone())
+        });
+
         Ok(Self {
-            config: Arc::new(config),
-            workspace: None,
-            retriever: Arc::new(PipelineRetriever::new()),
-            executor: Arc::new(Mutex::new(PipelineExecutor::new())),
+            config,
+            indexer,
+            retriever,
+            workspace: workspace_client,
+            events,
         })
     }
 
@@ -142,94 +172,101 @@ impl Engine {
         path: impl AsRef<Path>,
         options: IndexOptions,
     ) -> Result<String> {
-        let path = path.as_ref();
-        let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
-
-        if !path.exists() {
-            return Err(Error::Parse(format!("File not found: {}", path.display())));
-        }
-
-        // Generate document ID
-        let doc_id = Uuid::new_v4().to_string();
-
-        // Detect format
-        let format = self.detect_format(&path, &options)?;
-
-        info!("Indexing {:?} document: {}", format, path.display());
-
-        // Convert client options to pipeline options
-        let pipeline_options = PipelineOptions {
-            mode: match options.mode {
-                IndexMode::Auto => crate::index::IndexMode::Auto,
-                IndexMode::Pdf => crate::index::IndexMode::Pdf,
-                IndexMode::Markdown => crate::index::IndexMode::Markdown,
-                IndexMode::Html => crate::index::IndexMode::Html,
-                IndexMode::Docx => crate::index::IndexMode::Docx,
-            },
-            generate_ids: options.generate_ids,
-            summary_strategy: if options.generate_summaries {
-                SummaryStrategy::selective(self.config.indexer.min_summary_tokens, false)
-            } else {
-                SummaryStrategy::none()
-            },
-            generate_description: options.generate_description,
-            ..Default::default()
-        };
-
-        // Create pipeline input and execute (with mutex lock)
-        let input = IndexInput::file(&path);
-        let result = {
-            let mut executor = self
-                .executor
-                .lock()
-                .map_err(|_| Error::Other("Pipeline executor lock poisoned".to_string()))?;
-            executor.execute(input, pipeline_options).await?
-        };
-
-        // Build persisted document
-        let tree = result
-            .tree
-            .ok_or_else(|| Error::Parse("Document tree not generated".to_string()))?;
-
-        let meta = StorageMeta::new(&doc_id, &result.name, format.extension())
-            .with_source_path(path.to_string_lossy().to_string())
-            .with_description(result.description.clone().unwrap_or_default());
-
-        let mut doc = PersistedDocument::new(meta, tree);
-
-        // Add page count if available
-        if let Some(page_count) = result.page_count {
-            for i in 1..=page_count {
-                doc.add_page(i, "");
-            }
-        }
+        let doc = self.indexer.index_with_options(path, options).await?;
+        let persisted = self.indexer.to_persisted(doc);
 
         // Save to workspace if configured
         if let Some(ref workspace) = self.workspace {
-            let mut ws = workspace
-                .write()
-                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-            ws.add(&doc)?;
-            info!("Saved document {} to workspace", doc_id);
+            workspace.save(&persisted)?;
         }
 
-        info!("Indexing complete. Document ID: {}", doc_id);
+        let doc_id = persisted.meta.id.clone();
+        info!("Indexed document: {}", doc_id);
         Ok(doc_id)
     }
 
-    /// Detect document format from path and options.
-    fn detect_format(&self, path: &Path, options: &IndexOptions) -> Result<DocumentFormat> {
-        match options.mode {
-            IndexMode::Auto => {
-                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
-                DocumentFormat::from_extension(ext)
-                    .ok_or_else(|| Error::Parse(format!("Unknown format: {}", ext)))
-            }
-            IndexMode::Pdf => Ok(DocumentFormat::Pdf),
-            IndexMode::Markdown => Ok(DocumentFormat::Markdown),
-            IndexMode::Html => Ok(DocumentFormat::Html),
-            IndexMode::Docx => Ok(DocumentFormat::Docx),
+    // ============================================================
+    // Document Querying
+    // ============================================================
+
+    /// Query a document.
+    ///
+    /// Uses the adaptive retriever to find relevant content.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - No workspace is configured
+    /// - The document is not found
+    /// - The retrieval fails
+    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult> {
+        let tree = self.get_structure(doc_id)?;
+
+        let options = RetrieveOptions::new()
+            .with_top_k(self.config.retrieval.top_k)
+            .with_include_content(true)
+            .with_include_summaries(true);
+
+        let mut result = self.retriever.query(&tree, question, &options).await?;
+        result.doc_id = doc_id.to_string();
+
+        Ok(result)
+    }
+
+    /// Query a document with context.
+    ///
+    /// Allows request-specific configuration overrides.
+    pub async fn query_with_context(
+        &self,
+        doc_id: &str,
+        question: &str,
+        ctx: &ClientContext,
+    ) -> Result<QueryResult> {
+        let tree = self.get_structure(doc_id)?;
+
+        let mut options = RetrieveOptions::new()
+            .with_top_k(self.config.retrieval.top_k)
+            .with_include_content(true)
+            .with_include_summaries(true);
+
+        // Apply context overrides
+        if let Some(top_k) = ctx.config.top_k {
+            options.top_k = top_k;
         }
+        if let Some(token_budget) = ctx.config.token_budget {
+            options.max_tokens = token_budget;
+        }
+
+        let mut result = self.retriever.query_with_context(&tree, question, &options, ctx).await?;
+        result.doc_id = doc_id.to_string();
+
+        Ok(result)
+    }
+
+    // ============================================================
+    // Session Management
+    // ============================================================
+
+    /// Create a session for multi-document operations.
+    ///
+    /// Sessions provide:
+    /// - Automatic caching of document trees
+    /// - Cross-document queries
+    /// - Session statistics
+    pub fn session(&self) -> Session {
+        let workspace = self.workspace.clone().unwrap_or_else(|| {
+            WorkspaceClient::from_arc(
+                Arc::new(RwLock::new(Workspace::open("./temp_workspace").unwrap())),
+                self.events.clone(),
+            )
+        });
+
+        Session::new(
+            self.indexer.clone(),
+            self.retriever.clone(),
+            workspace,
+            self.events.clone(),
+        )
     }
 
     // ============================================================
@@ -240,24 +277,7 @@ impl Engine {
     #[must_use]
     pub fn list_documents(&self) -> Vec<DocumentInfo> {
         match &self.workspace {
-            Some(workspace) => {
-                let ws = match workspace.read() {
-                    Ok(guard) => guard,
-                    Err(_) => return Vec::new(),
-                };
-                ws.list_documents()
-                    .iter()
-                    .filter_map(|id| ws.get_meta(id))
-                    .map(|meta| DocumentInfo {
-                        id: meta.id.clone(),
-                        name: meta.doc_name.clone(),
-                        format: meta.doc_type.clone(),
-                        description: meta.doc_description.clone(),
-                        page_count: meta.page_count,
-                        line_count: meta.line_count,
-                    })
-                    .collect()
-            }
+            Some(workspace) => workspace.list().unwrap_or_default(),
             None => Vec::new(),
         }
     }
@@ -270,18 +290,10 @@ impl Engine {
     /// - No workspace is configured
     /// - The document is not found
     pub fn get_structure(&self, doc_id: &str) -> Result<DocumentTree> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        // Use read lock - Workspace::load now uses interior mutability for cache
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        let doc = ws
-            .load(doc_id)?
+        let doc = workspace.load(doc_id)?
             .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
 
         Ok(doc.tree)
@@ -296,18 +308,10 @@ impl Engine {
     /// - The document is not found
     /// - No page content is available
     pub fn get_page_content(&self, doc_id: &str, pages: &str) -> Result<String> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        // Use read lock - Workspace::load now uses interior mutability for cache
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        let doc = ws
-            .load(doc_id)?
+        let doc = workspace.load(doc_id)?
             .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
 
         if doc.pages.is_empty() {
@@ -358,73 +362,8 @@ impl Engine {
         Ok(result)
     }
 
-    /// Query a document.
-    ///
-    /// Uses the adaptive retriever to find relevant content.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - No workspace is configured
-    /// - The document is not found
-    /// - The retrieval fails
-    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult> {
-        let tree = self.get_structure(doc_id)?;
-
-        // Build retrieve options from config
-        let retrieve_options = crate::retrieval::RetrieveOptions::new()
-            .with_top_k(self.config.retrieval.top_k)
-            .with_include_content(true)
-            .with_include_summaries(true);
-
-        // Use adaptive retriever
-        let response = self
-            .retriever
-            .retrieve(&tree, question, &retrieve_options)
-            .await
-            .map_err(|e| Error::Retrieval(e.to_string()))?;
-
-        // Extract node IDs and build content from results
-        let node_ids: Vec<String> = response
-            .results
-            .iter()
-            .filter_map(|r| r.node_id.clone())
-            .collect();
-
-        let content_parts: Vec<String> = response
-            .results
-            .iter()
-            .map(|r| {
-                let mut parts = vec![format!("## {}", r.title)];
-
-                if let Some(ref summary) = r.summary {
-                    parts.push(format!("Summary: {}", summary));
-                }
-
-                if let Some(ref content) = r.content {
-                    parts.push(content.clone());
-                }
-
-                parts.join("\n\n")
-            })
-            .collect();
-
-        let content = if content_parts.is_empty() {
-            response.content
-        } else {
-            content_parts.join("\n\n---\n\n")
-        };
-
-        Ok(QueryResult {
-            doc_id: doc_id.to_string(),
-            node_ids,
-            content,
-            score: response.confidence,
-        })
-    }
-
     // ============================================================
-    // Persistence
+    // Persistence Operations
     // ============================================================
 
     /// Load a document from the workspace into cache.
@@ -435,21 +374,14 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn load(&self, doc_id: &str) -> Result<bool> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        // Use read lock - Workspace::load now uses interior mutability for cache
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        if !ws.contains(doc_id) {
+        if !workspace.exists(doc_id)? {
             return Ok(false);
         }
 
-        let _ = ws.load(doc_id)?;
+        let _ = workspace.load(doc_id)?;
         Ok(true)
     }
 
@@ -459,15 +391,10 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn remove(&self, doc_id: &str) -> Result<bool> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let mut ws = workspace
-            .write()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-        ws.remove(doc_id)
+        workspace.remove(doc_id)
     }
 
     /// Check if a document exists in the workspace.
@@ -476,15 +403,10 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn exists(&self, doc_id: &str) -> Result<bool> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-        Ok(ws.contains(doc_id))
+        workspace.exists(doc_id)
     }
 
     /// Get metadata for a document.
@@ -493,23 +415,10 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn get_metadata(&self, doc_id: &str) -> Result<Option<DocumentInfo>> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        Ok(ws.get_meta(doc_id).map(|meta| DocumentInfo {
-            id: meta.id.clone(),
-            name: meta.doc_name.clone(),
-            format: meta.doc_type.clone(),
-            description: meta.doc_description.clone(),
-            page_count: meta.page_count,
-            line_count: meta.line_count,
-        }))
+        workspace.get_document_info(doc_id)
     }
 
     /// Remove multiple documents from the workspace.
@@ -520,22 +429,10 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn batch_remove(&self, doc_ids: &[&str]) -> Result<usize> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let mut ws = workspace
-            .write()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        let mut removed = 0;
-        for doc_id in doc_ids {
-            if ws.remove(doc_id)? {
-                removed += 1;
-            }
-        }
-        Ok(removed)
+        workspace.batch_remove(doc_ids)
     }
 
     /// Remove all documents from the workspace.
@@ -546,38 +443,16 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn clear(&self) -> Result<usize> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let mut ws = workspace
-            .write()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        let doc_ids: Vec<String> = ws.list_documents().iter().map(|s| s.to_string()).collect();
-        let count = doc_ids.len();
-
-        for doc_id in &doc_ids {
-            let _ = ws.remove(doc_id);
-        }
-
-        Ok(count)
+        workspace.clear()
     }
 
     /// Get the number of indexed documents.
     #[must_use]
     pub fn len(&self) -> usize {
-        match &self.workspace {
-            Some(workspace) => {
-                let ws = match workspace.read() {
-                    Ok(guard) => guard,
-                    Err(_) => return 0,
-                };
-                ws.len()
-            }
-            None => 0,
-        }
+        self.workspace.as_ref().map(|w| w.len()).unwrap_or(0)
     }
 
     /// Check if there are no documents.
@@ -587,22 +462,27 @@ impl Engine {
     }
 
     // ============================================================
-    // Internal API (for Builder)
+    // Sub-Client Access
     // ============================================================
 
-    /// Create a new client with the given components.
-    pub(crate) fn with_components(
-        config: Config,
-        workspace: Option<Workspace>,
-        retriever: PipelineRetriever,
-        executor: PipelineExecutor,
-    ) -> Self {
-        Self {
-            config: Arc::new(config),
-            workspace: workspace.map(|w| Arc::new(RwLock::new(w))),
-            retriever: Arc::new(retriever),
-            executor: Arc::new(Mutex::new(executor)),
-        }
+    /// Get the indexer client.
+    pub fn indexer(&self) -> &IndexerClient {
+        &self.indexer
+    }
+
+    /// Get the retriever client.
+    pub fn retriever(&self) -> &RetrieverClient {
+        &self.retriever
+    }
+
+    /// Get the workspace client.
+    pub fn workspace(&self) -> Option<&WorkspaceClient> {
+        self.workspace.as_ref()
+    }
+
+    /// Get the configuration.
+    pub fn config(&self) -> &Config {
+        &self.config
     }
 }
 
@@ -610,9 +490,10 @@ impl Clone for Engine {
     fn clone(&self) -> Self {
         Self {
             config: Arc::clone(&self.config),
-            workspace: self.workspace.as_ref().map(Arc::clone),
-            retriever: Arc::clone(&self.retriever),
-            executor: Arc::clone(&self.executor),
+            indexer: self.indexer.clone(),
+            retriever: self.retriever.clone(),
+            workspace: self.workspace.clone(),
+            events: self.events.clone(),
         }
     }
 }
@@ -631,3 +512,15 @@ impl std::fmt::Debug for Engine {
             .finish_non_exhaustive()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_engine_builder() {
+        let builder = Engine::builder();
+        // Builder exists
+        let _ = builder;
+    }
+}
diff --git a/src/client/events.rs b/src/client/events.rs
new file mode 100644
index 00000000..a1d797c4
--- /dev/null
+++ b/src/client/events.rs
@@ -0,0 +1,365 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Event system for client operations.
+//!
+//! This module provides event types and handlers for observing
+//! and reacting to client operations (indexing, querying, etc.).
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let emitter = EventEmitter::new()
+//!     .on_index(|e| match e {
+//!         IndexEvent::Complete { doc_id } => println!("Indexed: {}", doc_id),
+//!         _ => {}
+//!     });
+//!
+//! let client = EngineBuilder::new()
+//!     .with_events(emitter)
+//!     .build()?;
+//! ```
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use tracing::info;
+
+use crate::parser::DocumentFormat;
+use crate::retrieval::SufficiencyLevel;
+
+/// Event types for client operations.
+#[derive(Debug, Clone)]
+pub enum Event {
+    /// Indexing events.
+    Index(IndexEvent),
+
+    /// Query events.
+    Query(QueryEvent),
+
+    /// Workspace events.
+    Workspace(WorkspaceEvent),
+}
+
+/// Indexing operation events.
+#[derive(Debug, Clone)]
+pub enum IndexEvent {
+    /// Started indexing a document.
+    Started {
+        /// File path being indexed.
+        path: String,
+    },
+
+    /// Document format detected.
+    FormatDetected {
+        /// Detected format.
+        format: DocumentFormat,
+    },
+
+    /// Parsing progress update.
+    ParsingProgress {
+        /// Percentage complete (0-100).
+        percent: u8,
+    },
+
+    /// Document tree built.
+    TreeBuilt {
+        /// Number of nodes in the tree.
+        node_count: usize,
+    },
+
+    /// Summary generation progress.
+    SummaryProgress {
+        /// Number of summaries completed.
+        completed: usize,
+        /// Total summaries to generate.
+        total: usize,
+    },
+
+    /// Indexing completed successfully.
+    Complete {
+        /// Generated document ID.
+        doc_id: String,
+    },
+
+    /// Error occurred during indexing.
+    Error {
+        /// Error message.
+        message: String,
+    },
+}
+
+/// Query operation events.
+#[derive(Debug, Clone)]
+pub enum QueryEvent {
+    /// Search started.
+    Started {
+        /// The query string.
+        query: String,
+    },
+
+    /// Node visited during search.
+    NodeVisited {
+        /// Node ID.
+        node_id: String,
+        /// Node title.
+        title: String,
+        /// Relevance score.
+        score: f32,
+    },
+
+    /// Candidate result found.
+    CandidateFound {
+        /// Node ID.
+        node_id: String,
+        /// Relevance score.
+        score: f32,
+    },
+
+    /// Sufficiency check result.
+    SufficiencyCheck {
+        /// Sufficiency level.
+        level: SufficiencyLevel,
+        /// Total tokens collected.
+        tokens: usize,
+    },
+
+    /// Query completed.
+    Complete {
+        /// Total results found.
+        total_results: usize,
+        /// Overall confidence score.
+        confidence: f32,
+    },
+
+    /// Error occurred during query.
+    Error {
+        /// Error message.
+        message: String,
+    },
+}
+
+/// Workspace operation events.
+#[derive(Debug, Clone)]
+pub enum WorkspaceEvent {
+    /// Document saved to workspace.
+    Saved {
+        /// Document ID.
+        doc_id: String,
+    },
+
+    /// Document loaded from workspace.
+    Loaded {
+        /// Document ID.
+        doc_id: String,
+        /// Whether it was a cache hit.
+        cache_hit: bool,
+    },
+
+    /// Document removed from workspace.
+    Removed {
+        /// Document ID.
+        doc_id: String,
+    },
+
+    /// Workspace cleared.
+    Cleared {
+        /// Number of documents removed.
+        count: usize,
+    },
+}
+
+/// Sync event handler trait.
+pub trait EventHandler: Send + Sync {
+    /// Handle an event.
+    fn handle(&self, event: &Event);
+}
+
+/// Async event handler trait.
+#[async_trait]
+pub trait AsyncEventHandler: Send + Sync {
+    /// Handle an event asynchronously.
+    async fn handle(&self, event: &Event);
+}
+
+/// Type alias for sync index handler.
+pub type IndexHandler = Box<dyn Fn(&IndexEvent) + Send + Sync>;
+
+/// Type alias for sync query handler.
+pub type QueryHandler = Box<dyn Fn(&QueryEvent) + Send + Sync>;
+
+/// Type alias for sync workspace handler.
+pub type WorkspaceHandler = Box<dyn Fn(&WorkspaceEvent) + Send + Sync>;
+
+/// Event emitter for client operations.
+///
+/// Collects event handlers and dispatches events to them.
+#[derive(Default)]
+pub struct EventEmitter {
+    /// Index event handlers.
+    index_handlers: Vec<IndexHandler>,
+
+    /// Query event handlers.
+    query_handlers: Vec<QueryHandler>,
+
+    /// Workspace event handlers.
+    workspace_handlers: Vec<WorkspaceHandler>,
+
+    /// Async handlers.
+    async_handlers: Vec<Arc<dyn AsyncEventHandler>>,
+}
+
+impl EventEmitter {
+    /// Create a new event emitter with no handlers.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Add an index event handler.
+    pub fn on_index<F>(mut self, handler: F) -> Self
+    where
+        F: Fn(&IndexEvent) + Send + Sync + 'static,
+    {
+        self.index_handlers.push(Box::new(handler));
+        self
+    }
+
+    /// Add a query event handler.
+    pub fn on_query<F>(mut self, handler: F) -> Self
+    where
+        F: Fn(&QueryEvent) + Send + Sync + 'static,
+    {
+        self.query_handlers.push(Box::new(handler));
+        self
+    }
+
+    /// Add a workspace event handler.
+    pub fn on_workspace<F>(mut self, handler: F) -> Self
+    where
+        F: Fn(&WorkspaceEvent) + Send + Sync + 'static,
+    {
+        self.workspace_handlers.push(Box::new(handler));
+        self
+    }
+
+    /// Add an async event handler.
+    pub fn with_async_handler<H>(mut self, handler: Arc<H>) -> Self
+    where
+        H: AsyncEventHandler + 'static,
+    {
+        self.async_handlers.push(handler);
+        self
+    }
+
+    /// Emit an index event.
+    pub fn emit_index(&self, event: IndexEvent) {
+        for handler in &self.index_handlers {
+            handler(&event);
+        }
+        for handler in &self.async_handlers {
+            // For sync context, we just log async handlers
+            let event = Event::Index(event.clone());
+            info!("Async event: {:?}", event);
+        }
+    }
+
+    /// Emit a query event.
+    pub fn emit_query(&self, event: QueryEvent) {
+        for handler in &self.query_handlers {
+            handler(&event);
+        }
+    }
+
+    /// Emit a workspace event.
+    pub fn emit_workspace(&self, event: WorkspaceEvent) {
+        for handler in &self.workspace_handlers {
+            handler(&event);
+        }
+    }
+
+    /// Check if there are any handlers registered.
+    pub fn has_handlers(&self) -> bool {
+        !self.index_handlers.is_empty()
+            || !self.query_handlers.is_empty()
+            || !self.workspace_handlers.is_empty()
+            || !self.async_handlers.is_empty()
+    }
+
+    /// Merge another emitter into this one.
+    pub fn merge(mut self, other: EventEmitter) -> Self {
+        self.index_handlers.extend(other.index_handlers);
+        self.query_handlers.extend(other.query_handlers);
+        self.workspace_handlers.extend(other.workspace_handlers);
+        self.async_handlers.extend(other.async_handlers);
+        self
+    }
+}
+
+impl std::fmt::Debug for EventEmitter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("EventEmitter")
+            .field("index_handlers", &self.index_handlers.len())
+            .field("query_handlers", &self.query_handlers.len())
+            .field("workspace_handlers", &self.workspace_handlers.len())
+            .field("async_handlers", &self.async_handlers.len())
+            .finish()
+    }
+}
+
+impl Clone for EventEmitter {
+    fn clone(&self) -> Self {
+        // Clone returns an empty emitter since we can't clone closures
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_event_emitter_index() {
+        let counter = Arc::new(AtomicUsize::new(0));
+        let counter_clone = counter.clone();
+
+        let emitter = EventEmitter::new().on_index(move |_e| {
+            counter_clone.fetch_add(1, Ordering::SeqCst);
+        });
+
+        emitter.emit_index(IndexEvent::Started {
+            path: "test.md".to_string(),
+        });
+        emitter.emit_index(IndexEvent::Complete {
+            doc_id: "123".to_string(),
+        });
+
+        assert_eq!(counter.load(Ordering::SeqCst), 2);
+    }
+
+    #[test]
+    fn test_event_emitter_query() {
+        let counter = Arc::new(AtomicUsize::new(0));
+        let counter_clone = counter.clone();
+
+        let emitter = EventEmitter::new().on_query(move |_e| {
+            counter_clone.fetch_add(1, Ordering::SeqCst);
+        });
+
+        emitter.emit_query(QueryEvent::Started {
+            query: "test".to_string(),
+        });
+
+        assert_eq!(counter.load(Ordering::SeqCst), 1);
+    }
+
+    #[test]
+    fn test_event_emitter_has_handlers() {
+        let empty = EventEmitter::new();
+        assert!(!empty.has_handlers());
+
+        let with_handler = EventEmitter::new().on_index(|_| {});
+        assert!(with_handler.has_handlers());
+    }
+}
diff --git a/src/client/indexer.rs b/src/client/indexer.rs
new file mode 100644
index 00000000..7f41cde8
--- /dev/null
+++ b/src/client/indexer.rs
@@ -0,0 +1,351 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Document indexing client.
+//!
+//! This module provides document indexing operations including
+//! format detection, parsing, and tree building.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let indexer = IndexerClient::new(executor);
+//!
+//! let result = indexer
+//!     .index("./document.md")
+//!     .with_summaries()
+//!     .await?;
+//!
+//! println!("Indexed: {} ({} nodes)", result.doc_id, result.node_count);
+//! ```
+
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex};
+
+use tracing::info;
+use uuid::Uuid;
+
+use crate::domain::{Error, Result};
+use crate::index::{IndexInput, IndexMode, PipelineExecutor, PipelineOptions, SummaryStrategy};
+use crate::parser::DocumentFormat;
+use crate::storage::{DocumentMeta, PersistedDocument};
+
+use super::context::ClientContext;
+use super::events::{EventEmitter, IndexEvent};
+use super::types::{IndexOptions, IndexMode as ClientIndexMode, IndexedDocument};
+
+/// Document indexing client.
+///
+/// Provides operations for parsing and indexing documents.
+pub struct IndexerClient {
+    /// Pipeline executor.
+    executor: Arc<Mutex<PipelineExecutor>>,
+
+    /// Event emitter.
+    events: EventEmitter,
+
+    /// Configuration.
+    config: IndexerConfig,
+}
+
+/// Indexer configuration.
+#[derive(Debug, Clone)]
+pub struct IndexerConfig {
+    /// Minimum content tokens required to generate a summary.
+    pub min_summary_tokens: usize,
+
+    /// Whether to generate IDs by default.
+    pub generate_ids: bool,
+
+    /// Whether to generate descriptions by default.
+    pub generate_descriptions: bool,
+}
+
+impl Default for IndexerConfig {
+    fn default() -> Self {
+        Self {
+            min_summary_tokens: 20,
+            generate_ids: true,
+            generate_descriptions: false,
+        }
+    }
+}
+
+impl IndexerClient {
+    /// Create a new indexer client.
+    pub fn new(executor: PipelineExecutor) -> Self {
+        Self {
+            executor: Arc::new(Mutex::new(executor)),
+            events: EventEmitter::new(),
+            config: IndexerConfig::default(),
+        }
+    }
+
+    /// Create with event emitter.
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = events;
+        self
+    }
+
+    /// Create with configuration.
+    pub fn with_config(mut self, config: IndexerConfig) -> Self {
+        self.config = config;
+        self
+    }
+
+    /// Create from an existing executor Arc.
+    pub(crate) fn from_arc(
+        executor: Arc<Mutex<PipelineExecutor>>,
+        events: EventEmitter,
+        config: IndexerConfig,
+    ) -> Self {
+        Self {
+            executor,
+            events,
+            config,
+        }
+    }
+
+    /// Index a document from a file path.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file does not exist
+    /// - The file format is not supported
+    /// - The pipeline execution fails
+    pub async fn index(&self, path: impl AsRef<Path>) -> Result<IndexedDocument> {
+        self.index_with_options(path, IndexOptions::default()).await
+    }
+
+    /// Index a document with custom options.
+    ///
+    /// # Errors
+    ///
+    /// See [`IndexerClient::index`].
+    pub async fn index_with_options(
+        &self,
+        path: impl AsRef<Path>,
+        options: IndexOptions,
+    ) -> Result<IndexedDocument> {
+        let path = path.as_ref();
+        let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
+
+        if !path.exists() {
+            return Err(Error::Parse(format!("File not found: {}", path.display())));
+        }
+
+        // Emit start event
+        self.events.emit_index(IndexEvent::Started {
+            path: path.display().to_string(),
+        });
+
+        // Generate document ID
+        let doc_id = Uuid::new_v4().to_string();
+
+        // Detect format
+        let format = self.detect_format(&path, &options)?;
+        self.events.emit_index(IndexEvent::FormatDetected { format });
+
+        info!("Indexing {:?} document: {}", format, path.display());
+
+        // Convert client options to pipeline options
+        let pipeline_options = PipelineOptions {
+            mode: match options.mode {
+                ClientIndexMode::Auto => IndexMode::Auto,
+                ClientIndexMode::Pdf => IndexMode::Pdf,
+                ClientIndexMode::Markdown => IndexMode::Markdown,
+                ClientIndexMode::Html => IndexMode::Html,
+                ClientIndexMode::Docx => IndexMode::Docx,
+            },
+            generate_ids: options.generate_ids,
+            summary_strategy: if options.generate_summaries {
+                SummaryStrategy::selective(self.config.min_summary_tokens, false)
+            } else {
+                SummaryStrategy::none()
+            },
+            generate_description: options.generate_description,
+            ..Default::default()
+        };
+
+        // Create pipeline input and execute
+        let input = IndexInput::file(&path);
+        let result = {
+            let mut executor = self.executor.lock()
+                .map_err(|_| Error::Other("Pipeline executor lock poisoned".to_string()))?;
+            executor.execute(input, pipeline_options).await?
+        };
+
+        // Build indexed document
+        let tree = result
+            .tree
+            .ok_or_else(|| Error::Parse("Document tree not generated".to_string()))?;
+
+        let node_count = tree.node_count();
+        self.events.emit_index(IndexEvent::TreeBuilt { node_count });
+
+        let mut doc = IndexedDocument::new(&doc_id, format)
+            .with_name(&result.name)
+            .with_source_path(&path)
+            .with_tree(tree);
+
+        if let Some(desc) = &result.description {
+            doc = doc.with_description(desc);
+        }
+
+        if let Some(page_count) = result.page_count {
+            doc = doc.with_page_count(page_count);
+        }
+
+        info!("Indexing complete: {} ({} nodes)", doc_id, node_count);
+        self.events.emit_index(IndexEvent::Complete { doc_id });
+
+        Ok(doc)
+    }
+
+    /// Detect document format from path and options.
+    pub fn detect_format(&self, path: &Path, options: &IndexOptions) -> Result<DocumentFormat> {
+        match options.mode {
+            ClientIndexMode::Auto => {
+                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
+                DocumentFormat::from_extension(ext)
+                    .ok_or_else(|| Error::Parse(format!("Unknown format: {}", ext)))
+            }
+            ClientIndexMode::Pdf => Ok(DocumentFormat::Pdf),
+            ClientIndexMode::Markdown => Ok(DocumentFormat::Markdown),
+            ClientIndexMode::Html => Ok(DocumentFormat::Html),
+            ClientIndexMode::Docx => Ok(DocumentFormat::Docx),
+        }
+    }
+
+    /// Validate a document before indexing.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the file doesn't exist or is not readable.
+    pub fn validate(&self, path: impl AsRef<Path>) -> Result<ValidationResult> {
+        let path = path.as_ref();
+
+        if !path.exists() {
+            return Ok(ValidationResult {
+                valid: false,
+                errors: vec![format!("File not found: {}", path.display())],
+                warnings: vec![],
+                format: None,
+                estimated_size: 0,
+            });
+        }
+
+        let metadata = std::fs::metadata(path)
+            .map_err(|e| Error::Parse(format!("Cannot read file metadata: {}", e)))?;
+
+        let estimated_size = metadata.len() as usize;
+        let mut warnings = Vec::new();
+
+        // Check file size
+        if estimated_size > 100 * 1024 * 1024 {
+            warnings.push("Large file (>100MB) may take longer to index".to_string());
+        }
+
+        // Detect format
+        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
+        let format = DocumentFormat::from_extension(ext);
+
+        if format.is_none() {
+            return Ok(ValidationResult {
+                valid: false,
+                errors: vec![format!("Unknown format: {}", ext)],
+                warnings,
+                format: None,
+                estimated_size,
+            });
+        }
+
+        Ok(ValidationResult {
+            valid: true,
+            errors: vec![],
+            warnings,
+            format,
+            estimated_size,
+        })
+    }
+
+    /// Convert IndexedDocument to PersistedDocument for storage.
+    pub fn to_persisted(&self, doc: IndexedDocument) -> PersistedDocument {
+        let meta = DocumentMeta::new(&doc.id, &doc.name, doc.format.extension())
+            .with_source_path(
+                doc.source_path
+                    .as_ref()
+                    .map(|p| p.to_string_lossy().to_string())
+                    .unwrap_or_default(),
+            )
+            .with_description(doc.description.clone().unwrap_or_default());
+
+        let mut persisted = PersistedDocument::new(
+            meta,
+            doc.tree.expect("IndexedDocument must have a tree"),
+        );
+
+        for page in doc.pages {
+            persisted.add_page(page.page, &page.content);
+        }
+
+        persisted
+    }
+
+    /// Get the underlying executor Arc (for advanced use).
+    pub(crate) fn inner(&self) -> Arc<Mutex<PipelineExecutor>> {
+        Arc::clone(&self.executor)
+    }
+}
+
+impl Clone for IndexerClient {
+    fn clone(&self) -> Self {
+        Self {
+            executor: Arc::clone(&self.executor),
+            events: self.events.clone(),
+            config: self.config.clone(),
+        }
+    }
+}
+
+/// Document validation result.
+#[derive(Debug, Clone)]
+pub struct ValidationResult {
+    /// Whether the document is valid for indexing.
+    pub valid: bool,
+
+    /// Validation errors (prevents indexing).
+    pub errors: Vec<String>,
+
+    /// Validation warnings (non-blocking).
+    pub warnings: Vec<String>,
+
+    /// Detected document format.
+    pub format: Option<DocumentFormat>,
+
+    /// Estimated file size in bytes.
+    pub estimated_size: usize,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_indexer_client_creation() {
+        let executor = PipelineExecutor::new();
+        let client = IndexerClient::new(executor);
+        assert_eq!(client.config.min_summary_tokens, 20);
+    }
+
+    #[test]
+    fn test_validate_missing_file() {
+        let executor = PipelineExecutor::new();
+        let client = IndexerClient::new(executor);
+
+        let result = client.validate("./nonexistent.md").unwrap();
+        assert!(!result.valid);
+        assert!(!result.errors.is_empty());
+    }
+}
diff --git a/src/client/mod.rs b/src/client/mod.rs
index 907d8c0e..51abecd0 100644
--- a/src/client/mod.rs
+++ b/src/client/mod.rs
@@ -6,6 +6,25 @@
 //! This module provides the main entry point for using vectorless:
 //! - [`Engine`] — The main client for indexing and querying documents
 //! - [`EngineBuilder`] — Builder pattern for client configuration
+//! - [`Session`] — Multi-document session management
+//!
+//! # Architecture
+//!
+//! The client module is organized into specialized sub-modules:
+//!
+//! ```text
+//! client/
+//! ├── mod.rs          → Re-exports and documentation
+//! ├── engine.rs       → Main orchestrator
+//! ├── builder.rs      → Builder pattern
+//! ├── types.rs        → Public API types
+//! ├── context.rs      → Request context and configuration
+//! ├── session.rs      → Session management
+//! ├── indexer.rs      → Document indexing operations
+//! ├── retriever.rs    → Query and retrieval operations
+//! ├── workspace.rs    → Workspace CRUD operations
+//! └── events.rs       → Event system and callbacks
+//! ```
 //!
 //! # Quick Start
 //!
@@ -15,11 +34,7 @@
 //! # #[tokio::main]
 //! # async fn main() -> vectorless::domain::Result<()> {
 //! // Create a client with default settings
-//! let client = Engine::new()?;
-//!
-//! // Or use the builder for custom configuration
 //! let client = EngineBuilder::new()
-//!     .with_api_key("your-api-key")
 //!     .with_workspace("./my_workspace")
 //!     .build()?;
 //!
@@ -29,6 +44,10 @@
 //! // Get document structure
 //! let structure = client.get_structure(&doc_id)?;
 //!
+//! // Query the document
+//! let result = client.query(&doc_id, "What is this?").await?;
+//! println!("{}", result.content);
+//!
 //! // List all documents
 //! for doc in client.list_documents() {
 //!     println!("{}: {}", doc.id, doc.name);
@@ -37,19 +56,117 @@
 //! # }
 //! ```
 //!
+//! # Session-Based Operations
+//!
+//! For multi-document operations, use sessions:
+//!
+//! ```rust,no_run
+//! # use vectorless::client::{Engine, EngineBuilder};
+//! # #[tokio::main]
+//! # async fn main() -> vectorless::domain::Result<()> {
+//! let client = EngineBuilder::new()
+//!     .with_workspace("./workspace")
+//!     .build()?;
+//!
+//! let session = client.session();
+//!
+//! // Index multiple documents
+//! let doc1 = session.index("./doc1.md").await?;
+//! let doc2 = session.index("./doc2.md").await?;
+//!
+//! // Query across all documents
+//! let results = session.query_all("What is the architecture?").await?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! # Events and Progress
+//!
+//! Monitor operation progress with events:
+//!
+//! ```rust,no_run
+//! # use vectorless::client::{Engine, EngineBuilder, EventEmitter, events::IndexEvent};
+//! # #[tokio::main]
+//! # async fn main() -> vectorless::domain::Result<()> {
+//! let events = EventEmitter::new()
+//!     .on_index(|e| match e {
+//!         IndexEvent::Complete { doc_id } => println!("Indexed: {}", doc_id),
+//!         _ => {}
+//!     });
+//!
+//! let client = EngineBuilder::new()
+//!     .with_events(events)
+//!     .build()?;
+//! # Ok(())
+//! # }
+//! ```
+//!
 //! # Features
 //!
 //! - **Document Indexing** — Parse and index Markdown, PDF, and text files
 //! - **Tree-Based Structure** — Documents organized as hierarchical trees
 //! - **Workspace Persistence** — Save and load indexed documents
-//! - **Builder Pattern** — Flexible client configuration
+//! - **Session Management** — Multi-document operations with caching
+//! - **Event System** — Progress callbacks and monitoring
 
 mod builder;
+mod context;
 mod engine;
+pub mod events;
+mod indexer;
+mod retriever;
+mod session;
 mod types;
+mod workspace;
 
-// Re-export main types
-pub use types::{DocumentInfo, IndexMode, IndexOptions, IndexedDocument, PageContent, QueryResult};
+// ============================================================
+// Main Types
+// ============================================================
 
-pub use builder::{BuildError, EngineBuilder};
 pub use engine::Engine;
+pub use builder::{BuildError, EngineBuilder};
+
+// ============================================================
+// Sub-Clients
+// ============================================================
+
+pub use indexer::IndexerClient;
+pub use retriever::RetrieverClient;
+pub use workspace::WorkspaceClient;
+pub use session::Session;
+
+// ============================================================
+// Context and Events
+// ============================================================
+
+pub use context::{ClientContext, FeatureFlags, RequestContextConfig};
+pub use events::{
+    EventEmitter, Event, EventHandler, AsyncEventHandler,
+    IndexEvent, QueryEvent, WorkspaceEvent,
+};
+
+// ============================================================
+// Types
+// ============================================================
+
+pub use types::{
+    // Document types
+    IndexedDocument, PageContent,
+    // Index types
+    IndexMode, IndexOptions,
+    // Query types
+    QueryResult,
+    // Document info
+    DocumentInfo,
+    // Error types
+    ClientError,
+};
+
+// ============================================================
+// Sub-Client Types
+// ============================================================
+
+pub use indexer::{IndexerConfig, ValidationResult};
+pub use retriever::{RetrieverClientConfig, NodeContext};
+pub use workspace::{WorkspaceClientConfig, WorkspaceStats};
+pub use session::{SessionConfig, SessionStats, EvictionPolicy, PreloadStrategy};
diff --git a/src/client/retriever.rs b/src/client/retriever.rs
new file mode 100644
index 00000000..7f0099ca
--- /dev/null
+++ b/src/client/retriever.rs
@@ -0,0 +1,408 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Document retrieval client.
+//!
+//! This module provides query and retrieval operations for document content.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let retriever = RetrieverClient::new(pipeline_retriever);
+//!
+//! let result = retriever
+//!     .query(&tree, "What is this?", RetrieveOptions::default())
+//!     .await?;
+//!
+//! println!("Found {} results", result.results.len());
+//! ```
+
+use std::sync::Arc;
+
+use tracing::info;
+
+use crate::config::Config;
+use crate::domain::{DocumentTree, Error, NodeId, Result};
+use crate::retrieval::content::ContentAggregatorConfig;
+use crate::retrieval::{
+    QueryComplexity, RetrieveOptions, RetrieveResponse, RetrievalResult, Retriever, SufficiencyLevel,
+};
+
+use super::context::ClientContext;
+use super::events::{EventEmitter, QueryEvent};
+use super::types::QueryResult;
+
+/// Document retrieval client.
+///
+/// Provides operations for querying document content.
+pub struct RetrieverClient {
+    /// Pipeline retriever.
+    retriever: Arc<crate::retrieval::PipelineRetriever>,
+
+    /// Configuration reference.
+    config: Arc<Config>,
+
+    /// Event emitter.
+    events: EventEmitter,
+
+    /// Default retrieval options.
+    default_options: RetrieveOptions,
+}
+
+/// Retriever configuration.
+#[derive(Debug, Clone)]
+pub struct RetrieverClientConfig {
+    /// Default top_k for retrieval.
+    pub default_top_k: usize,
+
+    /// Default token budget.
+    pub default_token_budget: usize,
+
+    /// Content aggregator config.
+    pub content_config: Option<ContentAggregatorConfig>,
+
+    /// Enable result caching.
+    pub enable_cache: bool,
+}
+
+impl Default for RetrieverClientConfig {
+    fn default() -> Self {
+        Self {
+            default_top_k: 5,
+            default_token_budget: 4000,
+            content_config: None,
+            enable_cache: true,
+        }
+    }
+}
+
+impl RetrieverClient {
+    /// Create a new retriever client.
+    pub fn new(retriever: crate::retrieval::PipelineRetriever, config: Arc<Config>) -> Self {
+        Self {
+            retriever: Arc::new(retriever),
+            config,
+            events: EventEmitter::new(),
+            default_options: RetrieveOptions::default(),
+        }
+    }
+
+    /// Create with event emitter.
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = events;
+        self
+    }
+
+    /// Create with configuration.
+    pub fn with_config(mut self, config: RetrieverClientConfig) -> Self {
+        self.default_options = RetrieveOptions::new()
+            .with_top_k(config.default_top_k)
+            .with_max_tokens(config.default_token_budget)
+            .with_enable_cache(config.enable_cache);
+        self
+    }
+
+    /// Create from existing retriever Arc.
+    pub(crate) fn from_arc(
+        retriever: Arc<crate::retrieval::PipelineRetriever>,
+        config: Arc<Config>,
+        events: EventEmitter,
+    ) -> Self {
+        Self {
+            retriever,
+            config,
+            events,
+            default_options: RetrieveOptions::default(),
+        }
+    }
+
+    /// Query a document tree.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The retrieval pipeline fails
+    pub async fn query(
+        &self,
+        tree: &DocumentTree,
+        question: &str,
+        options: &RetrieveOptions,
+    ) -> Result<QueryResult> {
+        self.query_with_context(tree, question, options, &ClientContext::new()).await
+    }
+
+    /// Query with request context.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The retrieval pipeline fails
+    /// - The request has timed out
+    pub async fn query_with_context(
+        &self,
+        tree: &DocumentTree,
+        question: &str,
+        options: &RetrieveOptions,
+        ctx: &ClientContext,
+    ) -> Result<QueryResult> {
+        // Check timeout
+        if ctx.is_timed_out() {
+            return Err(Error::Other("Request timed out".to_string()));
+        }
+
+        self.events.emit_query(QueryEvent::Started {
+            query: question.to_string(),
+        });
+
+        info!("Querying: {:?}", question);
+
+        // Apply context overrides
+        let mut options = options.clone();
+        if let Some(top_k) = ctx.config.top_k {
+            options.top_k = top_k;
+        }
+        if let Some(token_budget) = ctx.config.token_budget {
+            options.max_tokens = token_budget;
+        }
+
+        // Execute retrieval
+        let response = self.retriever
+            .retrieve(tree, question, &options)
+            .await
+            .map_err(|e| Error::Retrieval(e.to_string()))?;
+
+        // Build result
+        let result = self.build_query_result(&response);
+
+        self.events.emit_query(QueryEvent::Complete {
+            total_results: result.node_ids.len(),
+            confidence: result.score,
+        });
+
+        Ok(result)
+    }
+
+    /// Build QueryResult from RetrieveResponse.
+    fn build_query_result(&self, response: &RetrieveResponse) -> QueryResult {
+        // Extract node IDs
+        let node_ids: Vec<String> = response
+            .results
+            .iter()
+            .filter_map(|r| r.node_id.clone())
+            .collect();
+
+        // Build content
+        let content_parts: Vec<String> = response
+            .results
+            .iter()
+            .map(|r| {
+                let mut parts = vec![format!("## {}", r.title)];
+                if let Some(ref content) = r.content {
+                    parts.push(content.clone());
+                }
+                parts.join("\n\n")
+            })
+            .collect();
+
+        let content = if content_parts.is_empty() {
+            response.content.clone()
+        } else {
+            content_parts.join("\n\n---\n\n")
+        };
+
+        QueryResult {
+            doc_id: String::new(), // Will be set by caller
+            node_ids,
+            content,
+            score: response.confidence,
+        }
+    }
+
+    /// Get similar nodes to a given node.
+    ///
+    /// Uses tree structure and content to find similar nodes.
+    pub fn find_similar(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        top_k: usize,
+    ) -> Result<Vec<RetrievalResult>> {
+        let mut results = Vec::new();
+
+        // Get the target node's content for comparison
+        let target_content = tree
+            .get(node_id)
+            .map(|n| n.content.clone())
+            .unwrap_or_default();
+
+        if target_content.is_empty() {
+            return Ok(results);
+        }
+
+        // Extract keywords from target content
+        let target_keywords = self.extract_keywords(&target_content);
+
+        // Search all nodes for similarity
+        let root = tree.root();
+        let mut stack = vec![root];
+
+        while let Some(current_id) = stack.pop() {
+            if current_id == node_id {
+                // Skip the target node itself
+                stack.extend(tree.children(current_id));
+                continue;
+            }
+
+            if let Some(node) = tree.get(current_id) {
+                let node_keywords = self.extract_keywords(&node.content);
+                let similarity = self.calculate_similarity(&target_keywords, &node_keywords);
+
+                if similarity > 0.3 {
+                    results.push(RetrievalResult::new(&node.title)
+                        .with_node_id(format!("{:?}", current_id))
+                        .with_content(node.content.clone())
+                        .with_score(similarity)
+                        .with_depth(tree.depth(current_id)));
+                }
+            }
+
+            stack.extend(tree.children(current_id));
+        }
+
+        // Sort by score and take top_k
+        results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+        results.truncate(top_k);
+
+        Ok(results)
+    }
+
+    /// Extract keywords from content.
+    fn extract_keywords(&self, content: &str) -> Vec<String> {
+        content
+            .to_lowercase()
+            .split_whitespace()
+            .filter(|w| w.len() > 3)
+            .take(20)
+            .map(|s| s.to_string())
+            .collect()
+    }
+
+    /// Calculate similarity between keyword sets.
+    fn calculate_similarity(&self, set1: &[String], set2: &[String]) -> f32 {
+        if set1.is_empty() || set2.is_empty() {
+            return 0.0;
+        }
+
+        let set1_set: std::collections::HashSet<_> = set1.iter().collect();
+        let set2_set: std::collections::HashSet<_> = set2.iter().collect();
+
+        let intersection = set1_set.intersection(&set2_set).count();
+        let union = set1_set.union(&set2_set).count();
+
+        intersection as f32 / union as f32
+    }
+
+    /// Get node context (ancestors and siblings).
+    ///
+    /// Returns the node's ancestors up to the specified depth,
+    /// along with sibling nodes at each level.
+    pub fn get_node_context(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        ancestor_depth: usize,
+    ) -> Result<NodeContext> {
+        let mut ancestors = Vec::new();
+        let mut siblings = Vec::new();
+
+        // Get ancestors
+        let mut current_id = Some(node_id);
+        let mut depth = 0;
+
+        while let Some(id) = current_id {
+            if depth >= ancestor_depth {
+                break;
+            }
+
+            if let Some(node) = tree.get(id) {
+                ancestors.push(RetrievalResult::new(&node.title)
+                    .with_node_id(format!("{:?}", id))
+                    .with_depth(tree.depth(id)));
+
+                // Get siblings at this level
+                if let Some(parent_id) = tree.parent(id) {
+                    for child_id in tree.children(parent_id) {
+                        if child_id != id {
+                            if let Some(sibling) = tree.get(child_id) {
+                                siblings.push(RetrievalResult::new(&sibling.title)
+                                    .with_node_id(format!("{:?}", child_id))
+                                    .with_depth(tree.depth(child_id)));
+                            }
+                        }
+                    }
+                }
+            }
+
+            current_id = tree.parent(id);
+            depth += 1;
+        }
+
+        // Get the target node
+        let target = tree
+            .get(node_id)
+            .map(|n| {
+                RetrievalResult::new(&n.title)
+                    .with_node_id(format!("{:?}", node_id))
+                    .with_content(n.content.clone())
+                    .with_depth(tree.depth(node_id))
+            });
+
+        Ok(NodeContext {
+            target,
+            ancestors,
+            siblings,
+        })
+    }
+
+    /// Get the underlying retriever Arc.
+    pub(crate) fn inner(&self) -> Arc<crate::retrieval::PipelineRetriever> {
+        Arc::clone(&self.retriever)
+    }
+}
+
+impl Clone for RetrieverClient {
+    fn clone(&self) -> Self {
+        Self {
+            retriever: Arc::clone(&self.retriever),
+            config: Arc::clone(&self.config),
+            events: self.events.clone(),
+            default_options: self.default_options.clone(),
+        }
+    }
+}
+
+/// Node context information.
+#[derive(Debug, Clone)]
+pub struct NodeContext {
+    /// The target node.
+    pub target: Option<RetrievalResult>,
+
+    /// Ancestor nodes (ordered from parent to root).
+    pub ancestors: Vec<RetrievalResult>,
+
+    /// Sibling nodes at each ancestor level.
+    pub siblings: Vec<RetrievalResult>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_retriever_client_creation() {
+        let config = Arc::new(Config::default());
+        let retriever = crate::retrieval::PipelineRetriever::new();
+        let client = RetrieverClient::new(retriever, config);
+        assert!(client.default_options.top_k > 0);
+    }
+}
diff --git a/src/client/session.rs b/src/client/session.rs
new file mode 100644
index 00000000..1b5d55ef
--- /dev/null
+++ b/src/client/session.rs
@@ -0,0 +1,493 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Session management for multi-document operations.
+//!
+//! This module provides session-based document management with
+//! automatic caching and cross-document querying.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let session = client.session();
+//!
+//! // Index multiple documents
+//! let doc1 = session.index("./doc1.md").await?;
+//! let doc2 = session.index("./doc2.md").await?;
+//!
+//! // Query across all documents
+//! let results = session.query_all("What is X?").await?;
+//!
+//! // Query single document (uses cached tree)
+//! let result = session.query(&doc1, "Summary?").await?;
+//! ```
+
+use std::cell::Cell;
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use tracing::info;
+use uuid::Uuid;
+
+use crate::domain::{DocumentTree, Error, Result};
+use crate::retrieval::RetrieveOptions;
+use crate::storage::PersistedDocument;
+
+use super::context::ClientContext;
+use super::events::EventEmitter;
+use super::indexer::IndexerClient;
+use super::retriever::RetrieverClient;
+use super::types::{DocumentInfo, IndexOptions, QueryResult};
+use super::workspace::WorkspaceClient;
+
+/// Session for managing multiple documents.
+///
+/// Provides automatic caching of document trees and cross-document operations.
+pub struct Session {
+    /// Session ID.
+    pub id: Uuid,
+
+    /// Session configuration.
+    config: SessionConfig,
+
+    /// Document contexts (cached).
+    documents: HashMap<String, DocumentContext>,
+
+    /// Indexer client.
+    indexer: IndexerClient,
+
+    /// Retriever client.
+    retriever: RetrieverClient,
+
+    /// Workspace client.
+    workspace: WorkspaceClient,
+
+    /// Event emitter.
+    events: EventEmitter,
+
+    /// Session statistics.
+    stats: SessionStats,
+
+    /// Created at timestamp.
+    created_at: Instant,
+}
+
+/// Document context within a session.
+#[derive(Debug, Clone)]
+struct DocumentContext {
+    /// Document ID.
+    doc_id: String,
+
+    /// Cached document tree.
+    tree: Option<Arc<DocumentTree>>,
+
+    /// Document metadata.
+    meta: DocumentInfo,
+
+    /// Access count.
+    access_count: usize,
+
+    /// Last access time.
+    last_accessed: Instant,
+}
+
+/// Session configuration.
+#[derive(Debug, Clone)]
+pub struct SessionConfig {
+    /// Maximum documents to cache in memory.
+    pub max_cached_documents: usize,
+
+    /// Cache eviction policy.
+    pub eviction_policy: EvictionPolicy,
+
+    /// Preload strategy when indexing.
+    pub preload_strategy: PreloadStrategy,
+}
+
+impl Default for SessionConfig {
+    fn default() -> Self {
+        Self {
+            max_cached_documents: 100,
+            eviction_policy: EvictionPolicy::Lru,
+            preload_strategy: PreloadStrategy::Lazy,
+        }
+    }
+}
+
+/// Cache eviction policy.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum EvictionPolicy {
+    /// Least recently used.
+    Lru,
+    /// First in, first out.
+    Fifo,
+    /// No eviction (until session closes).
+    None,
+}
+
+/// Document preload strategy.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum PreloadStrategy {
+    /// Load trees on demand.
+    Lazy,
+    /// Load trees immediately when indexing.
+    Eager,
+}
+
+/// Session statistics.
+#[derive(Debug, Default)]
+pub struct SessionStats {
+    /// Total documents in session.
+    pub document_count: Cell<usize>,
+
+    /// Total queries made.
+    pub query_count: Cell<usize>,
+
+    /// Cache hits.
+    pub cache_hits: Cell<usize>,
+
+    /// Cache misses.
+    pub cache_misses: Cell<usize>,
+
+    /// Total query time (in microseconds).
+    total_query_time_us: Cell<u64>,
+}
+
+impl SessionStats {
+    /// Get the cache hit rate.
+    pub fn cache_hit_rate(&self) -> f32 {
+        let total = self.cache_hits.get() + self.cache_misses.get();
+        if total == 0 {
+            0.0
+        } else {
+            self.cache_hits.get() as f32 / total as f32
+        }
+    }
+
+    /// Get the total query time.
+    pub fn total_query_time(&self) -> Duration {
+        Duration::from_micros(self.total_query_time_us.get())
+    }
+
+    /// Get the average query time.
+    pub fn avg_query_time(&self) -> Option<Duration> {
+        let count = self.query_count.get();
+        if count == 0 {
+            None
+        } else {
+            Some(self.total_query_time() / count as u32)
+        }
+    }
+
+    /// Increment query count.
+    fn increment_query_count(&self) {
+        self.query_count.set(self.query_count.get() + 1);
+    }
+
+    /// Add query time.
+    fn add_query_time(&self, duration: Duration) {
+        self.total_query_time_us.set(
+            self.total_query_time_us.get() + duration.as_micros() as u64
+        );
+    }
+
+    /// Increment cache hits.
+    fn increment_cache_hits(&self) {
+        self.cache_hits.set(self.cache_hits.get() + 1);
+    }
+
+    /// Increment cache misses.
+    fn increment_cache_misses(&self) {
+        self.cache_misses.set(self.cache_misses.get() + 1);
+    }
+}
+
+impl Clone for SessionStats {
+    fn clone(&self) -> Self {
+        Self {
+            document_count: Cell::new(self.document_count.get()),
+            query_count: Cell::new(self.query_count.get()),
+            cache_hits: Cell::new(self.cache_hits.get()),
+            cache_misses: Cell::new(self.cache_misses.get()),
+            total_query_time_us: Cell::new(self.total_query_time_us.get()),
+        }
+    }
+}
+
+impl Session {
+    /// Create a new session.
+    pub(crate) fn new(
+        indexer: IndexerClient,
+        retriever: RetrieverClient,
+        workspace: WorkspaceClient,
+        events: EventEmitter,
+    ) -> Self {
+        Self {
+            id: Uuid::new_v4(),
+            config: SessionConfig::default(),
+            documents: HashMap::new(),
+            indexer,
+            retriever,
+            workspace,
+            events,
+            stats: SessionStats::default(),
+            created_at: Instant::now(),
+        }
+    }
+
+    /// Create with configuration.
+    pub fn with_config(mut self, config: SessionConfig) -> Self {
+        self.config = config;
+        self
+    }
+
+    /// Get the session ID.
+    pub fn id(&self) -> Uuid {
+        self.id
+    }
+
+    /// Get session age.
+    pub fn age(&self) -> Duration {
+        Instant::now().duration_since(self.created_at)
+    }
+
+    // ============================================================
+    // Document Indexing
+    // ============================================================
+
+    /// Index a document into this session.
+    ///
+    /// The document is indexed, saved to workspace, and cached in this session.
+    pub async fn index(&self, path: impl AsRef<Path>) -> Result<String> {
+        self.index_with_options(path, IndexOptions::default()).await
+    }
+
+    /// Index a document with options.
+    pub async fn index_with_options(
+        &self,
+        path: impl AsRef<Path>,
+        options: IndexOptions,
+    ) -> Result<String> {
+        // Index the document
+        let doc = self.indexer.index_with_options(path, options).await?;
+
+        // Save to workspace
+        let persisted = self.indexer.to_persisted(doc);
+        self.workspace.save(&persisted)?;
+
+        // Cache in session
+        let doc_id = persisted.meta.id.clone();
+
+        info!("Session {}: indexed document {}", self.id, doc_id);
+
+        Ok(doc_id)
+    }
+
+    // ============================================================
+    // Document Querying
+    // ============================================================
+
+    /// Query a document within this session.
+    ///
+    /// Uses the cached tree if available, otherwise loads from workspace.
+    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult> {
+        self.query_with_options(doc_id, question, RetrieveOptions::default()).await
+    }
+
+    /// Query a document with options.
+    pub async fn query_with_options(
+        &self,
+        doc_id: &str,
+        question: &str,
+        options: RetrieveOptions,
+    ) -> Result<QueryResult> {
+        let start = Instant::now();
+
+        // Get the document tree
+        let tree = self.get_tree(doc_id).await?;
+
+        // Query
+        let mut result = self.retriever.query(&tree, question, &options).await?;
+        result.doc_id = doc_id.to_string();
+
+        // Update stats
+        self.stats.increment_query_count();
+        self.stats.add_query_time(start.elapsed());
+
+        Ok(result)
+    }
+
+    /// Query across all documents in this session.
+    ///
+    /// Searches each document and merges results.
+    pub async fn query_all(&self, question: &str) -> Result<Vec<QueryResult>> {
+        self.query_all_with_options(question, RetrieveOptions::default()).await
+    }
+
+    /// Query across all documents with options.
+    pub async fn query_all_with_options(
+        &self,
+        question: &str,
+        options: RetrieveOptions,
+    ) -> Result<Vec<QueryResult>> {
+        let doc_ids: Vec<String> = self.documents.keys().cloned().collect();
+
+        if doc_ids.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let mut results = Vec::new();
+
+        for doc_id in &doc_ids {
+            match self.query_with_options(doc_id, question, options.clone()).await {
+                Ok(result) => {
+                    if !result.node_ids.is_empty() {
+                        results.push(result);
+                    }
+                }
+                Err(e) => {
+                    info!("Query failed for {}: {}", doc_id, e);
+                }
+            }
+        }
+
+        // Sort by score descending
+        results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+
+        Ok(results)
+    }
+
+    // ============================================================
+    // Document Management
+    // ============================================================
+
+    /// Get list of documents in this session.
+    pub fn list_documents(&self) -> Vec<DocumentInfo> {
+        self.documents.values().map(|ctx| ctx.meta.clone()).collect()
+    }
+
+    /// Get a document tree (from cache or workspace).
+    pub async fn get_tree(&self, doc_id: &str) -> Result<DocumentTree> {
+        // Check cache first
+        if let Some(tree) = self.get_cached_tree(doc_id) {
+            self.stats.increment_cache_hits();
+            return Ok((*tree).clone());
+        }
+
+        self.stats.increment_cache_misses();
+
+        // Load from workspace
+        let doc = self.workspace.load(doc_id)?
+            .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
+
+        let tree = doc.tree;
+
+        // Cache for future use
+        self.cache_tree(doc_id, &tree);
+
+        Ok(tree)
+    }
+
+    /// Preload documents into the session cache.
+    ///
+    /// Useful for warming up the cache before querying.
+    pub async fn preload(&self, doc_ids: &[&str]) -> Result<usize> {
+        let mut loaded = 0;
+
+        for doc_id in doc_ids {
+            if self.get_cached_tree(doc_id).is_none() {
+                if let Ok(tree) = self.get_tree(doc_id).await {
+                    self.cache_tree(doc_id, &tree);
+                    loaded += 1;
+                }
+            }
+        }
+
+        info!("Session {}: preloaded {} documents", self.id, loaded);
+        Ok(loaded)
+    }
+
+    /// Remove a document from the session.
+    pub fn remove_document(&self, doc_id: &str) -> bool {
+        // Note: This would need interior mutability for full implementation
+        false
+    }
+
+    /// Clear all documents from the session cache.
+    pub fn clear_cache(&self) {
+        // Note: This would need interior mutability for full implementation
+    }
+
+    // ============================================================
+    // Statistics
+    // ============================================================
+
+    /// Get session statistics.
+    pub fn stats(&self) -> SessionStats {
+        self.stats.clone()
+    }
+
+    /// Get the number of cached documents.
+    pub fn cached_count(&self) -> usize {
+        self.documents.values().filter(|d| d.tree.is_some()).count()
+    }
+
+    // ============================================================
+    // Internal Methods
+    // ============================================================
+
+    /// Cache a document in this session.
+    fn cache_document(&self, doc: crate::client::types::IndexedDocument) {
+        // Note: This would need interior mutability for full implementation
+        // For now, this is a placeholder
+    }
+
+    /// Get a cached tree.
+    fn get_cached_tree(&self, doc_id: &str) -> Option<Arc<DocumentTree>> {
+        self.documents.get(doc_id).and_then(|ctx| ctx.tree.clone())
+    }
+
+    /// Cache a tree.
+    fn cache_tree(&self, doc_id: &str, tree: &DocumentTree) {
+        // Note: This would need interior mutability for full implementation
+    }
+}
+
+impl Clone for Session {
+    fn clone(&self) -> Self {
+        Self {
+            id: self.id,
+            config: self.config.clone(),
+            documents: self.documents.clone(),
+            indexer: self.indexer.clone(),
+            retriever: self.retriever.clone(),
+            workspace: self.workspace.clone(),
+            events: self.events.clone(),
+            stats: self.stats.clone(),
+            created_at: self.created_at,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_session_config() {
+        let config = SessionConfig::default();
+        assert_eq!(config.max_cached_documents, 100);
+        assert_eq!(config.eviction_policy, EvictionPolicy::Lru);
+    }
+
+    #[test]
+    fn test_session_stats() {
+        let stats = SessionStats::default();
+        stats.cache_hits.set(8);
+        stats.cache_misses.set(2);
+
+        assert!((stats.cache_hit_rate() - 0.8).abs() < 0.01);
+    }
+}
diff --git a/src/client/types.rs b/src/client/types.rs
index e0e68a3a..40816257 100644
--- a/src/client/types.rs
+++ b/src/client/types.rs
@@ -1,7 +1,9 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Client type definitions.
+//! Public API types for the client module.
+//!
+//! This module contains all types exposed in the public API.
 
 use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
@@ -9,6 +11,10 @@ use std::path::PathBuf;
 use crate::domain::DocumentTree;
 use crate::parser::DocumentFormat;
 
+// ============================================================
+// Document Types
+// ============================================================
+
 /// An indexed document with its tree structure and metadata.
 #[derive(Debug, Clone)]
 pub struct IndexedDocument {
@@ -116,6 +122,10 @@ pub struct PageContent {
     pub content: String,
 }
 
+// ============================================================
+// Index Types
+// ============================================================
+
 /// Document indexing mode.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum IndexMode {
@@ -164,7 +174,7 @@ impl Default for IndexOptions {
     fn default() -> Self {
         Self {
             mode: IndexMode::Auto,
-            generate_summaries: false, // Disabled by default, requires API key
+            generate_summaries: false,
             include_text: true,
             generate_ids: true,
             generate_description: false,
@@ -189,8 +199,18 @@ impl IndexOptions {
         self.generate_description = true;
         self
     }
+
+    /// Set the indexing mode.
+    pub fn with_mode(mut self, mode: IndexMode) -> Self {
+        self.mode = mode;
+        self
+    }
 }
 
+// ============================================================
+// Query Types
+// ============================================================
+
 /// Result of a document query.
 #[derive(Debug, Clone)]
 pub struct QueryResult {
@@ -207,6 +227,32 @@ pub struct QueryResult {
     pub score: f32,
 }
 
+impl QueryResult {
+    /// Create a new query result.
+    pub fn new(doc_id: impl Into<String>) -> Self {
+        Self {
+            doc_id: doc_id.into(),
+            node_ids: Vec::new(),
+            content: String::new(),
+            score: 0.0,
+        }
+    }
+
+    /// Check if the result is empty.
+    pub fn is_empty(&self) -> bool {
+        self.node_ids.is_empty()
+    }
+
+    /// Get the number of results.
+    pub fn len(&self) -> usize {
+        self.node_ids.len()
+    }
+}
+
+// ============================================================
+// Document Info Types
+// ============================================================
+
 /// Document info for listing.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct DocumentInfo {
@@ -228,3 +274,89 @@ pub struct DocumentInfo {
     /// Line count (for text files).
     pub line_count: Option<usize>,
 }
+
+impl DocumentInfo {
+    /// Create a new document info.
+    pub fn new(id: impl Into<String>, name: impl Into<String>) -> Self {
+        Self {
+            id: id.into(),
+            name: name.into(),
+            format: String::new(),
+            description: None,
+            page_count: None,
+            line_count: None,
+        }
+    }
+
+    /// Set the format.
+    pub fn with_format(mut self, format: impl Into<String>) -> Self {
+        self.format = format.into();
+        self
+    }
+}
+
+// ============================================================
+// Error Types
+// ============================================================
+
+/// Client error types.
+#[derive(Debug, Clone, thiserror::Error)]
+pub enum ClientError {
+    /// Document not found.
+    #[error("Document not found: {0}")]
+    NotFound(String),
+
+    /// Invalid operation.
+    #[error("Invalid operation: {0}")]
+    InvalidOperation(String),
+
+    /// Configuration error.
+    #[error("Configuration error: {0}")]
+    Config(String),
+
+    /// Timeout error.
+    #[error("Operation timed out")]
+    Timeout,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_indexed_document() {
+        let doc = IndexedDocument::new("doc-1", DocumentFormat::Markdown)
+            .with_name("Test Document")
+            .with_description("A test document");
+
+        assert_eq!(doc.id, "doc-1");
+        assert_eq!(doc.name, "Test Document");
+        assert!(doc.tree.is_none());
+    }
+
+    #[test]
+    fn test_index_options() {
+        let options = IndexOptions::new()
+            .with_summaries()
+            .with_mode(IndexMode::Pdf);
+
+        assert!(options.generate_summaries);
+        assert_eq!(options.mode, IndexMode::Pdf);
+    }
+
+    #[test]
+    fn test_query_result() {
+        let result = QueryResult::new("doc-1");
+        assert!(result.is_empty());
+        assert_eq!(result.len(), 0);
+    }
+
+    #[test]
+    fn test_document_info() {
+        let info = DocumentInfo::new("doc-1", "Test")
+            .with_format("markdown");
+
+        assert_eq!(info.id, "doc-1");
+        assert_eq!(info.format, "markdown");
+    }
+}
diff --git a/src/client/workspace.rs b/src/client/workspace.rs
new file mode 100644
index 00000000..731a5e71
--- /dev/null
+++ b/src/client/workspace.rs
@@ -0,0 +1,372 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Workspace management client.
+//!
+//! This module provides CRUD operations for document persistence
+//! through the workspace abstraction.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let workspace = WorkspaceClient::new(workspace_storage);
+//!
+//! // Save a document
+//! workspace.save(&doc)?;
+//!
+//! // Load a document
+//! let doc = workspace.load("doc-id")?;
+//!
+//! // List all documents
+//! for doc in workspace.list()? {
+//!     println!("{}: {}", doc.id, doc.name);
+//! }
+//! ```
+
+use std::sync::{Arc, RwLock};
+
+use tracing::{debug, info, warn};
+
+use crate::domain::{Error, Result};
+use crate::storage::{DocumentMetaEntry, PersistedDocument, Workspace};
+
+use super::events::{EventEmitter, WorkspaceEvent};
+use super::types::DocumentInfo;
+
+/// Workspace management client.
+///
+/// Provides thread-safe CRUD operations for document persistence.
+pub struct WorkspaceClient {
+    /// Workspace storage.
+    workspace: Arc<RwLock<Workspace>>,
+
+    /// Event emitter.
+    events: EventEmitter,
+
+    /// Configuration.
+    config: WorkspaceClientConfig,
+}
+
+/// Workspace client configuration.
+#[derive(Debug, Clone)]
+pub struct WorkspaceClientConfig {
+    /// Auto-save interval in seconds (None = disabled).
+    pub auto_save_interval: Option<u64>,
+
+    /// Enable verbose logging.
+    pub verbose: bool,
+}
+
+impl Default for WorkspaceClientConfig {
+    fn default() -> Self {
+        Self {
+            auto_save_interval: None,
+            verbose: false,
+        }
+    }
+}
+
+impl WorkspaceClient {
+    /// Create a new workspace client.
+    pub fn new(workspace: Workspace) -> Self {
+        Self {
+            workspace: Arc::new(RwLock::new(workspace)),
+            events: EventEmitter::new(),
+            config: WorkspaceClientConfig::default(),
+        }
+    }
+
+    /// Create with event emitter.
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = events;
+        self
+    }
+
+    /// Create with configuration.
+    pub fn with_config(mut self, config: WorkspaceClientConfig) -> Self {
+        self.config = config;
+        self
+    }
+
+    /// Create from an existing workspace Arc.
+    pub(crate) fn from_arc(workspace: Arc<RwLock<Workspace>>, events: EventEmitter) -> Self {
+        Self {
+            workspace,
+            events,
+            config: WorkspaceClientConfig::default(),
+        }
+    }
+
+    /// Save a document to the workspace.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub fn save(&self, doc: &PersistedDocument) -> Result<()> {
+        let doc_id = doc.meta.id.clone();
+
+        {
+            let mut ws = self.workspace.write()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+            ws.add(doc)?;
+        }
+
+        info!("Saved document: {}", doc_id);
+        self.events.emit_workspace(WorkspaceEvent::Saved { doc_id });
+
+        Ok(())
+    }
+
+    /// Load a document from the workspace.
+    ///
+    /// Returns `Ok(None)` if the document doesn't exist.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn load(&self, doc_id: &str) -> Result<Option<PersistedDocument>> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+        if !ws.contains(doc_id) {
+            return Ok(None);
+        }
+
+        let doc = ws.load(doc_id)?;
+        let cache_hit = doc.is_some();
+
+        if let Some(ref doc) = doc {
+            debug!("Loaded document: {} (cache={})", doc_id, cache_hit);
+        }
+
+        self.events.emit_workspace(WorkspaceEvent::Loaded {
+            doc_id: doc_id.to_string(),
+            cache_hit,
+        });
+
+        Ok(doc)
+    }
+
+    /// Remove a document from the workspace.
+    ///
+    /// Returns `Ok(true)` if the document was removed, `Ok(false)` if it didn't exist.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub fn remove(&self, doc_id: &str) -> Result<bool> {
+        let removed = {
+            let mut ws = self.workspace.write()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+            ws.remove(doc_id)?
+        };
+
+        if removed {
+            info!("Removed document: {}", doc_id);
+            self.events.emit_workspace(WorkspaceEvent::Removed {
+                doc_id: doc_id.to_string(),
+            });
+        }
+
+        Ok(removed)
+    }
+
+    /// Check if a document exists in the workspace.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn exists(&self, doc_id: &str) -> Result<bool> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+        Ok(ws.contains(doc_id))
+    }
+
+    /// List all documents in the workspace.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn list(&self) -> Result<Vec<DocumentInfo>> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+        Ok(ws.list_documents()
+            .iter()
+            .filter_map(|id| ws.get_meta(id))
+            .map(|meta| DocumentInfo {
+                id: meta.id.clone(),
+                name: meta.doc_name.clone(),
+                format: meta.doc_type.clone(),
+                description: meta.doc_description.clone(),
+                page_count: meta.page_count,
+                line_count: meta.line_count,
+            })
+            .collect())
+    }
+
+    /// Get document metadata without loading the full document.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn get_meta(&self, doc_id: &str) -> Result<Option<DocumentMetaEntry>> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+        Ok(ws.get_meta(doc_id).cloned())
+    }
+
+    /// Get document info by ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn get_document_info(&self, doc_id: &str) -> Result<Option<DocumentInfo>> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+        Ok(ws.get_meta(doc_id).map(|meta| DocumentInfo {
+            id: meta.id.clone(),
+            name: meta.doc_name.clone(),
+            format: meta.doc_type.clone(),
+            description: meta.doc_description.clone(),
+            page_count: meta.page_count,
+            line_count: meta.line_count,
+        }))
+    }
+
+    /// Remove multiple documents from the workspace.
+    ///
+    /// Returns the number of documents successfully removed.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub fn batch_remove(&self, doc_ids: &[&str]) -> Result<usize> {
+        let mut removed = 0;
+
+        {
+            let mut ws = self.workspace.write()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+            for doc_id in doc_ids {
+                if ws.remove(doc_id)? {
+                    removed += 1;
+                    self.events.emit_workspace(WorkspaceEvent::Removed {
+                        doc_id: doc_id.to_string(),
+                    });
+                }
+            }
+        }
+
+        if removed > 0 {
+            info!("Batch removed {} documents", removed);
+        }
+
+        Ok(removed)
+    }
+
+    /// Clear all documents from the workspace.
+    ///
+    /// Returns the number of documents removed.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub fn clear(&self) -> Result<usize> {
+        let doc_ids: Vec<String>;
+
+        {
+            let ws = self.workspace.read()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+            doc_ids = ws.list_documents().iter().map(|s| s.to_string()).collect();
+        }
+
+        let count = doc_ids.len();
+
+        {
+            let mut ws = self.workspace.write()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+            for doc_id in &doc_ids {
+                let _ = ws.remove(doc_id);
+            }
+        }
+
+        if count > 0 {
+            info!("Cleared workspace: {} documents removed", count);
+            self.events.emit_workspace(WorkspaceEvent::Cleared { count });
+        }
+
+        Ok(count)
+    }
+
+    /// Get workspace statistics.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn stats(&self) -> Result<WorkspaceStats> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+        Ok(WorkspaceStats {
+            document_count: ws.len(),
+        })
+    }
+
+    /// Get the number of documents in the workspace.
+    pub fn len(&self) -> usize {
+        self.workspace.read()
+            .map(|ws| ws.len())
+            .unwrap_or(0)
+    }
+
+    /// Check if the workspace is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Get the underlying workspace Arc (for advanced use).
+    pub(crate) fn inner(&self) -> Arc<RwLock<Workspace>> {
+        Arc::clone(&self.workspace)
+    }
+}
+
+impl Clone for WorkspaceClient {
+    fn clone(&self) -> Self {
+        Self {
+            workspace: Arc::clone(&self.workspace),
+            events: self.events.clone(),
+            config: self.config.clone(),
+        }
+    }
+}
+
+/// Workspace statistics.
+#[derive(Debug, Clone)]
+pub struct WorkspaceStats {
+    /// Number of documents in the workspace.
+    pub document_count: usize,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_workspace_client_creation() {
+        let workspace = Workspace::open("./test_workspace").unwrap();
+        let client = WorkspaceClient::new(workspace);
+        assert!(client.is_empty());
+    }
+
+    #[test]
+    fn test_workspace_stats() {
+        let workspace = Workspace::open("./test_workspace").unwrap();
+        let client = WorkspaceClient::new(workspace);
+
+        let stats = client.stats().unwrap();
+        assert_eq!(stats.document_count, 0);
+    }
+}
diff --git a/src/config/docs.rs b/src/config/docs.rs
new file mode 100644
index 00000000..7e2330b9
--- /dev/null
+++ b/src/config/docs.rs
@@ -0,0 +1,307 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration documentation generation.
+//!
+//! This module provides utilities for generating documentation
+//! from configuration types, including markdown reference and
+//! example TOML files.
+
+use super::types::Config;
+
+/// Configuration documentation generator.
+#[derive(Debug, Clone)]
+pub struct ConfigDocs {
+    config: Config,
+}
+
+impl ConfigDocs {
+    /// Create a new documentation generator.
+    pub fn new(config: Config) -> Self {
+        Self { config }
+    }
+
+    /// Create with default configuration.
+    pub fn with_defaults() -> Self {
+        Self::new(Config::default())
+    }
+
+    /// Generate markdown documentation for the configuration.
+    pub fn to_markdown(&self) -> String {
+        let mut md = String::new();
+
+        md.push_str("# Configuration Reference\n\n");
+        md.push_str("This document describes all configuration options for vectorless.\n\n");
+        md.push_str("## Configuration File\n\n");
+        md.push_str("Configuration is loaded from a TOML file. Default locations:\n");
+        md.push_str("- `./vectorless.toml`\n");
+        md.push_str("- `./config.toml`\n");
+        md.push_str("- `./.vectorless.toml`\n\n");
+
+        // Indexer section
+        md.push_str("## `[indexer]`\n\n");
+        md.push_str("Controls document indexing behavior.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "subsection_threshold", "usize", "300",
+            "Word count threshold for splitting sections into subsections");
+        self.add_row(&mut md, "max_segment_tokens", "usize", "3000",
+            "Maximum tokens to send in a single segmentation request");
+        self.add_row(&mut md, "max_summary_tokens", "usize", "200",
+            "Maximum tokens for each summary");
+        self.add_row(&mut md, "min_summary_tokens", "usize", "20",
+            "Minimum content tokens required to generate a summary");
+        md.push_str("\n");
+
+        // Summary section
+        md.push_str("## `[summary]`\n\n");
+        md.push_str("LLM configuration for summary generation.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "model", "string", "gpt-4o-mini", "Model for summarization");
+        self.add_row(&mut md, "endpoint", "string", "https://api.openai.com/v1", "API endpoint");
+        self.add_row(&mut md, "api_key", "string?", "null", "API key (optional, can use env var)");
+        self.add_row(&mut md, "max_tokens", "usize", "200", "Maximum tokens for summary generation");
+        self.add_row(&mut md, "temperature", "f32", "0.0", "Temperature for summary generation");
+        md.push_str("\n");
+
+        // Retrieval section
+        md.push_str("## `[retrieval]`\n\n");
+        md.push_str("Retrieval model and behavior configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "model", "string", "gpt-4o", "Model for retrieval navigation");
+        self.add_row(&mut md, "endpoint", "string", "https://api.openai.com/v1", "API endpoint");
+        self.add_row(&mut md, "api_key", "string?", "null", "API key (defaults to summary.api_key)");
+        self.add_row(&mut md, "top_k", "usize", "3", "Number of top results to return");
+        self.add_row(&mut md, "max_tokens", "usize", "1000", "Maximum tokens for retrieval context");
+        self.add_row(&mut md, "temperature", "f32", "0.0", "Temperature for retrieval");
+        md.push_str("\n");
+
+        // Retrieval.search section
+        md.push_str("## `[retrieval.search]`\n\n");
+        md.push_str("Search algorithm configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "top_k", "usize", "5", "Number of top-k results to return");
+        self.add_row(&mut md, "beam_width", "usize", "3", "Beam width for multi-path search");
+        self.add_row(&mut md, "max_iterations", "usize", "10", "Maximum iterations for search algorithms");
+        self.add_row(&mut md, "min_score", "f32", "0.1", "Minimum score to include a path");
+        md.push_str("\n");
+
+        // Retrieval.sufficiency section
+        md.push_str("## `[retrieval.sufficiency]`\n\n");
+        md.push_str("Sufficiency checker configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "min_tokens", "usize", "500", "Minimum tokens for sufficiency");
+        self.add_row(&mut md, "target_tokens", "usize", "2000", "Target tokens for full sufficiency");
+        self.add_row(&mut md, "max_tokens", "usize", "4000", "Maximum tokens before stopping");
+        self.add_row(&mut md, "min_content_length", "usize", "200", "Minimum content length (characters)");
+        self.add_row(&mut md, "confidence_threshold", "f32", "0.7", "Confidence threshold for LLM judge");
+        md.push_str("\n");
+
+        // Retrieval.content section
+        md.push_str("## `[retrieval.content]`\n\n");
+        md.push_str("Content aggregator configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "enabled", "bool", "true", "Enable content aggregator");
+        self.add_row(&mut md, "token_budget", "usize", "4000", "Maximum tokens for aggregated content");
+        self.add_row(&mut md, "min_relevance_score", "f32", "0.2", "Minimum relevance score threshold (0.0-1.0)");
+        self.add_row(&mut md, "scoring_strategy", "string", "keyword_bm25", "Scoring strategy (keyword_only, keyword_bm25, hybrid)");
+        self.add_row(&mut md, "output_format", "string", "markdown", "Output format (markdown, json, tree, flat)");
+        self.add_row(&mut md, "include_scores", "bool", "false", "Include relevance scores in output");
+        self.add_row(&mut md, "hierarchical_min_per_level", "f32", "0.1", "Minimum budget allocation per depth level");
+        self.add_row(&mut md, "deduplicate", "bool", "true", "Enable content deduplication");
+        self.add_row(&mut md, "dedup_threshold", "f32", "0.9", "Similarity threshold for deduplication");
+        md.push_str("\n");
+
+        // Retrieval.strategy section
+        md.push_str("## `[retrieval.strategy]`\n\n");
+        md.push_str("Strategy-specific configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "exploration_weight", "f32", "1.414", "MCTS exploration weight (√2)");
+        self.add_row(&mut md, "similarity_threshold", "f32", "0.5", "Semantic similarity threshold");
+        self.add_row(&mut md, "high_similarity_threshold", "f32", "0.8", "High similarity for 'answer' decision");
+        self.add_row(&mut md, "low_similarity_threshold", "f32", "0.3", "Low similarity for 'explore' decision");
+        md.push_str("\n");
+
+        // Storage section
+        md.push_str("## `[storage]`\n\n");
+        md.push_str("Storage configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "workspace_dir", "string", "./workspace", "Workspace directory for persisted documents");
+        md.push_str("\n");
+
+        // Concurrency section
+        md.push_str("## `[concurrency]`\n\n");
+        md.push_str("Concurrency control configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "max_concurrent_requests", "usize", "10", "Maximum concurrent LLM API calls");
+        self.add_row(&mut md, "requests_per_minute", "usize", "500", "Rate limit: requests per minute");
+        self.add_row(&mut md, "enabled", "bool", "true", "Enable rate limiting");
+        self.add_row(&mut md, "semaphore_enabled", "bool", "true", "Enable semaphore-based concurrency");
+        md.push_str("\n");
+
+        // Fallback section
+        md.push_str("## `[fallback]`\n\n");
+        md.push_str("Fallback/error recovery configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "enabled", "bool", "true", "Enable graceful degradation");
+        self.add_row(&mut md, "models", "[string]", "[\"gpt-4o-mini\", \"glm-4-flash\"]", "Fallback models in priority order");
+        self.add_row(&mut md, "endpoints", "[string]", "[]", "Fallback endpoints in priority order");
+        self.add_row(&mut md, "on_rate_limit", "string", "retry_then_fallback", "Behavior on rate limit (retry, fallback, retry_then_fallback, fail)");
+        self.add_row(&mut md, "on_timeout", "string", "retry_then_fallback", "Behavior on timeout");
+        self.add_row(&mut md, "on_all_failed", "string", "return_error", "Behavior when all attempts fail (return_error, return_cache)");
+        md.push_str("\n");
+
+        md
+    }
+
+    fn add_row(&self, md: &mut String, name: &str, ty: &str, default: &str, desc: &str) {
+        md.push_str(&format!("| `{}` | {} | {} | {} |\n", name, ty, default, desc));
+    }
+
+    /// Generate an example TOML file with all options.
+    pub fn to_example_toml(&self) -> String {
+        toml::to_string_pretty(&self.config).unwrap_or_else(|e| {
+            format!("# Error generating TOML: {}\n\n# Using default config\n{}",
+                e, Self::fallback_toml())
+        })
+    }
+
+    fn fallback_toml() -> String {
+        r#"# Vectorless Configuration Example
+# Copy this file to config.toml and fill in your API keys
+
+[indexer]
+subsection_threshold = 300
+max_segment_tokens = 3000
+max_summary_tokens = 200
+min_summary_tokens = 20
+
+[summary]
+model = "gpt-4o-mini"
+endpoint = "https://api.openai.com/v1"
+# api_key = "sk-..."
+max_tokens = 200
+temperature = 0.0
+
+[retrieval]
+model = "gpt-4o"
+endpoint = "https://api.openai.com/v1"
+# api_key = "sk-..."
+top_k = 3
+max_tokens = 1000
+temperature = 0.0
+
+[retrieval.search]
+top_k = 5
+beam_width = 3
+max_iterations = 10
+min_score = 0.1
+
+[retrieval.sufficiency]
+min_tokens = 500
+target_tokens = 2000
+max_tokens = 4000
+min_content_length = 200
+confidence_threshold = 0.7
+
+[retrieval.cache]
+max_entries = 1000
+ttl_secs = 3600
+
+[retrieval.strategy]
+exploration_weight = 1.414
+similarity_threshold = 0.5
+high_similarity_threshold = 0.8
+low_similarity_threshold = 0.3
+
+[retrieval.content]
+enabled = true
+token_budget = 4000
+min_relevance_score = 0.2
+scoring_strategy = "keyword_bm25"
+output_format = "markdown"
+include_scores = false
+hierarchical_min_per_level = 0.1
+deduplicate = true
+dedup_threshold = 0.9
+
+[storage]
+workspace_dir = "./workspace"
+
+[concurrency]
+max_concurrent_requests = 10
+requests_per_minute = 500
+enabled = true
+semaphore_enabled = true
+
+[fallback]
+enabled = true
+models = ["gpt-4o-mini", "glm-4-flash"]
+on_rate_limit = "retry_then_fallback"
+on_timeout = "retry_then_fallback"
+on_all_failed = "return_error"
+"#.to_string()
+    }
+
+    /// Generate a minimal example TOML file.
+    pub fn to_minimal_toml(&self) -> String {
+        r#"# Minimal Vectorless Configuration
+# Most options have sensible defaults
+
+[summary]
+api_key = "your-api-key-here"
+
+[retrieval]
+top_k = 5
+"#.to_string()
+    }
+}
+
+impl Default for ConfigDocs {
+    fn default() -> Self {
+        Self::with_defaults()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_docs_markdown() {
+        let docs = ConfigDocs::with_defaults();
+        let md = docs.to_markdown();
+
+        assert!(md.contains("# Configuration Reference"));
+        assert!(md.contains("## `[indexer]`"));
+        assert!(md.contains("## `[retrieval]`"));
+        assert!(md.contains("## `[retrieval.content]`"));
+    }
+
+    #[test]
+    fn test_config_docs_toml() {
+        let docs = ConfigDocs::with_defaults();
+        let toml = docs.to_example_toml();
+
+        assert!(toml.contains("[indexer]"));
+        assert!(toml.contains("[retrieval]"));
+    }
+
+    #[test]
+    fn test_config_docs_minimal_toml() {
+        let docs = ConfigDocs::with_defaults();
+        let toml = docs.to_minimal_toml();
+
+        assert!(toml.contains("[summary]"));
+        assert!(toml.len() < 200); // Should be minimal
+    }
+}
diff --git a/src/config/loader.rs b/src/config/loader.rs
index e83dc229..fe2c6736 100644
--- a/src/config/loader.rs
+++ b/src/config/loader.rs
@@ -3,14 +3,46 @@
 
 //! Configuration loader.
 //!
-//! Loads configuration from TOML files only.
-//! All configuration comes from config files, not environment variables.
-//! This ensures configuration is explicit and traceable.
+//! Loads configuration from TOML files with optional environment variable
+//! overrides and validation.
+//!
+//! # Example
+//!
+//! ```rust,no_run
+//! use vectorless::config::{ConfigLoader, Config};
+//!
+//! // Load from file
+//! let config = ConfigLoader::new()
+//!     .file("config.toml")
+//!     .load()?;
+//!
+//! // Load with validation
+//! let config = ConfigLoader::new()
+//!     .file("config.toml")
+//!     .with_validation(true)
+//!     .load()?;
+//!
+//! // Load with environment variable override
+//! let config = ConfigLoader::new()
+//!     .file("config.toml")
+//!     .with_env("VECTORLESS_")
+//!     .load()?;
+//!
+//! // Layered configuration
+//! let config = ConfigLoader::new()
+//!     .file("default.toml")
+//!     .file("production.toml")
+//!     .with_validation(true)
+//!     .load()?;
+//! # Ok::<(), vectorless::config::ConfigError>(())
+//! ```
 
 use std::path::{Path, PathBuf};
 use thiserror::Error;
 
+use super::merge::Merge;
 use super::types::Config;
+use super::validator::ConfigValidator;
 
 /// Configuration loading errors.
 #[derive(Debug, Error)]
@@ -30,59 +62,235 @@ pub enum ConfigError {
     /// Invalid configuration value.
     #[error("Invalid configuration: {0}")]
     Invalid(String),
+
+    /// Configuration validation failed.
+    #[error("{0}")]
+    Validation(#[from] super::types::ConfigValidationError),
+
+    /// Environment variable error.
+    #[error("Environment variable error: {0}")]
+    Env(String),
 }
 
 /// Configuration loader.
-///
-/// # Example
-///
-/// ```rust,no_run
-/// use vectorless::config::{ConfigLoader, Config};
-///
-/// // Load from file
-/// let config = ConfigLoader::new()
-///     .file("config.toml")
-///     .load()?;
-///
-/// // Or use defaults
-/// let config = Config::default();
-/// # Ok::<(), vectorless::config::ConfigError>(())
-/// ```
-#[derive(Debug, Default)]
+#[derive(Debug)]
 pub struct ConfigLoader {
-    /// Configuration file path.
-    file: Option<PathBuf>,
+    /// Configuration file paths (loaded in order, later files override earlier).
+    files: Vec<PathBuf>,
+
+    /// Environment variable prefix (optional).
+    env_prefix: Option<String>,
+
+    /// Whether to validate after loading.
+    validate: bool,
+
+    /// Custom validator (optional).
+    validator: Option<ConfigValidator>,
+}
+
+impl Default for ConfigLoader {
+    fn default() -> Self {
+        Self::new()
+    }
 }
 
 impl ConfigLoader {
     /// Create a new configuration loader with defaults.
     pub fn new() -> Self {
-        Self::default()
+        Self {
+            files: Vec::new(),
+            env_prefix: None,
+            validate: false,
+            validator: None,
+        }
     }
 
     /// Specify a configuration file to load.
+    ///
+    /// Multiple files can be specified; later files override earlier ones.
     pub fn file<P: AsRef<Path>>(mut self, path: P) -> Self {
-        self.file = Some(path.as_ref().to_path_buf());
+        self.files.push(path.as_ref().to_path_buf());
+        self
+    }
+
+    /// Specify multiple configuration files.
+    pub fn files<I, P>(mut self, paths: I) -> Self
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<Path>,
+    {
+        self.files
+            .extend(paths.into_iter().map(|p| p.as_ref().to_path_buf()));
+        self
+    }
+
+    /// Enable environment variable override.
+    ///
+    /// Variables like `VECTORLESS_SUMMARY__API_KEY` override config values.
+    /// Use `__` (double underscore) to separate nested keys.
+    pub fn with_env(mut self, prefix: impl Into<String>) -> Self {
+        self.env_prefix = Some(prefix.into());
+        self
+    }
+
+    /// Enable or disable validation after loading.
+    pub fn with_validation(mut self, validate: bool) -> Self {
+        self.validate = validate;
+        self
+    }
+
+    /// Set a custom validator.
+    pub fn with_validator(mut self, validator: ConfigValidator) -> Self {
+        self.validator = Some(validator);
         self
     }
 
     /// Load the configuration.
     ///
-    /// If no file is specified, returns default configuration.
-    /// If file is specified but doesn't exist, returns an error.
+    /// # Behavior
+    ///
+    /// 1. Start with default configuration
+    /// 2. Load and merge each specified file (in order)
+    /// 3. Apply environment variable overrides (if enabled)
+    /// 4. Validate configuration (if enabled)
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - A specified file doesn't exist
+    /// - A file can't be parsed as valid TOML
+    /// - Validation fails (when enabled)
     pub fn load(self) -> Result<Config, ConfigError> {
-        if let Some(ref path) = self.file {
+        let mut config = Config::default();
+
+        // Load and merge each file
+        for path in &self.files {
             if path.exists() {
                 let content = std::fs::read_to_string(path)?;
-                let config: Config = toml::from_str(&content)?;
-                Ok(config)
+                let file_config: Config = toml::from_str(&content)?;
+                config.merge(&file_config, super::merge::MergeStrategy::Replace);
             } else {
-                Err(ConfigError::NotFound(path.clone()))
+                return Err(ConfigError::NotFound(path.clone()));
+            }
+        }
+
+        // Apply environment variable overrides
+        if let Some(ref prefix) = self.env_prefix {
+            self.apply_env_overrides(&mut config, prefix)?;
+        }
+
+        // Validate if requested
+        if self.validate {
+            let validator = self.validator.unwrap_or_default();
+            validator.validate(&config)?;
+        }
+
+        Ok(config)
+    }
+
+    /// Apply environment variable overrides to the configuration.
+    fn apply_env_overrides(&self, config: &mut Config, prefix: &str) -> Result<(), ConfigError> {
+        for (key, value) in std::env::vars() {
+            if !key.starts_with(prefix) {
+                continue;
+            }
+
+            // Parse the path: VECTORLESS_SUMMARY__API_KEY -> ["summary", "api_key"]
+            let path_str = key.trim_start_matches(prefix).trim_start_matches('_');
+            let parts: Vec<&str> = path_str.split("__").collect();
+
+            if parts.is_empty() {
+                continue;
+            }
+
+            // Apply the override
+            self.set_by_path(config, &parts, &value)?;
+        }
+
+        Ok(())
+    }
+
+    /// Set a configuration value by path.
+    fn set_by_path(&self, config: &mut Config, path: &[&str], value: &str) -> Result<(), ConfigError> {
+        match path {
+            ["summary", "api_key"] => {
+                config.summary.api_key = Some(value.to_string());
+            }
+            ["summary", "model"] => {
+                config.summary.model = value.to_string();
             }
+            ["summary", "endpoint"] => {
+                config.summary.endpoint = value.to_string();
+            }
+            ["summary", "max_tokens"] => {
+                config.summary.max_tokens = value.parse().map_err(|e| {
+                    ConfigError::Env(format!("Invalid max_tokens: {}", e))
+                })?;
+            }
+            ["retrieval", "api_key"] => {
+                config.retrieval.api_key = Some(value.to_string());
+            }
+            ["retrieval", "model"] => {
+                config.retrieval.model = value.to_string();
+            }
+            ["retrieval", "endpoint"] => {
+                config.retrieval.endpoint = value.to_string();
+            }
+            ["retrieval", "top_k"] => {
+                config.retrieval.top_k = value.parse().map_err(|e| {
+                    ConfigError::Env(format!("Invalid top_k: {}", e))
+                })?;
+            }
+            ["storage", "workspace_dir"] => {
+                config.storage.workspace_dir = PathBuf::from(value);
+            }
+            ["concurrency", "max_concurrent_requests"] => {
+                config.concurrency.max_concurrent_requests = value.parse().map_err(|e| {
+                    ConfigError::Env(format!("Invalid max_concurrent_requests: {}", e))
+                })?;
+            }
+            _ => {
+                // Unknown path - could log a warning
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Default configuration file names to search for.
+pub const CONFIG_FILE_NAMES: &[&str] =
+    &["vectorless.toml", "config.toml", ".vectorless.toml"];
+
+/// Find a configuration file in current or parent directories.
+pub fn find_config_file() -> Option<PathBuf> {
+    let current_dir = std::env::current_dir().ok()?;
+
+    // Search in current directory first
+    for name in CONFIG_FILE_NAMES {
+        let path = current_dir.join(name);
+        if path.exists() {
+            return Some(path);
+        }
+    }
+
+    // Search in parent directories (up to 3 levels)
+    let mut dir = current_dir.as_path();
+    for _ in 0..3 {
+        if let Some(parent) = dir.parent() {
+            for name in CONFIG_FILE_NAMES {
+                let path = parent.join(name);
+                if path.exists() {
+                    return Some(path);
+                }
+            }
+            dir = parent;
         } else {
-            Ok(Config::default())
+            break;
         }
     }
+
+    None
 }
 
 #[cfg(test)]
@@ -106,4 +314,24 @@ mod tests {
         let config = ConfigLoader::new().load().unwrap();
         assert_eq!(config.indexer.subsection_threshold, 300);
     }
+
+    #[test]
+    fn test_config_loader_not_found() {
+        let result = ConfigLoader::new()
+            .file("nonexistent_config.toml")
+            .load();
+
+        assert!(result.is_err());
+        assert!(matches!(result.unwrap_err(), ConfigError::NotFound(_)));
+    }
+
+    #[test]
+    fn test_config_loader_with_validation() {
+        let config = ConfigLoader::new()
+            .with_validation(true)
+            .load()
+            .unwrap();
+
+        assert_eq!(config.retrieval.model, "gpt-4o");
+    }
 }
diff --git a/src/config/merge.rs b/src/config/merge.rs
new file mode 100644
index 00000000..438872b5
--- /dev/null
+++ b/src/config/merge.rs
@@ -0,0 +1,356 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration merging.
+//!
+//! This module provides utilities for merging multiple configurations,
+//! enabling layered configuration from multiple sources.
+
+use super::types::{
+    CacheConfig, Config, ConcurrencyConfig, ContentAggregatorConfig, FallbackConfig,
+    IndexerConfig, RetrievalConfig, SearchConfig, StorageConfig, StrategyConfig, SufficiencyConfig,
+    SummaryConfig,
+};
+
+/// Configuration merge strategy.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum MergeStrategy {
+    /// Replace with source value.
+    Replace,
+    /// Keep existing value if present (don't overwrite).
+    KeepExisting,
+    /// Recursively merge nested structures.
+    Recursive,
+}
+
+/// Trait for configuration merging.
+pub trait Merge {
+    /// Merge another configuration into this one.
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy);
+}
+
+impl Merge for Config {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        self.indexer.merge(&other.indexer, strategy);
+        self.summary.merge(&other.summary, strategy);
+        self.retrieval.merge(&other.retrieval, strategy);
+        self.storage.merge(&other.storage, strategy);
+        self.concurrency.merge(&other.concurrency, strategy);
+        self.fallback.merge(&other.fallback, strategy);
+    }
+}
+
+impl Merge for IndexerConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.subsection_threshold == 300 {
+            self.subsection_threshold = other.subsection_threshold;
+        }
+        if strategy == MergeStrategy::Replace || self.max_segment_tokens == 3000 {
+            self.max_segment_tokens = other.max_segment_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.max_summary_tokens == 200 {
+            self.max_summary_tokens = other.max_summary_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.min_summary_tokens == 20 {
+            self.min_summary_tokens = other.min_summary_tokens;
+        }
+    }
+}
+
+impl Merge for SummaryConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.model == "gpt-4o-mini" {
+            self.model = other.model.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.endpoint == "https://api.openai.com/v1" {
+            self.endpoint = other.endpoint.clone();
+        }
+        // Always merge API keys if present
+        if other.api_key.is_some() {
+            self.api_key = other.api_key.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.max_tokens == 200 {
+            self.max_tokens = other.max_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.temperature == 0.0 {
+            self.temperature = other.temperature;
+        }
+    }
+}
+
+impl Merge for RetrievalConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.model == "gpt-4o" {
+            self.model = other.model.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.endpoint == "https://api.openai.com/v1" {
+            self.endpoint = other.endpoint.clone();
+        }
+        if other.api_key.is_some() {
+            self.api_key = other.api_key.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.max_tokens == 1000 {
+            self.max_tokens = other.max_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.temperature == 0.0 {
+            self.temperature = other.temperature;
+        }
+        if strategy == MergeStrategy::Replace || self.top_k == 3 {
+            self.top_k = other.top_k;
+        }
+
+        self.search.merge(&other.search, strategy);
+        self.sufficiency.merge(&other.sufficiency, strategy);
+        self.cache.merge(&other.cache, strategy);
+        self.strategy.merge(&other.strategy, strategy);
+        self.content.merge(&other.content, strategy);
+    }
+}
+
+impl Merge for SearchConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.top_k == 5 {
+            self.top_k = other.top_k;
+        }
+        if strategy == MergeStrategy::Replace || self.beam_width == 3 {
+            self.beam_width = other.beam_width;
+        }
+        if strategy == MergeStrategy::Replace || self.max_iterations == 10 {
+            self.max_iterations = other.max_iterations;
+        }
+        if strategy == MergeStrategy::Replace || (self.min_score - 0.1).abs() < f32::EPSILON {
+            self.min_score = other.min_score;
+        }
+    }
+}
+
+impl Merge for SufficiencyConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.min_tokens == 500 {
+            self.min_tokens = other.min_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.target_tokens == 2000 {
+            self.target_tokens = other.target_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.max_tokens == 4000 {
+            self.max_tokens = other.max_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.min_content_length == 200 {
+            self.min_content_length = other.min_content_length;
+        }
+        if strategy == MergeStrategy::Replace || (self.confidence_threshold - 0.7).abs() < f32::EPSILON
+        {
+            self.confidence_threshold = other.confidence_threshold;
+        }
+    }
+}
+
+impl Merge for CacheConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.max_entries == 1000 {
+            self.max_entries = other.max_entries;
+        }
+        if strategy == MergeStrategy::Replace || self.ttl_secs == 3600 {
+            self.ttl_secs = other.ttl_secs;
+        }
+    }
+}
+
+impl Merge for StrategyConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace
+            || (self.exploration_weight - 1.414).abs() < 0.001
+        {
+            self.exploration_weight = other.exploration_weight;
+        }
+        if strategy == MergeStrategy::Replace || (self.similarity_threshold - 0.5).abs() < f32::EPSILON
+        {
+            self.similarity_threshold = other.similarity_threshold;
+        }
+        if strategy == MergeStrategy::Replace
+            || (self.high_similarity_threshold - 0.8).abs() < f32::EPSILON
+        {
+            self.high_similarity_threshold = other.high_similarity_threshold;
+        }
+        if strategy == MergeStrategy::Replace
+            || (self.low_similarity_threshold - 0.3).abs() < f32::EPSILON
+        {
+            self.low_similarity_threshold = other.low_similarity_threshold;
+        }
+    }
+}
+
+impl Merge for ContentAggregatorConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if other.enabled != self.enabled {
+            self.enabled = other.enabled;
+        }
+        if strategy == MergeStrategy::Replace || self.token_budget == 4000 {
+            self.token_budget = other.token_budget;
+        }
+        if strategy == MergeStrategy::Replace || (self.min_relevance_score - 0.2).abs() < f32::EPSILON
+        {
+            self.min_relevance_score = other.min_relevance_score;
+        }
+        if strategy == MergeStrategy::Replace || self.scoring_strategy == "keyword_bm25" {
+            self.scoring_strategy = other.scoring_strategy.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.output_format == "markdown" {
+            self.output_format = other.output_format.clone();
+        }
+        if other.include_scores != self.include_scores {
+            self.include_scores = other.include_scores;
+        }
+        if strategy == MergeStrategy::Replace
+            || (self.hierarchical_min_per_level - 0.1).abs() < f32::EPSILON
+        {
+            self.hierarchical_min_per_level = other.hierarchical_min_per_level;
+        }
+        if other.deduplicate != self.deduplicate {
+            self.deduplicate = other.deduplicate;
+        }
+        if strategy == MergeStrategy::Replace || (self.dedup_threshold - 0.9).abs() < f32::EPSILON {
+            self.dedup_threshold = other.dedup_threshold;
+        }
+    }
+}
+
+impl Merge for StorageConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace
+            || self.workspace_dir == std::path::PathBuf::from("./workspace")
+        {
+            self.workspace_dir = other.workspace_dir.clone();
+        }
+    }
+}
+
+impl Merge for ConcurrencyConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.max_concurrent_requests == 10 {
+            self.max_concurrent_requests = other.max_concurrent_requests;
+        }
+        if strategy == MergeStrategy::Replace || self.requests_per_minute == 500 {
+            self.requests_per_minute = other.requests_per_minute;
+        }
+        if other.enabled != self.enabled {
+            self.enabled = other.enabled;
+        }
+        if other.semaphore_enabled != self.semaphore_enabled {
+            self.semaphore_enabled = other.semaphore_enabled;
+        }
+    }
+}
+
+impl Merge for FallbackConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if other.enabled != self.enabled {
+            self.enabled = other.enabled;
+        }
+        if !other.models.is_empty() {
+            self.models = other.models.clone();
+        }
+        if !other.endpoints.is_empty() {
+            self.endpoints = other.endpoints.clone();
+        }
+        if strategy == MergeStrategy::Replace {
+            self.on_rate_limit = other.on_rate_limit;
+            self.on_timeout = other.on_timeout;
+            self.on_all_failed = other.on_all_failed;
+            self.max_retries = other.max_retries;
+            self.initial_retry_delay_ms = other.initial_retry_delay_ms;
+            self.max_retry_delay_ms = other.max_retry_delay_ms;
+            self.retry_multiplier = other.retry_multiplier;
+        }
+    }
+}
+
+/// Configuration overlay for layered configuration.
+///
+/// Allows building a configuration from multiple sources,
+/// with later overlays taking precedence.
+#[derive(Debug, Clone)]
+pub struct ConfigOverlay {
+    /// Base configuration.
+    base: Config,
+    /// Overlay configurations (applied in order).
+    overlays: Vec<Config>,
+}
+
+impl ConfigOverlay {
+    /// Create a new overlay with a base configuration.
+    pub fn new(base: Config) -> Self {
+        Self {
+            base,
+            overlays: Vec::new(),
+        }
+    }
+
+    /// Add an overlay configuration.
+    pub fn overlay(mut self, config: Config) -> Self {
+        self.overlays.push(config);
+        self
+    }
+
+    /// Resolve all overlays into a final configuration.
+    pub fn resolve(self) -> Config {
+        let mut result = self.base;
+        for overlay in self.overlays {
+            result.merge(&overlay, MergeStrategy::Replace);
+        }
+        result
+    }
+}
+
+impl Default for ConfigOverlay {
+    fn default() -> Self {
+        Self::new(Config::default())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_merge() {
+        let mut base = Config::default();
+        let mut overlay = Config::default();
+
+        overlay.retrieval.top_k = 10;
+        overlay.summary.model = "gpt-4o".to_string();
+
+        base.merge(&overlay, MergeStrategy::Replace);
+
+        assert_eq!(base.retrieval.top_k, 10);
+        assert_eq!(base.summary.model, "gpt-4o");
+    }
+
+    #[test]
+    fn test_config_overlay() {
+        let mut overlay1 = Config::default();
+        overlay1.retrieval.top_k = 5;
+
+        let mut overlay2 = Config::default();
+        overlay2.retrieval.top_k = 10;
+
+        let config = ConfigOverlay::new(Config::default())
+            .overlay(overlay1)
+            .overlay(overlay2)
+            .resolve();
+
+        assert_eq!(config.retrieval.top_k, 10);
+    }
+
+    #[test]
+    fn test_merge_keeps_api_keys() {
+        let mut base = Config::default();
+        let mut overlay = Config::default();
+
+        overlay.summary.api_key = Some("test-key".to_string());
+
+        base.merge(&overlay, MergeStrategy::Replace);
+
+        assert_eq!(base.summary.api_key, Some("test-key".to_string()));
+    }
+}
diff --git a/src/config/mod.rs b/src/config/mod.rs
index 23e98f4e..98ad2e8a 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -3,15 +3,98 @@
 
 //! Configuration management for vectorless.
 //!
-//! This module provides configuration loading and validation:
-//! - [`Config`] - Main configuration structure
-//! - [`IndexerConfig`] - Indexing parameters
-//! - [`SummaryConfig`] - Summarization model settings
-//! - [`RetrievalConfig`] - Retrieval model settings
-//! - [`StorageConfig`] - Storage paths
+//! This module provides comprehensive configuration loading, validation,
+//! and management:
+//!
+//! - [`Config`] — Main configuration structure
+//! - [`ConfigLoader`] — Load configuration from TOML files
+//! - [`ConfigValidator`] — Validate configuration values
+//! - [`ConfigDocs`] — Generate configuration documentation
+//!
+//! # Quick Start
+//!
+//! ```rust,no_run
+//! use vectorless::config::{Config, ConfigLoader};
+//!
+//! // Load from file
+//! let config = ConfigLoader::new()
+//!     .file("config.toml")
+//!     .with_validation(true)
+//!     .load()?;
+//!
+//! // Or use defaults
+//! let config = Config::default();
+//! # Ok::<(), vectorless::config::ConfigError>(())
+//! ```
+//!
+//! # Layered Configuration
+//!
+//! Multiple configuration files can be layered:
+//!
+//! ```rust,no_run
+//! use vectorless::config::ConfigLoader;
+//!
+//! let config = ConfigLoader::new()
+//!     .file("default.toml")        // Base defaults
+//!     .file("production.toml")     // Production overrides
+//!     .with_env("VECTORLESS_")     // Environment overrides
+//!     .with_validation(true)
+//!     .load()?;
+//! # Ok::<(), vectorless::config::ConfigError>(())
+//! ```
+//!
+//! # Environment Variables
+//!
+//! When enabled with `with_env()`, environment variables can override config:
+//!
+//! | Variable | Config Path |
+//! |----------|-------------|
+//! | `VECTORLESS_SUMMARY__API_KEY` | `summary.api_key` |
+//! | `VECTORLESS_RETRIEVAL__TOP_K` | `retrieval.top_k` |
+//! | `VECTORLESS_STORAGE__WORKSPACE_DIR` | `storage.workspace_dir` |
+//!
+//! # Configuration Sections
+//!
+//! - `[indexer]` — Document indexing parameters
+//! - `[summary]` — Summarization model settings
+//! - `[retrieval]` — Retrieval model settings
+//! - `[retrieval.search]` — Search algorithm configuration
+//! - `[retrieval.sufficiency]` — Sufficiency checker settings
+//! - `[retrieval.content]` — Content aggregator settings
+//! - `[retrieval.strategy]` — Strategy-specific settings
+//! - `[retrieval.cache]` — Cache configuration
+//! - `[storage]` — Storage paths
+//! - `[concurrency]` — Concurrency control
+//! - `[fallback]` — Error recovery settings
 
+mod docs;
 mod loader;
+mod merge;
 mod types;
+mod validator;
 
-pub use loader::{ConfigError, ConfigLoader};
-pub use types::*;
+// Re-export main types
+pub use docs::ConfigDocs;
+pub use loader::{find_config_file, ConfigError, ConfigLoader, CONFIG_FILE_NAMES};
+pub use merge::{ConfigOverlay, Merge, MergeStrategy};
+pub use types::{
+    // Main config
+    Config,
+    // Indexer
+    IndexerConfig,
+    // LLM configs
+    LlmConfig, SummaryConfig,
+    // Retrieval configs
+    RetrievalConfig, SearchConfig,
+    // Storage and sufficiency
+    StorageConfig, CacheConfig, StrategyConfig, SufficiencyConfig,
+    // Content aggregator
+    ContentAggregatorConfig,
+    // Concurrency
+    ConcurrencyConfig,
+    // Fallback
+    FallbackBehavior, FallbackConfig, OnAllFailedBehavior,
+    // Validation
+    ConfigValidationError, ValidationError, Severity,
+};
+pub use validator::{ConfigValidator, ValidationRule};
diff --git a/src/config/types.rs b/src/config/types.rs
deleted file mode 100644
index 3a40d920..00000000
--- a/src/config/types.rs
+++ /dev/null
@@ -1,578 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Configuration type definitions.
-//!
-//! All configuration values are defined inline in `Default` trait implementations.
-//! Configuration is loaded from TOML files only - no environment variable magic.
-
-use serde::{Deserialize, Serialize};
-use std::path::PathBuf;
-
-/// Main configuration for vectorless.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Config {
-    /// Indexer configuration.
-    #[serde(default)]
-    pub indexer: IndexerConfig,
-
-    /// Summary model configuration.
-    #[serde(default)]
-    pub summary: SummaryConfig,
-
-    /// Retrieval model configuration.
-    #[serde(default)]
-    pub retrieval: RetrievalConfig,
-
-    /// Storage configuration.
-    #[serde(default)]
-    pub storage: StorageConfig,
-
-    /// Concurrency control configuration.
-    #[serde(default)]
-    pub concurrency: ConcurrencyConfig,
-
-    /// Fallback/error recovery configuration.
-    #[serde(default)]
-    pub fallback: FallbackConfig,
-}
-
-impl Default for Config {
-    fn default() -> Self {
-        Self {
-            indexer: IndexerConfig::default(),
-            summary: SummaryConfig::default(),
-            retrieval: RetrievalConfig::default(),
-            storage: StorageConfig::default(),
-            concurrency: ConcurrencyConfig::default(),
-            fallback: FallbackConfig::default(),
-        }
-    }
-}
-
-/// Indexer configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct IndexerConfig {
-    /// Word count threshold for splitting sections into subsections.
-    #[serde(default)]
-    pub subsection_threshold: usize,
-
-    /// Maximum tokens to send in a single segmentation request.
-    #[serde(default)]
-    pub max_segment_tokens: usize,
-
-    /// Maximum tokens for each summary.
-    #[serde(default)]
-    pub max_summary_tokens: usize,
-
-    /// Minimum content tokens required to generate a summary.
-    #[serde(default)]
-    pub min_summary_tokens: usize,
-}
-
-impl Default for IndexerConfig {
-    fn default() -> Self {
-        Self {
-            subsection_threshold: 300,
-            max_segment_tokens: 3000,
-            max_summary_tokens: 200,
-            min_summary_tokens: 20,
-        }
-    }
-}
-
-/// Generic LLM configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct LlmConfig {
-    /// Model name (e.g., "gpt-4o-mini", "claude-3-haiku").
-    #[serde(default)]
-    pub model: String,
-
-    /// API endpoint.
-    #[serde(default)]
-    pub endpoint: String,
-
-    /// API key.
-    #[serde(default)]
-    pub api_key: Option<String>,
-
-    /// Maximum tokens for responses.
-    #[serde(default)]
-    pub max_tokens: usize,
-
-    /// Temperature for generation.
-    #[serde(default)]
-    pub temperature: f32,
-}
-
-impl Default for LlmConfig {
-    fn default() -> Self {
-        Self {
-            model: "gpt-4o-mini".to_string(),
-            endpoint: "https://api.openai.com/v1".to_string(),
-            api_key: None,
-            max_tokens: 1000,
-            temperature: 0.0,
-        }
-    }
-}
-
-impl LlmConfig {
-    /// Create a new LLM config with defaults.
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Set the model.
-    pub fn with_model(mut self, model: impl Into<String>) -> Self {
-        self.model = model.into();
-        self
-    }
-
-    /// Set the endpoint.
-    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
-        self.endpoint = endpoint.into();
-        self
-    }
-
-    /// Set the API key.
-    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
-        self.api_key = Some(api_key.into());
-        self
-    }
-
-    /// Get the API key from config.
-    pub fn get_api_key(&self) -> Option<&str> {
-        self.api_key.as_deref()
-    }
-}
-
-/// Summary model configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SummaryConfig {
-    /// Model name for summarization.
-    #[serde(default)]
-    pub model: String,
-
-    /// API endpoint for summary model.
-    #[serde(default)]
-    pub endpoint: String,
-
-    /// API key.
-    #[serde(default)]
-    pub api_key: Option<String>,
-
-    /// Maximum tokens for summary generation.
-    #[serde(default)]
-    pub max_tokens: usize,
-
-    /// Temperature for summary generation.
-    #[serde(default)]
-    pub temperature: f32,
-}
-
-impl Default for SummaryConfig {
-    fn default() -> Self {
-        Self {
-            model: "gpt-4o-mini".to_string(),
-            endpoint: "https://api.openai.com/v1".to_string(),
-            api_key: None,
-            max_tokens: 200,
-            temperature: 0.0,
-        }
-    }
-}
-
-/// Retrieval model configuration (for navigation).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct RetrievalConfig {
-    /// Model name for retrieval/navigation.
-    #[serde(default)]
-    pub model: String,
-
-    /// API endpoint for retrieval model.
-    #[serde(default)]
-    pub endpoint: String,
-
-    /// API key.
-    #[serde(default)]
-    pub api_key: Option<String>,
-
-    /// Maximum tokens for retrieval context.
-    #[serde(default)]
-    pub max_tokens: usize,
-
-    /// Temperature for retrieval.
-    #[serde(default)]
-    pub temperature: f32,
-
-    /// Number of top-k results to return.
-    #[serde(default)]
-    pub top_k: usize,
-
-    /// Search algorithm configuration.
-    #[serde(default)]
-    pub search: SearchConfig,
-
-    /// Sufficiency checker configuration.
-    #[serde(default)]
-    pub sufficiency: SufficiencyConfig,
-
-    /// Cache configuration.
-    #[serde(default)]
-    pub cache: CacheConfig,
-
-    /// Strategy-specific configuration.
-    #[serde(default)]
-    pub strategy: StrategyConfig,
-}
-
-impl Default for RetrievalConfig {
-    fn default() -> Self {
-        Self {
-            model: "gpt-4o".to_string(),
-            endpoint: "https://api.openai.com/v1".to_string(),
-            api_key: None,
-            max_tokens: 1000,
-            temperature: 0.0,
-            top_k: 3,
-            search: SearchConfig::default(),
-            sufficiency: SufficiencyConfig::default(),
-            cache: CacheConfig::default(),
-            strategy: StrategyConfig::default(),
-        }
-    }
-}
-
-/// Search algorithm configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SearchConfig {
-    /// Number of top-k results to return.
-    #[serde(default)]
-    pub top_k: usize,
-
-    /// Beam width for multi-path search.
-    #[serde(default)]
-    pub beam_width: usize,
-
-    /// Maximum iterations for search algorithms.
-    #[serde(default)]
-    pub max_iterations: usize,
-
-    /// Minimum score to include a path.
-    #[serde(default)]
-    pub min_score: f32,
-}
-
-impl Default for SearchConfig {
-    fn default() -> Self {
-        Self {
-            top_k: 5,
-            beam_width: 3,
-            max_iterations: 10,
-            min_score: 0.1,
-        }
-    }
-}
-
-/// Sufficiency checker configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SufficiencyConfig {
-    /// Minimum tokens for sufficiency.
-    #[serde(default)]
-    pub min_tokens: usize,
-
-    /// Target tokens for full sufficiency.
-    #[serde(default)]
-    pub target_tokens: usize,
-
-    /// Maximum tokens before stopping.
-    #[serde(default)]
-    pub max_tokens: usize,
-
-    /// Minimum content length (characters).
-    #[serde(default)]
-    pub min_content_length: usize,
-
-    /// Confidence threshold for LLM judge.
-    #[serde(default)]
-    pub confidence_threshold: f32,
-}
-
-impl Default for SufficiencyConfig {
-    fn default() -> Self {
-        Self {
-            min_tokens: 500,
-            target_tokens: 2000,
-            max_tokens: 4000,
-            min_content_length: 200,
-            confidence_threshold: 0.7,
-        }
-    }
-}
-
-/// Cache configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CacheConfig {
-    /// Maximum number of cache entries.
-    #[serde(default)]
-    pub max_entries: usize,
-
-    /// Time-to-live for cache entries (seconds).
-    #[serde(default)]
-    pub ttl_secs: u64,
-}
-
-impl Default for CacheConfig {
-    fn default() -> Self {
-        Self {
-            max_entries: 1000,
-            ttl_secs: 3600,
-        }
-    }
-}
-
-/// Strategy-specific configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct StrategyConfig {
-    /// MCTS exploration weight (sqrt(2) ≈ 1.414).
-    #[serde(default)]
-    pub exploration_weight: f32,
-
-    /// Semantic similarity threshold.
-    #[serde(default)]
-    pub similarity_threshold: f32,
-
-    /// High similarity threshold for "answer" decision.
-    #[serde(default)]
-    pub high_similarity_threshold: f32,
-
-    /// Low similarity threshold for "explore" decision.
-    #[serde(default)]
-    pub low_similarity_threshold: f32,
-}
-
-impl Default for StrategyConfig {
-    fn default() -> Self {
-        Self {
-            exploration_weight: 1.414,
-            similarity_threshold: 0.5,
-            high_similarity_threshold: 0.8,
-            low_similarity_threshold: 0.3,
-        }
-    }
-}
-
-/// Storage configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct StorageConfig {
-    /// Workspace directory for persisted documents.
-    #[serde(default)]
-    pub workspace_dir: PathBuf,
-}
-
-impl Default for StorageConfig {
-    fn default() -> Self {
-        Self {
-            workspace_dir: PathBuf::from("./workspace"),
-        }
-    }
-}
-
-/// Concurrency control configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ConcurrencyConfig {
-    /// Maximum concurrent LLM API calls.
-    #[serde(default)]
-    pub max_concurrent_requests: usize,
-
-    /// Rate limit: requests per minute.
-    #[serde(default)]
-    pub requests_per_minute: usize,
-
-    /// Whether rate limiting is enabled.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Whether semaphore-based concurrency limiting is enabled.
-    #[serde(default = "default_true")]
-    pub semaphore_enabled: bool,
-}
-
-fn default_true() -> bool {
-    true
-}
-
-impl Default for ConcurrencyConfig {
-    fn default() -> Self {
-        Self {
-            max_concurrent_requests: 10,
-            requests_per_minute: 500,
-            enabled: true,
-            semaphore_enabled: true,
-        }
-    }
-}
-
-impl ConcurrencyConfig {
-    /// Create a new config with defaults.
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Set the maximum concurrent requests.
-    pub fn with_max_concurrent_requests(mut self, max: usize) -> Self {
-        self.max_concurrent_requests = max;
-        self
-    }
-
-    /// Set the requests per minute rate limit.
-    pub fn with_requests_per_minute(mut self, rpm: usize) -> Self {
-        self.requests_per_minute = rpm;
-        self
-    }
-
-    /// Enable or disable rate limiting.
-    pub fn with_enabled(mut self, enabled: bool) -> Self {
-        self.enabled = enabled;
-        self
-    }
-
-    /// Enable or disable semaphore.
-    pub fn with_semaphore_enabled(mut self, enabled: bool) -> Self {
-        self.semaphore_enabled = enabled;
-        self
-    }
-
-    /// Convert to the runtime concurrency config.
-    pub fn to_runtime_config(&self) -> crate::throttle::ConcurrencyConfig {
-        crate::throttle::ConcurrencyConfig {
-            max_concurrent_requests: self.max_concurrent_requests,
-            requests_per_minute: self.requests_per_minute,
-            enabled: self.enabled,
-            semaphore_enabled: self.semaphore_enabled,
-        }
-    }
-}
-
-impl From<ConcurrencyConfig> for crate::throttle::ConcurrencyConfig {
-    fn from(config: ConcurrencyConfig) -> Self {
-        config.to_runtime_config()
-    }
-}
-
-/// Fallback behavior when encountering errors.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum FallbackBehavior {
-    /// Only retry with the same model/endpoint.
-    Retry,
-    /// Immediately switch to fallback model/endpoint.
-    Fallback,
-    /// Retry first, then fallback if still failing.
-    RetryThenFallback,
-    /// Fail immediately without retry or fallback.
-    Fail,
-}
-
-impl Default for FallbackBehavior {
-    fn default() -> Self {
-        Self::RetryThenFallback
-    }
-}
-
-/// Behavior when all fallback attempts fail.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum OnAllFailedBehavior {
-    /// Return the error to the caller.
-    ReturnError,
-    /// Try to return cached result if available.
-    ReturnCache,
-}
-
-impl Default for OnAllFailedBehavior {
-    fn default() -> Self {
-        Self::ReturnError
-    }
-}
-
-/// Fallback configuration for error recovery.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FallbackConfig {
-    /// Whether fallback is enabled.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Fallback models in priority order.
-    #[serde(default)]
-    pub models: Vec<String>,
-
-    /// Fallback endpoints in priority order.
-    #[serde(default)]
-    pub endpoints: Vec<String>,
-
-    /// Behavior on rate limit error (429).
-    #[serde(default)]
-    pub on_rate_limit: FallbackBehavior,
-
-    /// Behavior on timeout error.
-    #[serde(default)]
-    pub on_timeout: FallbackBehavior,
-
-    /// Behavior when all attempts fail.
-    #[serde(default)]
-    pub on_all_failed: OnAllFailedBehavior,
-}
-
-impl Default for FallbackConfig {
-    fn default() -> Self {
-        Self {
-            enabled: true,
-            models: vec!["gpt-4o-mini".to_string(), "glm-4-flash".to_string()],
-            endpoints: vec![],
-            on_rate_limit: FallbackBehavior::RetryThenFallback,
-            on_timeout: FallbackBehavior::RetryThenFallback,
-            on_all_failed: OnAllFailedBehavior::ReturnError,
-        }
-    }
-}
-
-impl FallbackConfig {
-    /// Create a new fallback config with defaults.
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Disable fallback entirely.
-    pub fn disabled() -> Self {
-        Self {
-            enabled: false,
-            ..Self::default()
-        }
-    }
-
-    /// Set fallback models.
-    pub fn with_models(mut self, models: Vec<String>) -> Self {
-        self.models = models;
-        self
-    }
-
-    /// Set fallback endpoints.
-    pub fn with_endpoints(mut self, endpoints: Vec<String>) -> Self {
-        self.endpoints = endpoints;
-        self
-    }
-
-    /// Set behavior on rate limit.
-    pub fn with_on_rate_limit(mut self, behavior: FallbackBehavior) -> Self {
-        self.on_rate_limit = behavior;
-        self
-    }
-
-    /// Set behavior on timeout.
-    pub fn with_on_timeout(mut self, behavior: FallbackBehavior) -> Self {
-        self.on_timeout = behavior;
-        self
-    }
-}
diff --git a/src/config/types/concurrency.rs b/src/config/types/concurrency.rs
new file mode 100644
index 00000000..c4172ba8
--- /dev/null
+++ b/src/config/types/concurrency.rs
@@ -0,0 +1,122 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Concurrency control configuration types.
+
+use serde::{Deserialize, Serialize};
+
+/// Concurrency control configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ConcurrencyConfig {
+    /// Maximum concurrent LLM API calls.
+    #[serde(default = "default_max_concurrent_requests")]
+    pub max_concurrent_requests: usize,
+
+    /// Rate limit: requests per minute.
+    #[serde(default = "default_requests_per_minute")]
+    pub requests_per_minute: usize,
+
+    /// Whether rate limiting is enabled.
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// Whether semaphore-based concurrency limiting is enabled.
+    #[serde(default = "default_true")]
+    pub semaphore_enabled: bool,
+}
+
+fn default_max_concurrent_requests() -> usize {
+    10
+}
+
+fn default_requests_per_minute() -> usize {
+    500
+}
+
+fn default_true() -> bool {
+    true
+}
+
+impl Default for ConcurrencyConfig {
+    fn default() -> Self {
+        Self {
+            max_concurrent_requests: default_max_concurrent_requests(),
+            requests_per_minute: default_requests_per_minute(),
+            enabled: default_true(),
+            semaphore_enabled: default_true(),
+        }
+    }
+}
+
+impl ConcurrencyConfig {
+    /// Create a new config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the maximum concurrent requests.
+    pub fn with_max_concurrent_requests(mut self, max: usize) -> Self {
+        self.max_concurrent_requests = max;
+        self
+    }
+
+    /// Set the requests per minute rate limit.
+    pub fn with_requests_per_minute(mut self, rpm: usize) -> Self {
+        self.requests_per_minute = rpm;
+        self
+    }
+
+    /// Enable or disable rate limiting.
+    pub fn with_enabled(mut self, enabled: bool) -> Self {
+        self.enabled = enabled;
+        self
+    }
+
+    /// Enable or disable semaphore.
+    pub fn with_semaphore_enabled(mut self, enabled: bool) -> Self {
+        self.semaphore_enabled = enabled;
+        self
+    }
+
+    /// Convert to the runtime concurrency config.
+    pub fn to_runtime_config(&self) -> crate::throttle::ConcurrencyConfig {
+        crate::throttle::ConcurrencyConfig {
+            max_concurrent_requests: self.max_concurrent_requests,
+            requests_per_minute: self.requests_per_minute,
+            enabled: self.enabled,
+            semaphore_enabled: self.semaphore_enabled,
+        }
+    }
+}
+
+impl From<ConcurrencyConfig> for crate::throttle::ConcurrencyConfig {
+    fn from(config: ConcurrencyConfig) -> Self {
+        config.to_runtime_config()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_concurrency_config_defaults() {
+        let config = ConcurrencyConfig::default();
+        assert_eq!(config.max_concurrent_requests, 10);
+        assert_eq!(config.requests_per_minute, 500);
+        assert!(config.enabled);
+        assert!(config.semaphore_enabled);
+    }
+
+    #[test]
+    fn test_concurrency_config_builder() {
+        let config = ConcurrencyConfig::new()
+            .with_max_concurrent_requests(20)
+            .with_requests_per_minute(1000)
+            .with_enabled(false);
+
+        assert_eq!(config.max_concurrent_requests, 20);
+        assert_eq!(config.requests_per_minute, 1000);
+        assert!(!config.enabled);
+    }
+}
diff --git a/src/config/types/content.rs b/src/config/types/content.rs
new file mode 100644
index 00000000..62741cd7
--- /dev/null
+++ b/src/config/types/content.rs
@@ -0,0 +1,222 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Content aggregator configuration types.
+
+use serde::{Deserialize, Serialize};
+
+/// Content aggregator configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentAggregatorConfig {
+    /// Whether content aggregator is enabled.
+    /// When disabled, uses simple content collection (legacy behavior).
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// Maximum tokens for aggregated content.
+    #[serde(default = "default_token_budget")]
+    pub token_budget: usize,
+
+    /// Minimum relevance score threshold (0.0 - 1.0).
+    /// Content below this threshold will be filtered out.
+    #[serde(default = "default_min_relevance_score")]
+    pub min_relevance_score: f32,
+
+    /// Scoring strategy: "keyword_only" | "keyword_bm25" | "hybrid"
+    #[serde(default = "default_scoring_strategy")]
+    pub scoring_strategy: String,
+
+    /// Output format: "markdown" | "json" | "tree" | "flat"
+    #[serde(default = "default_output_format")]
+    pub output_format: String,
+
+    /// Include relevance scores in output.
+    #[serde(default)]
+    pub include_scores: bool,
+
+    /// Minimum budget allocation per depth level (0.0 - 1.0).
+    /// Ensures each tree level gets representation.
+    #[serde(default = "default_hierarchical_min_per_level")]
+    pub hierarchical_min_per_level: f32,
+
+    /// Enable content deduplication.
+    #[serde(default = "default_true")]
+    pub deduplicate: bool,
+
+    /// Similarity threshold for deduplication (0.0 - 1.0).
+    /// Higher = more aggressive deduplication.
+    #[serde(default = "default_dedup_threshold")]
+    pub dedup_threshold: f32,
+}
+
+fn default_true() -> bool {
+    true
+}
+
+fn default_token_budget() -> usize {
+    4000
+}
+
+fn default_min_relevance_score() -> f32 {
+    0.2
+}
+
+fn default_scoring_strategy() -> String {
+    "keyword_bm25".to_string()
+}
+
+fn default_output_format() -> String {
+    "markdown".to_string()
+}
+
+fn default_hierarchical_min_per_level() -> f32 {
+    0.1
+}
+
+fn default_dedup_threshold() -> f32 {
+    0.9
+}
+
+impl Default for ContentAggregatorConfig {
+    fn default() -> Self {
+        Self {
+            enabled: default_true(),
+            token_budget: default_token_budget(),
+            min_relevance_score: default_min_relevance_score(),
+            scoring_strategy: default_scoring_strategy(),
+            output_format: default_output_format(),
+            include_scores: false,
+            hierarchical_min_per_level: default_hierarchical_min_per_level(),
+            deduplicate: default_true(),
+            dedup_threshold: default_dedup_threshold(),
+        }
+    }
+}
+
+impl ContentAggregatorConfig {
+    /// Create a new config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Disable content aggregator (use legacy behavior).
+    pub fn disabled() -> Self {
+        Self {
+            enabled: false,
+            ..Self::default()
+        }
+    }
+
+    /// Set the token budget.
+    pub fn with_token_budget(mut self, budget: usize) -> Self {
+        self.token_budget = budget;
+        self
+    }
+
+    /// Set the minimum relevance score.
+    pub fn with_min_relevance(mut self, score: f32) -> Self {
+        self.min_relevance_score = score.clamp(0.0, 1.0);
+        self
+    }
+
+    /// Set the scoring strategy.
+    pub fn with_scoring_strategy(mut self, strategy: impl Into<String>) -> Self {
+        self.scoring_strategy = strategy.into();
+        self
+    }
+
+    /// Set the output format.
+    pub fn with_output_format(mut self, format: impl Into<String>) -> Self {
+        self.output_format = format.into();
+        self
+    }
+
+    /// Enable/disable score inclusion.
+    pub fn with_include_scores(mut self, include: bool) -> Self {
+        self.include_scores = include;
+        self
+    }
+
+    /// Enable/disable deduplication.
+    pub fn with_deduplicate(mut self, dedupe: bool) -> Self {
+        self.deduplicate = dedupe;
+        self
+    }
+
+    /// Convert to the retrieval content aggregator config.
+    pub fn to_aggregator_config(&self) -> crate::retrieval::content::ContentAggregatorConfig {
+        use crate::retrieval::content::{
+            ContentAggregatorConfig as RetrievalContentConfig, OutputFormatConfig,
+            ScoringStrategyConfig,
+        };
+
+        let scoring_strategy = match self.scoring_strategy.as_str() {
+            "keyword_only" => ScoringStrategyConfig::KeywordOnly,
+            "hybrid" => ScoringStrategyConfig::Hybrid,
+            _ => ScoringStrategyConfig::KeywordWithBM25,
+        };
+
+        let output_format = match self.output_format.as_str() {
+            "json" => OutputFormatConfig::Json,
+            "tree" => OutputFormatConfig::Tree,
+            "flat" => OutputFormatConfig::Flat,
+            _ => OutputFormatConfig::Markdown,
+        };
+
+        RetrievalContentConfig {
+            token_budget: self.token_budget,
+            min_relevance_score: self.min_relevance_score,
+            scoring_strategy,
+            output_format,
+            include_scores: self.include_scores,
+            hierarchical_min_per_level: self.hierarchical_min_per_level,
+            deduplicate: self.deduplicate,
+            dedup_threshold: self.dedup_threshold,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_content_aggregator_config_defaults() {
+        let config = ContentAggregatorConfig::default();
+        assert!(config.enabled);
+        assert_eq!(config.token_budget, 4000);
+        assert_eq!(config.min_relevance_score, 0.2);
+        assert_eq!(config.scoring_strategy, "keyword_bm25");
+        assert_eq!(config.output_format, "markdown");
+        assert!(config.deduplicate);
+    }
+
+    #[test]
+    fn test_content_aggregator_config_disabled() {
+        let config = ContentAggregatorConfig::disabled();
+        assert!(!config.enabled);
+    }
+
+    #[test]
+    fn test_content_aggregator_config_builder() {
+        let config = ContentAggregatorConfig::new()
+            .with_token_budget(8000)
+            .with_min_relevance(0.5)
+            .with_scoring_strategy("hybrid")
+            .with_output_format("json");
+
+        assert_eq!(config.token_budget, 8000);
+        assert_eq!(config.min_relevance_score, 0.5);
+        assert_eq!(config.scoring_strategy, "hybrid");
+        assert_eq!(config.output_format, "json");
+    }
+
+    #[test]
+    fn test_min_relevance_clamping() {
+        let config = ContentAggregatorConfig::new().with_min_relevance(1.5);
+        assert_eq!(config.min_relevance_score, 1.0);
+
+        let config = ContentAggregatorConfig::new().with_min_relevance(-0.5);
+        assert_eq!(config.min_relevance_score, 0.0);
+    }
+}
diff --git a/src/config/types/fallback.rs b/src/config/types/fallback.rs
new file mode 100644
index 00000000..fa199b30
--- /dev/null
+++ b/src/config/types/fallback.rs
@@ -0,0 +1,233 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Fallback and error recovery configuration types.
+
+use serde::{Deserialize, Serialize};
+
+/// Fallback behavior when encountering errors.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum FallbackBehavior {
+    /// Only retry with the same model/endpoint.
+    Retry,
+    /// Immediately switch to fallback model/endpoint.
+    Fallback,
+    /// Retry first, then fallback if still failing.
+    RetryThenFallback,
+    /// Fail immediately without retry or fallback.
+    Fail,
+}
+
+impl Default for FallbackBehavior {
+    fn default() -> Self {
+        Self::RetryThenFallback
+    }
+}
+
+/// Behavior when all fallback attempts fail.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum OnAllFailedBehavior {
+    /// Return the error to the caller.
+    ReturnError,
+    /// Try to return cached result if available.
+    ReturnCache,
+}
+
+impl Default for OnAllFailedBehavior {
+    fn default() -> Self {
+        Self::ReturnError
+    }
+}
+
+/// Fallback configuration for error recovery.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FallbackConfig {
+    /// Whether fallback is enabled.
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// Fallback models in priority order.
+    #[serde(default = "default_fallback_models")]
+    pub models: Vec<String>,
+
+    /// Fallback endpoints in priority order.
+    #[serde(default)]
+    pub endpoints: Vec<String>,
+
+    /// Behavior on rate limit error (429).
+    #[serde(default)]
+    pub on_rate_limit: FallbackBehavior,
+
+    /// Behavior on timeout error.
+    #[serde(default)]
+    pub on_timeout: FallbackBehavior,
+
+    /// Behavior when all attempts fail.
+    #[serde(default)]
+    pub on_all_failed: OnAllFailedBehavior,
+
+    /// Maximum retry attempts.
+    #[serde(default = "default_max_retries")]
+    pub max_retries: usize,
+
+    /// Initial retry delay in milliseconds.
+    #[serde(default = "default_initial_retry_delay_ms")]
+    pub initial_retry_delay_ms: u64,
+
+    /// Maximum retry delay in milliseconds.
+    #[serde(default = "default_max_retry_delay_ms")]
+    pub max_retry_delay_ms: u64,
+
+    /// Retry delay multiplier (exponential backoff).
+    #[serde(default = "default_retry_multiplier")]
+    pub retry_multiplier: f32,
+}
+
+fn default_fallback_models() -> Vec<String> {
+    vec!["gpt-4o-mini".to_string(), "glm-4-flash".to_string()]
+}
+
+fn default_max_retries() -> usize {
+    3
+}
+
+fn default_initial_retry_delay_ms() -> u64 {
+    1000
+}
+
+fn default_max_retry_delay_ms() -> u64 {
+    30000
+}
+
+fn default_retry_multiplier() -> f32 {
+    2.0
+}
+
+impl Default for FallbackConfig {
+    fn default() -> Self {
+        Self {
+            enabled: default_true(),
+            models: default_fallback_models(),
+            endpoints: Vec::new(),
+            on_rate_limit: FallbackBehavior::default(),
+            on_timeout: FallbackBehavior::default(),
+            on_all_failed: OnAllFailedBehavior::default(),
+            max_retries: default_max_retries(),
+            initial_retry_delay_ms: default_initial_retry_delay_ms(),
+            max_retry_delay_ms: default_max_retry_delay_ms(),
+            retry_multiplier: default_retry_multiplier(),
+        }
+    }
+}
+
+fn default_true() -> bool {
+    true
+}
+
+impl FallbackConfig {
+    /// Create a new fallback config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Disable fallback entirely.
+    pub fn disabled() -> Self {
+        Self {
+            enabled: false,
+            ..Self::default()
+        }
+    }
+
+    /// Set fallback models.
+    pub fn with_models(mut self, models: Vec<String>) -> Self {
+        self.models = models;
+        self
+    }
+
+    /// Set fallback endpoints.
+    pub fn with_endpoints(mut self, endpoints: Vec<String>) -> Self {
+        self.endpoints = endpoints;
+        self
+    }
+
+    /// Set behavior on rate limit.
+    pub fn with_on_rate_limit(mut self, behavior: FallbackBehavior) -> Self {
+        self.on_rate_limit = behavior;
+        self
+    }
+
+    /// Set behavior on timeout.
+    pub fn with_on_timeout(mut self, behavior: FallbackBehavior) -> Self {
+        self.on_timeout = behavior;
+        self
+    }
+
+    /// Set behavior when all attempts fail.
+    pub fn with_on_all_failed(mut self, behavior: OnAllFailedBehavior) -> Self {
+        self.on_all_failed = behavior;
+        self
+    }
+
+    /// Set maximum retries.
+    pub fn with_max_retries(mut self, max: usize) -> Self {
+        self.max_retries = max;
+        self
+    }
+
+    /// Calculate retry delay with exponential backoff.
+    pub fn calculate_retry_delay(&self, attempt: usize) -> std::time::Duration {
+        let delay_ms = if attempt == 0 {
+            self.initial_retry_delay_ms
+        } else {
+            let delay = self.initial_retry_delay_ms as f32
+                * self.retry_multiplier.powi(attempt as i32);
+            delay.min(self.max_retry_delay_ms as f32) as u64
+        };
+        std::time::Duration::from_millis(delay_ms)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fallback_config_defaults() {
+        let config = FallbackConfig::default();
+        assert!(config.enabled);
+        assert_eq!(config.models.len(), 2);
+        assert_eq!(config.on_rate_limit, FallbackBehavior::RetryThenFallback);
+        assert_eq!(config.max_retries, 3);
+    }
+
+    #[test]
+    fn test_fallback_config_disabled() {
+        let config = FallbackConfig::disabled();
+        assert!(!config.enabled);
+    }
+
+    #[test]
+    fn test_fallback_behavior_serde() {
+        let behavior = FallbackBehavior::RetryThenFallback;
+        let json = serde_json::to_string(&behavior).unwrap();
+        assert_eq!(json, "\"retry_then_fallback\"");
+
+        let decoded: FallbackBehavior = serde_json::from_str(&json).unwrap();
+        assert_eq!(decoded, behavior);
+    }
+
+    #[test]
+    fn test_retry_delay_calculation() {
+        let config = FallbackConfig::default();
+
+        let d0 = config.calculate_retry_delay(0);
+        let d1 = config.calculate_retry_delay(1);
+        let d2 = config.calculate_retry_delay(2);
+
+        assert_eq!(d0.as_millis(), 1000);
+        assert_eq!(d1.as_millis(), 2000);
+        assert_eq!(d2.as_millis(), 4000);
+    }
+}
diff --git a/src/config/types/indexer.rs b/src/config/types/indexer.rs
new file mode 100644
index 00000000..6353122a
--- /dev/null
+++ b/src/config/types/indexer.rs
@@ -0,0 +1,108 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Indexer configuration types.
+
+use serde::{Deserialize, Serialize};
+
+/// Indexer configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct IndexerConfig {
+    /// Word count threshold for splitting sections into subsections.
+    #[serde(default = "default_subsection_threshold")]
+    pub subsection_threshold: usize,
+
+    /// Maximum tokens to send in a single segmentation request.
+    #[serde(default = "default_max_segment_tokens")]
+    pub max_segment_tokens: usize,
+
+    /// Maximum tokens for each summary.
+    #[serde(default = "default_max_summary_tokens")]
+    pub max_summary_tokens: usize,
+
+    /// Minimum content tokens required to generate a summary.
+    #[serde(default = "default_min_summary_tokens")]
+    pub min_summary_tokens: usize,
+}
+
+fn default_subsection_threshold() -> usize {
+    300
+}
+
+fn default_max_segment_tokens() -> usize {
+    3000
+}
+
+fn default_max_summary_tokens() -> usize {
+    200
+}
+
+fn default_min_summary_tokens() -> usize {
+    20
+}
+
+impl Default for IndexerConfig {
+    fn default() -> Self {
+        Self {
+            subsection_threshold: default_subsection_threshold(),
+            max_segment_tokens: default_max_segment_tokens(),
+            max_summary_tokens: default_max_summary_tokens(),
+            min_summary_tokens: default_min_summary_tokens(),
+        }
+    }
+}
+
+impl IndexerConfig {
+    /// Create a new indexer config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the subsection threshold.
+    pub fn with_subsection_threshold(mut self, threshold: usize) -> Self {
+        self.subsection_threshold = threshold;
+        self
+    }
+
+    /// Set the maximum segment tokens.
+    pub fn with_max_segment_tokens(mut self, tokens: usize) -> Self {
+        self.max_segment_tokens = tokens;
+        self
+    }
+
+    /// Set the maximum summary tokens.
+    pub fn with_max_summary_tokens(mut self, tokens: usize) -> Self {
+        self.max_summary_tokens = tokens;
+        self
+    }
+
+    /// Set the minimum summary tokens.
+    pub fn with_min_summary_tokens(mut self, tokens: usize) -> Self {
+        self.min_summary_tokens = tokens;
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_indexer_config_defaults() {
+        let config = IndexerConfig::default();
+        assert_eq!(config.subsection_threshold, 300);
+        assert_eq!(config.max_segment_tokens, 3000);
+        assert_eq!(config.max_summary_tokens, 200);
+        assert_eq!(config.min_summary_tokens, 20);
+    }
+
+    #[test]
+    fn test_indexer_config_builder() {
+        let config = IndexerConfig::new()
+            .with_subsection_threshold(500)
+            .with_max_summary_tokens(300);
+
+        assert_eq!(config.subsection_threshold, 500);
+        assert_eq!(config.max_summary_tokens, 300);
+    }
+}
diff --git a/src/config/types/llm.rs b/src/config/types/llm.rs
new file mode 100644
index 00000000..a98ee7d3
--- /dev/null
+++ b/src/config/types/llm.rs
@@ -0,0 +1,218 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! LLM configuration types for summary and retrieval.
+
+use serde::{Deserialize, Serialize};
+
+/// Generic LLM configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LlmConfig {
+    /// Model name (e.g., "gpt-4o-mini", "claude-3-haiku").
+    #[serde(default = "default_model")]
+    pub model: String,
+
+    /// API endpoint.
+    #[serde(default = "default_endpoint")]
+    pub endpoint: String,
+
+    /// API key.
+    #[serde(default)]
+    pub api_key: Option<String>,
+
+    /// Maximum tokens for responses.
+    #[serde(default = "default_max_tokens")]
+    pub max_tokens: usize,
+
+    /// Temperature for generation.
+    #[serde(default = "default_temperature")]
+    pub temperature: f32,
+}
+
+fn default_model() -> String {
+    "gpt-4o-mini".to_string()
+}
+
+fn default_endpoint() -> String {
+    "https://api.openai.com/v1".to_string()
+}
+
+fn default_max_tokens() -> usize {
+    1000
+}
+
+fn default_temperature() -> f32 {
+    0.0
+}
+
+impl Default for LlmConfig {
+    fn default() -> Self {
+        Self {
+            model: default_model(),
+            endpoint: default_endpoint(),
+            api_key: None,
+            max_tokens: default_max_tokens(),
+            temperature: default_temperature(),
+        }
+    }
+}
+
+impl LlmConfig {
+    /// Create a new LLM config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the model.
+    pub fn with_model(mut self, model: impl Into<String>) -> Self {
+        self.model = model.into();
+        self
+    }
+
+    /// Set the endpoint.
+    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
+        self.endpoint = endpoint.into();
+        self
+    }
+
+    /// Set the API key.
+    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
+        self.api_key = Some(api_key.into());
+        self
+    }
+
+    /// Set the maximum tokens.
+    pub fn with_max_tokens(mut self, max_tokens: usize) -> Self {
+        self.max_tokens = max_tokens;
+        self
+    }
+
+    /// Set the temperature.
+    pub fn with_temperature(mut self, temperature: f32) -> Self {
+        self.temperature = temperature;
+        self
+    }
+
+    /// Get the API key from config.
+    pub fn get_api_key(&self) -> Option<&str> {
+        self.api_key.as_deref()
+    }
+}
+
+/// Summary model configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SummaryConfig {
+    /// Model name for summarization.
+    #[serde(default = "default_summary_model")]
+    pub model: String,
+
+    /// API endpoint for summary model.
+    #[serde(default = "default_endpoint")]
+    pub endpoint: String,
+
+    /// API key.
+    #[serde(default)]
+    pub api_key: Option<String>,
+
+    /// Maximum tokens for summary generation.
+    #[serde(default = "default_max_summary_tokens")]
+    pub max_tokens: usize,
+
+    /// Temperature for summary generation.
+    #[serde(default = "default_temperature")]
+    pub temperature: f32,
+}
+
+fn default_summary_model() -> String {
+    "gpt-4o-mini".to_string()
+}
+
+fn default_max_summary_tokens() -> usize {
+    200
+}
+
+impl Default for SummaryConfig {
+    fn default() -> Self {
+        Self {
+            model: default_summary_model(),
+            endpoint: default_endpoint(),
+            api_key: None,
+            max_tokens: default_max_summary_tokens(),
+            temperature: default_temperature(),
+        }
+    }
+}
+
+impl SummaryConfig {
+    /// Create a new summary config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the model.
+    pub fn with_model(mut self, model: impl Into<String>) -> Self {
+        self.model = model.into();
+        self
+    }
+
+    /// Set the endpoint.
+    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
+        self.endpoint = endpoint.into();
+        self
+    }
+
+    /// Set the API key.
+    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
+        self.api_key = Some(api_key.into());
+        self
+    }
+
+    /// Set the maximum tokens.
+    pub fn with_max_tokens(mut self, max_tokens: usize) -> Self {
+        self.max_tokens = max_tokens;
+        self
+    }
+
+    /// Convert to generic LLM config.
+    pub fn to_llm_config(&self) -> LlmConfig {
+        LlmConfig {
+            model: self.model.clone(),
+            endpoint: self.endpoint.clone(),
+            api_key: self.api_key.clone(),
+            max_tokens: self.max_tokens,
+            temperature: self.temperature,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_llm_config_defaults() {
+        let config = LlmConfig::default();
+        assert_eq!(config.model, "gpt-4o-mini");
+        assert_eq!(config.endpoint, "https://api.openai.com/v1");
+        assert!(config.api_key.is_none());
+    }
+
+    #[test]
+    fn test_llm_config_builder() {
+        let config = LlmConfig::new()
+            .with_model("gpt-4o")
+            .with_api_key("test-key")
+            .with_max_tokens(2000);
+
+        assert_eq!(config.model, "gpt-4o");
+        assert_eq!(config.api_key, Some("test-key".to_string()));
+        assert_eq!(config.max_tokens, 2000);
+    }
+
+    #[test]
+    fn test_summary_config() {
+        let config = SummaryConfig::default();
+        assert_eq!(config.model, "gpt-4o-mini");
+        assert_eq!(config.max_tokens, 200);
+    }
+}
diff --git a/src/config/types/mod.rs b/src/config/types/mod.rs
new file mode 100644
index 00000000..a824ee3f
--- /dev/null
+++ b/src/config/types/mod.rs
@@ -0,0 +1,336 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration type definitions.
+//!
+//! All configuration values are defined inline in `Default` trait implementations.
+//! Configuration is loaded from TOML files only - no environment variable magic.
+
+mod content;
+mod concurrency;
+mod fallback;
+mod indexer;
+mod llm;
+mod retrieval;
+mod storage;
+
+use serde::{Deserialize, Serialize};
+
+pub use content::ContentAggregatorConfig;
+pub use concurrency::ConcurrencyConfig;
+pub use fallback::{FallbackBehavior, FallbackConfig, OnAllFailedBehavior};
+pub use indexer::IndexerConfig;
+pub use llm::{LlmConfig, SummaryConfig};
+pub use retrieval::{RetrievalConfig, SearchConfig};
+pub use storage::{
+    CacheConfig, StorageConfig, StrategyConfig, SufficiencyConfig,
+};
+
+/// Main configuration for vectorless.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Config {
+    /// Indexer configuration.
+    #[serde(default)]
+    pub indexer: IndexerConfig,
+
+    /// Summary model configuration.
+    #[serde(default)]
+    pub summary: SummaryConfig,
+
+    /// Retrieval model configuration.
+    #[serde(default)]
+    pub retrieval: RetrievalConfig,
+
+    /// Storage configuration.
+    #[serde(default)]
+    pub storage: StorageConfig,
+
+    /// Concurrency control configuration.
+    #[serde(default)]
+    pub concurrency: ConcurrencyConfig,
+
+    /// Fallback/error recovery configuration.
+    #[serde(default)]
+    pub fallback: FallbackConfig,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            indexer: IndexerConfig::default(),
+            summary: SummaryConfig::default(),
+            retrieval: RetrievalConfig::default(),
+            storage: StorageConfig::default(),
+            concurrency: ConcurrencyConfig::default(),
+            fallback: FallbackConfig::default(),
+        }
+    }
+}
+
+impl Config {
+    /// Create a new configuration with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the indexer configuration.
+    pub fn with_indexer(mut self, indexer: IndexerConfig) -> Self {
+        self.indexer = indexer;
+        self
+    }
+
+    /// Set the summary configuration.
+    pub fn with_summary(mut self, summary: SummaryConfig) -> Self {
+        self.summary = summary;
+        self
+    }
+
+    /// Set the retrieval configuration.
+    pub fn with_retrieval(mut self, retrieval: RetrievalConfig) -> Self {
+        self.retrieval = retrieval;
+        self
+    }
+
+    /// Set the storage configuration.
+    pub fn with_storage(mut self, storage: StorageConfig) -> Self {
+        self.storage = storage;
+        self
+    }
+
+    /// Set the concurrency configuration.
+    pub fn with_concurrency(mut self, concurrency: ConcurrencyConfig) -> Self {
+        self.concurrency = concurrency;
+        self
+    }
+
+    /// Set the fallback configuration.
+    pub fn with_fallback(mut self, fallback: FallbackConfig) -> Self {
+        self.fallback = fallback;
+        self
+    }
+
+    /// Validate the configuration.
+    pub fn validate(&self) -> Result<(), ConfigValidationError> {
+        let mut errors = Vec::new();
+
+        // Validate indexer
+        if self.indexer.subsection_threshold == 0 {
+            errors.push(ValidationError::error(
+                "indexer.subsection_threshold",
+                "Subsection threshold must be greater than 0",
+            ));
+        }
+
+        // Validate summary
+        if self.summary.max_tokens == 0 {
+            errors.push(ValidationError::error(
+                "summary.max_tokens",
+                "Summary max tokens must be greater than 0",
+            ));
+        }
+
+        // Validate retrieval
+        if self.retrieval.top_k == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.top_k",
+                "Top K must be greater than 0",
+            ));
+        }
+
+        if self.retrieval.temperature < 0.0 || self.retrieval.temperature > 2.0 {
+            errors.push(ValidationError::warning(
+                "retrieval.temperature",
+                "Temperature outside typical range [0.0, 2.0]",
+            ).with_actual(self.retrieval.temperature.to_string()));
+        }
+
+        // Validate content aggregator
+        if self.retrieval.content.token_budget == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.content.token_budget",
+                "Token budget must be greater than 0",
+            ));
+        }
+
+        if self.retrieval.content.min_relevance_score < 0.0
+            || self.retrieval.content.min_relevance_score > 1.0
+        {
+            errors.push(ValidationError::error(
+                "retrieval.content.min_relevance_score",
+                "Min relevance score must be between 0.0 and 1.0",
+            )
+            .with_expected("0.0 - 1.0")
+            .with_actual(self.retrieval.content.min_relevance_score.to_string()));
+        }
+
+        // Validate concurrency
+        if self.concurrency.max_concurrent_requests == 0 {
+            errors.push(ValidationError::error(
+                "concurrency.max_concurrent_requests",
+                "Max concurrent requests must be greater than 0",
+            ));
+        }
+
+        // Validate fallback
+        if self.fallback.enabled && self.fallback.models.is_empty() {
+            errors.push(ValidationError::warning(
+                "fallback.models",
+                "Fallback enabled but no fallback models configured",
+            ));
+        }
+
+        if errors.is_empty() {
+            Ok(())
+        } else {
+            Err(ConfigValidationError { errors })
+        }
+    }
+}
+
+/// Configuration validation error.
+#[derive(Debug, Clone, thiserror::Error)]
+#[error("Configuration validation failed with {} error(s)", self.errors.len())]
+pub struct ConfigValidationError {
+    /// Validation errors.
+    pub errors: Vec<ValidationError>,
+}
+
+/// A single validation error.
+#[derive(Debug, Clone)]
+pub struct ValidationError {
+    /// Field path (e.g., "retrieval.content.token_budget").
+    pub path: String,
+
+    /// Error message.
+    pub message: String,
+
+    /// Expected value/range.
+    pub expected: Option<String>,
+
+    /// Actual value.
+    pub actual: Option<String>,
+
+    /// Severity level.
+    pub severity: Severity,
+}
+
+impl ValidationError {
+    /// Create an error-level validation error.
+    pub fn error(path: impl Into<String>, message: impl Into<String>) -> Self {
+        Self {
+            path: path.into(),
+            message: message.into(),
+            expected: None,
+            actual: None,
+            severity: Severity::Error,
+        }
+    }
+
+    /// Create a warning-level validation error.
+    pub fn warning(path: impl Into<String>, message: impl Into<String>) -> Self {
+        Self {
+            path: path.into(),
+            message: message.into(),
+            expected: None,
+            actual: None,
+            severity: Severity::Warning,
+        }
+    }
+
+    /// Create an info-level validation error.
+    pub fn info(path: impl Into<String>, message: impl Into<String>) -> Self {
+        Self {
+            path: path.into(),
+            message: message.into(),
+            expected: None,
+            actual: None,
+            severity: Severity::Info,
+        }
+    }
+
+    /// Set the expected value.
+    pub fn with_expected(mut self, expected: impl Into<String>) -> Self {
+        self.expected = Some(expected.into());
+        self
+    }
+
+    /// Set the actual value.
+    pub fn with_actual(mut self, actual: impl Into<String>) -> Self {
+        self.actual = Some(actual.into());
+        self
+    }
+}
+
+impl std::fmt::Display for ValidationError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let severity = match self.severity {
+            Severity::Error => "ERROR",
+            Severity::Warning => "WARNING",
+            Severity::Info => "INFO",
+        };
+        write!(f, "[{}] {}: {}", severity, self.path, self.message)?;
+        if let Some(ref expected) = self.expected {
+            write!(f, " (expected: {})", expected)?;
+        }
+        if let Some(ref actual) = self.actual {
+            write!(f, " (actual: {})", actual)?;
+        }
+        Ok(())
+    }
+}
+
+/// Validation severity level.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Severity {
+    /// Error - must fix.
+    Error,
+    /// Warning - should fix.
+    Warning,
+    /// Info - suggestion.
+    Info,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_defaults() {
+        let config = Config::default();
+        assert_eq!(config.indexer.subsection_threshold, 300);
+        assert_eq!(config.summary.model, "gpt-4o-mini");
+        assert_eq!(config.retrieval.model, "gpt-4o");
+        assert_eq!(config.concurrency.max_concurrent_requests, 10);
+    }
+
+    #[test]
+    fn test_config_validation_success() {
+        let config = Config::default();
+        assert!(config.validate().is_ok());
+    }
+
+    #[test]
+    fn test_config_validation_errors() {
+        let mut config = Config::default();
+        config.retrieval.content.token_budget = 0;
+        config.retrieval.content.min_relevance_score = 1.5;
+
+        let result = config.validate();
+        assert!(result.is_err());
+
+        let err = result.unwrap_err();
+        assert!(!err.errors.is_empty());
+    }
+
+    #[test]
+    fn test_validation_error_display() {
+        let err = ValidationError::error("test.field", "Invalid value")
+            .with_expected(">= 1")
+            .with_actual("0");
+
+        let display = format!("{}", err);
+        assert!(display.contains("ERROR"));
+        assert!(display.contains("test.field"));
+        assert!(display.contains("expected"));
+    }
+}
diff --git a/src/config/types/retrieval.rs b/src/config/types/retrieval.rs
new file mode 100644
index 00000000..d111b686
--- /dev/null
+++ b/src/config/types/retrieval.rs
@@ -0,0 +1,219 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Retrieval configuration types.
+
+use serde::{Deserialize, Serialize};
+
+use super::content::ContentAggregatorConfig;
+use super::storage::{CacheConfig, StrategyConfig, SufficiencyConfig};
+
+/// Retrieval model configuration (for navigation).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RetrievalConfig {
+    /// Model name for retrieval/navigation.
+    #[serde(default = "default_retrieval_model")]
+    pub model: String,
+
+    /// API endpoint for retrieval model.
+    #[serde(default = "default_endpoint")]
+    pub endpoint: String,
+
+    /// API key.
+    #[serde(default)]
+    pub api_key: Option<String>,
+
+    /// Maximum tokens for retrieval context.
+    #[serde(default = "default_max_retrieval_tokens")]
+    pub max_tokens: usize,
+
+    /// Temperature for retrieval.
+    #[serde(default = "default_temperature")]
+    pub temperature: f32,
+
+    /// Number of top-k results to return.
+    #[serde(default = "default_top_k")]
+    pub top_k: usize,
+
+    /// Search algorithm configuration.
+    #[serde(default)]
+    pub search: SearchConfig,
+
+    /// Sufficiency checker configuration.
+    #[serde(default)]
+    pub sufficiency: SufficiencyConfig,
+
+    /// Cache configuration.
+    #[serde(default)]
+    pub cache: CacheConfig,
+
+    /// Strategy-specific configuration.
+    #[serde(default)]
+    pub strategy: StrategyConfig,
+
+    /// Content aggregator configuration.
+    #[serde(default)]
+    pub content: ContentAggregatorConfig,
+}
+
+fn default_retrieval_model() -> String {
+    "gpt-4o".to_string()
+}
+
+fn default_endpoint() -> String {
+    "https://api.openai.com/v1".to_string()
+}
+
+fn default_max_retrieval_tokens() -> usize {
+    1000
+}
+
+fn default_temperature() -> f32 {
+    0.0
+}
+
+fn default_top_k() -> usize {
+    3
+}
+
+impl Default for RetrievalConfig {
+    fn default() -> Self {
+        Self {
+            model: default_retrieval_model(),
+            endpoint: default_endpoint(),
+            api_key: None,
+            max_tokens: default_max_retrieval_tokens(),
+            temperature: default_temperature(),
+            top_k: default_top_k(),
+            search: SearchConfig::default(),
+            sufficiency: SufficiencyConfig::default(),
+            cache: CacheConfig::default(),
+            strategy: StrategyConfig::default(),
+            content: ContentAggregatorConfig::default(),
+        }
+    }
+}
+
+impl RetrievalConfig {
+    /// Create a new retrieval config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the model.
+    pub fn with_model(mut self, model: impl Into<String>) -> Self {
+        self.model = model.into();
+        self
+    }
+
+    /// Set the endpoint.
+    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
+        self.endpoint = endpoint.into();
+        self
+    }
+
+    /// Set the API key.
+    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
+        self.api_key = Some(api_key.into());
+        self
+    }
+
+    /// Set the top_k.
+    pub fn with_top_k(mut self, top_k: usize) -> Self {
+        self.top_k = top_k;
+        self
+    }
+}
+
+/// Search algorithm configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SearchConfig {
+    /// Number of top-k results to return.
+    #[serde(default = "default_search_top_k")]
+    pub top_k: usize,
+
+    /// Beam width for multi-path search.
+    #[serde(default = "default_beam_width")]
+    pub beam_width: usize,
+
+    /// Maximum iterations for search algorithms.
+    #[serde(default = "default_max_iterations")]
+    pub max_iterations: usize,
+
+    /// Minimum score to include a path.
+    #[serde(default = "default_min_score")]
+    pub min_score: f32,
+}
+
+fn default_search_top_k() -> usize {
+    5
+}
+
+fn default_beam_width() -> usize {
+    3
+}
+
+fn default_max_iterations() -> usize {
+    10
+}
+
+fn default_min_score() -> f32 {
+    0.1
+}
+
+impl Default for SearchConfig {
+    fn default() -> Self {
+        Self {
+            top_k: default_search_top_k(),
+            beam_width: default_beam_width(),
+            max_iterations: default_max_iterations(),
+            min_score: default_min_score(),
+        }
+    }
+}
+
+impl SearchConfig {
+    /// Create new search config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the top_k.
+    pub fn with_top_k(mut self, top_k: usize) -> Self {
+        self.top_k = top_k;
+        self
+    }
+
+    /// Set the beam width.
+    pub fn with_beam_width(mut self, width: usize) -> Self {
+        self.beam_width = width;
+        self
+    }
+
+    /// Set the max iterations.
+    pub fn with_max_iterations(mut self, max: usize) -> Self {
+        self.max_iterations = max;
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_retrieval_config_defaults() {
+        let config = RetrievalConfig::default();
+        assert_eq!(config.model, "gpt-4o");
+        assert_eq!(config.top_k, 3);
+        assert_eq!(config.search.top_k, 5);
+    }
+
+    #[test]
+    fn test_search_config_defaults() {
+        let config = SearchConfig::default();
+        assert_eq!(config.top_k, 5);
+        assert_eq!(config.beam_width, 3);
+        assert_eq!(config.max_iterations, 10);
+    }
+}
diff --git a/src/config/types/storage.rs b/src/config/types/storage.rs
new file mode 100644
index 00000000..0dc55ed9
--- /dev/null
+++ b/src/config/types/storage.rs
@@ -0,0 +1,274 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Storage and sufficiency configuration types.
+
+use serde::{Deserialize, Serialize};
+use std::path::PathBuf;
+
+/// Storage configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StorageConfig {
+    /// Workspace directory for persisted documents.
+    #[serde(default = "default_workspace_dir")]
+    pub workspace_dir: PathBuf,
+}
+
+fn default_workspace_dir() -> PathBuf {
+    PathBuf::from("./workspace")
+}
+
+impl Default for StorageConfig {
+    fn default() -> Self {
+        Self {
+            workspace_dir: default_workspace_dir(),
+        }
+    }
+}
+
+impl StorageConfig {
+    /// Create new storage config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the workspace directory.
+    pub fn with_workspace_dir(mut self, dir: impl Into<PathBuf>) -> Self {
+        self.workspace_dir = dir.into();
+        self
+    }
+}
+
+/// Sufficiency checker configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SufficiencyConfig {
+    /// Minimum tokens for sufficiency.
+    #[serde(default = "default_min_tokens")]
+    pub min_tokens: usize,
+
+    /// Target tokens for full sufficiency.
+    #[serde(default = "default_target_tokens")]
+    pub target_tokens: usize,
+
+    /// Maximum tokens before stopping.
+    #[serde(default = "default_max_tokens")]
+    pub max_tokens: usize,
+
+    /// Minimum content length (characters).
+    #[serde(default = "default_min_content_length")]
+    pub min_content_length: usize,
+
+    /// Confidence threshold for LLM judge.
+    #[serde(default = "default_confidence_threshold")]
+    pub confidence_threshold: f32,
+}
+
+fn default_min_tokens() -> usize {
+    500
+}
+
+fn default_target_tokens() -> usize {
+    2000
+}
+
+fn default_max_tokens() -> usize {
+    4000
+}
+
+fn default_min_content_length() -> usize {
+    200
+}
+
+fn default_confidence_threshold() -> f32 {
+    0.7
+}
+
+impl Default for SufficiencyConfig {
+    fn default() -> Self {
+        Self {
+            min_tokens: default_min_tokens(),
+            target_tokens: default_target_tokens(),
+            max_tokens: default_max_tokens(),
+            min_content_length: default_min_content_length(),
+            confidence_threshold: default_confidence_threshold(),
+        }
+    }
+}
+
+impl SufficiencyConfig {
+    /// Create new sufficiency config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the minimum tokens.
+    pub fn with_min_tokens(mut self, tokens: usize) -> Self {
+        self.min_tokens = tokens;
+        self
+    }
+
+    /// Set the target tokens.
+    pub fn with_target_tokens(mut self, tokens: usize) -> Self {
+        self.target_tokens = tokens;
+        self
+    }
+
+    /// Set the maximum tokens.
+    pub fn with_max_tokens(mut self, tokens: usize) -> Self {
+        self.max_tokens = tokens;
+        self
+    }
+
+    /// Set the confidence threshold.
+    pub fn with_confidence_threshold(mut self, threshold: f32) -> Self {
+        self.confidence_threshold = threshold;
+        self
+    }
+}
+
+/// Cache configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CacheConfig {
+    /// Maximum number of cache entries.
+    #[serde(default = "default_max_entries")]
+    pub max_entries: usize,
+
+    /// Time-to-live for cache entries (seconds).
+    #[serde(default = "default_ttl_secs")]
+    pub ttl_secs: u64,
+}
+
+fn default_max_entries() -> usize {
+    1000
+}
+
+fn default_ttl_secs() -> u64 {
+    3600
+}
+
+impl Default for CacheConfig {
+    fn default() -> Self {
+        Self {
+            max_entries: default_max_entries(),
+            ttl_secs: default_ttl_secs(),
+        }
+    }
+}
+
+impl CacheConfig {
+    /// Create new cache config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the maximum entries.
+    pub fn with_max_entries(mut self, max: usize) -> Self {
+        self.max_entries = max;
+        self
+    }
+
+    /// Set the TTL in seconds.
+    pub fn with_ttl_secs(mut self, secs: u64) -> Self {
+        self.ttl_secs = secs;
+        self
+    }
+}
+
+/// Strategy-specific configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StrategyConfig {
+    /// MCTS exploration weight (sqrt(2) ≈ 1.414).
+    #[serde(default = "default_exploration_weight")]
+    pub exploration_weight: f32,
+
+    /// Semantic similarity threshold.
+    #[serde(default = "default_similarity_threshold")]
+    pub similarity_threshold: f32,
+
+    /// High similarity threshold for "answer" decision.
+    #[serde(default = "default_high_similarity_threshold")]
+    pub high_similarity_threshold: f32,
+
+    /// Low similarity threshold for "explore" decision.
+    #[serde(default = "default_low_similarity_threshold")]
+    pub low_similarity_threshold: f32,
+}
+
+fn default_exploration_weight() -> f32 {
+    1.414
+}
+
+fn default_similarity_threshold() -> f32 {
+    0.5
+}
+
+fn default_high_similarity_threshold() -> f32 {
+    0.8
+}
+
+fn default_low_similarity_threshold() -> f32 {
+    0.3
+}
+
+impl Default for StrategyConfig {
+    fn default() -> Self {
+        Self {
+            exploration_weight: default_exploration_weight(),
+            similarity_threshold: default_similarity_threshold(),
+            high_similarity_threshold: default_high_similarity_threshold(),
+            low_similarity_threshold: default_low_similarity_threshold(),
+        }
+    }
+}
+
+impl StrategyConfig {
+    /// Create new strategy config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the exploration weight.
+    pub fn with_exploration_weight(mut self, weight: f32) -> Self {
+        self.exploration_weight = weight;
+        self
+    }
+
+    /// Set the similarity threshold.
+    pub fn with_similarity_threshold(mut self, threshold: f32) -> Self {
+        self.similarity_threshold = threshold;
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_storage_config_defaults() {
+        let config = StorageConfig::default();
+        assert_eq!(config.workspace_dir, PathBuf::from("./workspace"));
+    }
+
+    #[test]
+    fn test_sufficiency_config_defaults() {
+        let config = SufficiencyConfig::default();
+        assert_eq!(config.min_tokens, 500);
+        assert_eq!(config.target_tokens, 2000);
+        assert_eq!(config.max_tokens, 4000);
+    }
+
+    #[test]
+    fn test_cache_config_defaults() {
+        let config = CacheConfig::default();
+        assert_eq!(config.max_entries, 1000);
+        assert_eq!(config.ttl_secs, 3600);
+    }
+
+    #[test]
+    fn test_strategy_config_defaults() {
+        let config = StrategyConfig::default();
+        assert!((config.exploration_weight - 1.414).abs() < 0.001);
+        assert_eq!(config.similarity_threshold, 0.5);
+    }
+}
diff --git a/src/config/validator.rs b/src/config/validator.rs
new file mode 100644
index 00000000..8a3596fd
--- /dev/null
+++ b/src/config/validator.rs
@@ -0,0 +1,359 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration validation.
+//!
+//! This module provides comprehensive validation for configuration values,
+//! including range checks, consistency checks, and dependency validation.
+
+use super::types::{Config, ConfigValidationError, Severity, ValidationError};
+
+/// Configuration validator.
+#[derive(Debug, Default)]
+pub struct ConfigValidator {
+    /// Validation rules to apply.
+    rules: Vec<Box<dyn ValidationRule>>,
+}
+
+impl ConfigValidator {
+    /// Create a new validator with default rules.
+    pub fn new() -> Self {
+        Self {
+            rules: vec![
+                Box::new(RangeValidator),
+                Box::new(ConsistencyValidator),
+                Box::new(DependencyValidator),
+            ],
+        }
+    }
+
+    /// Add a custom validation rule.
+    pub fn with_rule(mut self, rule: Box<dyn ValidationRule>) -> Self {
+        self.rules.push(rule);
+        self
+    }
+
+    /// Validate the configuration.
+    pub fn validate(&self, config: &Config) -> Result<(), ConfigValidationError> {
+        let mut errors = Vec::new();
+
+        for rule in &self.rules {
+            rule.validate(config, &mut errors);
+        }
+
+        // Only fail on errors, not warnings or info
+        let has_errors = errors.iter().any(|e| e.severity == Severity::Error);
+
+        if has_errors {
+            Err(ConfigValidationError { errors })
+        } else {
+            Ok(())
+        }
+    }
+}
+
+/// Trait for validation rules.
+pub trait ValidationRule: std::fmt::Debug + Send + Sync {
+    /// Validate the configuration, appending errors if found.
+    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>);
+}
+
+/// Validates value ranges.
+#[derive(Debug)]
+struct RangeValidator;
+
+impl ValidationRule for RangeValidator {
+    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
+        // Indexer ranges
+        if config.indexer.subsection_threshold == 0 {
+            errors.push(ValidationError::error(
+                "indexer.subsection_threshold",
+                "Subsection threshold must be greater than 0",
+            ));
+        }
+
+        if config.indexer.subsection_threshold > 10000 {
+            errors.push(ValidationError::warning(
+                "indexer.subsection_threshold",
+                "Subsection threshold is very high, may impact performance",
+            ).with_actual(config.indexer.subsection_threshold.to_string()));
+        }
+
+        // Summary ranges
+        if config.summary.max_tokens == 0 {
+            errors.push(ValidationError::error(
+                "summary.max_tokens",
+                "Summary max tokens must be greater than 0",
+            ));
+        }
+
+        if config.summary.temperature < 0.0 || config.summary.temperature > 2.0 {
+            errors.push(ValidationError::warning(
+                "summary.temperature",
+                "Temperature outside typical range [0.0, 2.0]",
+            ).with_actual(config.summary.temperature.to_string()));
+        }
+
+        // Retrieval ranges
+        if config.retrieval.top_k == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.top_k",
+                "Top K must be greater than 0",
+            ));
+        }
+
+        if config.retrieval.search.beam_width == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.search.beam_width",
+                "Beam width must be greater than 0",
+            ));
+        }
+
+        // Content aggregator ranges
+        if config.retrieval.content.token_budget == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.content.token_budget",
+                "Token budget must be greater than 0",
+            ));
+        }
+
+        if config.retrieval.content.min_relevance_score < 0.0
+            || config.retrieval.content.min_relevance_score > 1.0
+        {
+            errors.push(ValidationError::error(
+                "retrieval.content.min_relevance_score",
+                "Min relevance score must be between 0.0 and 1.0",
+            )
+            .with_expected("0.0 - 1.0")
+            .with_actual(config.retrieval.content.min_relevance_score.to_string()));
+        }
+
+        if config.retrieval.content.hierarchical_min_per_level < 0.0
+            || config.retrieval.content.hierarchical_min_per_level > 1.0
+        {
+            errors.push(ValidationError::error(
+                "retrieval.content.hierarchical_min_per_level",
+                "Hierarchical min per level must be between 0.0 and 1.0",
+            ));
+        }
+
+        // Concurrency ranges
+        if config.concurrency.max_concurrent_requests == 0 {
+            errors.push(ValidationError::error(
+                "concurrency.max_concurrent_requests",
+                "Max concurrent requests must be greater than 0",
+            ));
+        }
+
+        if config.concurrency.requests_per_minute == 0 {
+            errors.push(ValidationError::error(
+                "concurrency.requests_per_minute",
+                "Requests per minute must be greater than 0",
+            ));
+        }
+
+        // Fallback ranges
+        if config.fallback.max_retries == 0 {
+            errors.push(ValidationError::warning(
+                "fallback.max_retries",
+                "Max retries is 0, fallback will not retry",
+            ));
+        }
+    }
+}
+
+/// Validates configuration consistency.
+#[derive(Debug)]
+struct ConsistencyValidator;
+
+impl ValidationRule for ConsistencyValidator {
+    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
+        // Check if summary tokens are reasonable
+        if config.summary.max_tokens > config.indexer.max_segment_tokens {
+            errors.push(ValidationError::warning(
+                "summary.max_tokens",
+                "Summary max tokens exceeds max segment tokens",
+            )
+            .with_expected(format!("<= {}", config.indexer.max_segment_tokens))
+            .with_actual(config.summary.max_tokens.to_string()));
+        }
+
+        // Check if content token budget is reasonable
+        if config.retrieval.content.token_budget > 100000 {
+            errors.push(ValidationError::warning(
+                "retrieval.content.token_budget",
+                "Token budget is very high, may cause performance issues",
+            ).with_actual(config.retrieval.content.token_budget.to_string()));
+        }
+
+        // Check if sufficiency thresholds are consistent
+        if config.retrieval.sufficiency.min_tokens > config.retrieval.sufficiency.target_tokens {
+            errors.push(ValidationError::error(
+                "retrieval.sufficiency.min_tokens",
+                "Min tokens cannot exceed target tokens",
+            )
+            .with_expected(format!("<= {}", config.retrieval.sufficiency.target_tokens))
+            .with_actual(config.retrieval.sufficiency.min_tokens.to_string()));
+        }
+
+        if config.retrieval.sufficiency.target_tokens > config.retrieval.sufficiency.max_tokens {
+            errors.push(ValidationError::error(
+                "retrieval.sufficiency.target_tokens",
+                "Target tokens cannot exceed max tokens",
+            )
+            .with_expected(format!("<= {}", config.retrieval.sufficiency.max_tokens))
+            .with_actual(config.retrieval.sufficiency.target_tokens.to_string()));
+        }
+
+        // Check scoring strategy validity
+        let valid_strategies = ["keyword_only", "keyword_bm25", "hybrid"];
+        if !valid_strategies.contains(&config.retrieval.content.scoring_strategy.as_str()) {
+            errors.push(ValidationError::error(
+                "retrieval.content.scoring_strategy",
+                "Invalid scoring strategy",
+            )
+            .with_expected(format!("one of: {:?}", valid_strategies))
+            .with_actual(config.retrieval.content.scoring_strategy.clone()));
+        }
+
+        // Check output format validity
+        let valid_formats = ["markdown", "json", "tree", "flat"];
+        if !valid_formats.contains(&config.retrieval.content.output_format.as_str()) {
+            errors.push(ValidationError::error(
+                "retrieval.content.output_format",
+                "Invalid output format",
+            )
+            .with_expected(format!("one of: {:?}", valid_formats))
+            .with_actual(config.retrieval.content.output_format.clone()));
+        }
+    }
+}
+
+/// Validates configuration dependencies.
+#[derive(Debug)]
+struct DependencyValidator;
+
+impl ValidationRule for DependencyValidator {
+    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
+        // Check if API key is available when summaries are needed
+        if config.summary.api_key.is_none() {
+            // Check if any feature requires LLM
+            if config.indexer.max_summary_tokens > 0 {
+                errors.push(ValidationError::info(
+                    "summary.api_key",
+                    "No API key configured, summary generation will be disabled",
+                ));
+            }
+        }
+
+        // Check fallback configuration
+        if config.fallback.enabled {
+            if config.fallback.models.is_empty() && config.fallback.endpoints.is_empty() {
+                errors.push(ValidationError::warning(
+                    "fallback.models",
+                    "Fallback enabled but no fallback models or endpoints configured",
+                ));
+            }
+
+            // Check retry behavior consistency
+            if matches!(
+                config.fallback.on_rate_limit,
+                super::types::FallbackBehavior::Fallback
+            ) && config.fallback.models.is_empty()
+            {
+                errors.push(ValidationError::error(
+                    "fallback.models",
+                    "Rate limit behavior is 'fallback' but no fallback models configured",
+                ));
+            }
+        }
+
+        // Check cache configuration
+        if config.retrieval.cache.max_entries == 0 {
+            errors.push(ValidationError::warning(
+                "retrieval.cache.max_entries",
+                "Cache disabled (max_entries = 0), performance may be impacted",
+            ));
+        }
+
+        // Check strategy configuration
+        if config.retrieval.strategy.exploration_weight <= 0.0 {
+            errors.push(ValidationError::error(
+                "retrieval.strategy.exploration_weight",
+                "Exploration weight must be positive",
+            ).with_actual(config.retrieval.strategy.exploration_weight.to_string()));
+        }
+
+        // Check similarity thresholds are ordered correctly
+        if config.retrieval.strategy.low_similarity_threshold
+            >= config.retrieval.strategy.high_similarity_threshold
+        {
+            errors.push(ValidationError::error(
+                "retrieval.strategy.low_similarity_threshold",
+                "Low similarity threshold must be less than high similarity threshold",
+            )
+            .with_expected(format!(
+                "< {}",
+                config.retrieval.strategy.high_similarity_threshold
+            ))
+            .with_actual(config.retrieval.strategy.low_similarity_threshold.to_string()));
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_validator_valid_config() {
+        let config = Config::default();
+        let validator = ConfigValidator::new();
+        // Default config should pass validation (no errors, warnings are ok)
+        let result = validator.validate(&config);
+        assert!(result.is_ok(), "Default config should pass validation");
+    }
+
+    #[test]
+    fn test_validator_catches_range_errors() {
+        let mut config = Config::default();
+        config.retrieval.content.token_budget = 0;
+        config.retrieval.content.min_relevance_score = 1.5;
+
+        let validator = ConfigValidator::new();
+        let result = validator.validate(&config);
+
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(err.errors.iter().any(|e| e.path.contains("token_budget")));
+    }
+
+    #[test]
+    fn test_validator_catches_consistency_errors() {
+        let mut config = Config::default();
+        config.retrieval.sufficiency.min_tokens = 3000;
+        config.retrieval.sufficiency.target_tokens = 2000;
+
+        let validator = ConfigValidator::new();
+        let result = validator.validate(&config);
+
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(err.errors.iter().any(|e| e.path.contains("min_tokens")));
+    }
+
+    #[test]
+    fn test_validator_catches_dependency_warnings() {
+        let mut config = Config::default();
+        config.fallback.enabled = true;
+        config.fallback.models.clear();
+
+        let validator = ConfigValidator::new();
+        let result = validator.validate(&config);
+
+        // Should succeed but with warnings
+        if let Err(err) = result {
+            assert!(err.errors.iter().any(|e| e.path.contains("fallback.models")));
+        }
+    }
+}
diff --git a/src/domain/mod.rs b/src/domain/mod.rs
index d5aa3e5c..75970a12 100644
--- a/src/domain/mod.rs
+++ b/src/domain/mod.rs
@@ -24,4 +24,4 @@ pub use error::{Error, Result};
 pub use node::{NodeId, TreeNode};
 pub use toc::{TocConfig, TocEntry, TocNode, TocView};
 pub use token::{estimate_tokens, estimate_tokens_batch, estimate_tokens_fast};
-pub use tree::{DocumentStructure, DocumentTree, StructureNode};
+pub use tree::{DocumentStructure, DocumentTree, RetrievalIndex, StructureNode};
diff --git a/src/domain/node.rs b/src/domain/node.rs
index ea9939b2..04359572 100644
--- a/src/domain/node.rs
+++ b/src/domain/node.rs
@@ -53,6 +53,15 @@ pub struct TreeNode {
     /// Title of this section.
     pub title: String,
 
+    /// Hierarchical structure index (e.g., "1", "1.1", "1.2.3").
+    ///
+    /// This provides a human-readable path to the node and is useful for:
+    /// - LLM navigation (easier to understand "go to section 2.1.3")
+    /// - Table of contents display
+    /// - Cross-referencing
+    #[serde(default)]
+    pub structure: String,
+
     /// Raw text content (populated at leaves).
     #[serde(default)]
     pub content: String,
@@ -93,6 +102,7 @@ impl Default for TreeNode {
     fn default() -> Self {
         Self {
             title: String::new(),
+            structure: String::new(),
             content: String::new(),
             summary: String::new(),
             depth: 0,
diff --git a/src/domain/tree.rs b/src/domain/tree.rs
index 1f63bbff..94f138a3 100644
--- a/src/domain/tree.rs
+++ b/src/domain/tree.rs
@@ -4,7 +4,9 @@
 //! Document tree using arena-based allocation.
 //!
 //! This structure provides better memory locality and simpler
-//! lifetime management compared to `Rc<RefCell<PageNode>`.
+//! lifetime management compared to `Rc<RefCell<PageNode>}`.
+
+use std::collections::HashMap;
 
 use indextree::Arena;
 use serde::{Deserialize, Serialize};
@@ -39,6 +41,172 @@ pub struct DocumentStructure {
     pub structure: Vec<StructureNode>,
 }
 
+/// Pre-computed index for efficient retrieval operations.
+///
+/// Built once after the document tree is fully constructed.
+/// Provides O(1) access to commonly needed traversal data.
+#[derive(Debug, Clone)]
+pub struct RetrievalIndex {
+    /// All leaf nodes in the tree.
+    leaves: Vec<NodeId>,
+
+    /// Nodes grouped by depth level.
+    /// level_index[0] = root, level_index[1] = level 1 nodes, etc.
+    level_index: Vec<Vec<NodeId>>,
+
+    /// Path from root to each node (inclusive).
+    path_cache: HashMap<NodeId, Vec<NodeId>>,
+
+    /// Siblings for each node (excluding self).
+    siblings_cache: HashMap<NodeId, Vec<NodeId>>,
+
+    /// Structure string to NodeId mapping.
+    /// e.g., "1.2.3" -> NodeId
+    structure_index: HashMap<String, NodeId>,
+
+    /// Page number to NodeId mapping.
+    /// Maps each page to the most specific (deepest) node containing it.
+    page_index: HashMap<usize, NodeId>,
+
+    /// NodeId to page range mapping.
+    node_page_range: HashMap<NodeId, (usize, usize)>,
+
+    /// Total node count.
+    node_count: usize,
+
+    /// Maximum depth in the tree.
+    max_depth: usize,
+}
+
+impl RetrievalIndex {
+    /// Get all leaf nodes.
+    pub fn leaves(&self) -> &[NodeId] {
+        &self.leaves
+    }
+
+    /// Get nodes at a specific depth level.
+    ///
+    /// Returns None if the level doesn't exist.
+    pub fn level(&self, depth: usize) -> Option<&[NodeId]> {
+        self.level_index.get(depth).map(|v| v.as_slice())
+    }
+
+    /// Get all levels.
+    pub fn levels(&self) -> &[Vec<NodeId>] {
+        &self.level_index
+    }
+
+    /// Get the path from root to a node (inclusive).
+    ///
+    /// Returns None if the node is not in the index.
+    pub fn path_to(&self, node: NodeId) -> Option<&[NodeId]> {
+        self.path_cache.get(&node).map(|v| v.as_slice())
+    }
+
+    /// Get siblings of a node (excluding the node itself).
+    ///
+    /// Returns None if the node is not in the index or has no siblings.
+    pub fn siblings(&self, node: NodeId) -> Option<&[NodeId]> {
+        self.siblings_cache.get(&node).map(|v| v.as_slice())
+    }
+
+    /// Find a node by its structure index.
+    ///
+    /// # Example
+    /// ```ignore
+    /// // Find section 2.1.3
+    /// let node = index.find_by_structure("2.1.3");
+    /// ```
+    pub fn find_by_structure(&self, structure: &str) -> Option<NodeId> {
+        self.structure_index.get(structure).copied()
+    }
+
+    /// Find the most specific node containing a page number.
+    ///
+    /// Returns the deepest node whose page range contains the given page.
+    pub fn find_by_page(&self, page: usize) -> Option<NodeId> {
+        self.page_index.get(&page).copied()
+    }
+
+    /// Find all nodes whose page range overlaps with the given range.
+    ///
+    /// This is useful for retrieving all content that spans a range of pages.
+    ///
+    /// # Example
+    /// ```ignore
+    /// // Find all nodes covering pages 10-15
+    /// let nodes = index.find_nodes_by_page_range(10, 15);
+    /// ```
+    pub fn find_nodes_by_page_range(&self, start: usize, end: usize) -> Vec<NodeId> {
+        let mut result = Vec::new();
+        for (&node_id, &(node_start, node_end)) in &self.node_page_range {
+            // Check if ranges overlap: node_start <= end && start <= node_end
+            if node_start <= end && start <= node_end {
+                result.push(node_id);
+            }
+        }
+        // Sort by start page for consistent ordering
+        result.sort_by_key(|&id| {
+            self.node_page_range.get(&id).map(|(s, _)| *s).unwrap_or(0)
+        });
+        result
+    }
+
+    /// Get all page numbers covered by a node.
+    ///
+    /// Returns None if the node has no page information.
+    pub fn get_pages_for_node(&self, node: NodeId) -> Option<Vec<usize>> {
+        let (start, end) = self.node_page_range.get(&node)?;
+        Some((*start..=*end).collect())
+    }
+
+    /// Get the page range for a node.
+    pub fn page_range(&self, node: NodeId) -> Option<(usize, usize)> {
+        self.node_page_range.get(&node).copied()
+    }
+
+    /// Get all nodes that are leaves within a page range.
+    ///
+    /// This returns only leaf nodes (nodes with no children) that
+    /// overlap with the given page range.
+    pub fn find_leaves_by_page_range(&self, start: usize, end: usize) -> Vec<NodeId> {
+        let leaves_set: std::collections::HashSet<NodeId> = self.leaves.iter().copied().collect();
+        self.find_nodes_by_page_range(start, end)
+            .into_iter()
+            .filter(|id| leaves_set.contains(id))
+            .collect()
+    }
+
+    /// Get the total number of pages in the document.
+    pub fn total_pages(&self) -> usize {
+        self.node_page_range
+            .values()
+            .map(|(_, end)| *end)
+            .max()
+            .unwrap_or(0)
+    }
+
+    /// Get all structure indices.
+    pub fn structures(&self) -> &HashMap<String, NodeId> {
+        &self.structure_index
+    }
+
+    /// Get the total number of nodes.
+    pub fn node_count(&self) -> usize {
+        self.node_count
+    }
+
+    /// Get the maximum depth in the tree.
+    pub fn max_depth(&self) -> usize {
+        self.max_depth
+    }
+
+    /// Get the number of levels.
+    pub fn level_count(&self) -> usize {
+        self.level_index.len()
+    }
+}
+
 /// A hierarchical document tree structure.
 ///
 /// Uses an arena-based tree representation for efficient traversal
@@ -50,6 +218,10 @@ pub struct DocumentTree {
 
     /// The root node ID.
     root_id: NodeId,
+
+    /// Cached leaf nodes (rebuilt on demand).
+    #[serde(skip)]
+    leaves_cache: Option<Vec<NodeId>>,
 }
 
 impl DocumentTree {
@@ -58,6 +230,7 @@ impl DocumentTree {
         let mut arena = Arena::new();
         let root_data = TreeNode {
             title: title.to_string(),
+            structure: String::new(), // Root has no structure index
             content: content.to_string(),
             summary: String::new(),
             depth: 0,
@@ -71,9 +244,13 @@ impl DocumentTree {
         };
         let root_id = arena.new_node(root_data);
 
+        // Root is initially a leaf
+        let leaves_cache = Some(vec![NodeId(root_id)]);
+
         Self {
             arena,
             root_id: NodeId(root_id),
+            leaves_cache,
         }
     }
 
@@ -81,7 +258,11 @@ impl DocumentTree {
     ///
     /// This is useful for deserialization and testing.
     pub fn from_raw(arena: Arena<TreeNode>, root_id: NodeId) -> Self {
-        Self { arena, root_id }
+        Self {
+            arena,
+            root_id,
+            leaves_cache: None, // Will be rebuilt on demand
+        }
     }
 
     /// Get the root node ID.
@@ -111,10 +292,28 @@ impl DocumentTree {
     /// Add a child node to the specified parent.
     ///
     /// Returns the ID of the newly created child node.
+    /// The structure is automatically calculated based on siblings.
     pub fn add_child(&mut self, parent: NodeId, title: &str, content: &str) -> NodeId {
         let parent_depth = self.arena.get(parent.0).map(|n| n.get().depth).unwrap_or(0);
+        let parent_structure = self
+            .arena
+            .get(parent.0)
+            .map(|n| n.get().structure.clone())
+            .unwrap_or_default();
+
+        // Calculate child index (1-based)
+        let child_index = parent.0.children(&self.arena).count() + 1;
+
+        // Calculate structure: parent_structure.child_index
+        let child_structure = if parent_structure.is_empty() {
+            child_index.to_string()
+        } else {
+            format!("{}.{}", parent_structure, child_index)
+        };
+
         let child_data = TreeNode {
             title: title.to_string(),
+            structure: child_structure,
             content: content.to_string(),
             summary: String::new(),
             depth: parent_depth + 1,
@@ -128,6 +327,15 @@ impl DocumentTree {
         };
         let child_id = self.arena.new_node(child_data);
         parent.0.append(child_id, &mut self.arena);
+
+        // Update leaves cache
+        if let Some(ref mut cache) = self.leaves_cache {
+            // Remove parent from leaves (it's no longer a leaf)
+            cache.retain(|&id| id != parent);
+            // Add child to leaves
+            cache.push(NodeId(child_id));
+        }
+
         NodeId(child_id)
     }
 
@@ -155,9 +363,27 @@ impl DocumentTree {
         id.0.children(&self.arena).next().is_none()
     }
 
+    /// Get the number of children of a node.
+    ///
+    /// This is more efficient than `children().len()` as it doesn't allocate.
+    pub fn child_count(&self, id: NodeId) -> usize {
+        id.0.children(&self.arena).count()
+    }
+
+    /// Get the children of a node as an iterator.
+    ///
+    /// Use this instead of `children()` when you only need to iterate,
+    /// as it avoids allocating a Vec.
+    pub fn children_iter(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
+        id.0.children(&self.arena).map(NodeId)
+    }
+
     /// Get the children of a node.
+    ///
+    /// Returns a Vec for cases where you need owned access to the children.
+    /// Consider using `children_iter()` if you only need to iterate.
     pub fn children(&self, id: NodeId) -> Vec<NodeId> {
-        id.0.children(&self.arena).map(NodeId).collect()
+        self.children_iter(id).collect()
     }
 
     /// Get the parent of a node.
@@ -167,12 +393,87 @@ impl DocumentTree {
         id.0.parent(&self.arena).map(NodeId)
     }
 
+    /// Get the siblings of a node (excluding the node itself).
+    ///
+    /// Returns an empty iterator for the root node.
+    pub fn siblings_iter(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
+        id.0.preceding_siblings(&self.arena)
+            .chain(id.0.following_siblings(&self.arena))
+            .map(NodeId)
+    }
+
+    /// Get the ancestors of a node from parent to root.
+    ///
+    /// Returns an empty iterator for the root node.
+    pub fn ancestors_iter(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
+        id.0.ancestors(&self.arena).map(NodeId)
+    }
+
+    /// Get the path from root to a node (inclusive).
+    ///
+    /// Returns the path as a Vec starting from the root.
+    pub fn path_from_root(&self, id: NodeId) -> Vec<NodeId> {
+        let mut path: Vec<NodeId> = self.ancestors_iter(id).collect();
+        path.reverse();
+        path.push(id);
+        path
+    }
+
+    /// Get the depth of a node (root = 0).
+    pub fn depth(&self, id: NodeId) -> usize {
+        self.get(id).map(|n| n.depth).unwrap_or(0)
+    }
+
+    /// Get the first child of a node.
+    ///
+    /// Returns None if the node has no children.
+    pub fn first_child(&self, id: NodeId) -> Option<NodeId> {
+        self.children_iter(id).next()
+    }
+
+    /// Get the last child of a node.
+    ///
+    /// Returns None if the node has no children.
+    pub fn last_child(&self, id: NodeId) -> Option<NodeId> {
+        self.children_iter(id).last()
+    }
+
     /// Get all leaf nodes in the tree.
+    ///
+    /// Uses cached leaves if available, otherwise rebuilds the cache.
     pub fn leaves(&self) -> Vec<NodeId> {
-        self.traverse()
+        if let Some(ref cache) = self.leaves_cache {
+            return cache.clone();
+        }
+
+        // Rebuild cache on demand
+        let leaves: Vec<NodeId> = self
+            .traverse()
             .into_iter()
             .filter(|id| self.is_leaf(*id))
-            .collect()
+            .collect();
+
+        // Note: Can't mutate self here, caller should use rebuild_leaves_cache()
+        leaves
+    }
+
+    /// Rebuild the leaves cache.
+    ///
+    /// Call this after deserialization or batch modifications.
+    pub fn rebuild_leaves_cache(&mut self) {
+        self.leaves_cache = Some(
+            self.traverse()
+                .into_iter()
+                .filter(|id| self.is_leaf(*id))
+                .collect(),
+        );
+    }
+
+    /// Invalidate the leaves cache.
+    ///
+    /// Called automatically by mutation methods.
+    pub fn invalidate_leaves_cache(&mut self) {
+        self.leaves_cache = None;
     }
 
     /// Get all nodes in the tree (depth-first order).
@@ -210,6 +511,13 @@ impl DocumentTree {
         }
     }
 
+    /// Update a node's structure index.
+    pub fn set_structure(&mut self, id: NodeId, structure: &str) {
+        if let Some(node) = self.get_mut(id) {
+            node.structure = structure.to_string();
+        }
+    }
+
     /// Set page boundaries for a node.
     pub fn set_page_boundaries(&mut self, id: NodeId, start: usize, end: usize) {
         if let Some(node) = self.get_mut(id) {
@@ -244,6 +552,62 @@ impl DocumentTree {
         }
     }
 
+    /// Find a node by its structure index.
+    ///
+    /// This is a convenience method that builds an index if needed.
+    /// For repeated queries, build a RetrievalIndex once.
+    pub fn find_by_structure(&self, structure: &str) -> Option<NodeId> {
+        // Linear search - for repeated use, build RetrievalIndex
+        for node_id in self.traverse() {
+            if let Some(node) = self.get(node_id) {
+                if node.structure == structure {
+                    return Some(node_id);
+                }
+            }
+        }
+        None
+    }
+
+    /// Find the most specific node containing a page.
+    ///
+    /// This is a convenience method that builds an index if needed.
+    /// For repeated queries, build a RetrievalIndex once.
+    pub fn find_by_page(&self, page: usize) -> Option<NodeId> {
+        let mut best_match: Option<(NodeId, usize)> = None;
+
+        // Find the deepest node containing this page
+        for node_id in self.traverse() {
+            if let Some((start, end)) = self.page_range(node_id) {
+                if page >= start && page <= end {
+                    let depth = self.get(node_id).map(|n| n.depth).unwrap_or(0);
+                    match &best_match {
+                        None => best_match = Some((node_id, depth)),
+                        Some((_, best_depth)) if depth > *best_depth => {
+                            best_match = Some((node_id, depth));
+                        }
+                        _ => {}
+                    }
+                }
+            }
+        }
+
+        best_match.map(|(id, _)| id)
+    }
+
+    /// Get all nodes whose page range overlaps with the given range.
+    pub fn find_nodes_by_page_range(&self, start: usize, end: usize) -> Vec<NodeId> {
+        self.traverse()
+            .into_iter()
+            .filter(|&id| {
+                if let Some((node_start, node_end)) = self.page_range(id) {
+                    node_start <= end && start <= node_end
+                } else {
+                    false
+                }
+            })
+            .collect()
+    }
+
     /// Set the node ID (identifier string).
     pub fn set_node_id(&mut self, id: NodeId, node_id: &str) {
         if let Some(node) = self.get_mut(id) {
@@ -274,6 +638,128 @@ impl DocumentTree {
         }
     }
 
+    /// Build a retrieval index for efficient operations.
+    ///
+    /// This should be called once after the tree is fully constructed.
+    /// The index provides O(1) access to commonly needed traversal data.
+    ///
+    /// # Example
+    ///
+    /// ```ignore
+    /// let tree = /* build tree */;
+    /// let index = tree.build_retrieval_index();
+    ///
+    /// // Fast access to leaves
+    /// for leaf in index.leaves() {
+    ///     // process leaf
+    /// }
+    ///
+    /// // Fast path lookup
+    /// if let Some(path) = index.path_to(node_id) {
+    ///     // path[0] = root, path[-1] = node_id
+    /// }
+    ///
+    /// // Fast structure lookup
+    /// if let Some(node) = index.find_by_structure("2.1.3") {
+    ///     // Found section 2.1.3
+    /// }
+    ///
+    /// // Fast page lookup
+    /// if let Some(node) = index.find_by_page(42) {
+    ///     // Found node containing page 42
+    /// }
+    /// ```
+    pub fn build_retrieval_index(&self) -> RetrievalIndex {
+        let mut leaves = Vec::new();
+        let mut level_index: Vec<Vec<NodeId>> = Vec::new();
+        let mut path_cache: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
+        let mut siblings_cache: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
+        let mut structure_index: HashMap<String, NodeId> = HashMap::new();
+        let mut page_index: HashMap<usize, NodeId> = HashMap::new();
+        let mut node_page_range: HashMap<NodeId, (usize, usize)> = HashMap::new();
+        let mut max_depth = 0;
+        let node_count = self.node_count();
+
+        // BFS to build level index
+        let mut current_level = vec![self.root_id];
+
+        // Initialize root path
+        path_cache.insert(self.root_id, vec![self.root_id]);
+
+        while !current_level.is_empty() {
+            level_index.push(current_level.clone());
+
+            let mut next_level = Vec::new();
+
+            for &node_id in &current_level {
+                let children: Vec<NodeId> = self.children(node_id);
+
+                // Get node data
+                if let Some(node) = self.get(node_id) {
+                    max_depth = max_depth.max(node.depth);
+
+                    // Build structure index
+                    if !node.structure.is_empty() {
+                        structure_index.insert(node.structure.clone(), node_id);
+                    }
+
+                    // Build page index and page range
+                    if let (Some(start), Some(end)) = (node.start_page, node.end_page) {
+                        node_page_range.insert(node_id, (start, end));
+
+                        // Map each page to this node (will be overwritten by deeper nodes)
+                        for page in start..=end {
+                            page_index.insert(page, node_id);
+                        }
+                    }
+                }
+
+                // Check if leaf
+                if children.is_empty() {
+                    leaves.push(node_id);
+                }
+
+                // Build siblings cache for children
+                if children.len() > 1 {
+                    for (i, &child) in children.iter().enumerate() {
+                        let siblings: Vec<NodeId> = children
+                            .iter()
+                            .enumerate()
+                            .filter(|(j, _)| *j != i)
+                            .map(|(_, &c)| c)
+                            .collect();
+                        siblings_cache.insert(child, siblings);
+                    }
+                }
+
+                // Build path cache for children
+                if let Some(parent_path) = path_cache.get(&node_id).cloned() {
+                    for &child in &children {
+                        let mut child_path = parent_path.clone();
+                        child_path.push(child);
+                        path_cache.insert(child, child_path);
+                    }
+                }
+
+                next_level.extend(children);
+            }
+
+            current_level = next_level;
+        }
+
+        RetrievalIndex {
+            leaves,
+            level_index,
+            path_cache,
+            siblings_cache,
+            structure_index,
+            page_index,
+            node_page_range,
+            node_count,
+            max_depth,
+        }
+    }
+
     /// Recursively build structure nodes starting from the given node.
     fn build_structure_nodes(&self, node_id: NodeId) -> Vec<StructureNode> {
         let children = self.children(node_id);
diff --git a/src/parser/markdown/parser.rs b/src/parser/markdown/parser.rs
index 7e1f3a2d..366be1be 100644
--- a/src/parser/markdown/parser.rs
+++ b/src/parser/markdown/parser.rs
@@ -320,12 +320,14 @@ fn finish_current_node(
     config: &MarkdownConfig,
     current_line: usize,
 ) -> Option<RawNode> {
-    // Handle preamble content
+    // Handle preamble content (content before first heading)
     if nodes.is_empty() && !content_buffer.trim().is_empty() {
         if config.create_preamble_node {
             let content = content_buffer.trim();
             *preamble_content = content.to_string();
         }
+        // Clear the buffer after storing as preamble to avoid duplication
+        content_buffer.clear();
     }
 
     // Finish current heading node
diff --git a/src/retrieval/content/aggregator.rs b/src/retrieval/content/aggregator.rs
new file mode 100644
index 00000000..9edb625b
--- /dev/null
+++ b/src/retrieval/content/aggregator.rs
@@ -0,0 +1,402 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Main content aggregator combining all components.
+//!
+//! This module provides the main [`ContentAggregator`] that orchestrates
+//! scoring, budget allocation, and structure building.
+
+use std::collections::HashMap;
+
+use tracing::{debug, info};
+
+use crate::domain::{DocumentTree, NodeId, estimate_tokens};
+
+use super::budget::{AllocationResult, AllocationStrategy, BudgetAllocator, SelectedContent};
+use super::builder::{ContentMetadata, StructureBuilder, StructuredContent};
+use super::config::{ContentAggregatorConfig, OutputFormatConfig, ScoringStrategyConfig};
+use super::scorer::{
+    ContentChunk, ContentRelevance, RelevanceScorer, ScoreComponents, ScoringContext,
+};
+
+/// Candidate node from retrieval.
+#[derive(Debug, Clone)]
+pub struct CandidateNode {
+    /// Node ID.
+    pub node_id: NodeId,
+    /// Relevance score from search.
+    pub score: f32,
+    /// Depth in tree.
+    pub depth: usize,
+}
+
+impl CandidateNode {
+    /// Create a new candidate.
+    #[must_use]
+    pub fn new(node_id: NodeId, score: f32, depth: usize) -> Self {
+        Self { node_id, score, depth }
+    }
+}
+
+/// Result of content aggregation.
+#[derive(Debug, Clone)]
+pub struct AggregationResult {
+    /// Aggregated content string.
+    pub content: String,
+    /// Total tokens used.
+    pub tokens_used: usize,
+    /// Number of nodes included.
+    pub nodes_included: usize,
+    /// Average relevance score.
+    pub avg_score: f32,
+    /// Whether content was truncated due to budget.
+    pub was_truncated: bool,
+    /// Metadata about the aggregation.
+    pub metadata: ContentMetadata,
+}
+
+impl AggregationResult {
+    /// Check if result is empty.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.content.is_empty()
+    }
+}
+
+/// Content aggregator combining scoring, allocation, and building.
+#[derive(Debug)]
+pub struct ContentAggregator {
+    /// Configuration.
+    config: ContentAggregatorConfig,
+}
+
+impl ContentAggregator {
+    /// Create a new content aggregator.
+    #[must_use]
+    pub fn new(config: ContentAggregatorConfig) -> Self {
+        Self { config }
+    }
+
+    /// Create aggregator with default configuration.
+    #[must_use]
+    pub fn with_defaults() -> Self {
+        Self::new(ContentAggregatorConfig::default())
+    }
+
+    /// Aggregate content from candidate nodes.
+    ///
+    /// # Arguments
+    ///
+    /// * `candidates` - Candidate nodes from retrieval
+    /// * `tree` - Document tree
+    /// * `query` - Query string for relevance scoring
+    ///
+    /// # Returns
+    ///
+    /// Aggregated content within token budget.
+    #[must_use]
+    pub fn aggregate(
+        &self,
+        candidates: &[CandidateNode],
+        tree: &DocumentTree,
+        query: &str,
+    ) -> AggregationResult {
+        let start = std::time::Instant::now();
+
+        // Step 1: Collect all content chunks from candidates and their descendants
+        let chunks = self.collect_chunks(candidates, tree);
+        debug!(
+            "Collected {} content chunks from {} candidates",
+            chunks.len(),
+            candidates.len()
+        );
+
+        if chunks.is_empty() {
+            return AggregationResult {
+                content: String::new(),
+                tokens_used: 0,
+                nodes_included: 0,
+                avg_score: 0.0,
+                was_truncated: false,
+                metadata: ContentMetadata::default(),
+            };
+        }
+
+        // Step 2: Score all chunks for relevance
+        let scorer = RelevanceScorer::new(query, self.config.scoring_strategy);
+        let scoring_ctx = self.build_scoring_context(&chunks);
+        let scored = scorer.score_chunks(&chunks, &scoring_ctx);
+
+        // Filter by minimum score
+        let filtered: Vec<_> = scored
+            .into_iter()
+            .filter(|r| r.score >= self.config.min_relevance_score)
+            .collect();
+
+        debug!(
+            "Scored {} chunks, {} passed threshold {:.2}",
+            chunks.len(),
+            filtered.len(),
+            self.config.min_relevance_score
+        );
+
+        if filtered.is_empty() {
+            // Fall back to returning best candidate content
+            return self.fallback_result(candidates, tree);
+        }
+
+        // Step 3: Allocate token budget
+        let max_depth = filtered.iter().map(|r| r.chunk.depth).max().unwrap_or(0);
+        let strategy = self.get_allocation_strategy();
+        let allocator = BudgetAllocator::new(self.config.token_budget)
+            .with_strategy(strategy);
+
+        let allocation = allocator.allocate(filtered, max_depth);
+
+        info!(
+            "Allocated {} tokens to {} items (strategy: {:?})",
+            allocation.tokens_used,
+            allocation.selected.len(),
+            self.config.scoring_strategy
+        );
+
+        // Step 4: Build structured output
+        let builder = StructureBuilder::from_config(
+            self.config.output_format,
+            self.config.include_scores,
+        );
+
+        let structured = builder.build(allocation.selected.clone(), tree);
+
+        // Build result
+        let was_truncated = allocation.selected.iter().any(|s| s.is_truncated());
+
+        AggregationResult {
+            content: structured.content,
+            tokens_used: allocation.tokens_used,
+            nodes_included: allocation.selected.len(),
+            avg_score: allocation.stats.avg_score,
+            was_truncated,
+            metadata: structured.metadata,
+        }
+    }
+
+    /// Collect content chunks from candidates and descendants.
+    fn collect_chunks(
+        &self,
+        candidates: &[CandidateNode],
+        tree: &DocumentTree,
+    ) -> Vec<ContentChunk> {
+        let mut chunks = Vec::new();
+        let mut visited: HashMap<NodeId, bool> = HashMap::new();
+
+        for candidate in candidates {
+            // Add candidate's own content
+            if let Some(node) = tree.get(candidate.node_id) {
+                if !node.content.is_empty() {
+                    chunks.push(ContentChunk::new(
+                        candidate.node_id,
+                        node.title.clone(),
+                        node.content.clone(),
+                        candidate.depth,
+                    ));
+                    visited.insert(candidate.node_id, true);
+                }
+
+                // Collect leaf descendants
+                self.collect_descendant_chunks(
+                    candidate.node_id,
+                    tree,
+                    candidate.depth,
+                    &mut chunks,
+                    &mut visited,
+                );
+            }
+        }
+
+        chunks
+    }
+
+    /// Collect chunks from descendant nodes.
+    fn collect_descendant_chunks(
+        &self,
+        parent_id: NodeId,
+        tree: &DocumentTree,
+        parent_depth: usize,
+        chunks: &mut Vec<ContentChunk>,
+        visited: &mut HashMap<NodeId, bool>,
+    ) {
+        let children = tree.children(parent_id);
+
+        for child_id in children {
+            if visited.contains_key(&child_id) {
+                continue;
+            }
+            visited.insert(child_id, true);
+
+            if let Some(node) = tree.get(child_id) {
+                let child_depth = parent_depth + 1;
+
+                if tree.is_leaf(child_id) {
+                    // Leaf node - add its content
+                    if !node.content.is_empty() {
+                        chunks.push(ContentChunk::new(
+                            child_id,
+                            node.title.clone(),
+                            node.content.clone(),
+                            child_depth,
+                        ));
+                    }
+                } else {
+                    // Non-leaf - recurse
+                    self.collect_descendant_chunks(child_id, tree, child_depth, chunks, visited);
+                }
+            }
+        }
+    }
+
+    /// Build scoring context from chunks.
+    fn build_scoring_context(&self, chunks: &[ContentChunk]) -> ScoringContext {
+        let total_len: usize = chunks.iter().map(|c| c.content.len()).sum();
+        let avg_len = if chunks.is_empty() {
+            100.0
+        } else {
+            total_len as f32 / chunks.len() as f32
+        };
+
+        // Build document frequency map
+        let mut doc_freq: HashMap<String, usize> = HashMap::new();
+        for chunk in chunks {
+            let mut seen_in_doc = std::collections::HashSet::new();
+            for word in chunk.content.to_lowercase().split_whitespace() {
+                if !seen_in_doc.contains(word) {
+                    *doc_freq.entry(word.to_string()).or_insert(0) += 1;
+                    seen_in_doc.insert(word);
+                }
+            }
+        }
+
+        ScoringContext {
+            avg_doc_len: avg_len,
+            doc_count: chunks.len(),
+            doc_freq,
+            parent_score: None,
+        }
+    }
+
+    /// Get allocation strategy from config.
+    fn get_allocation_strategy(&self) -> AllocationStrategy {
+        AllocationStrategy::Hierarchical {
+            min_per_level: self.config.hierarchical_min_per_level,
+        }
+    }
+
+    /// Fallback result when no content passes threshold.
+    fn fallback_result(
+        &self,
+        candidates: &[CandidateNode],
+        tree: &DocumentTree,
+    ) -> AggregationResult {
+        // Return best candidate's content
+        if let Some(best) = candidates.first() {
+            if let Some(node) = tree.get(best.node_id) {
+                let content = if !node.content.is_empty() {
+                    node.content.clone()
+                } else if !node.summary.is_empty() {
+                    node.summary.clone()
+                } else {
+                    String::new()
+                };
+
+                let tokens = estimate_tokens(&content);
+
+                return AggregationResult {
+                    content: format!("## {}\n\n{}", node.title, content),
+                    tokens_used: tokens,
+                    nodes_included: 1,
+                    avg_score: best.score,
+                    was_truncated: false,
+                    metadata: ContentMetadata {
+                        total_tokens: tokens,
+                        node_count: 1,
+                        avg_score: best.score,
+                        max_depth: best.depth,
+                    },
+                };
+            }
+        }
+
+        AggregationResult {
+            content: String::new(),
+            tokens_used: 0,
+            nodes_included: 0,
+            avg_score: 0.0,
+            was_truncated: false,
+            metadata: ContentMetadata::default(),
+        }
+    }
+}
+
+impl Default for ContentAggregator {
+    fn default() -> Self {
+        Self::with_defaults()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use indextree::Arena;
+
+    fn make_test_node_id() -> NodeId {
+        let mut arena = Arena::new();
+        let node = crate::domain::TreeNode {
+            title: "Test".to_string(),
+            structure: String::new(),
+            content: String::new(),
+            summary: String::new(),
+            depth: 0,
+            start_index: 0,
+            end_index: 0,
+            start_page: None,
+            end_page: None,
+            node_id: None,
+            physical_index: None,
+            token_count: None,
+        };
+        NodeId(arena.new_node(node))
+    }
+
+    #[test]
+    fn test_aggregator_creation() {
+        let config = ContentAggregatorConfig::default();
+        let aggregator = ContentAggregator::new(config);
+        assert_eq!(aggregator.config.token_budget, 4000);
+    }
+
+    #[test]
+    fn test_aggregator_with_defaults() {
+        let aggregator = ContentAggregator::with_defaults();
+        assert_eq!(aggregator.config.token_budget, 4000);
+    }
+
+    #[test]
+    fn test_empty_candidates() {
+        let aggregator = ContentAggregator::with_defaults();
+        let tree = DocumentTree::new("Test", "");
+
+        let result = aggregator.aggregate(&[], &tree, "test query");
+
+        assert!(result.is_empty());
+        assert_eq!(result.tokens_used, 0);
+    }
+
+    #[test]
+    fn test_candidate_node_creation() {
+        let node_id = make_test_node_id();
+        let candidate = CandidateNode::new(node_id, 0.8, 2);
+
+        assert_eq!(candidate.score, 0.8);
+        assert_eq!(candidate.depth, 2);
+    }
+}
diff --git a/src/retrieval/content/budget.rs b/src/retrieval/content/budget.rs
new file mode 100644
index 00000000..fa91e9c0
--- /dev/null
+++ b/src/retrieval/content/budget.rs
@@ -0,0 +1,624 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Token budget allocation for content aggregation.
+//!
+//! This module provides budget-aware content selection that optimizes
+//! token usage while maximizing relevance.
+
+use std::collections::HashMap;
+
+use crate::domain::{estimate_tokens, NodeId};
+
+use super::scorer::ContentRelevance;
+
+/// Allocation strategy for distributing token budget.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum AllocationStrategy {
+    /// Select highest-scoring content first until budget exhausted.
+    Greedy,
+    /// Distribute budget proportionally to relevance scores.
+    Proportional,
+    /// Ensure each depth level has minimum representation.
+    Hierarchical {
+        /// Minimum fraction of budget per level (0.0 - 1.0)
+        min_per_level: f32,
+    },
+}
+
+impl Default for AllocationStrategy {
+    fn default() -> Self {
+        Self::Hierarchical { min_per_level: 0.1 }
+    }
+}
+
+/// Information about content truncation.
+#[derive(Debug, Clone)]
+pub struct TruncationInfo {
+    /// Original content length in characters.
+    pub original_len: usize,
+    /// Truncated content length in characters.
+    pub truncated_len: usize,
+    /// Reason for truncation.
+    pub reason: TruncationReason,
+}
+
+/// Reason for content truncation.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TruncationReason {
+    /// Content exceeded remaining budget.
+    BudgetExceeded,
+    /// Content tail had low relevance.
+    LowRelevanceTail,
+}
+
+/// A selected content item after budget allocation.
+#[derive(Debug, Clone)]
+pub struct SelectedContent {
+    /// Node ID.
+    pub node_id: NodeId,
+    /// Node title.
+    pub title: String,
+    /// Selected content text.
+    pub content: String,
+    /// Token count of selected content.
+    pub tokens: usize,
+    /// Relevance score.
+    pub score: f32,
+    /// Depth in tree.
+    pub depth: usize,
+    /// Truncation info if content was truncated.
+    pub truncation: Option<TruncationInfo>,
+}
+
+impl SelectedContent {
+    /// Check if content was truncated.
+    #[must_use]
+    pub fn is_truncated(&self) -> bool {
+        self.truncation.is_some()
+    }
+}
+
+/// Statistics about the allocation process.
+#[derive(Debug, Clone, Default)]
+pub struct AllocationStats {
+    /// Total content items considered.
+    pub items_considered: usize,
+    /// Items selected for output.
+    pub items_selected: usize,
+    /// Items truncated.
+    pub items_truncated: usize,
+    /// Items filtered (below threshold).
+    pub items_filtered: usize,
+    /// Average score of selected items.
+    pub avg_score: f32,
+}
+
+/// Result of budget allocation.
+#[derive(Debug, Clone)]
+pub struct AllocationResult {
+    /// Selected content items.
+    pub selected: Vec<SelectedContent>,
+    /// Total tokens used.
+    pub tokens_used: usize,
+    /// Remaining token budget.
+    pub remaining_budget: usize,
+    /// Allocation statistics.
+    pub stats: AllocationStats,
+}
+
+impl AllocationResult {
+    /// Check if any content was selected.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.selected.is_empty()
+    }
+
+    /// Get number of selected items.
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.selected.len()
+    }
+}
+
+/// Token budget allocator.
+#[derive(Debug)]
+pub struct BudgetAllocator {
+    /// Total token budget.
+    total_budget: usize,
+    /// Minimum reserve budget (for fallback).
+    min_reserve: usize,
+    /// Allocation strategy.
+    strategy: AllocationStrategy,
+    /// Minimum relevance score threshold.
+    min_score: f32,
+}
+
+impl BudgetAllocator {
+    /// Create a new allocator with the specified budget.
+    #[must_use]
+    pub fn new(budget: usize) -> Self {
+        Self {
+            total_budget: budget,
+            min_reserve: budget / 10,
+            strategy: AllocationStrategy::default(),
+            min_score: 0.0,
+        }
+    }
+
+    /// Set the allocation strategy.
+    #[must_use]
+    pub fn with_strategy(mut self, strategy: AllocationStrategy) -> Self {
+        self.strategy = strategy;
+        self
+    }
+
+    /// Set minimum relevance score threshold.
+    #[must_use]
+    pub fn with_min_score(mut self, min_score: f32) -> Self {
+        self.min_score = min_score;
+        self
+    }
+
+    /// Allocate budget to scored content.
+    #[must_use]
+    pub fn allocate(
+        &self,
+        scored_content: Vec<ContentRelevance>,
+        max_depth: usize,
+    ) -> AllocationResult {
+        // Filter by minimum score
+        let filtered: Vec<_> = scored_content
+            .into_iter()
+            .filter(|c| c.score >= self.min_score)
+            .collect();
+
+        let stats = AllocationStats {
+            items_considered: filtered.len(),
+            ..Default::default()
+        };
+
+        match &self.strategy {
+            AllocationStrategy::Greedy => self.allocate_greedy(filtered, stats),
+            AllocationStrategy::Proportional => self.allocate_proportional(filtered, stats),
+            AllocationStrategy::Hierarchical { min_per_level } => {
+                self.allocate_hierarchical(filtered, max_depth, *min_per_level, stats)
+            }
+        }
+    }
+
+    /// Greedy allocation: select highest-scoring content first.
+    fn allocate_greedy(
+        &self,
+        mut content: Vec<ContentRelevance>,
+        mut stats: AllocationStats,
+    ) -> AllocationResult {
+        // Sort by score descending
+        content.sort_by(|a, b| {
+            b.score
+                .partial_cmp(&a.score)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        let mut selected = Vec::new();
+        let mut tokens_used = 0;
+
+        for relevance in content {
+            let tokens = relevance.chunk.token_count();
+
+            if tokens_used + tokens <= self.total_budget {
+                selected.push(SelectedContent {
+                    node_id: relevance.chunk.node_id,
+                    title: relevance.chunk.title,
+                    content: relevance.chunk.content,
+                    tokens,
+                    score: relevance.score,
+                    depth: relevance.chunk.depth,
+                    truncation: None,
+                });
+                tokens_used += tokens;
+            } else {
+                // Try to fit truncated content
+                let remaining = self.total_budget - tokens_used;
+                if remaining >= 50 {
+                    // Minimum useful content
+                    if let Some(truncated) = self.truncate_content(&relevance.chunk.content, remaining) {
+                        let truncated_tokens = estimate_tokens(&truncated);
+                        selected.push(SelectedContent {
+                            node_id: relevance.chunk.node_id,
+                            title: relevance.chunk.title,
+                            content: truncated,
+                            tokens: truncated_tokens,
+                            score: relevance.score,
+                            depth: relevance.chunk.depth,
+                            truncation: Some(TruncationInfo {
+                                original_len: relevance.chunk.content.len(),
+                                truncated_len: remaining,
+                                reason: TruncationReason::BudgetExceeded,
+                            }),
+                        });
+                        tokens_used += truncated_tokens;
+                        stats.items_truncated += 1;
+                    }
+                }
+                break;
+            }
+        }
+
+        stats.items_selected = selected.len();
+        stats.avg_score = if selected.is_empty() {
+            0.0
+        } else {
+            selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32
+        };
+
+        AllocationResult {
+            selected,
+            tokens_used,
+            remaining_budget: self.total_budget - tokens_used,
+            stats,
+        }
+    }
+
+    /// Proportional allocation: distribute budget by score ratio.
+    fn allocate_proportional(
+        &self,
+        content: Vec<ContentRelevance>,
+        mut stats: AllocationStats,
+    ) -> AllocationResult {
+        let total_score: f32 = content.iter().map(|c| c.score).sum();
+        if total_score == 0.0 {
+            return AllocationResult {
+                selected: Vec::new(),
+                tokens_used: 0,
+                remaining_budget: self.total_budget,
+                stats,
+            };
+        }
+
+        let mut selected = Vec::new();
+        let mut tokens_used = 0;
+
+        for relevance in content {
+            // Calculate proportional budget
+            let proportion = relevance.score / total_score;
+            let allocated_budget = ((self.total_budget as f32 * proportion) as usize).max(50);
+
+            let content_tokens = relevance.chunk.token_count();
+
+            if content_tokens <= allocated_budget {
+                // Full content fits
+                if tokens_used + content_tokens <= self.total_budget {
+                    selected.push(SelectedContent {
+                        node_id: relevance.chunk.node_id,
+                        title: relevance.chunk.title,
+                        content: relevance.chunk.content,
+                        tokens: content_tokens,
+                        score: relevance.score,
+                        depth: relevance.chunk.depth,
+                        truncation: None,
+                    });
+                    tokens_used += content_tokens;
+                }
+            } else {
+                // Truncate to allocated budget
+                let remaining = self.total_budget - tokens_used;
+                if remaining >= 50 && remaining >= allocated_budget / 2 {
+                    if let Some(truncated) = self.truncate_content(&relevance.chunk.content, remaining.min(allocated_budget)) {
+                        let truncated_tokens = estimate_tokens(&truncated);
+                        let truncated_len = truncated.len();
+                        selected.push(SelectedContent {
+                            node_id: relevance.chunk.node_id,
+                            title: relevance.chunk.title,
+                            content: truncated,
+                            tokens: truncated_tokens,
+                            score: relevance.score,
+                            depth: relevance.chunk.depth,
+                            truncation: Some(TruncationInfo {
+                                original_len: relevance.chunk.content.len(),
+                                truncated_len,
+                                reason: TruncationReason::BudgetExceeded,
+                            }),
+                        });
+                        tokens_used += truncated_tokens;
+                        stats.items_truncated += 1;
+                    }
+                }
+            }
+        }
+
+        stats.items_selected = selected.len();
+        stats.avg_score = if selected.is_empty() {
+            0.0
+        } else {
+            selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32
+        };
+
+        AllocationResult {
+            selected,
+            tokens_used,
+            remaining_budget: self.total_budget - tokens_used,
+            stats,
+        }
+    }
+
+    /// Hierarchical allocation: ensure each depth level has representation.
+    fn allocate_hierarchical(
+        &self,
+        content: Vec<ContentRelevance>,
+        max_depth: usize,
+        min_per_level: f32,
+        mut stats: AllocationStats,
+    ) -> AllocationResult {
+        // Group content by depth
+        let mut by_depth: HashMap<usize, Vec<ContentRelevance>> = HashMap::new();
+        for c in content {
+            by_depth
+                .entry(c.chunk.depth)
+                .or_default()
+                .push(c);
+        }
+
+        // Sort each level by score
+        for (_depth, items) in by_depth.iter_mut() {
+            items.sort_by(|a, b| {
+                b.score
+                    .partial_cmp(&a.score)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            });
+        }
+
+        let per_level_budget = (self.total_budget as f32 * min_per_level) as usize;
+        let mut selected = Vec::new();
+        let mut tokens_used = 0;
+
+        // Process from shallow to deep
+        for depth in 0..=max_depth {
+            if tokens_used >= self.total_budget {
+                break;
+            }
+
+            if let Some(level_content) = by_depth.get(&depth) {
+                let mut level_used = 0;
+
+                for relevance in level_content {
+                    if tokens_used >= self.total_budget {
+                        break;
+                    }
+
+                    let tokens = relevance.chunk.token_count();
+
+                    // Check if we should include this content
+                    let can_include_full = tokens_used + tokens <= self.total_budget;
+                    let level_budget_ok = level_used < per_level_budget || depth == 0;
+
+                    if can_include_full && level_budget_ok {
+                        selected.push(SelectedContent {
+                            node_id: relevance.chunk.node_id,
+                            title: relevance.chunk.title.clone(),
+                            content: relevance.chunk.content.clone(),
+                            tokens,
+                            score: relevance.score,
+                            depth,
+                            truncation: None,
+                        });
+                        tokens_used += tokens;
+                        level_used += tokens;
+                    } else if level_used < per_level_budget {
+                        // Try truncated version
+                        let remaining = (self.total_budget - tokens_used).min(per_level_budget - level_used);
+                        if remaining >= 50 {
+                            if let Some(truncated) = self.truncate_content(&relevance.chunk.content, remaining) {
+                                let truncated_tokens = estimate_tokens(&truncated);
+                                selected.push(SelectedContent {
+                                    node_id: relevance.chunk.node_id,
+                                    title: relevance.chunk.title.clone(),
+                                    content: truncated,
+                                    tokens: truncated_tokens,
+                                    score: relevance.score,
+                                    depth,
+                                    truncation: Some(TruncationInfo {
+                                        original_len: relevance.chunk.content.len(),
+                                        truncated_len: remaining,
+                                        reason: TruncationReason::BudgetExceeded,
+                                    }),
+                                });
+                                tokens_used += truncated_tokens;
+                                level_used += truncated_tokens;
+                                stats.items_truncated += 1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Second pass: fill remaining budget with highest-scoring content
+        if tokens_used < self.total_budget - self.min_reserve {
+            let mut all_remaining: Vec<_> = by_depth
+                .values()
+                .flat_map(|v| v.iter())
+                .filter(|c| !selected.iter().any(|s| s.node_id == c.chunk.node_id))
+                .collect();
+
+            all_remaining.sort_by(|a, b| {
+                b.score
+                    .partial_cmp(&a.score)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            });
+
+            for relevance in all_remaining {
+                if tokens_used >= self.total_budget - self.min_reserve {
+                    break;
+                }
+
+                let tokens = relevance.chunk.token_count();
+                if tokens_used + tokens <= self.total_budget {
+                    selected.push(SelectedContent {
+                        node_id: relevance.chunk.node_id,
+                        title: relevance.chunk.title.clone(),
+                        content: relevance.chunk.content.clone(),
+                        tokens,
+                        score: relevance.score,
+                        depth: relevance.chunk.depth,
+                        truncation: None,
+                    });
+                    tokens_used += tokens;
+                }
+            }
+        }
+
+        stats.items_selected = selected.len();
+        stats.avg_score = if selected.is_empty() {
+            0.0
+        } else {
+            selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32
+        };
+
+        AllocationResult {
+            selected,
+            tokens_used,
+            remaining_budget: self.total_budget - tokens_used,
+            stats,
+        }
+    }
+
+    /// Truncate content to fit within token budget.
+    fn truncate_content(&self, content: &str, max_tokens: usize) -> Option<String> {
+        if max_tokens < 20 {
+            return None;
+        }
+
+        // Approximate: 1 token ≈ 4 characters (for English)
+        let max_chars = max_tokens * 4;
+
+        if content.len() <= max_chars {
+            return Some(content.to_string());
+        }
+
+        // Try to break at sentence boundary
+        let truncated = &content[..max_chars];
+
+        // Find last sentence boundary
+        if let Some(pos) = truncated.rfind(|c| c == '.' || c == '!' || c == '?') {
+            Some(format!("{}...", &truncated[..=pos]))
+        } else if let Some(pos) = truncated.rfind(' ') {
+            // Fall back to word boundary
+            Some(format!("{}...", &truncated[..pos]))
+        } else {
+            // Hard truncate
+            Some(format!("{}...", truncated))
+        }
+    }
+}
+
+impl Default for BudgetAllocator {
+    fn default() -> Self {
+        Self::new(4000)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::retrieval::content::{ContentChunk, ScoreComponents};
+    use indextree::Arena;
+
+    fn make_test_node_id() -> NodeId {
+        let mut arena = Arena::new();
+        let node = crate::domain::TreeNode {
+            title: "Test".to_string(),
+            structure: String::new(),
+            content: String::new(),
+            summary: String::new(),
+            depth: 0,
+            start_index: 0,
+            end_index: 0,
+            start_page: None,
+            end_page: None,
+            node_id: None,
+            physical_index: None,
+            token_count: None,
+        };
+        NodeId(arena.new_node(node))
+    }
+
+    fn make_relevance(content: &str, score: f32, depth: usize) -> ContentRelevance {
+        let chunk = ContentChunk::new(
+            make_test_node_id(),
+            "Test".to_string(),
+            content.to_string(),
+            depth,
+        );
+        ContentRelevance::new(chunk, score, ScoreComponents::default())
+    }
+
+    #[test]
+    fn test_allocator_creation() {
+        let allocator = BudgetAllocator::new(1000);
+        assert_eq!(allocator.total_budget, 1000);
+    }
+
+    #[test]
+    fn test_greedy_allocation() {
+        let allocator = BudgetAllocator::new(100)
+            .with_strategy(AllocationStrategy::Greedy);
+
+        let content = vec![
+            make_relevance("High score content with enough text", 0.9, 0),
+            make_relevance("Low score content", 0.3, 0),
+        ];
+
+        let result = allocator.allocate(content, 1);
+        assert!(!result.is_empty());
+        assert!(result.tokens_used <= 100);
+    }
+
+    #[test]
+    fn test_min_score_filter() {
+        let allocator = BudgetAllocator::new(1000)
+            .with_min_score(0.5);
+
+        let content = vec![
+            make_relevance("Good content", 0.8, 0),
+            make_relevance("Bad content", 0.2, 0),
+        ];
+
+        let result = allocator.allocate(content, 1);
+        assert_eq!(result.selected.len(), 1);
+    }
+
+    #[test]
+    fn test_truncation() {
+        let allocator = BudgetAllocator::new(50);
+        let truncated = allocator.truncate_content(
+            "This is a very long piece of content. It has multiple sentences. We want to test truncation at sentence boundary.",
+            25,  // Need at least 20 tokens for truncation
+        );
+
+        assert!(truncated.is_some());
+        let text = truncated.unwrap();
+        // Should truncate and add ellipsis
+        assert!(text.len() < 200); // Should be truncated
+    }
+
+    #[test]
+    fn test_hierarchical_allocation() {
+        let allocator = BudgetAllocator::new(200)
+            .with_strategy(AllocationStrategy::Hierarchical { min_per_level: 0.2 });
+
+        let content = vec![
+            make_relevance("Depth 0 content", 0.9, 0),
+            make_relevance("Depth 1 content A", 0.7, 1),
+            make_relevance("Depth 1 content B", 0.6, 1),
+            make_relevance("Depth 2 content", 0.8, 2),
+        ];
+
+        let result = allocator.allocate(content, 2);
+
+        // Should have content from multiple depths
+        let depths: std::collections::HashSet<usize> =
+            result.selected.iter().map(|s| s.depth).collect();
+        assert!(depths.len() >= 2);
+    }
+}
diff --git a/src/retrieval/content/builder.rs b/src/retrieval/content/builder.rs
new file mode 100644
index 00000000..c3b5792f
--- /dev/null
+++ b/src/retrieval/content/builder.rs
@@ -0,0 +1,522 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Structure builder for aggregated content.
+//!
+//! This module transforms selected content into structured output formats.
+
+use serde::{Deserialize, Serialize};
+
+use crate::domain::DocumentTree;
+
+use super::budget::SelectedContent;
+use super::config::OutputFormatConfig;
+
+/// Output format for structured content.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum OutputFormat {
+    /// Markdown format with headers.
+    #[default]
+    Markdown,
+    /// JSON format.
+    Json,
+    /// Tree format.
+    Tree,
+    /// Flat text format.
+    Flat,
+}
+
+impl From<OutputFormatConfig> for OutputFormat {
+    fn from(config: OutputFormatConfig) -> Self {
+        match config {
+            OutputFormatConfig::Markdown => Self::Markdown,
+            OutputFormatConfig::Json => Self::Json,
+            OutputFormatConfig::Tree => Self::Tree,
+            OutputFormatConfig::Flat => Self::Flat,
+        }
+    }
+}
+
+/// Tree node in the content structure.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentTreeNode {
+    /// Node title.
+    pub title: String,
+    /// Node content (if any).
+    pub content: Option<String>,
+    /// Relevance score.
+    pub score: f32,
+    /// Child nodes.
+    pub children: Vec<ContentTreeNode>,
+}
+
+impl ContentTreeNode {
+    /// Create a new tree node.
+    #[must_use]
+    pub fn new(title: String) -> Self {
+        Self {
+            title,
+            content: None,
+            score: 0.0,
+            children: Vec::new(),
+        }
+    }
+
+    /// Add content to this node.
+    #[must_use]
+    pub fn with_content(mut self, content: String, score: f32) -> Self {
+        self.content = Some(content);
+        self.score = score;
+        self
+    }
+
+    /// Add a child node.
+    pub fn add_child(&mut self, child: ContentTreeNode) {
+        self.children.push(child);
+    }
+}
+
+/// Content tree structure.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentTree {
+    /// Root node.
+    pub root: ContentTreeNode,
+    /// Total nodes in tree.
+    pub total_nodes: usize,
+}
+
+impl ContentTree {
+    /// Create a new content tree.
+    #[must_use]
+    pub fn new(root: ContentTreeNode) -> Self {
+        Self {
+            total_nodes: 1,
+            root,
+        }
+    }
+}
+
+/// Metadata about aggregated content.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ContentMetadata {
+    /// Total tokens in content.
+    pub total_tokens: usize,
+    /// Number of nodes included.
+    pub node_count: usize,
+    /// Average relevance score.
+    pub avg_score: f32,
+    /// Maximum depth included.
+    pub max_depth: usize,
+}
+
+/// Structured content result.
+#[derive(Debug, Clone)]
+pub struct StructuredContent {
+    /// Formatted content string.
+    pub content: String,
+    /// Optional tree structure.
+    pub structure: Option<ContentTree>,
+    /// Content metadata.
+    pub metadata: ContentMetadata,
+}
+
+impl StructuredContent {
+    /// Check if content is empty.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.content.is_empty()
+    }
+
+    /// Get content length in characters.
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.content.len()
+    }
+}
+
+/// Builder for creating structured content output.
+#[derive(Debug)]
+pub struct StructureBuilder {
+    /// Output format.
+    format: OutputFormat,
+    /// Include metadata in output.
+    include_metadata: bool,
+    /// Include scores in output.
+    include_scores: bool,
+}
+
+impl StructureBuilder {
+    /// Create a new structure builder.
+    #[must_use]
+    pub fn new(format: OutputFormat) -> Self {
+        Self {
+            format,
+            include_metadata: false,
+            include_scores: false,
+        }
+    }
+
+    /// Create builder from config.
+    #[must_use]
+    pub fn from_config(format: OutputFormatConfig, include_scores: bool) -> Self {
+        Self {
+            format: OutputFormat::from(format),
+            include_metadata: false,
+            include_scores,
+        }
+    }
+
+    /// Enable metadata in output.
+    #[must_use]
+    pub fn with_metadata(mut self) -> Self {
+        self.include_metadata = true;
+        self
+    }
+
+    /// Enable scores in output.
+    #[must_use]
+    pub fn with_scores(mut self) -> Self {
+        self.include_scores = true;
+        self
+    }
+
+    /// Build structured content from selected items.
+    #[must_use]
+    pub fn build(
+        &self,
+        selected: Vec<SelectedContent>,
+        tree: &DocumentTree,
+    ) -> StructuredContent {
+        if selected.is_empty() {
+            return StructuredContent {
+                content: String::new(),
+                structure: None,
+                metadata: ContentMetadata::default(),
+            };
+        }
+
+        // Calculate metadata
+        let total_tokens: usize = selected.iter().map(|s| s.tokens).sum();
+        let avg_score = selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32;
+        let max_depth = selected.iter().map(|s| s.depth).max().unwrap_or(0);
+
+        let metadata = ContentMetadata {
+            total_tokens,
+            node_count: selected.len(),
+            avg_score,
+            max_depth,
+        };
+
+        // Build based on format
+        let (content, structure) = match &self.format {
+            OutputFormat::Markdown => self.build_markdown(selected, tree),
+            OutputFormat::Json => self.build_json(selected, tree),
+            OutputFormat::Tree => self.build_tree_format(selected, tree),
+            OutputFormat::Flat => self.build_flat(selected),
+        };
+
+        StructuredContent {
+            content,
+            structure,
+            metadata,
+        }
+    }
+
+    /// Build Markdown format output.
+    fn build_markdown(
+        &self,
+        selected: Vec<SelectedContent>,
+        _tree: &DocumentTree,
+    ) -> (String, Option<ContentTree>) {
+        let mut sections = Vec::new();
+        let mut current_depth = 0;
+
+        // Sort by depth to maintain hierarchy
+        let mut sorted = selected;
+        sorted.sort_by(|a, b| a.depth.cmp(&b.depth));
+
+        for content in sorted {
+            // Adjust heading level based on depth
+            let heading_level = (content.depth + 1).min(6);
+            let heading = "#".repeat(heading_level);
+
+            let mut section = format!("{} {}", heading, content.title);
+
+            if self.include_scores {
+                section.push_str(&format!(" *(score: {:.2})*", content.score));
+            }
+
+            section.push_str("\n\n");
+            section.push_str(&content.content);
+
+            if content.is_truncated() {
+                section.push_str("\n\n*[content truncated]*");
+            }
+
+            sections.push(section);
+            current_depth = current_depth.max(content.depth);
+        }
+
+        (sections.join("\n\n---\n\n"), None)
+    }
+
+    /// Build JSON format output.
+    fn build_json(
+        &self,
+        selected: Vec<SelectedContent>,
+        _tree: &DocumentTree,
+    ) -> (String, Option<ContentTree>) {
+        #[derive(Serialize)]
+        struct JsonOutput<'a> {
+            sections: Vec<JsonSection<'a>>,
+        }
+
+        #[derive(Serialize)]
+        struct JsonSection<'a> {
+            title: &'a str,
+            content: &'a str,
+            score: f32,
+            depth: usize,
+            truncated: bool,
+        }
+
+        let sections: Vec<_> = selected
+            .iter()
+            .map(|s| JsonSection {
+                title: &s.title,
+                content: &s.content,
+                score: s.score,
+                depth: s.depth,
+                truncated: s.is_truncated(),
+            })
+            .collect();
+
+        let output = JsonOutput { sections };
+        let content = serde_json::to_string_pretty(&output).unwrap_or_default();
+
+        (content, None)
+    }
+
+    /// Build tree format output.
+    fn build_tree_format(
+        &self,
+        selected: Vec<SelectedContent>,
+        tree: &DocumentTree,
+    ) -> (String, Option<ContentTree>) {
+        // Build tree structure
+        let mut root = ContentTreeNode::new("Content".to_string());
+        let mut node_count = 0;
+
+        // Group by parent
+        use std::collections::HashMap;
+        let mut by_parent: HashMap<Option<crate::domain::NodeId>, Vec<&SelectedContent>> =
+            HashMap::new();
+
+        for content in &selected {
+            let parent = tree.get(content.node_id).and_then(|_| {
+                // Find parent in selected
+                selected
+                    .iter()
+                    .find(|s| s.depth < content.depth)
+                    .map(|s| Some(s.node_id))
+                    .unwrap_or(None)
+            });
+            by_parent.entry(parent).or_default().push(content);
+        }
+
+        // Build tree recursively
+        fn build_node(
+            content: &SelectedContent,
+            all_by_parent: &HashMap<Option<crate::domain::NodeId>, Vec<&SelectedContent>>,
+        ) -> ContentTreeNode {
+            let mut node = ContentTreeNode::new(content.title.clone())
+                .with_content(content.content.clone(), content.score);
+
+            if let Some(children) = all_by_parent.get(&Some(content.node_id)) {
+                for child in children {
+                    node.add_child(build_node(child, all_by_parent));
+                }
+            }
+
+            node
+        }
+
+        // Add top-level items
+        if let Some(top_level) = by_parent.get(&None) {
+            for content in top_level {
+                let node = build_node(content, &by_parent);
+                node_count += count_nodes(&node);
+                root.add_child(node);
+            }
+        }
+
+        // Build string representation
+        let content = render_tree(&root, 0);
+
+        let tree_structure = ContentTree {
+            root,
+            total_nodes: node_count,
+        };
+
+        (content, Some(tree_structure))
+    }
+
+    /// Build flat format output.
+    fn build_flat(&self, selected: Vec<SelectedContent>) -> (String, Option<ContentTree>) {
+        let parts: Vec<_> = selected
+            .iter()
+            .map(|c| {
+                let mut part = format!("[{}] {}", c.title, c.content);
+                if self.include_scores {
+                    part = format!("[{}] (score: {:.2}) {}", c.title, c.score, c.content);
+                }
+                part
+            })
+            .collect();
+
+        (parts.join("\n\n"), None)
+    }
+}
+
+impl Default for StructureBuilder {
+    fn default() -> Self {
+        Self::new(OutputFormat::default())
+    }
+}
+
+/// Count nodes in a tree.
+fn count_nodes(node: &ContentTreeNode) -> usize {
+    1 + node.children.iter().map(count_nodes).sum::<usize>()
+}
+
+/// Render tree as string.
+fn render_tree(node: &ContentTreeNode, depth: usize) -> String {
+    let indent = "  ".repeat(depth);
+    let mut result = format!("{}├─ {} (score: {:.2})\n", indent, node.title, node.score);
+
+    if let Some(ref content) = node.content {
+        let preview = if content.len() > 100 {
+            format!("{}...", &content[..100])
+        } else {
+            content.clone()
+        };
+        result.push_str(&format!("{}│  {}\n", indent, preview.replace('\n', " ")));
+    }
+
+    for child in &node.children {
+        result.push_str(&render_tree(child, depth + 1));
+    }
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::domain::NodeId;
+    use indextree::Arena;
+
+    fn make_test_node_id() -> NodeId {
+        let mut arena = Arena::new();
+        let node = crate::domain::TreeNode {
+            title: "Test".to_string(),
+            structure: String::new(),
+            content: String::new(),
+            summary: String::new(),
+            depth: 0,
+            start_index: 0,
+            end_index: 0,
+            start_page: None,
+            end_page: None,
+            node_id: None,
+            physical_index: None,
+            token_count: None,
+        };
+        NodeId(arena.new_node(node))
+    }
+
+    fn make_selected(title: &str, content: &str, score: f32, depth: usize) -> SelectedContent {
+        SelectedContent {
+            node_id: make_test_node_id(),
+            title: title.to_string(),
+            content: content.to_string(),
+            tokens: 50,
+            score,
+            depth,
+            truncation: None,
+        }
+    }
+
+    #[test]
+    fn test_markdown_builder() {
+        let builder = StructureBuilder::new(OutputFormat::Markdown);
+        let selected = vec![
+            make_selected("Section 1", "Content 1", 0.9, 0),
+            make_selected("Section 2", "Content 2", 0.8, 1),
+        ];
+
+        // Create a minimal tree for testing
+        let tree = DocumentTree::new("Test", "");
+
+        let result = builder.build(selected, &tree);
+
+        assert!(!result.is_empty());
+        assert!(result.content.contains("Section 1"));
+        assert!(result.content.contains("Section 2"));
+        assert!(result.content.contains("# Section 1"));
+        assert!(result.content.contains("## Section 2"));
+    }
+
+    #[test]
+    fn test_flat_builder() {
+        let builder = StructureBuilder::new(OutputFormat::Flat);
+        let selected = vec![
+            make_selected("Section 1", "Content 1", 0.9, 0),
+        ];
+
+        let tree = DocumentTree::new("Test", "");
+        let result = builder.build(selected, &tree);
+
+        assert!(result.content.contains("[Section 1]"));
+        assert!(result.content.contains("Content 1"));
+    }
+
+    #[test]
+    fn test_builder_with_scores() {
+        let builder = StructureBuilder::new(OutputFormat::Markdown)
+            .with_scores();
+
+        let selected = vec![
+            make_selected("Section 1", "Content 1", 0.95, 0),
+        ];
+
+        let tree = DocumentTree::new("Test", "");
+        let result = builder.build(selected, &tree);
+
+        assert!(result.content.contains("score: 0.95"));
+    }
+
+    #[test]
+    fn test_empty_selected() {
+        let builder = StructureBuilder::new(OutputFormat::Markdown);
+        let tree = DocumentTree::new("Test", "");
+        let result = builder.build(Vec::new(), &tree);
+
+        assert!(result.is_empty());
+        assert_eq!(result.metadata.node_count, 0);
+    }
+
+    #[test]
+    fn test_content_tree_node() {
+        let mut root = ContentTreeNode::new("Root".to_string())
+            .with_content("Root content".to_string(), 0.9);
+
+        let child = ContentTreeNode::new("Child".to_string())
+            .with_content("Child content".to_string(), 0.8);
+
+        root.add_child(child);
+
+        assert_eq!(root.children.len(), 1);
+        assert_eq!(root.score, 0.9);
+    }
+}
diff --git a/src/retrieval/content/config.rs b/src/retrieval/content/config.rs
new file mode 100644
index 00000000..f9bc38b6
--- /dev/null
+++ b/src/retrieval/content/config.rs
@@ -0,0 +1,158 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration types for content aggregation.
+
+use serde::{Deserialize, Serialize};
+
+/// Configuration for content aggregation.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentAggregatorConfig {
+    /// Maximum tokens to return in aggregated content.
+    pub token_budget: usize,
+
+    /// Minimum relevance score threshold (0.0 - 1.0).
+    /// Content below this threshold will be filtered out.
+    pub min_relevance_score: f32,
+
+    /// Scoring strategy for relevance computation.
+    pub scoring_strategy: ScoringStrategyConfig,
+
+    /// Output format for aggregated content.
+    pub output_format: OutputFormatConfig,
+
+    /// Include relevance scores in output metadata.
+    pub include_scores: bool,
+
+    /// Minimum budget allocation per depth level (for hierarchical strategy).
+    /// Value between 0.0 and 1.0, representing fraction of total budget.
+    pub hierarchical_min_per_level: f32,
+
+    /// Enable content deduplication.
+    pub deduplicate: bool,
+
+    /// Similarity threshold for deduplication (0.0 - 1.0).
+    pub dedup_threshold: f32,
+}
+
+impl Default for ContentAggregatorConfig {
+    fn default() -> Self {
+        Self {
+            token_budget: 4000,
+            min_relevance_score: 0.2,
+            scoring_strategy: ScoringStrategyConfig::KeywordWithBM25,
+            output_format: OutputFormatConfig::Markdown,
+            include_scores: false,
+            hierarchical_min_per_level: 0.1,
+            deduplicate: true,
+            dedup_threshold: 0.9,
+        }
+    }
+}
+
+impl ContentAggregatorConfig {
+    /// Create a new config with default values.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the token budget.
+    #[must_use]
+    pub fn with_token_budget(mut self, budget: usize) -> Self {
+        self.token_budget = budget;
+        self
+    }
+
+    /// Set the minimum relevance score.
+    #[must_use]
+    pub fn with_min_relevance(mut self, score: f32) -> Self {
+        self.min_relevance_score = score.clamp(0.0, 1.0);
+        self
+    }
+
+    /// Set the scoring strategy.
+    #[must_use]
+    pub fn with_scoring_strategy(mut self, strategy: ScoringStrategyConfig) -> Self {
+        self.scoring_strategy = strategy;
+        self
+    }
+
+    /// Set the output format.
+    #[must_use]
+    pub fn with_output_format(mut self, format: OutputFormatConfig) -> Self {
+        self.output_format = format;
+        self
+    }
+}
+
+/// Scoring strategy configuration.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ScoringStrategyConfig {
+    /// Fast keyword matching only.
+    KeywordOnly,
+    /// Keyword matching with BM25 scoring.
+    KeywordWithBM25,
+    /// Hybrid: keyword + LLM reranking for top candidates.
+    Hybrid,
+}
+
+impl Default for ScoringStrategyConfig {
+    fn default() -> Self {
+        Self::KeywordWithBM25
+    }
+}
+
+/// Output format configuration.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum OutputFormatConfig {
+    /// Markdown format with headers.
+    Markdown,
+    /// JSON format.
+    Json,
+    /// Tree format.
+    Tree,
+    /// Flat text format.
+    Flat,
+}
+
+impl Default for OutputFormatConfig {
+    fn default() -> Self {
+        Self::Markdown
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_config() {
+        let config = ContentAggregatorConfig::default();
+        assert_eq!(config.token_budget, 4000);
+        assert_eq!(config.min_relevance_score, 0.2);
+    }
+
+    #[test]
+    fn test_config_builder() {
+        let config = ContentAggregatorConfig::new()
+            .with_token_budget(2000)
+            .with_min_relevance(0.5);
+
+        assert_eq!(config.token_budget, 2000);
+        assert_eq!(config.min_relevance_score, 0.5);
+    }
+
+    #[test]
+    fn test_min_relevance_clamped() {
+        let config = ContentAggregatorConfig::new()
+            .with_min_relevance(1.5);
+        assert_eq!(config.min_relevance_score, 1.0);
+
+        let config = ContentAggregatorConfig::new()
+            .with_min_relevance(-0.5);
+        assert_eq!(config.min_relevance_score, 0.0);
+    }
+}
diff --git a/src/retrieval/content/mod.rs b/src/retrieval/content/mod.rs
new file mode 100644
index 00000000..2a78f801
--- /dev/null
+++ b/src/retrieval/content/mod.rs
@@ -0,0 +1,46 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Content aggregation module for retrieval results.
+//!
+//! This module provides precision-focused, budget-aware content aggregation
+//! that transforms candidate nodes into structured, relevant content.
+//!
+//! # Architecture
+//!
+//! ```text
+//! ┌─────────────────────────────────────────────────────────────┐
+//! │                    Content Aggregator                         │
+//! ├─────────────────────────────────────────────────────────────┤
+//! │  RelevanceScorer → BudgetAllocator → StructureBuilder       │
+//! └─────────────────────────────────────────────────────────────┘
+//! ```
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use vectorless::retrieval::content::{ContentAggregator, ContentAggregatorConfig};
+//!
+//! let config = ContentAggregatorConfig {
+//!     token_budget: 4000,
+//!     min_relevance_score: 0.3,
+//!     ..Default::default()
+//! };
+//!
+//! let aggregator = ContentAggregator::new(config);
+//! let result = aggregator.aggregate(&candidates, &tree, &query);
+//! ```
+
+mod aggregator;
+mod budget;
+mod builder;
+mod config;
+mod scorer;
+
+pub use aggregator::{ContentAggregator, AggregationResult, CandidateNode};
+pub use budget::{BudgetAllocator, AllocationStrategy, AllocationResult, SelectedContent};
+pub use builder::{StructureBuilder, OutputFormat, StructuredContent, ContentTree};
+pub use config::{ContentAggregatorConfig, OutputFormatConfig, ScoringStrategyConfig};
+pub use scorer::{
+    RelevanceScorer, ContentRelevance, ScoreComponents, ContentChunk, ScoringContext,
+};
diff --git a/src/retrieval/content/scorer.rs b/src/retrieval/content/scorer.rs
new file mode 100644
index 00000000..ba04a6ce
--- /dev/null
+++ b/src/retrieval/content/scorer.rs
@@ -0,0 +1,439 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Relevance scoring for content chunks.
+//!
+//! This module provides fine-grained relevance scoring for content,
+//! combining keyword matching, BM25, and optional LLM reranking.
+
+use std::collections::HashMap;
+
+use crate::domain::{estimate_tokens, NodeId};
+
+use super::config::ScoringStrategyConfig;
+
+/// Content chunk for scoring.
+#[derive(Debug, Clone)]
+pub struct ContentChunk {
+    /// Node ID this chunk belongs to.
+    pub node_id: NodeId,
+    /// Title of the node.
+    pub title: String,
+    /// Content text.
+    pub content: String,
+    /// Depth in tree (0 = root level).
+    pub depth: usize,
+}
+
+impl ContentChunk {
+    /// Create a new content chunk.
+    #[must_use]
+    pub fn new(node_id: NodeId, title: String, content: String, depth: usize) -> Self {
+        Self {
+            node_id,
+            title,
+            content,
+            depth,
+        }
+    }
+
+    /// Estimate token count for this chunk.
+    #[must_use]
+    pub fn token_count(&self) -> usize {
+        estimate_tokens(&self.content)
+    }
+}
+
+/// Relevance score components.
+#[derive(Debug, Clone, Default)]
+pub struct ScoreComponents {
+    /// Keyword match score (0.0 - 1.0).
+    pub keyword_score: f32,
+    /// BM25 score (normalized).
+    pub bm25_score: f32,
+    /// Depth penalty (deeper = lower score).
+    pub depth_penalty: f32,
+    /// Path bonus from parent relevance.
+    pub path_bonus: f32,
+    /// Information density score.
+    pub density_score: f32,
+}
+
+impl ScoreComponents {
+    /// Compute final weighted score.
+    #[must_use]
+    pub fn final_score(&self) -> f32 {
+        // Weight formula from design doc
+        let score = self.keyword_score * 0.35
+            + self.bm25_score * 0.25
+            + self.depth_penalty * 0.15
+            + self.path_bonus * 0.10
+            + self.density_score * 0.15;
+
+        score.clamp(0.0, 1.0)
+    }
+}
+
+/// Relevance score result for a content chunk.
+#[derive(Debug, Clone)]
+pub struct ContentRelevance {
+    /// The content chunk that was scored.
+    pub chunk: ContentChunk,
+    /// Final relevance score (0.0 - 1.0).
+    pub score: f32,
+    /// Score breakdown by component.
+    pub components: ScoreComponents,
+}
+
+impl ContentRelevance {
+    /// Create a new relevance result.
+    #[must_use]
+    pub fn new(chunk: ContentChunk, score: f32, components: ScoreComponents) -> Self {
+        Self {
+            chunk,
+            score,
+            components,
+        }
+    }
+}
+
+/// Context for scoring operations.
+#[derive(Debug, Clone)]
+pub struct ScoringContext {
+    /// Average document length for BM25.
+    pub avg_doc_len: f32,
+    /// Total document count for IDF.
+    pub doc_count: usize,
+    /// Document frequency for terms.
+    pub doc_freq: HashMap<String, usize>,
+    /// Parent node score (for path bonus).
+    pub parent_score: Option<f32>,
+}
+
+impl Default for ScoringContext {
+    fn default() -> Self {
+        Self {
+            avg_doc_len: 100.0,
+            doc_count: 1,
+            doc_freq: HashMap::new(),
+            parent_score: None,
+        }
+    }
+}
+
+/// Relevance scorer for content chunks.
+#[derive(Debug)]
+pub struct RelevanceScorer {
+    /// Query keywords extracted from the query.
+    query_keywords: Vec<String>,
+    /// Scoring strategy to use.
+    strategy: ScoringStrategyConfig,
+    /// BM25 parameters.
+    k1: f32,
+    b: f32,
+}
+
+impl RelevanceScorer {
+    /// Create a new scorer with keywords.
+    #[must_use]
+    pub fn new(query: &str, strategy: ScoringStrategyConfig) -> Self {
+        let query_keywords = extract_keywords(query);
+        Self {
+            query_keywords,
+            strategy,
+            k1: 1.2,
+            b: 0.75,
+        }
+    }
+
+    /// Create a scorer with pre-extracted keywords.
+    #[must_use]
+    pub fn with_keywords(keywords: Vec<String>, strategy: ScoringStrategyConfig) -> Self {
+        Self {
+            query_keywords: keywords,
+            strategy,
+            k1: 1.2,
+            b: 0.75,
+        }
+    }
+
+    /// Score a content chunk.
+    #[must_use]
+    pub fn score_chunk(&self, chunk: &ContentChunk, ctx: &ScoringContext) -> ContentRelevance {
+        let mut components = ScoreComponents::default();
+
+        // 1. Keyword score
+        components.keyword_score = self.compute_keyword_score(&chunk.content);
+
+        // 2. BM25 score (if enabled)
+        if matches!(self.strategy, ScoringStrategyConfig::KeywordWithBM25 | ScoringStrategyConfig::Hybrid) {
+            components.bm25_score = self.compute_bm25_score(&chunk.content, ctx);
+        }
+
+        // 3. Depth penalty (10% per level)
+        components.depth_penalty = 0.9_f32.powi(chunk.depth as i32);
+
+        // 4. Path bonus
+        components.path_bonus = ctx.parent_score.map(|s| s * 0.2).unwrap_or(0.0);
+
+        // 5. Density score
+        components.density_score = compute_density(&chunk.content);
+
+        let final_score = components.final_score();
+
+        ContentRelevance::new(chunk.clone(), final_score, components)
+    }
+
+    /// Score multiple chunks.
+    pub fn score_chunks<'a>(
+        &self,
+        chunks: &'a [ContentChunk],
+        ctx: &ScoringContext,
+    ) -> Vec<ContentRelevance> {
+        chunks
+            .iter()
+            .map(|chunk| self.score_chunk(chunk, ctx))
+            .collect()
+    }
+
+    /// Compute keyword overlap score.
+    fn compute_keyword_score(&self, content: &str) -> f32 {
+        if self.query_keywords.is_empty() {
+            return 0.5; // Neutral score if no keywords
+        }
+
+        let content_lower = content.to_lowercase();
+        let content_words: std::collections::HashSet<&str> = content_lower
+            .split_whitespace()
+            .collect();
+
+        let matches = self
+            .query_keywords
+            .iter()
+            .filter(|kw| {
+                let kw_lower = kw.to_lowercase();
+                content_words.iter().any(|&w| w.contains(&kw_lower))
+                    || content_lower.contains(&kw_lower)
+            })
+            .count();
+
+        matches as f32 / self.query_keywords.len() as f32
+    }
+
+    /// Compute BM25 score.
+    fn compute_bm25_score(&self, content: &str, ctx: &ScoringContext) -> f32 {
+        if self.query_keywords.is_empty() {
+            return 0.0;
+        }
+
+        let doc_len = content.split_whitespace().count() as f32;
+        let mut score = 0.0;
+
+        for term in &self.query_keywords {
+            let term_lower = term.to_lowercase();
+            let tf = content
+                .to_lowercase()
+                .matches(&term_lower)
+                .count() as f32;
+
+            if tf == 0.0 {
+                continue;
+            }
+
+            // IDF calculation
+            let df = ctx.doc_freq.get(&term_lower).copied().unwrap_or(1) as f32;
+            let idf = ((ctx.doc_count as f32 - df + 0.5) / (df + 0.5) + 1.0).ln();
+
+            // BM25 formula
+            let numerator = tf * (self.k1 + 1.0);
+            let denominator = tf + self.k1 * (1.0 - self.b + self.b * doc_len / ctx.avg_doc_len);
+
+            score += idf * numerator / denominator;
+        }
+
+        // Normalize to [0, 1]
+        let max_possible_score = self.query_keywords.len() as f32 * 5.0; // Rough upper bound
+        (score / max_possible_score).clamp(0.0, 1.0)
+    }
+
+    /// Get the query keywords.
+    #[must_use]
+    pub fn keywords(&self) -> &[String] {
+        &self.query_keywords
+    }
+}
+
+/// Extract keywords from a query string.
+fn extract_keywords(query: &str) -> Vec<String> {
+    // Common English stop words
+    const STOPWORDS: &[&str] = &[
+        "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+        "have", "has", "had", "do", "does", "did", "will", "would", "could",
+        "should", "may", "might", "must", "shall", "can", "need", "dare",
+        "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
+        "from", "as", "into", "through", "during", "before", "after",
+        "above", "below", "between", "under", "again", "further", "then",
+        "once", "here", "there", "when", "where", "why", "how", "all",
+        "each", "few", "more", "most", "other", "some", "such", "no", "nor",
+        "not", "only", "own", "same", "so", "than", "too", "very", "just",
+        "and", "but", "if", "or", "because", "until", "while", "about",
+        "what", "which", "who", "whom", "this", "that", "these", "those",
+        "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you",
+        "your", "yours", "yourself", "yourselves", "he", "him", "his",
+        "himself", "she", "her", "hers", "herself", "it", "its", "itself",
+        "they", "them", "their", "theirs", "themselves",
+    ];
+
+    query
+        .to_lowercase()
+        .split(|c: char| !c.is_alphanumeric())
+        .filter(|s| {
+            let s = *s;
+            !s.is_empty() && s.len() > 1 && !STOPWORDS.contains(&s)
+        })
+        .map(String::from)
+        .collect()
+}
+
+/// Compute information density of content.
+fn compute_density(content: &str) -> f32 {
+    let words: Vec<&str> = content.split_whitespace().collect();
+    if words.is_empty() {
+        return 0.0;
+    }
+
+    // Stopword ratio (lower is better)
+    const STOPWORDS: &[&str] = &[
+        "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+        "have", "has", "had", "do", "does", "did", "will", "would", "could",
+        "should", "may", "might", "must", "shall", "can", "to", "of", "in",
+        "for", "on", "with", "at", "by", "from", "and", "but", "or", "as",
+    ];
+
+    let stopword_count = words
+        .iter()
+        .filter(|w| STOPWORDS.contains(&w.to_lowercase().as_str()))
+        .count();
+
+    let stopword_ratio = stopword_count as f32 / words.len() as f32;
+
+    // Entity-like ratio (capitalized, numbers, special terms)
+    let entity_count = words
+        .iter()
+        .filter(|w| {
+            w.chars()
+                .any(|c| c.is_numeric() || c.is_uppercase())
+        })
+        .count();
+
+    let entity_ratio = entity_count as f32 / words.len() as f32;
+
+    // Combined density score
+    (1.0 - stopword_ratio) * 0.7 + entity_ratio * 0.3
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use indextree::Arena;
+
+    fn make_test_node_id() -> NodeId {
+        let mut arena = Arena::new();
+        let node = crate::domain::TreeNode {
+            title: "Test".to_string(),
+            structure: String::new(),
+            content: String::new(),
+            summary: String::new(),
+            depth: 0,
+            start_index: 0,
+            end_index: 0,
+            start_page: None,
+            end_page: None,
+            node_id: None,
+            physical_index: None,
+            token_count: None,
+        };
+        NodeId(arena.new_node(node))
+    }
+
+    #[test]
+    fn test_keyword_extraction() {
+        let keywords = extract_keywords("What is the architecture of vectorless?");
+        assert!(keywords.contains(&"architecture".to_string()));
+        assert!(keywords.contains(&"vectorless".to_string()));
+        assert!(!keywords.contains(&"what".to_string())); // stopword
+        assert!(!keywords.contains(&"the".to_string())); // stopword
+    }
+
+    #[test]
+    fn test_keyword_score() {
+        let scorer = RelevanceScorer::new(
+            "vectorless architecture",
+            ScoringStrategyConfig::KeywordOnly,
+        );
+
+        let chunk = ContentChunk::new(
+            make_test_node_id(),
+            "Test".to_string(),
+            "Vectorless has a unique architecture for document retrieval.".to_string(),
+            0,
+        );
+
+        let ctx = ScoringContext::default();
+        let score = scorer.compute_keyword_score(&chunk.content);
+
+        assert!(score > 0.5); // Should match both keywords
+    }
+
+    #[test]
+    fn test_density_score() {
+        // High density content
+        let high_density = "Rust 1.85+ requires Cargo.toml configuration with [dependencies]";
+        let score = compute_density(high_density);
+        assert!(score > 0.5);
+
+        // Low density content (many stopwords)
+        let low_density = "This is a test of the system with some words in it";
+        let score = compute_density(low_density);
+        assert!(score < 0.7);
+    }
+
+    #[test]
+    fn test_depth_penalty() {
+        let shallow = ContentChunk::new(
+            make_test_node_id(),
+            "Test".to_string(),
+            "Content".to_string(),
+            0,
+        );
+
+        let deep = ContentChunk::new(
+            make_test_node_id(),
+            "Test".to_string(),
+            "Content".to_string(),
+            5,
+        );
+
+        let scorer = RelevanceScorer::new("test", ScoringStrategyConfig::KeywordOnly);
+        let ctx = ScoringContext::default();
+
+        let shallow_score = scorer.score_chunk(&shallow, &ctx);
+        let deep_score = scorer.score_chunk(&deep, &ctx);
+
+        assert!(shallow_score.components.depth_penalty > deep_score.components.depth_penalty);
+    }
+
+    #[test]
+    fn test_score_components_final_score() {
+        let components = ScoreComponents {
+            keyword_score: 0.8,
+            bm25_score: 0.6,
+            depth_penalty: 0.9,
+            path_bonus: 0.1,
+            density_score: 0.5,
+        };
+
+        let final_score = components.final_score();
+        assert!(final_score > 0.0 && final_score <= 1.0);
+    }
+}
diff --git a/src/retrieval/context.rs b/src/retrieval/context.rs
index ba0edb34..595c9083 100644
--- a/src/retrieval/context.rs
+++ b/src/retrieval/context.rs
@@ -434,7 +434,7 @@ impl ContextBuilder {
                 sections.push(section);
             }
 
-            for child_id in tree.children(node_id) {
+            for child_id in tree.children_iter(node_id) {
                 self.collect_sections(tree, child_id, current_depth + 1, max_depth, sections);
             }
         }
@@ -463,7 +463,7 @@ impl ContextBuilder {
                 sections.push(section);
             }
 
-            for child_id in tree.children(node_id) {
+            for child_id in tree.children_iter(node_id) {
                 Box::pin(self.collect_sections_async(
                     tree,
                     child_id,
diff --git a/src/retrieval/mod.rs b/src/retrieval/mod.rs
index 5ff07413..565d0fa8 100644
--- a/src/retrieval/mod.rs
+++ b/src/retrieval/mod.rs
@@ -54,6 +54,7 @@ mod types;
 
 pub mod cache;
 pub mod complexity;
+pub mod content;
 pub mod pilot;
 pub mod pipeline;
 pub mod search;
@@ -98,6 +99,13 @@ pub use complexity::ComplexityDetector;
 // Cache exports
 pub use cache::PathCache;
 
+// Content aggregation exports
+pub use content::{
+    AggregationResult, AllocationResult, AllocationStrategy, BudgetAllocator, ContentAggregator,
+    ContentAggregatorConfig, ContentChunk, ContentRelevance, OutputFormat, RelevanceScorer,
+    ScoreComponents, ScoringStrategyConfig, SelectedContent, StructureBuilder, StructuredContent,
+};
+
 // Pilot exports
 pub use pilot::{
     BudgetConfig, InterventionConfig, InterventionPoint, Pilot, PilotConfig, PilotDecision,
diff --git a/src/retrieval/pilot/decision.rs b/src/retrieval/pilot/decision.rs
index 09c76add..69a117d6 100644
--- a/src/retrieval/pilot/decision.rs
+++ b/src/retrieval/pilot/decision.rs
@@ -245,6 +245,7 @@ mod tests {
         for i in 0..count {
             let node = crate::domain::TreeNode {
                 title: format!("Node {}", i),
+                structure: String::new(),
                 content: String::new(),
                 summary: String::new(),
                 depth: 0,
diff --git a/src/retrieval/pilot/llm_pilot.rs b/src/retrieval/pilot/llm_pilot.rs
index 9342ffa4..c163396a 100644
--- a/src/retrieval/pilot/llm_pilot.rs
+++ b/src/retrieval/pilot/llm_pilot.rs
@@ -298,6 +298,7 @@ impl Pilot for LlmPilot {
 
         // Check budget
         if !self.has_budget() {
+            debug!("Budget exhausted, cannot guide start");
             return None;
         }
 
@@ -308,7 +309,14 @@ impl Pilot for LlmPilot {
         let candidates = tree.children(tree.root());
 
         // Make LLM call
-        Some(self.call_llm(InterventionPoint::Start, &context, &candidates).await)
+        let decision = self.call_llm(InterventionPoint::Start, &context, &candidates).await;
+        info!(
+            "Pilot start guidance: confidence={}, candidates={}",
+            decision.confidence,
+            decision.ranked_candidates.len()
+        );
+
+        Some(decision)
     }
 
     async fn guide_backtrack(
@@ -358,6 +366,7 @@ mod tests {
         for i in 0..count {
             let node = crate::domain::TreeNode {
                 title: format!("Node {}", i),
+                structure: String::new(),
                 content: String::new(),
                 summary: String::new(),
                 depth: 0,
diff --git a/src/retrieval/pilot/parser.rs b/src/retrieval/pilot/parser.rs
index 0447a259..9bb0bd48 100644
--- a/src/retrieval/pilot/parser.rs
+++ b/src/retrieval/pilot/parser.rs
@@ -350,6 +350,7 @@ mod tests {
         for i in 0..count {
             let node = crate::domain::TreeNode {
                 title: format!("Node {}", i),
+                structure: String::new(),
                 content: String::new(),
                 summary: String::new(),
                 depth: 0,
diff --git a/src/retrieval/pipeline/context.rs b/src/retrieval/pipeline/context.rs
index 5dafaf36..b12d3d9f 100644
--- a/src/retrieval/pipeline/context.rs
+++ b/src/retrieval/pipeline/context.rs
@@ -10,7 +10,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Instant;
 
-use crate::domain::{DocumentTree, NodeId};
+use crate::domain::{DocumentTree, NodeId, RetrievalIndex};
 use crate::retrieval::pilot::Pilot;
 use crate::retrieval::types::{
     NavigationStep, QueryComplexity, RetrieveOptions, RetrieveResponse, SearchPath,
@@ -195,6 +195,8 @@ pub struct PipelineContext {
     pub query: String,
     /// Document tree to search.
     pub tree: Arc<DocumentTree>,
+    /// Pre-computed retrieval index for efficient operations.
+    pub retrieval_index: Option<RetrievalIndex>,
     /// Retrieval options.
     pub options: RetrieveOptions,
     /// Optional Pilot for navigation guidance.
@@ -254,9 +256,13 @@ impl PipelineContext {
         query: impl Into<String>,
         options: RetrieveOptions,
     ) -> Self {
+        // Build retrieval index for efficient operations
+        let retrieval_index = Some(tree.build_retrieval_index());
+
         Self {
             query: query.into(),
             tree,
+            retrieval_index,
             options,
             pilot: None,
             complexity: None,
diff --git a/src/retrieval/pipeline_retriever.rs b/src/retrieval/pipeline_retriever.rs
index 084ad53d..e51d187a 100644
--- a/src/retrieval/pipeline_retriever.rs
+++ b/src/retrieval/pipeline_retriever.rs
@@ -9,6 +9,7 @@
 use async_trait::async_trait;
 use std::sync::Arc;
 
+use super::content::ContentAggregatorConfig;
 use super::pipeline::RetrievalOrchestrator;
 use super::retriever::{CostEstimate, Retriever, RetrieverError, RetrieverResult};
 use super::stages::{AnalyzeStage, JudgeStage, PlanStage, SearchStage};
@@ -16,6 +17,7 @@ use super::strategy::LlmStrategy;
 use super::types::{RetrieveOptions, RetrieveResponse};
 use crate::domain::DocumentTree;
 use crate::llm::LlmClient;
+use crate::retrieval::pilot::{LlmPilot, PilotConfig};
 
 /// Pipeline-based retriever using the stage architecture.
 ///
@@ -37,6 +39,8 @@ pub struct PipelineRetriever {
     llm_client: Option<LlmClient>,
     max_backtracks: usize,
     max_iterations: usize,
+    /// Content aggregator configuration.
+    content_config: Option<ContentAggregatorConfig>,
 }
 
 impl Default for PipelineRetriever {
@@ -52,6 +56,7 @@ impl PipelineRetriever {
             llm_client: None,
             max_backtracks: 5,
             max_iterations: 10,
+            content_config: None,
         }
     }
 
@@ -73,6 +78,15 @@ impl PipelineRetriever {
         self
     }
 
+    /// Set content aggregator configuration.
+    ///
+    /// When enabled, the Judge stage uses precision-focused content
+    /// aggregation with relevance scoring and token budget control.
+    pub fn with_content_config(mut self, config: ContentAggregatorConfig) -> Self {
+        self.content_config = Some(config);
+        self
+    }
+
     /// Build the orchestrator with all stages.
     fn build_orchestrator(&self) -> RetrievalOrchestrator {
         let mut orchestrator = RetrievalOrchestrator::new()
@@ -89,18 +103,24 @@ impl PipelineRetriever {
         }
         orchestrator = orchestrator.stage(plan_stage);
 
-        // Add search stage
+        // Add search stage with Pilot for semantic navigation
         let mut search_stage = SearchStage::new();
         if let Some(ref client) = self.llm_client {
-            search_stage = search_stage.with_llm_strategy(LlmStrategy::new(client.clone()));
+            // Create LLM-based Pilot for semantic navigation guidance
+            let pilot = LlmPilot::new(client.clone(), PilotConfig::default());
+            search_stage = search_stage.with_pilot(Arc::new(pilot));
         }
         orchestrator = orchestrator.stage(search_stage);
 
-        // Add judge stage
+        // Add judge stage with optional content aggregator
         let mut judge_stage = JudgeStage::new();
         if let Some(ref client) = self.llm_client {
             judge_stage = judge_stage.with_llm_judge(client.clone());
         }
+        // Configure content aggregator if provided
+        if let Some(ref config) = self.content_config {
+            judge_stage = judge_stage.with_content_aggregator(config.clone());
+        }
         orchestrator = orchestrator.stage(judge_stage);
 
         orchestrator
@@ -158,6 +178,7 @@ impl Clone for PipelineRetriever {
             llm_client: self.llm_client.clone(),
             max_backtracks: self.max_backtracks,
             max_iterations: self.max_iterations,
+            content_config: self.content_config.clone(),
         }
     }
 }
@@ -180,4 +201,11 @@ mod tests {
         assert_eq!(cloned.name(), "pipeline");
         assert_eq!(cloned.max_backtracks, 3);
     }
+
+    #[test]
+    fn test_pipeline_retriever_with_content_config() {
+        let config = ContentAggregatorConfig::default();
+        let retriever = PipelineRetriever::new().with_content_config(config);
+        assert!(retriever.content_config.is_some());
+    }
 }
diff --git a/src/retrieval/search/beam.rs b/src/retrieval/search/beam.rs
index 63cdcec1..2dec5e40 100644
--- a/src/retrieval/search/beam.rs
+++ b/src/retrieval/search/beam.rs
@@ -12,7 +12,7 @@ use tracing::{debug, trace};
 
 use super::super::RetrievalContext;
 use super::super::types::{NavigationDecision, NavigationStep, SearchPath};
-use super::scorer::NodeScorer;
+use super::scorer::{NodeScorer, ScoringContext};
 use super::{SearchConfig, SearchResult, SearchTree};
 use crate::domain::{DocumentTree, NodeId};
 use crate::retrieval::pilot::{Pilot, SearchState};
@@ -28,34 +28,36 @@ use crate::retrieval::pilot::{Pilot, SearchState};
 /// (when multiple candidates are available) to get semantic guidance
 /// on which branches are most relevant to the query.
 pub struct BeamSearch {
-    scorer: NodeScorer,
     beam_width: usize,
 }
 
 impl BeamSearch {
     /// Create a new beam search with default beam width.
     pub fn new() -> Self {
-        Self {
-            scorer: NodeScorer::new(Default::default()),
-            beam_width: 3,
-        }
+        Self { beam_width: 3 }
     }
 
     /// Create beam search with specified width.
     pub fn with_width(width: usize) -> Self {
         Self {
-            scorer: NodeScorer::new(Default::default()),
             beam_width: width.max(1),
         }
     }
 
-    /// Score candidates using the algorithm's scorer.
-    fn score_candidates(
+    /// Create a scorer for the given query.
+    fn create_scorer(&self, query: &str) -> NodeScorer {
+        NodeScorer::new(ScoringContext::new(query))
+    }
+
+    /// Score candidates using a query-specific scorer.
+    fn score_candidates_with_query(
         &self,
         tree: &DocumentTree,
         candidates: &[NodeId],
+        query: &str,
     ) -> Vec<(NodeId, f32)> {
-        self.scorer.score_and_sort(tree, candidates)
+        let scorer = self.create_scorer(query);
+        scorer.score_and_sort(tree, candidates)
     }
 
     /// Merge algorithm scores with Pilot decision.
@@ -67,7 +69,9 @@ impl BeamSearch {
         tree: &DocumentTree,
         candidates: &[NodeId],
         pilot_decision: &crate::retrieval::pilot::PilotDecision,
+        query: &str,
     ) -> Vec<(NodeId, f32)> {
+        let scorer = self.create_scorer(query);
         let alpha = 0.4;
         let beta = 0.6 * pilot_decision.confidence;
 
@@ -81,7 +85,7 @@ impl BeamSearch {
         let mut merged: Vec<(NodeId, f32)> = candidates
             .iter()
             .map(|&node_id| {
-                let algo_score = self.scorer.score(tree, node_id);
+                let algo_score = scorer.score(tree, node_id);
                 let pilot_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0);
 
                 // Weighted combination
@@ -138,18 +142,18 @@ impl SearchTree for BeamSearch {
 
                     // Use Pilot's ranked order if available
                     if guidance.has_candidates() {
-                        self.merge_with_pilot_decision(tree, &root_children, &guidance)
+                        self.merge_with_pilot_decision(tree, &root_children, &guidance, &context.query)
                     } else {
-                        self.score_candidates(tree, &root_children)
+                        self.score_candidates_with_query(tree, &root_children, &context.query)
                     }
                 } else {
-                    self.score_candidates(tree, &root_children)
+                    self.score_candidates_with_query(tree, &root_children, &context.query)
                 }
             } else {
-                self.score_candidates(tree, &root_children)
+                self.score_candidates_with_query(tree, &root_children, &context.query)
             }
         } else {
-            self.score_candidates(tree, &root_children)
+            self.score_candidates_with_query(tree, &root_children, &context.query)
         };
 
         let mut current_beam: Vec<SearchPath> = initial_candidates
@@ -211,16 +215,16 @@ impl SearchTree for BeamSearch {
                                     );
 
                                     // Merge algorithm scores with Pilot decision
-                                    self.merge_with_pilot_decision(tree, &children, &decision)
+                                    self.merge_with_pilot_decision(tree, &children, &decision, &context.query)
                                 }
                             }
                         } else {
                             // No intervention, use algorithm scoring
-                            self.score_candidates(tree, &children)
+                            self.score_candidates_with_query(tree, &children, &context.query)
                         }
                     } else {
                         // No Pilot, use algorithm scoring
-                        self.score_candidates(tree, &children)
+                        self.score_candidates_with_query(tree, &children, &context.query)
                     };
                     // ==============================================
 
@@ -268,6 +272,16 @@ impl SearchTree for BeamSearch {
             }
         }
 
+        // Fallback: if no results found, add best candidates regardless of score
+        if result.paths.is_empty() && config.min_score > 0.0 {
+            debug!("No results above min_score, adding best candidates as fallback");
+            // Re-score initial candidates and take top-k
+            let all_candidates = self.score_candidates_with_query(tree, &tree.children(tree.root()), &context.query);
+            for (node_id, score) in all_candidates.into_iter().take(config.top_k) {
+                result.paths.push(SearchPath::from_node(node_id, score));
+            }
+        }
+
         // Sort final results by score
         result.paths.sort_by(|a, b| {
             b.score
diff --git a/src/retrieval/search/greedy.rs b/src/retrieval/search/greedy.rs
index f016a066..ad9fd8d8 100644
--- a/src/retrieval/search/greedy.rs
+++ b/src/retrieval/search/greedy.rs
@@ -7,27 +7,85 @@
 //! When a Pilot is provided, it can provide semantic guidance at decision points.
 
 use async_trait::async_trait;
+use tracing::{debug, trace};
 
 use super::super::RetrievalContext;
 use super::super::types::{NavigationDecision, NavigationStep, SearchPath};
-use super::scorer::NodeScorer;
+use super::scorer::{NodeScorer, ScoringContext};
 use super::{SearchConfig, SearchResult, SearchTree};
-use crate::domain::DocumentTree;
-use crate::retrieval::pilot::Pilot;
+use crate::domain::{DocumentTree, NodeId};
+use crate::retrieval::pilot::{Pilot, SearchState};
 
 /// Greedy search - always follows the best single path.
 ///
 /// Fast but may miss relevant content in other branches.
-pub struct GreedySearch {
-    scorer: NodeScorer,
-}
+/// When a Pilot is provided, it can guide the search at key decision points.
+pub struct GreedySearch;
 
 impl GreedySearch {
     /// Create a new greedy search.
     pub fn new() -> Self {
-        Self {
-            scorer: NodeScorer::new(Default::default()),
+        Self
+    }
+
+    /// Create a scorer for the given query.
+    fn create_scorer(&self, query: &str) -> NodeScorer {
+        NodeScorer::new(ScoringContext::new(query))
+    }
+
+    /// Score candidates using a query-specific scorer.
+    fn score_candidates_with_query(
+        &self,
+        tree: &DocumentTree,
+        candidates: &[NodeId],
+        query: &str,
+    ) -> Vec<(NodeId, f32)> {
+        let scorer = self.create_scorer(query);
+        scorer.score_and_sort(tree, candidates)
+    }
+
+    /// Merge algorithm scores with Pilot decision.
+    fn merge_with_pilot_decision(
+        &self,
+        tree: &DocumentTree,
+        candidates: &[NodeId],
+        pilot_decision: &crate::retrieval::pilot::PilotDecision,
+        query: &str,
+    ) -> Vec<(NodeId, f32)> {
+        let scorer = self.create_scorer(query);
+        let alpha = 0.4;
+        let beta = 0.6 * pilot_decision.confidence;
+
+        // Build a map from node_id to pilot score
+        let mut pilot_scores: std::collections::HashMap<NodeId, f32> = std::collections::HashMap::new();
+        for ranked in &pilot_decision.ranked_candidates {
+            pilot_scores.insert(ranked.node_id, ranked.score);
         }
+
+        // Merge scores
+        let mut merged: Vec<(NodeId, f32)> = candidates
+            .iter()
+            .map(|&node_id| {
+                let algo_score = scorer.score(tree, node_id);
+                let pilot_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0);
+
+                // Weighted combination
+                let final_score = if beta > 0.0 {
+                    (alpha * algo_score + beta * pilot_score) / (alpha + beta)
+                } else {
+                    algo_score
+                };
+
+                (node_id, final_score)
+            })
+            .collect();
+
+        // Sort by merged score
+        merged.sort_by(|a, b| {
+            b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        merged
     }
 }
 
@@ -44,13 +102,15 @@ impl SearchTree for GreedySearch {
         tree: &DocumentTree,
         context: &RetrievalContext,
         config: &SearchConfig,
-        _pilot: Option<&dyn Pilot>,
+        pilot: Option<&dyn Pilot>,
     ) -> SearchResult {
-        // Note: Pilot integration for GreedySearch can be added in Phase 2
-        // For now, we keep the original behavior
         let mut result = SearchResult::default();
         let mut current_path = SearchPath::new();
         let mut current_node = tree.root();
+        let mut visited: std::collections::HashSet<NodeId> = std::collections::HashSet::new();
+
+        // Track Pilot interventions
+        let mut pilot_interventions = 0;
 
         for iteration in 0..config.max_iterations {
             result.iterations = iteration + 1;
@@ -67,8 +127,43 @@ impl SearchTree for GreedySearch {
                 break;
             }
 
-            // Score all children
-            let scored_children = self.scorer.score_and_sort(tree, &children);
+            // ========== Pilot Integration Point ==========
+            let scored_children = if let Some(p) = pilot {
+                // Build search state for Pilot
+                let state = SearchState::new(
+                    tree,
+                    &context.query,
+                    &current_path.nodes,
+                    &children,
+                    &visited,
+                );
+
+                // Check if Pilot wants to intervene
+                if p.should_intervene(&state) {
+                    trace!("Pilot intervening at greedy decision point with {} candidates", children.len());
+
+                    match p.decide(&state).await {
+                        decision => {
+                            pilot_interventions += 1;
+                            debug!(
+                                "Pilot decision: confidence={}, direction={:?}",
+                                decision.confidence,
+                                std::mem::discriminant(&decision.direction)
+                            );
+
+                            // Merge algorithm scores with Pilot decision
+                            self.merge_with_pilot_decision(tree, &children, &decision, &context.query)
+                        }
+                    }
+                } else {
+                    // No intervention, use algorithm scoring
+                    self.score_candidates_with_query(tree, &children, &context.query)
+                }
+            } else {
+                // No Pilot, use algorithm scoring
+                self.score_candidates_with_query(tree, &children, &context.query)
+            };
+            // ==============================================
 
             // Find the best child that meets minimum score
             let mut best_child = None;
@@ -83,6 +178,8 @@ impl SearchTree for GreedySearch {
             }
 
             if let Some(child_id) = best_child {
+                visited.insert(child_id);
+
                 // Record navigation step
                 let child_node = tree.get(child_id);
                 result.trace.push(NavigationStep {
@@ -105,13 +202,18 @@ impl SearchTree for GreedySearch {
                     break;
                 }
             } else {
-                // No good children found
+                // No good children found - add current path as result
                 current_path.leaf = Some(current_node);
-                result.paths.push(current_path);
+                if current_path.score > 0.0 {
+                    result.paths.push(current_path);
+                }
                 break;
             }
         }
 
+        // Record Pilot interventions
+        result.pilot_interventions = pilot_interventions;
+
         result
     }
 
diff --git a/src/retrieval/stages/judge.rs b/src/retrieval/stages/judge.rs
index f22806db..9cc11e68 100644
--- a/src/retrieval/stages/judge.rs
+++ b/src/retrieval/stages/judge.rs
@@ -12,6 +12,7 @@ use tracing::{info, warn};
 
 use crate::domain::estimate_tokens;
 use crate::llm::LlmClient;
+use crate::retrieval::content::{ContentAggregator, ContentAggregatorConfig};
 use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage, StageOutcome};
 use crate::retrieval::sufficiency::{LlmJudge, SufficiencyChecker, ThresholdChecker};
 use crate::retrieval::types::{RetrievalResult, RetrieveResponse, SufficiencyLevel};
@@ -23,18 +24,26 @@ use crate::retrieval::types::{RetrievalResult, RetrieveResponse, SufficiencyLeve
 /// 2. Checks if content is sufficient to answer the query
 /// 3. Can trigger additional search iterations if needed
 ///
+/// # Content Aggregation
+///
+/// By default, uses simple content collection. For precision-focused
+/// aggregation with token budget control, use `with_content_aggregator()`.
+///
 /// # Example
 ///
 /// ```rust,ignore
 /// let stage = JudgeStage::new()
 ///     .with_llm_judge(llm_client)
-///     .with_max_iterations(3);
+///     .with_max_iterations(3)
+///     .with_content_aggregator(ContentAggregatorConfig::default());
 /// ```
 pub struct JudgeStage {
     threshold_checker: ThresholdChecker,
     llm_judge: Option<LlmJudge>,
     max_iterations: usize,
     use_llm_judge: bool,
+    /// Optional content aggregator for precision-focused aggregation.
+    content_aggregator: Option<ContentAggregator>,
 }
 
 impl Default for JudgeStage {
@@ -51,6 +60,7 @@ impl JudgeStage {
             llm_judge: None,
             max_iterations: 3,
             use_llm_judge: false,
+            content_aggregator: None,
         }
     }
 
@@ -67,8 +77,58 @@ impl JudgeStage {
         self
     }
 
+    /// Add content aggregator for precision-focused aggregation.
+    ///
+    /// When enabled, content aggregation uses:
+    /// - Relevance scoring (keyword + BM25)
+    /// - Token budget allocation
+    /// - Hierarchical content selection
+    pub fn with_content_aggregator(mut self, config: ContentAggregatorConfig) -> Self {
+        self.content_aggregator = Some(ContentAggregator::new(config));
+        self
+    }
+
+    /// Enable content aggregator with default configuration.
+    pub fn with_default_content_aggregator(mut self) -> Self {
+        self.content_aggregator = Some(ContentAggregator::with_defaults());
+        self
+    }
+
     /// Aggregate content from candidates.
+    ///
+    /// When content aggregator is enabled:
+    /// - Uses relevance scoring for content selection
+    /// - Respects token budget
+    /// - Prioritizes high-relevance content
+    ///
+    /// Otherwise falls back to simple collection:
+    /// - Collects node's own content + descendant leaf content
     fn aggregate_content(&self, ctx: &PipelineContext) -> (String, usize) {
+        // Use ContentAggregator if configured
+        if let Some(ref aggregator) = self.content_aggregator {
+            use crate::retrieval::content::CandidateNode;
+
+            let candidates: Vec<CandidateNode> = ctx.candidates
+                .iter()
+                .map(|c| CandidateNode::new(c.node_id, c.score, c.depth))
+                .collect();
+
+            let result = aggregator.aggregate(&candidates, &ctx.tree, &ctx.query);
+            info!(
+                "ContentAggregator: {} nodes, {} tokens, avg score {:.2}",
+                result.nodes_included,
+                result.tokens_used,
+                result.avg_score
+            );
+            return (result.content, result.tokens_used);
+        }
+
+        // Fallback: simple content collection
+        self.aggregate_content_simple(ctx)
+    }
+
+    /// Simple content aggregation (legacy behavior).
+    fn aggregate_content_simple(&self, ctx: &PipelineContext) -> (String, usize) {
         let mut content_parts = Vec::new();
         let mut total_tokens = 0;
 
@@ -77,13 +137,25 @@ impl JudgeStage {
                 // Add title
                 content_parts.push(format!("## {}\n", node.title));
 
-                // Add summary if available, otherwise content preview
-                if !node.summary.is_empty() {
+                // Always collect all content: own content + descendant leaf content
+                let mut has_content = false;
+
+                // Add node's own content if available
+                if !node.content.is_empty() {
+                    content_parts.push(format!("{}\n\n", node.content));
+                    has_content = true;
+                }
+
+                // Also collect content from leaf descendants (for intermediate nodes)
+                let leaf_content = self.collect_leaf_content(&ctx.tree, candidate.node_id);
+                if !leaf_content.is_empty() {
+                    content_parts.push(format!("{}\n\n", leaf_content));
+                    has_content = true;
+                }
+
+                // Fall back to summary only if no content available
+                if !has_content && !node.summary.is_empty() {
                     content_parts.push(format!("{}\n\n", node.summary));
-                } else if !node.content.is_empty() {
-                    // Limit content preview
-                    let preview: String = node.content.chars().take(500).collect();
-                    content_parts.push(format!("{}\n\n", preview));
                 }
 
                 // Estimate tokens
@@ -94,6 +166,38 @@ impl JudgeStage {
         (content_parts.join(""), total_tokens)
     }
 
+    /// Collect content from leaf descendants of a node (excluding the node itself).
+    fn collect_leaf_content(&self, tree: &crate::domain::DocumentTree, node_id: crate::domain::NodeId) -> String {
+        let mut content_parts = Vec::new();
+
+        // Start with children, not the node itself
+        let children = tree.children(node_id);
+        if children.is_empty() {
+            // Node is already a leaf, no descendants to collect
+            return String::new();
+        }
+
+        let mut stack: Vec<crate::domain::NodeId> = children;
+
+        while let Some(current_id) = stack.pop() {
+            let current_children = tree.children(current_id);
+
+            if current_children.is_empty() {
+                // Leaf node - collect its content
+                if let Some(node) = tree.get(current_id) {
+                    if !node.content.is_empty() {
+                        content_parts.push(format!("### {}\n{}", node.title, node.content));
+                    }
+                }
+            } else {
+                // Non-leaf node - add children to stack
+                stack.extend(current_children);
+            }
+        }
+
+        content_parts.join("\n\n")
+    }
+
     /// Check sufficiency level.
     fn check_sufficiency(&self, ctx: &PipelineContext) -> SufficiencyLevel {
         if !ctx.options.sufficiency_check {
@@ -118,14 +222,34 @@ impl JudgeStage {
 
         for candidate in &ctx.candidates {
             if let Some(node) = ctx.tree.get(candidate.node_id) {
+                // Build content: node's own content + all descendant leaf content
+                let content = if ctx.options.include_content {
+                    let mut content_parts = Vec::new();
+
+                    // Add node's own content
+                    if !node.content.is_empty() {
+                        content_parts.push(node.content.clone());
+                    }
+
+                    // Add content from leaf descendants
+                    let leaf_content = self.collect_leaf_content(&ctx.tree, candidate.node_id);
+                    if !leaf_content.is_empty() {
+                        content_parts.push(leaf_content);
+                    }
+
+                    if content_parts.is_empty() {
+                        None
+                    } else {
+                        Some(content_parts.join("\n\n"))
+                    }
+                } else {
+                    None
+                };
+
                 results.push(RetrievalResult {
                     node_id: Some(format!("{:?}", candidate.node_id)),
                     title: node.title.clone(),
-                    content: if ctx.options.include_content {
-                        Some(node.content.clone())
-                    } else {
-                        None
-                    },
+                    content,
                     summary: if ctx.options.include_summaries {
                         Some(node.summary.clone())
                     } else {
diff --git a/src/retrieval/stages/search.rs b/src/retrieval/stages/search.rs
index 0283de23..e9addfe7 100644
--- a/src/retrieval/stages/search.rs
+++ b/src/retrieval/stages/search.rs
@@ -147,7 +147,7 @@ impl SearchStage {
                 // Get node info
                 if let Some(node) = tree.get(leaf_id) {
                     let depth = node.depth;
-                    let is_leaf = tree.children(leaf_id).is_empty();
+                    let is_leaf = tree.is_leaf(leaf_id);
 
                     candidates.push(CandidateNode::new(leaf_id, path.score, depth, is_leaf));
                 }
diff --git a/templates/template.toml b/templates/template.toml
index 5ea61bce..66e85e21 100644
--- a/templates/template.toml
+++ b/templates/template.toml
@@ -108,6 +108,47 @@ high_similarity_threshold = 0.8
 # Low similarity threshold for "explore" decision
 low_similarity_threshold = 0.3
 
+# Content aggregator configuration
+# Controls how retrieved content is aggregated and returned
+[retrieval.content]
+# Enable/disable content aggregator
+# When disabled, uses simple content collection (legacy behavior)
+enabled = true
+
+# Maximum tokens for aggregated content
+token_budget = 4000
+
+# Minimum relevance score threshold (0.0 - 1.0)
+# Content below this threshold will be filtered out
+min_relevance_score = 0.2
+
+# Scoring strategy: "keyword_only" | "keyword_bm25" | "hybrid"
+# - keyword_only: Fast keyword matching (no BM25)
+# - keyword_bm25: Keyword + BM25 scoring (recommended)
+# - hybrid: Keyword + LLM reranking (most accurate, slower)
+scoring_strategy = "keyword_bm25"
+
+# Output format: "markdown" | "json" | "tree" | "flat"
+# - markdown: Structured markdown with headers (default)
+# - json: JSON format for programmatic use
+# - tree: Tree structure preserving hierarchy
+# - flat: Flat text format
+output_format = "markdown"
+
+# Include relevance scores in output (useful for debugging)
+include_scores = false
+
+# Minimum budget allocation per depth level (0.0 - 1.0)
+# Ensures each tree level gets representation
+hierarchical_min_per_level = 0.1
+
+# Enable content deduplication
+deduplicate = true
+
+# Similarity threshold for deduplication (0.0 - 1.0)
+# Higher = more aggressive deduplication
+dedup_threshold = 0.9
+
 [storage]
 # Workspace directory for persisted documents
 #