From 0c78b146132020f4cf879c2a36f139079127bd13 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 15:30:54 +0800
Subject: [PATCH 01/21] feat: add benchmark suite and update roadmap for v0.2.0

Add comprehensive benchmark infrastructure with Criterion for measuring
performance across core modules:

- Set up index pipeline benchmarks covering parsing, tree building,
  ToC extraction and full pipeline performance
- Implement retrieval pipeline benchmarks for query analysis, search
  algorithms, judge evaluation and end-to-end retrieval
- Create pilot module benchmarks for intervention decisions, context
  building, response parsing, score merging and full decision flows
- Update roadmap.md to reflect v0.2.0 status with completed features
  including Pilot implementation, backtracking support and new modules
- Add placeholder examples for future implementation including batch
  processing, CLI tool, custom pilot, multi-format support and streaming
- Configure Cargo.toml with benchmark definitions and criterion dependency

These benchmarks provide the foundation for performance optimization
and regression testing as the system evolves.
---
 Cargo.toml                   |  13 ++
 benches/index_bench.rs       | 106 +++++++++++++
 benches/pilot_bench.rs       | 196 ++++++++++++++++++++++++
 benches/retrieval_bench.rs   | 164 ++++++++++++++++++++
 docs/design/roadmap.md       | 286 ++++++++++++++++++-----------------
 examples/batch_processing.rs |  61 ++++++++
 examples/cli_tool.rs         | 122 +++++++++++++++
 examples/custom_pilot.rs     |  67 ++++++++
 examples/multi_format.rs     |  77 ++++++++++
 examples/streaming.rs        |  70 +++++++++
 10 files changed, 1027 insertions(+), 135 deletions(-)
 create mode 100644 benches/index_bench.rs
 create mode 100644 benches/pilot_bench.rs
 create mode 100644 benches/retrieval_bench.rs
 create mode 100644 examples/batch_processing.rs
 create mode 100644 examples/cli_tool.rs
 create mode 100644 examples/custom_pilot.rs
 create mode 100644 examples/multi_format.rs
 create mode 100644 examples/streaming.rs

diff --git a/Cargo.toml b/Cargo.toml
index 00d69362..e16d14ac 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,6 +73,19 @@ rand = "0.8"
 [dev-dependencies]
 tempfile = "3.10"
 tokio-test = "0.4"
+criterion = { version = "0.5", features = ["async_tokio"] }
+
+[[bench]]
+name = "index_bench"
+harness = false
+
+[[bench]]
+name = "retrieval_bench"
+harness = false
+
+[[bench]]
+name = "pilot_bench"
+harness = false
 
 [profile.release]
 opt-level = 3
diff --git a/benches/index_bench.rs b/benches/index_bench.rs
new file mode 100644
index 00000000..9ba1e230
--- /dev/null
+++ b/benches/index_bench.rs
@@ -0,0 +1,106 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Index pipeline benchmarks.
+//!
+//! Measures performance of document indexing:
+//! - Parsing speed (Markdown, PDF, DOCX)
+//! - Tree building
+//! - Summary generation (LLM calls)
+//! - End-to-end indexing time
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+
+// TODO: Implement actual benchmarks once the API is stable
+//
+// use vectorless::client::{Engine, EngineBuilder};
+// use vectorless::parser::{MarkdownParser, DocumentParser};
+
+fn bench_markdown_parsing(c: &mut Criterion) {
+    let mut group = c.benchmark_group("markdown_parsing");
+
+    // TODO: Create test documents of different sizes
+    // let small_doc = generate_markdown(100);    // 100 lines
+    // let medium_doc = generate_markdown(500);   // 500 lines
+    // let large_doc = generate_markdown(2000);   // 2000 lines
+
+    // TODO: Benchmark parsing
+    // group.bench_with_input(BenchmarkId::new("parse", "small"), &small_doc, |b, doc| {
+    //     b.iter(|| {
+    //         let parser = MarkdownParser::new();
+    //         black_box(parser.parse(doc))
+    //     })
+    // });
+
+    // Placeholder benchmark
+    group.bench_function("parse_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_tree_building(c: &mut Criterion) {
+    let mut group = c.benchmark_group("tree_building");
+
+    // TODO: Benchmark tree construction from parsed content
+    // - Node creation
+    // - Hierarchy building
+    // - Metadata assignment
+
+    group.bench_function("build_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_toc_extraction(c: &mut Criterion) {
+    let mut group = c.benchmark_group("toc_extraction");
+
+    // TODO: Benchmark ToC extraction
+    // - Heading detection
+    // - Hierarchy inference
+    // - Section boundary detection
+
+    group.bench_function("toc_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_full_index_pipeline(c: &mut Criterion) {
+    let mut group = c.benchmark_group("full_index");
+
+    // TODO: Benchmark complete indexing pipeline
+    // - Parse → Build → Enhance → Enrich → Optimize
+    // - With and without LLM summarization
+    // - Different document sizes
+
+    group.bench_function("full_pipeline_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+// TODO: Add helper functions for generating test documents
+//
+// fn generate_markdown(lines: usize) -> String {
+//     // Generate markdown with headings, paragraphs, code blocks
+// }
+//
+// fn generate_pdf(pages: usize) -> Vec<u8> {
+//     // Generate PDF content
+// }
+
+criterion_group!(
+    benches,
+    bench_markdown_parsing,
+    bench_tree_building,
+    bench_toc_extraction,
+    bench_full_index_pipeline,
+);
+
+criterion_main!(benches);
diff --git a/benches/pilot_bench.rs b/benches/pilot_bench.rs
new file mode 100644
index 00000000..5a052b50
--- /dev/null
+++ b/benches/pilot_bench.rs
@@ -0,0 +1,196 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Pilot module benchmarks.
+//!
+//! Measures performance of Pilot (the brain of retrieval):
+//! - Intervention decision overhead
+//! - Context building
+//! - LLM call latency (mocked)
+//! - Response parsing
+//! - Score merging
+//! - Fallback handling
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+fn bench_intervention_decision(c: &mut Criterion) {
+    let mut group = c.benchmark_group("intervention_decision");
+
+    // TODO: Benchmark should_intervene() decision
+    // - START point decision
+    // - FORK point decision
+    // - BACKTRACK point decision
+    // - EVALUATE point decision
+
+    // This should be very fast (< 1µs) as it's called frequently
+
+    group.bench_function("should_intervene_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_context_building(c: &mut Criterion) {
+    let mut group = c.benchmark_group("context_building");
+
+    // TODO: Benchmark ContextBuilder
+    // - Token budget allocation
+    // - Path context building
+    // - Candidate context building
+    // - Sibling context building
+
+    // Test different context sizes:
+    // - Small: 1-2 candidates, short path
+    // - Medium: 3-5 candidates, medium path
+    // - Large: 10+ candidates, long path
+
+    group.bench_function("build_context_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_response_parsing(c: &mut Criterion) {
+    let mut group = c.benchmark_group("response_parsing");
+
+    // TODO: Benchmark ResponseParser
+    // - JSON parsing
+    // - Regex fallback extraction
+    // - Default decision generation
+
+    // let json_response = r#"{"candidates": [...], "direction": "...", "confidence": 0.9}"#;
+    //
+    // group.bench_with_input("json_parse", json_response, |b, response| {
+    //     b.iter(|| {
+    //         black_box(ResponseParser::parse(response))
+    //     })
+    // });
+
+    group.bench_function("parse_json_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.bench_function("parse_regex_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_score_merging(c: &mut Criterion) {
+    let mut group = c.benchmark_group("score_merging");
+
+    // TODO: Benchmark score merging
+    // - final = α × algo + β × llm
+    // - Different weight configurations
+    // - Batch merging (multiple candidates)
+
+    group.bench_function("merge_scores_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_budget_controller(c: &mut Criterion) {
+    let mut group = c.benchmark_group("budget_controller");
+
+    // TODO: Benchmark BudgetController
+    // - can_call() check
+    // - record_usage() update
+    // - estimate_cost() calculation
+    // - Thread-safe operations
+
+    group.bench_function("budget_check_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.bench_function("budget_record_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_fallback_manager(c: &mut Criterion) {
+    let mut group = c.benchmark_group("fallback_manager");
+
+    // TODO: Benchmark FallbackManager
+    // - Level escalation
+    // - Level de-escalation
+    // - Retry delay calculation
+    // - Action determination
+
+    group.bench_function("fallback_record_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_metrics_collector(c: &mut Criterion) {
+    let mut group = c.benchmark_group("metrics_collector");
+
+    // TODO: Benchmark MetricsCollector
+    // - record_call() with atomic operations
+    // - snapshot() generation
+    // - Percentile calculation
+
+    group.bench_function("metrics_record_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.bench_function("metrics_snapshot_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_full_pilot_decision(c: &mut Criterion) {
+    let mut group = c.benchmark_group("full_pilot_decision");
+
+    // TODO: Benchmark complete Pilot.decide() flow
+    // - should_intervene check
+    // - Context building
+    // - LLM call (mocked or skipped)
+    // - Response parsing
+    // - Decision construction
+
+    // Compare:
+    // - With LLM call (real latency)
+    // - Without LLM call (algorithm only)
+    // - With cached response
+
+    group.bench_function("full_decide_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+// TODO: Add helper functions
+//
+// fn create_mock_search_state() -> SearchState {
+//     // Create mock state for benchmarking
+// }
+//
+// fn create_mock_tree() -> DocumentTree {
+//     // Create mock tree for benchmarking
+// }
+
+criterion_group!(
+    benches,
+    bench_intervention_decision,
+    bench_context_building,
+    bench_response_parsing,
+    bench_score_merging,
+    bench_budget_controller,
+    bench_fallback_manager,
+    bench_metrics_collector,
+    bench_full_pilot_decision,
+);
+
+criterion_main!(benches);
diff --git a/benches/retrieval_bench.rs b/benches/retrieval_bench.rs
new file mode 100644
index 00000000..d0615401
--- /dev/null
+++ b/benches/retrieval_bench.rs
@@ -0,0 +1,164 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Retrieval pipeline benchmarks.
+//!
+//! Measures performance of document retrieval:
+//! - Query analysis
+//! - Strategy selection
+//! - Search algorithms (Greedy, Beam, MCTS)
+//! - Judge evaluation
+//! - End-to-end retrieval time
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+fn bench_query_analysis(c: &mut Criterion) {
+    let mut group = c.benchmark_group("query_analysis");
+
+    // TODO: Benchmark query analysis stage
+    // - Complexity detection
+    // - Keyword extraction
+    // - Target section identification
+
+    // Test different query types:
+    // - Simple factual: "What is X?"
+    // - Complex analytical: "Compare X and Y"
+    // - Multi-part: "What are the steps to do X?"
+
+    group.bench_function("analyze_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_search_algorithms(c: &mut Criterion) {
+    let mut group = c.benchmark_group("search_algorithms");
+
+    // TODO: Benchmark different search algorithms
+    //
+    // group.bench_with_input("greedy", &config, |b, cfg| {
+    //     b.iter(|| {
+    //         let searcher = GreedySearcher::new(cfg);
+    //         black_box(searcher.search(&tree, &query))
+    //     })
+    // });
+    //
+    // group.bench_with_input("beam_k3", &beam_config_3, |b, cfg| {
+    //     b.iter(|| {
+    //         let searcher = BeamSearcher::new(cfg);
+    //         black_box(searcher.search(&tree, &query))
+    //     })
+    // });
+    //
+    // group.bench_with_input("beam_k5", &beam_config_5, |b, cfg| {
+    //     b.iter(|| {
+    //         let searcher = BeamSearcher::new(cfg);
+    //         black_box(searcher.search(&tree, &query))
+    //     })
+    // });
+    //
+    // group.bench_with_input("mcts", &mcts_config, |b, cfg| {
+    //     b.iter(|| {
+    //         let searcher = MctsSearcher::new(cfg);
+    //         black_box(searcher.search(&tree, &query))
+    //     })
+    // });
+
+    group.bench_function("greedy_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.bench_function("beam_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.bench_function("mcts_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_judge_evaluation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("judge_evaluation");
+
+    // TODO: Benchmark judge stage
+    // - Sufficiency evaluation
+    // - Content quality assessment
+    // - Backtrack decision making
+
+    group.bench_function("judge_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_full_retrieval_pipeline(c: &mut Criterion) {
+    let mut group = c.benchmark_group("full_retrieval");
+
+    // TODO: Benchmark complete retrieval pipeline
+    // - Analyze → Plan → Search → Judge
+    // - With and without Pilot
+    // - With and without backtracking
+    // - Different query complexities
+
+    // group.bench_with_input(
+    //     BenchmarkId::new("no_pilot", "simple_query"),
+    //     &simple_query,
+    //     |b, query| {
+    //         b.iter(|| {
+    //             black_box(engine.query(&doc_id, query))
+    //         })
+    //     },
+    // );
+    //
+    // group.bench_with_input(
+    //     BenchmarkId::new("with_pilot", "simple_query"),
+    //     &simple_query,
+    //     |b, query| {
+    //         b.iter(|| {
+    //             black_box(engine_with_pilot.query(&doc_id, query))
+    //         })
+    //     },
+    // );
+
+    group.bench_function("retrieval_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+fn bench_backtracking(c: &mut Criterion) {
+    let mut group = c.benchmark_group("backtracking");
+
+    // TODO: Benchmark backtracking overhead
+    // - Time to detect insufficient results
+    // - Time to adjust search parameters
+    // - Additional search iterations
+
+    group.bench_function("backtrack_placeholder", |b| {
+        b.iter(|| black_box(1 + 1))
+    });
+
+    group.finish();
+}
+
+// TODO: Add helper functions for creating test trees
+//
+// fn create_test_tree(depth: usize, branching: usize) -> DocumentTree {
+//     // Create tree with specified depth and branching factor
+// }
+
+criterion_group!(
+    benches,
+    bench_query_analysis,
+    bench_search_algorithms,
+    bench_judge_evaluation,
+    bench_full_retrieval_pipeline,
+    bench_backtracking,
+);
+
+criterion_main!(benches);
diff --git a/docs/design/roadmap.md b/docs/design/roadmap.md
index 87a867a6..de125bd5 100644
--- a/docs/design/roadmap.md
+++ b/docs/design/roadmap.md
@@ -1,153 +1,133 @@
 # 架构评估与路线图
 
-> 评估日期: 2026-04-03
-> 评估版本: v0.1.7
+> 评估日期: 2026-04-04
+> 评估版本: v0.2.0
 
 ## 当前状态
 
 | 指标 | 状态 |
 |------|------|
-| **测试** | 129 passed, 0 failed |
-| **代码量** | 17,695 行 Rust (112 文件) |
-| **模块** | client, domain, index, retrieval, llm, parser, storage, throttle |
-| **编译** | 成功 (仅 warnings) |
+| **测试** | 197 passed, 0 failed |
+| **代码量** | 26,000+ 行 Rust |
+| **模块** | client, domain, index, retrieval, pilot, llm, parser, storage, throttle |
+| **编译** | 成功 |
 
 ## 架构亮点
 
-### 1. 双 Pipeline 设计一致
+### 1. 双 Pipeline 设计完整
 
 Index 和 Retrieval 都采用相同的 orchestrator 模式:
 - 依赖解析 (topological sort)
 - ExecutionGroup 支持并行
 - FailurePolicy (Fail/Skip/Retry)
 - StageOutcome 流程控制
+- **Backtracking 支持** (Retrieval)
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                     Orchestrator 模式                        │
 ├─────────────────────────────────────────────────────────────┤
-│  Index Pipeline          │  Retrieval Pipeline              │
-│  ─────────────           │  ─────────────────               │
-│  Parse → Build →         │  Analyze → Plan →                │
-│  Enhance → Enrich →      │  Search → Judge                  │
-│  Optimize                │  (支持回溯)                       │
+│  Index Pipeline           │  Retrieval Pipeline              │
+│  ─────────────            │  ─────────────────               │
+│  Parse → Build →          │  Analyze → Plan →                │
+│  Enhance → Enrich →       │  Search → Judge                  │
+│  Optimize                 │  (支持回溯 + Pilot)               │
 └─────────────────────────────────────────────────────────────┘
 ```
 
-### 2. 清晰的分层架构
+### 2. Pilot 模块完整实现
+
+**Pilot 是 Retrieval Pipeline 的"大脑"**，负责语义理解和导航决策：
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                     Pilot 架构                               │
+├─────────────────────────────────────────────────────────────┤
+│  干预点: START → FORK → BACKTRACK → EVALUATE                │
+│  组件: BudgetController, ContextBuilder, FallbackManager    │
+│  特性: 分数合并, 4级降级策略, 指标收集                        │
+└─────────────────────────────────────────────────────────────┘
+```
+
+**核心设计理念**:
+- Algorithm 处理 "how to search" — 高效、确定性
+- Pilot 处理 "where to go" — 语义理解、方向指引
+- 只在关键决策点干预，不是每一步
+
+### 3. 清晰的分层架构
 
 ```
 client (Engine) → index/retrieval → domain ← parser/llm/config
+                              ↑
+                           pilot (大脑)
 ```
 
 - **client**: 高层 API，封装内部复杂性
 - **domain**: 核心领域类型，无外部依赖
 - **index/retrieval**: 业务逻辑，操作 domain
+- **pilot**: LLM 导航智能，干预检索流程
 - **parser/llm/config**: 基础设施，提供能力
 
-### 3. 良好的模块化
+---
+
+## 已完成功能
 
-每个模块职责单一:
-- `parser/` - 文档解析 (Markdown, PDF, DOCX)
-- `llm/` - LLM 客户端 (retry, fallback, pool)
-- `storage/` - 持久化 (Workspace, LRU cache)
-- `throttle/` - 限流控制
+| 功能 | 状态 | 说明 |
+|------|------|------|
+| Index Pipeline | ✅ | Parse, Build, Enhance, Enrich, Optimize |
+| Retrieval Pipeline | ✅ | Analyze, Plan, Search, Judge |
+| Backtracking | ✅ | NeedMoreData, 显式 Backtrack |
+| Pilot Trait | ✅ | should_intervene, decide, guide_* |
+| BudgetController | ✅ | Token/Call 限制，预算分配 |
+| FallbackManager | ✅ | 4级降级策略 |
+| MetricsCollector | ✅ | 延迟、Token、成功率追踪 |
+| Score Merging | ✅ | α×algo + β×llm |
+| Markdown Parser | ✅ | 完整支持 |
+| PDF Parser | ✅ | 基于 pdf-extract |
+| DOCX Parser | ✅ | 基于 docx-rs |
 
 ---
 
 ## 待改进项
 
-### 代码质量 (Clippy Warnings)
-
-| 类型 | 数量 | 示例 |
-|------|------|------|
-| unused variable | 8 | `_context`, `_query`, `_strategy` |
-| dead_code | 5 | `find_stage_index`, `term_frequency` |
-| must_use | 12 | builder 方法缺少 `#[must_use]` |
-| style | 3 | redundant else, unnecessary hashes |
-
 ### 功能缺失
 
-| 模块 | 缺失 | 影响 |
-|------|------|------|
-| `parser/registry.rs` | HTML parser | HTML 格式不支持 |
-| `parser/toc/processor.rs` | 无 ToC 文档的结构提取 | 依赖 LLM |
-| `retrieval/strategy/llm.rs` | 批量 prompt 优化 | 性能 |
+| 模块 | 缺失 | 优先级 |
+|------|------|--------|
+| `parser/` | HTML parser | 中 |
+| `parser/` | Plain text parser | 低 |
+| `retrieval/strategy/` | 批量 prompt 优化 | 中 |
 
 ### 架构限制
 
-| 限制 | 说明 |
-|------|------|
-| **并行执行未实现** | ExecutionGroup 已设计但 `execute()` 仍顺序执行 |
-| **Strategy 无切换** | Plan 选择策略后中途不可切换 |
-| **增量索引骨架** | `ChangeDetector` 存在但未集成到 pipeline |
-
----
-
-## 下一阶段优化方案
-
-### Phase 1: 代码清理 (优先级: 高)
-
-**目标**: 消除所有 clippy warnings
-
-| 任务 | 文件 | 工作量 |
+| 限制 | 说明 | 优先级 |
 |------|------|--------|
-| 添加 `#[must_use]` | builder 类型 | ~12 处 |
-| 修复 unused variables | 各模块 | ~8 处 |
-| 移除 dead code | `search/mod.rs`, `strategy/keyword.rs` | ~5 处 |
-| 修复 style issues | 散落各处 | ~3 处 |
-
-**验收标准**: `cargo clippy` 无 warnings
+| **并行执行未实现** | ExecutionGroup 已设计但 `execute()` 仍顺序执行 | 高 |
+| **Strategy 无切换** | Plan 选择策略后中途不可切换 | 低 |
+| **增量索引骨架** | `ChangeDetector` 存在但未集成到 pipeline | 低 |
 
 ---
 
-### Phase 2: 功能补全 (优先级: 中)
-
-#### 2.1 HTML Parser
-
-```rust
-// src/parser/html/mod.rs (新建)
-pub struct HtmlParser {
-    config: HtmlConfig,
-}
-
-impl DocumentParser for HtmlParser {
-    fn parse(&self, content: &str) -> ParseResult {
-        // 使用 html5ever 或 scraper crate
-    }
-}
-```
-
-#### 2.2 Strategy 热切换
+## 下一阶段路线图
 
-当前: Plan 阶段选择策略后固定
-目标: Search 阶段根据效果动态切换
+### Phase 1: 性能基准 (当前)
 
-```rust
-// 在 SearchStage 中
-if current_strategy.is_struggling() {
-    ctx.switch_strategy(Strategy::more_capable());
-}
-```
+**目标**: 建立性能基准，为优化提供依据
 
-#### 2.3 增量索引集成
-
-```rust
-// 在 PipelineExecutor 中
-pub fn execute_incremental(
-    &mut self,
-    input: IndexInput,
-    changes: ChangeSet,
-) -> Result<IndexResult> {
-    // 只处理变更部分
-}
-```
+| 任务 | 文件 | 状态 |
+|------|------|------|
+| Index 性能基准 | `benches/index_bench.rs` | 📝 待实现 |
+| Retrieval 性能基准 | `benches/retrieval_bench.rs` | 📝 待实现 |
+| Pilot 性能基准 | `benches/pilot_bench.rs` | 📝 待实现 |
+| Token 消耗基准 | `benches/token_bench.rs` | 📝 待实现 |
 
 ---
 
-### Phase 3: 性能优化 (优先级: 中)
+### Phase 2: 性能优化
 
-#### 3.1 并行执行实现
+**目标**: 基于基准测试结果优化关键路径
+
+#### 2.1 并行执行实现
 
 **当前状态**: `ExecutionGroup` 已设计，但 `execute()` 仍顺序执行
 
@@ -168,37 +148,87 @@ futures::future::try_join_all(
 - `PipelineContext` 需要 `Send + Sync`
 - 需要细粒度锁或消息传递
 
-#### 3.2 Path Cache 命中率
+#### 2.2 Pilot 调用优化
 
 ```rust
-// 添加热点查询缓存
-pub struct PathCache {
-    entries: LruCache<QueryHash, CachedPath>,
-    hot_queries: Arc<RwLock<HashSet<QueryHash>>>, // 新增
+// 当前: 逐个评估
+for node_id in node_ids {
+    pilot.evaluate_node(tree, node_id).await;
 }
+
+// 目标: 批量评估
+pilot.evaluate_nodes_batch(tree, node_ids).await;
 ```
 
-#### 3.3 批量 LLM 调用
+#### 2.3 缓存优化
+
+- Path Cache 命中率优化
+- 热点查询缓存
+- LLM 响应缓存 (相同上下文)
+
+---
+
+### Phase 3: 功能补全
+
+#### 3.1 HTML Parser
 
 ```rust
-// 当前: 逐个评估
-for node_id in node_ids {
-    self.evaluate_node(tree, node_id, context).await;
+// src/parser/html/mod.rs (新建)
+pub struct HtmlParser {
+    config: HtmlConfig,
 }
 
-// 目标: 批量评估
-self.evaluate_nodes_batch(tree, node_ids, context).await;
+impl DocumentParser for HtmlParser {
+    fn parse(&self, content: &str) -> ParseResult {
+        // 使用 html5ever 或 scraper crate
+    }
+}
 ```
 
+#### 3.2 更多 LLM Provider
+
+- Anthropic Claude API
+- Local LLM (Ollama, llama.cpp)
+- Azure OpenAI
+
+#### 3.3 流式输出
+
+```rust
+// 支持流式检索结果
+pub async fn query_stream(
+    &self,
+    doc_id: &DocumentId,
+    query: &str,
+) -> impl Stream<Item = RetrieveEvent> {
+    // 边检索边返回
+}
+```
+
+---
+
+### Phase 4: 示例完善
+
+| 示例 | 说明 | 状态 |
+|------|------|------|
+| `basic.rs` | 基础用法 | ✅ 已有 |
+| `index.rs` | 索引文档 | ✅ 已有 |
+| `retrieve.rs` | 检索文档 | ✅ 已有 |
+| `markdownflow.rs` | Markdown 流程 | ✅ 已有 |
+| `custom_pilot.rs` | 自定义 Pilot | 📝 待实现 |
+| `batch_processing.rs` | 批量处理 | 📝 待实现 |
+| `streaming.rs` | 流式输出 | 📝 待实现 |
+| `multi_format.rs` | 多格式文档 | 📝 待实现 |
+| `cli_tool.rs` | CLI 工具示例 | 📝 待实现 |
+
 ---
 
-### Phase 4: 测试增强 (优先级: 低)
+### Phase 5: 测试增强
 
 | 测试类型 | 当前 | 目标 |
 |----------|------|------|
-| 单元测试 | 129 | +50 |
-| 集成测试 | 0 (仅 examples) | +10 |
-| Property 测试 | 0 | +5 |
+| 单元测试 | 197 | +30 |
+| 集成测试 | 1 | +10 |
+| 基准测试 | 0 | +4 |
 | 覆盖率报告 | 无 | cargo-tarpaulin |
 
 ---
@@ -206,42 +236,28 @@ self.evaluate_nodes_batch(tree, node_ids, context).await;
 ## 执行顺序
 
 ```
-Phase 1 (代码清理)
+Phase 1 (性能基准) ← 当前
     ↓
-Phase 3.1 (并行执行)
+Phase 2 (性能优化)
     ↓
-Phase 2 (功能补全)
+Phase 3 (功能补全)
     ↓
-Phase 4 (测试增强)
+Phase 4 (示例完善)
+    ↓
+Phase 5 (测试增强)
 ```
 
-**建议首先执行 Phase 1 代码清理**，消除所有 clippy warnings，使代码库更干净。
-
----
-
-## 文件变更预览
-
-### Phase 1 涉及文件
-
-```
-src/
-├── client/builder.rs          # 添加 #[must_use]
-├── config/types.rs            # 添加 #[must_use]
-├── domain/tree.rs             # 移除 dead code
-├── index/
-│   ├── pipeline/orchestrator.rs  # 移除 find_stage_index
-│   └── stages/*.rs               # 修复 unused
-├── retrieval/
-│   ├── search/mod.rs          # 移除 dead code
-│   ├── strategy/keyword.rs    # 移除 term_frequency
-│   └── stages/*.rs            # 修复 unused
-└── llm/client.rs              # 修复 unused max_tokens
-```
+**建议首先建立性能基准**，这样才能：
+1. 发现真正的瓶颈
+2. 衡量优化效果
+3. 防止性能回归
 
 ---
 
 ## 参考资料
 
 - [Architecture v2](./architecture-v2.svg)
+- [Pilot Architecture](./pilot-architecture.svg)
 - [Pipeline Design](./v2.md)
+- [Pilot Design](./pilot.md)
 - [RFCs](../rfcs/)
diff --git a/examples/batch_processing.rs b/examples/batch_processing.rs
new file mode 100644
index 00000000..bbbde4d0
--- /dev/null
+++ b/examples/batch_processing.rs
@@ -0,0 +1,61 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Batch document processing example.
+//!
+//! This example demonstrates how to efficiently process
+//! multiple documents in batch mode.
+//!
+//! # What you'll learn:
+//! - How to index multiple documents concurrently
+//! - How to batch queries for better throughput
+//! - How to manage resources (memory, LLM calls) during batch processing
+//! - How to track progress and handle failures
+//!
+//! # Use cases:
+//! - Indexing a documentation site with hundreds of pages
+//! - Processing a corpus of research papers
+//! - Building a knowledge base from multiple sources
+//!
+//! # Performance considerations:
+//! - Control concurrency with `max_concurrent_indexing`
+//! - Use rate limiting to avoid LLM API throttling
+//! - Monitor memory usage with large document sets
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Load list of documents to process
+//! 2. Configure batch processing parameters
+//! 3. Process documents with controlled concurrency
+//! 4. Track progress and handle errors
+//! 5. Generate processing report
+
+// TODO: Implement batch processing
+// ```
+// use std::path::PathBuf;
+// use futures::stream::{self, StreamExt};
+// use vectorless::client::{Engine, EngineBuilder};
+//
+// async fn batch_index(
+//     engine: &Engine,
+//     documents: Vec<PathBuf>,
+//     concurrency: usize,
+// ) -> Vec<Result<DocumentId, Error>> {
+//     stream::iter(documents)
+//         .map(|path| async move { engine.index(&path).await })
+//         .buffer_unordered(concurrency)
+//         .collect()
+//         .await
+// }
+// ```
+
+fn main() {
+    // TODO: Show batch indexing and querying
+    //
+    // let documents = find_all_markdown_files("./docs");
+    // let results = batch_index(&engine, documents, 5).await;
+    //
+    // // Process results, report failures, etc.
+
+    println!("TODO: Implement batch_processing example");
+}
diff --git a/examples/cli_tool.rs b/examples/cli_tool.rs
new file mode 100644
index 00000000..62a05f33
--- /dev/null
+++ b/examples/cli_tool.rs
@@ -0,0 +1,122 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! CLI tool example for vectorless.
+//!
+//! This example shows how to build a command-line tool
+//! using vectorless for document indexing and querying.
+//!
+//! # What you'll learn:
+//! - How to structure a CLI application
+//! - How to handle subcommands (index, query, info)
+//! - How to manage configuration and workspace
+//! - How to provide user-friendly output
+//!
+//! # Example commands:
+//!
+//! ```bash
+//! # Index a document
+//! vectorless-cli index ./document.md
+//!
+//! # Query a document
+//! vectorless-cli query <doc-id> "What is the main topic?"
+//!
+//! # List indexed documents
+//! vectorless-cli list
+//!
+//! # Show document info
+//! vectorless-cli info <doc-id>
+//!
+//! # Delete a document
+//! vectorless-cli delete <doc-id>
+//! ```
+//!
+//! # Implementation notes:
+//!
+//! ## Recommended crates:
+//! - `clap` for argument parsing
+//! - `colored` or `termcolor` for colored output
+//! - `indicatif` for progress bars
+//! - `serde` for configuration
+//!
+//! ## Configuration file:
+//! ```toml
+//! # ~/.vectorless/config.toml
+//! [llm]
+//! provider = "openai"
+//! model = "gpt-4"
+//!
+//! [index]
+//! cache_size = 100
+//!
+//! [retrieval]
+//! max_iterations = 10
+//! ```
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Define CLI structure with clap
+//! 2. Implement index subcommand
+//! 3. Implement query subcommand
+//! 4. Implement list/info subcommands
+//! 5. Add configuration management
+//! 6. Add colored output and progress
+
+// TODO: Implement CLI tool
+// ```
+// use clap::{Parser, Subcommand};
+// use vectorless::client::{Engine, EngineBuilder};
+//
+// #[derive(Parser)]
+// #[command(name = "vectorless-cli")]
+// struct Cli {
+//     #[command(subcommand)]
+//     command: Commands,
+// }
+//
+// #[derive(Subcommand)]
+// enum Commands {
+//     /// Index a document
+//     Index {
+//         /// Path to document
+//         path: PathBuf,
+//     },
+//     /// Query an indexed document
+//     Query {
+//         /// Document ID
+//         doc_id: String,
+//         /// Query string
+//         query: String,
+//     },
+//     /// List all indexed documents
+//     List,
+// }
+//
+// #[tokio::main]
+// async fn main() -> Result<()> {
+//     let cli = Cli::parse();
+//     let engine = EngineBuilder::new().build()?;
+//
+//     match cli.command {
+//         Commands::Index { path } => {
+//             let doc_id = engine.index(&path).await?;
+//             println!("Indexed: {}", doc_id);
+//         }
+//         Commands::Query { doc_id, query } => {
+//             let result = engine.query(&doc_id, &query).await?;
+//             println!("{}", result.content);
+//         }
+//         Commands::List => {
+//             // List documents
+//         }
+//     }
+//
+//     Ok(())
+// }
+// ```
+
+fn main() {
+    // TODO: Implement full CLI tool
+
+    println!("TODO: Implement cli_tool example");
+}
diff --git a/examples/custom_pilot.rs b/examples/custom_pilot.rs
new file mode 100644
index 00000000..bd7a730e
--- /dev/null
+++ b/examples/custom_pilot.rs
@@ -0,0 +1,67 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Custom Pilot implementation example.
+//!
+//! This example demonstrates how to implement a custom Pilot
+//! that provides navigation guidance during retrieval.
+//!
+//! # What you'll learn:
+//! - How to implement the Pilot trait
+//! - When to intervene (START, FORK, BACKTRACK, EVALUATE)
+//! - How to provide ranked candidates
+//! - How to integrate custom Pilot with the retrieval pipeline
+//!
+//! # Key concepts:
+//!
+//! ## Intervention Points
+//! - START: Before search begins - analyze query, set direction
+//! - FORK: At branch points - rank candidates, guide path selection
+//! - BACKTRACK: When search fails - suggest alternatives
+//! - EVALUATE: After content found - check sufficiency
+//!
+//! ## Score Merging
+//! ```text
+//! final_score = α × algorithm_score + β × llm_score
+//! ```
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Define your custom Pilot struct
+//! 2. Implement the Pilot trait
+//! 3. Configure intervention conditions
+//! 4. Integrate with EngineBuilder
+
+// TODO: Implement custom Pilot
+// ```
+// use vectorless::retrieval::pilot::{Pilot, PilotDecision, SearchState, InterventionPoint};
+//
+// pub struct MyCustomPilot {
+//     // Your fields here
+// }
+//
+// impl Pilot for MyCustomPilot {
+//     fn should_intervene(&self, state: &SearchState, point: InterventionPoint) -> bool {
+//         // Decide when to intervene
+//         todo!()
+//     }
+//
+//     async fn decide(&self, state: &SearchState) -> PilotDecision {
+//         // Make navigation decision
+//         todo!()
+//     }
+// }
+// ```
+
+fn main() {
+    // TODO: Show how to use custom Pilot with EngineBuilder
+    //
+    // let pilot = MyCustomPilot::new();
+    // let engine = EngineBuilder::new()
+    //     .with_pilot(Arc::new(pilot))
+    //     .build()?;
+    //
+    // // Use engine with custom Pilot guidance
+
+    println!("TODO: Implement custom_pilot example");
+}
diff --git a/examples/multi_format.rs b/examples/multi_format.rs
new file mode 100644
index 00000000..f146b851
--- /dev/null
+++ b/examples/multi_format.rs
@@ -0,0 +1,77 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Multi-format document processing example.
+//!
+//! This example demonstrates how to work with different
+//! document formats (Markdown, PDF, DOCX, HTML).
+//!
+//! # What you'll learn:
+//! - How to index documents of different formats
+//! - How format detection works
+//! - How to configure format-specific parsing options
+//! - How to handle mixed-format document sets
+//!
+//! # Supported formats:
+//! - **Markdown** (.md): Full support with ToC extraction
+//! - **PDF** (.pdf): Text extraction, structure inference
+//! - **DOCX** (.docx): Word document parsing
+//! - **HTML** (.html, .htm): Web page parsing (planned)
+//! - **Plain text** (.txt): Basic text parsing (planned)
+//!
+//! # Format-specific considerations:
+//!
+//! ## Markdown
+//! - Best format for structured documents
+//! - Automatic heading hierarchy detection
+//! - Code block handling
+//!
+//! ## PDF
+//! - Text extraction quality varies
+//! - No explicit structure (inferred from fonts/spacing)
+//! - Tables and images not supported
+//!
+//! ## DOCX
+//! - Good structure preservation
+//! - Styles mapped to hierarchy
+//! - Limited formatting support
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Detect document format from extension or content
+//! 2. Configure format-specific parser options
+//! 3. Index documents of mixed formats
+//! 4. Query across all formats
+
+// TODO: Implement multi-format example
+// ```
+// use vectorless::client::{Engine, EngineBuilder};
+// use vectorless::parser::DocumentFormat;
+//
+// async fn index_multiple_formats(engine: &Engine) {
+//     // Index different formats
+//     let md_doc = engine.index("./README.md").await?;
+//     let pdf_doc = engine.index("./paper.pdf").await?;
+//     let docx_doc = engine.index("./report.docx").await?;
+//
+//     // Query works across all formats
+//     let result = engine.query(&md_doc, "What is this about?").await?;
+// }
+// ```
+
+fn main() {
+    // TODO: Show multi-format indexing and querying
+    //
+    // // Index documents of different formats
+    // let md_id = engine.index("./docs/guide.md").await?;
+    // let pdf_id = engine.index("./docs/paper.pdf").await?;
+    // let docx_id = engine.index("./docs/report.docx").await?;
+    //
+    // // Each can be queried independently
+    // for doc_id in &[md_id, pdf_id, docx_id] {
+    //     let result = engine.query(doc_id, "summary").await?;
+    //     println!("Result: {}", result.content);
+    // }
+
+    println!("TODO: Implement multi_format example");
+}
diff --git a/examples/streaming.rs b/examples/streaming.rs
new file mode 100644
index 00000000..8942110c
--- /dev/null
+++ b/examples/streaming.rs
@@ -0,0 +1,70 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Streaming retrieval example.
+//!
+//! This example demonstrates how to use streaming retrieval
+//! to get results incrementally as they are found.
+//!
+//! # What you'll learn:
+//! - How to use `query_stream()` for progressive results
+//! - How to handle RetrieveEvent types
+//! - How to display results as they arrive
+//! - How to cancel long-running queries
+//!
+//! # RetrieveEvent types:
+//! - `Started`: Query began, shows planned strategy
+//! - `NodeVisited`: A node was visited during search
+//! - `ContentFound`: Relevant content was found
+//! - `Backtracking`: Search is backtracking for more data
+//! - `Completed`: Query finished with final results
+//! - `Error`: An error occurred
+//!
+//! # Use cases:
+//! - Interactive Q&A with real-time feedback
+//! - Long-running queries on large documents
+//! - Debugging retrieval behavior
+//! - Building responsive UIs
+//!
+//! # TODO: Implementation steps
+//!
+//! 1. Configure engine for streaming
+//! 2. Call query_stream() instead of query()
+//! 3. Process events as they arrive
+//! 4. Handle completion and errors
+
+// TODO: Implement streaming retrieval
+// ```
+// use vectorless::client::{Engine, RetrieveEvent};
+//
+// async fn streaming_query(
+//     engine: &Engine,
+//     doc_id: &DocumentId,
+//     query: &str,
+// ) {
+//     let mut stream = engine.query_stream(doc_id, query).await;
+//
+//     while let Some(event) = stream.next().await {
+//         match event {
+//             RetrieveEvent::Started { strategy } => {
+//                 println!("Starting search with strategy: {:?}", strategy);
+//             }
+//             RetrieveEvent::ContentFound { node_id, preview } => {
+//                 println!("Found: {} - {}", node_id, preview);
+//             }
+//             RetrieveEvent::Completed { response } => {
+//                 println!("Done! Confidence: {}", response.confidence);
+//             }
+//             _ => {}
+//         }
+//     }
+// }
+// ```
+
+fn main() {
+    // TODO: Show streaming query usage
+    //
+    // streaming_query(&engine, &doc_id, "What is the architecture?").await;
+
+    println!("TODO: Implement streaming example");
+}

From e9f4cb20954179cf104cc41ce74674cf7464abfe Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 17:49:49 +0800
Subject: [PATCH 02/21] feat(tree): add new traversal methods and optimize
 performance

- Add `child_count()` method for efficient child counting without allocation
- Add `children_iter()` method providing iterator interface to avoid Vec allocation
- Update `children()` to use `children_iter()` internally for consistency
- Add `siblings_iter()` method to get sibling nodes as iterator
- Add `ancestors_iter()` method to get ancestor nodes as iterator
- Add `path_from_root()` method to get complete path from root to node
- Add `depth()` method to get node depth (root = 0)
- Add `first_child()` and `last_child()` convenience methods

refactor(context): use iterator methods instead of collecting into Vec

- Replace `tree.children()` with `tree.children_iter()` in recursive section collection
- Improves memory efficiency by avoiding unnecessary Vec allocations during traversal

refactor(search): use optimized is_leaf check

- Replace manual `tree.children(leaf_id).is_empty()` check with `tree.is_leaf(leaf_id)`
- Uses existing optimized method that was already available in the codebase
---
 src/domain/tree.rs             | 65 +++++++++++++++++++++++++++++++++-
 src/retrieval/context.rs       |  4 +--
 src/retrieval/stages/search.rs |  2 +-
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/src/domain/tree.rs b/src/domain/tree.rs
index 1f63bbff..94cc5774 100644
--- a/src/domain/tree.rs
+++ b/src/domain/tree.rs
@@ -155,9 +155,27 @@ impl DocumentTree {
         id.0.children(&self.arena).next().is_none()
     }
 
+    /// Get the number of children of a node.
+    ///
+    /// This is more efficient than `children().len()` as it doesn't allocate.
+    pub fn child_count(&self, id: NodeId) -> usize {
+        id.0.children(&self.arena).count()
+    }
+
+    /// Get the children of a node as an iterator.
+    ///
+    /// Use this instead of `children()` when you only need to iterate,
+    /// as it avoids allocating a Vec.
+    pub fn children_iter(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
+        id.0.children(&self.arena).map(NodeId)
+    }
+
     /// Get the children of a node.
+    ///
+    /// Returns a Vec for cases where you need owned access to the children.
+    /// Consider using `children_iter()` if you only need to iterate.
     pub fn children(&self, id: NodeId) -> Vec<NodeId> {
-        id.0.children(&self.arena).map(NodeId).collect()
+        self.children_iter(id).collect()
     }
 
     /// Get the parent of a node.
@@ -167,6 +185,51 @@ impl DocumentTree {
         id.0.parent(&self.arena).map(NodeId)
     }
 
+    /// Get the siblings of a node (excluding the node itself).
+    ///
+    /// Returns an empty iterator for the root node.
+    pub fn siblings_iter(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
+        id.0.preceding_siblings(&self.arena)
+            .chain(id.0.following_siblings(&self.arena))
+            .map(NodeId)
+    }
+
+    /// Get the ancestors of a node from parent to root.
+    ///
+    /// Returns an empty iterator for the root node.
+    pub fn ancestors_iter(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
+        id.0.ancestors(&self.arena).map(NodeId)
+    }
+
+    /// Get the path from root to a node (inclusive).
+    ///
+    /// Returns the path as a Vec starting from the root.
+    pub fn path_from_root(&self, id: NodeId) -> Vec<NodeId> {
+        let mut path: Vec<NodeId> = self.ancestors_iter(id).collect();
+        path.reverse();
+        path.push(id);
+        path
+    }
+
+    /// Get the depth of a node (root = 0).
+    pub fn depth(&self, id: NodeId) -> usize {
+        self.get(id).map(|n| n.depth).unwrap_or(0)
+    }
+
+    /// Get the first child of a node.
+    ///
+    /// Returns None if the node has no children.
+    pub fn first_child(&self, id: NodeId) -> Option<NodeId> {
+        self.children_iter(id).next()
+    }
+
+    /// Get the last child of a node.
+    ///
+    /// Returns None if the node has no children.
+    pub fn last_child(&self, id: NodeId) -> Option<NodeId> {
+        self.children_iter(id).last()
+    }
+
     /// Get all leaf nodes in the tree.
     pub fn leaves(&self) -> Vec<NodeId> {
         self.traverse()
diff --git a/src/retrieval/context.rs b/src/retrieval/context.rs
index ba0edb34..595c9083 100644
--- a/src/retrieval/context.rs
+++ b/src/retrieval/context.rs
@@ -434,7 +434,7 @@ impl ContextBuilder {
                 sections.push(section);
             }
 
-            for child_id in tree.children(node_id) {
+            for child_id in tree.children_iter(node_id) {
                 self.collect_sections(tree, child_id, current_depth + 1, max_depth, sections);
             }
         }
@@ -463,7 +463,7 @@ impl ContextBuilder {
                 sections.push(section);
             }
 
-            for child_id in tree.children(node_id) {
+            for child_id in tree.children_iter(node_id) {
                 Box::pin(self.collect_sections_async(
                     tree,
                     child_id,
diff --git a/src/retrieval/stages/search.rs b/src/retrieval/stages/search.rs
index 0283de23..e9addfe7 100644
--- a/src/retrieval/stages/search.rs
+++ b/src/retrieval/stages/search.rs
@@ -147,7 +147,7 @@ impl SearchStage {
                 // Get node info
                 if let Some(node) = tree.get(leaf_id) {
                     let depth = node.depth;
-                    let is_leaf = tree.children(leaf_id).is_empty();
+                    let is_leaf = tree.is_leaf(leaf_id);
 
                     candidates.push(CandidateNode::new(leaf_id, path.score, depth, is_leaf));
                 }

From cd489ee4d0e3aff6e40dba92b9f813298c8118e5 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 17:54:47 +0800
Subject: [PATCH 03/21] feat(domain): add RetrievalIndex for efficient document
 tree operations

- Add RetrievalIndex struct with pre-computed traversal data
- Include methods for accessing leaves, levels, paths, and siblings in O(1) time
- Implement build_retrieval_index method on DocumentTree to create the index
- Add HashMap dependency for caching path and sibling relationships

feat(retrieval): integrate RetrievalIndex into PipelineContext

- Update PipelineContext to include optional RetrievalIndex field
- Automatically build retrieval index when creating new context
- Enable fast access to commonly needed tree traversal operations
---
 src/domain/mod.rs                 |   2 +-
 src/domain/tree.rs                | 169 +++++++++++++++++++++++++++++-
 src/retrieval/pipeline/context.rs |   8 +-
 3 files changed, 176 insertions(+), 3 deletions(-)

diff --git a/src/domain/mod.rs b/src/domain/mod.rs
index d5aa3e5c..75970a12 100644
--- a/src/domain/mod.rs
+++ b/src/domain/mod.rs
@@ -24,4 +24,4 @@ pub use error::{Error, Result};
 pub use node::{NodeId, TreeNode};
 pub use toc::{TocConfig, TocEntry, TocNode, TocView};
 pub use token::{estimate_tokens, estimate_tokens_batch, estimate_tokens_fast};
-pub use tree::{DocumentStructure, DocumentTree, StructureNode};
+pub use tree::{DocumentStructure, DocumentTree, RetrievalIndex, StructureNode};
diff --git a/src/domain/tree.rs b/src/domain/tree.rs
index 94cc5774..21bcfac1 100644
--- a/src/domain/tree.rs
+++ b/src/domain/tree.rs
@@ -4,7 +4,9 @@
 //! Document tree using arena-based allocation.
 //!
 //! This structure provides better memory locality and simpler
-//! lifetime management compared to `Rc<RefCell<PageNode>`.
+//! lifetime management compared to `Rc<RefCell<PageNode>}`.
+
+use std::collections::HashMap;
 
 use indextree::Arena;
 use serde::{Deserialize, Serialize};
@@ -39,6 +41,80 @@ pub struct DocumentStructure {
     pub structure: Vec<StructureNode>,
 }
 
+/// Pre-computed index for efficient retrieval operations.
+///
+/// Built once after the document tree is fully constructed.
+/// Provides O(1) access to commonly needed traversal data.
+#[derive(Debug, Clone)]
+pub struct RetrievalIndex {
+    /// All leaf nodes in the tree.
+    leaves: Vec<NodeId>,
+
+    /// Nodes grouped by depth level.
+    /// level_index[0] = root, level_index[1] = level 1 nodes, etc.
+    level_index: Vec<Vec<NodeId>>,
+
+    /// Path from root to each node (inclusive).
+    path_cache: HashMap<NodeId, Vec<NodeId>>,
+
+    /// Siblings for each node (excluding self).
+    siblings_cache: HashMap<NodeId, Vec<NodeId>>,
+
+    /// Total node count.
+    node_count: usize,
+
+    /// Maximum depth in the tree.
+    max_depth: usize,
+}
+
+impl RetrievalIndex {
+    /// Get all leaf nodes.
+    pub fn leaves(&self) -> &[NodeId] {
+        &self.leaves
+    }
+
+    /// Get nodes at a specific depth level.
+    ///
+    /// Returns None if the level doesn't exist.
+    pub fn level(&self, depth: usize) -> Option<&[NodeId]> {
+        self.level_index.get(depth).map(|v| v.as_slice())
+    }
+
+    /// Get all levels.
+    pub fn levels(&self) -> &[Vec<NodeId>] {
+        &self.level_index
+    }
+
+    /// Get the path from root to a node (inclusive).
+    ///
+    /// Returns None if the node is not in the index.
+    pub fn path_to(&self, node: NodeId) -> Option<&[NodeId]> {
+        self.path_cache.get(&node).map(|v| v.as_slice())
+    }
+
+    /// Get siblings of a node (excluding the node itself).
+    ///
+    /// Returns None if the node is not in the index or has no siblings.
+    pub fn siblings(&self, node: NodeId) -> Option<&[NodeId]> {
+        self.siblings_cache.get(&node).map(|v| v.as_slice())
+    }
+
+    /// Get the total number of nodes.
+    pub fn node_count(&self) -> usize {
+        self.node_count
+    }
+
+    /// Get the maximum depth in the tree.
+    pub fn max_depth(&self) -> usize {
+        self.max_depth
+    }
+
+    /// Get the number of levels.
+    pub fn level_count(&self) -> usize {
+        self.level_index.len()
+    }
+}
+
 /// A hierarchical document tree structure.
 ///
 /// Uses an arena-based tree representation for efficient traversal
@@ -337,6 +413,97 @@ impl DocumentTree {
         }
     }
 
+    /// Build a retrieval index for efficient operations.
+    ///
+    /// This should be called once after the tree is fully constructed.
+    /// The index provides O(1) access to commonly needed traversal data.
+    ///
+    /// # Example
+    ///
+    /// ```ignore
+    /// let tree = /* build tree */;
+    /// let index = tree.build_retrieval_index();
+    ///
+    /// // Fast access to leaves
+    /// for leaf in index.leaves() {
+    ///     // process leaf
+    /// }
+    ///
+    /// // Fast path lookup
+    /// if let Some(path) = index.path_to(node_id) {
+    ///     // path[0] = root, path[-1] = node_id
+    /// }
+    /// ```
+    pub fn build_retrieval_index(&self) -> RetrievalIndex {
+        let mut leaves = Vec::new();
+        let mut level_index: Vec<Vec<NodeId>> = Vec::new();
+        let mut path_cache: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
+        let mut siblings_cache: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
+        let mut max_depth = 0;
+        let node_count = self.node_count();
+
+        // BFS to build level index
+        let mut current_level = vec![self.root_id];
+
+        // Initialize root path
+        path_cache.insert(self.root_id, vec![self.root_id]);
+
+        while !current_level.is_empty() {
+            level_index.push(current_level.clone());
+
+            let mut next_level = Vec::new();
+
+            for &node_id in &current_level {
+                let children: Vec<NodeId> = self.children(node_id);
+
+                // Update max depth
+                if let Some(node) = self.get(node_id) {
+                    max_depth = max_depth.max(node.depth);
+                }
+
+                // Check if leaf
+                if children.is_empty() {
+                    leaves.push(node_id);
+                }
+
+                // Build siblings cache for children
+                if children.len() > 1 {
+                    for (i, &child) in children.iter().enumerate() {
+                        let siblings: Vec<NodeId> = children
+                            .iter()
+                            .enumerate()
+                            .filter(|(j, _)| *j != i)
+                            .map(|(_, &c)| c)
+                            .collect();
+                        siblings_cache.insert(child, siblings);
+                    }
+                }
+
+                // Build path cache for children
+                if let Some(parent_path) = path_cache.get(&node_id).cloned() {
+                    for &child in &children {
+                        let mut child_path = parent_path.clone();
+                        child_path.push(child);
+                        path_cache.insert(child, child_path);
+                    }
+                }
+
+                next_level.extend(children);
+            }
+
+            current_level = next_level;
+        }
+
+        RetrievalIndex {
+            leaves,
+            level_index,
+            path_cache,
+            siblings_cache,
+            node_count,
+            max_depth,
+        }
+    }
+
     /// Recursively build structure nodes starting from the given node.
     fn build_structure_nodes(&self, node_id: NodeId) -> Vec<StructureNode> {
         let children = self.children(node_id);
diff --git a/src/retrieval/pipeline/context.rs b/src/retrieval/pipeline/context.rs
index 5dafaf36..b12d3d9f 100644
--- a/src/retrieval/pipeline/context.rs
+++ b/src/retrieval/pipeline/context.rs
@@ -10,7 +10,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Instant;
 
-use crate::domain::{DocumentTree, NodeId};
+use crate::domain::{DocumentTree, NodeId, RetrievalIndex};
 use crate::retrieval::pilot::Pilot;
 use crate::retrieval::types::{
     NavigationStep, QueryComplexity, RetrieveOptions, RetrieveResponse, SearchPath,
@@ -195,6 +195,8 @@ pub struct PipelineContext {
     pub query: String,
     /// Document tree to search.
     pub tree: Arc<DocumentTree>,
+    /// Pre-computed retrieval index for efficient operations.
+    pub retrieval_index: Option<RetrievalIndex>,
     /// Retrieval options.
     pub options: RetrieveOptions,
     /// Optional Pilot for navigation guidance.
@@ -254,9 +256,13 @@ impl PipelineContext {
         query: impl Into<String>,
         options: RetrieveOptions,
     ) -> Self {
+        // Build retrieval index for efficient operations
+        let retrieval_index = Some(tree.build_retrieval_index());
+
         Self {
             query: query.into(),
             tree,
+            retrieval_index,
             options,
             pilot: None,
             complexity: None,

From 8d7ec2d052efaae1d4c228dce538daeb9f51e69c Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 17:58:42 +0800
Subject: [PATCH 04/21] feat(tree): add cached leaves functionality to
 DocumentTree

- Add leaves_cache field to DocumentTree struct to store cached leaf nodes
- Implement lazy rebuilding of leaves cache with rebuild_leaves_cache() method
- Add invalidate_leaves_cache() method to clear cache when tree is modified
- Update add_child() method to maintain leaves cache consistency
- Modify constructor to initialize cache with root node as initial leaf
- Skip serialization of leaves_cache using serde(skip) attribute
- Return cached leaves when available, otherwise rebuild on demand in leaves() method
---
 src/domain/tree.rs | 57 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/src/domain/tree.rs b/src/domain/tree.rs
index 21bcfac1..29491147 100644
--- a/src/domain/tree.rs
+++ b/src/domain/tree.rs
@@ -126,6 +126,10 @@ pub struct DocumentTree {
 
     /// The root node ID.
     root_id: NodeId,
+
+    /// Cached leaf nodes (rebuilt on demand).
+    #[serde(skip)]
+    leaves_cache: Option<Vec<NodeId>>,
 }
 
 impl DocumentTree {
@@ -147,9 +151,13 @@ impl DocumentTree {
         };
         let root_id = arena.new_node(root_data);
 
+        // Root is initially a leaf
+        let leaves_cache = Some(vec![NodeId(root_id)]);
+
         Self {
             arena,
             root_id: NodeId(root_id),
+            leaves_cache,
         }
     }
 
@@ -157,7 +165,11 @@ impl DocumentTree {
     ///
     /// This is useful for deserialization and testing.
     pub fn from_raw(arena: Arena<TreeNode>, root_id: NodeId) -> Self {
-        Self { arena, root_id }
+        Self {
+            arena,
+            root_id,
+            leaves_cache: None, // Will be rebuilt on demand
+        }
     }
 
     /// Get the root node ID.
@@ -204,6 +216,15 @@ impl DocumentTree {
         };
         let child_id = self.arena.new_node(child_data);
         parent.0.append(child_id, &mut self.arena);
+
+        // Update leaves cache
+        if let Some(ref mut cache) = self.leaves_cache {
+            // Remove parent from leaves (it's no longer a leaf)
+            cache.retain(|&id| id != parent);
+            // Add child to leaves
+            cache.push(NodeId(child_id));
+        }
+
         NodeId(child_id)
     }
 
@@ -307,11 +328,41 @@ impl DocumentTree {
     }
 
     /// Get all leaf nodes in the tree.
+    ///
+    /// Uses cached leaves if available, otherwise rebuilds the cache.
     pub fn leaves(&self) -> Vec<NodeId> {
-        self.traverse()
+        if let Some(ref cache) = self.leaves_cache {
+            return cache.clone();
+        }
+
+        // Rebuild cache on demand
+        let leaves: Vec<NodeId> = self
+            .traverse()
             .into_iter()
             .filter(|id| self.is_leaf(*id))
-            .collect()
+            .collect();
+
+        // Note: Can't mutate self here, caller should use rebuild_leaves_cache()
+        leaves
+    }
+
+    /// Rebuild the leaves cache.
+    ///
+    /// Call this after deserialization or batch modifications.
+    pub fn rebuild_leaves_cache(&mut self) {
+        self.leaves_cache = Some(
+            self.traverse()
+                .into_iter()
+                .filter(|id| self.is_leaf(*id))
+                .collect(),
+        );
+    }
+
+    /// Invalidate the leaves cache.
+    ///
+    /// Called automatically by mutation methods.
+    pub fn invalidate_leaves_cache(&mut self) {
+        self.leaves_cache = None;
     }
 
     /// Get all nodes in the tree (depth-first order).

From d8e62cbd48061ff042b09de14220234a9c72a14d Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 18:12:43 +0800
Subject: [PATCH 05/21] feat(domain): add hierarchical structure indexing to
 TreeNode

- Add `structure` field to TreeNode to store hierarchical path (e.g., "1", "1.1", "1.2.3")
- This provides human-readable paths for LLM navigation and cross-referencing
- Initialize structure field with empty string in Default implementation
- Update test cases to include the new structure field
---
 src/domain/node.rs               | 10 ++++
 src/domain/tree.rs               | 98 +++++++++++++++++++++++++++++++-
 src/retrieval/pilot/decision.rs  |  1 +
 src/retrieval/pilot/llm_pilot.rs |  1 +
 src/retrieval/pilot/parser.rs    |  1 +
 5 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/src/domain/node.rs b/src/domain/node.rs
index ea9939b2..04359572 100644
--- a/src/domain/node.rs
+++ b/src/domain/node.rs
@@ -53,6 +53,15 @@ pub struct TreeNode {
     /// Title of this section.
     pub title: String,
 
+    /// Hierarchical structure index (e.g., "1", "1.1", "1.2.3").
+    ///
+    /// This provides a human-readable path to the node and is useful for:
+    /// - LLM navigation (easier to understand "go to section 2.1.3")
+    /// - Table of contents display
+    /// - Cross-referencing
+    #[serde(default)]
+    pub structure: String,
+
     /// Raw text content (populated at leaves).
     #[serde(default)]
     pub content: String,
@@ -93,6 +102,7 @@ impl Default for TreeNode {
     fn default() -> Self {
         Self {
             title: String::new(),
+            structure: String::new(),
             content: String::new(),
             summary: String::new(),
             depth: 0,
diff --git a/src/domain/tree.rs b/src/domain/tree.rs
index 29491147..0d1c71d5 100644
--- a/src/domain/tree.rs
+++ b/src/domain/tree.rs
@@ -60,6 +60,17 @@ pub struct RetrievalIndex {
     /// Siblings for each node (excluding self).
     siblings_cache: HashMap<NodeId, Vec<NodeId>>,
 
+    /// Structure string to NodeId mapping.
+    /// e.g., "1.2.3" -> NodeId
+    structure_index: HashMap<String, NodeId>,
+
+    /// Page number to NodeId mapping.
+    /// Maps each page to the most specific (deepest) node containing it.
+    page_index: HashMap<usize, NodeId>,
+
+    /// NodeId to page range mapping.
+    node_page_range: HashMap<NodeId, (usize, usize)>,
+
     /// Total node count.
     node_count: usize,
 
@@ -99,6 +110,34 @@ impl RetrievalIndex {
         self.siblings_cache.get(&node).map(|v| v.as_slice())
     }
 
+    /// Find a node by its structure index.
+    ///
+    /// # Example
+    /// ```ignore
+    /// // Find section 2.1.3
+    /// let node = index.find_by_structure("2.1.3");
+    /// ```
+    pub fn find_by_structure(&self, structure: &str) -> Option<NodeId> {
+        self.structure_index.get(structure).copied()
+    }
+
+    /// Find the most specific node containing a page number.
+    ///
+    /// Returns the deepest node whose page range contains the given page.
+    pub fn find_by_page(&self, page: usize) -> Option<NodeId> {
+        self.page_index.get(&page).copied()
+    }
+
+    /// Get the page range for a node.
+    pub fn page_range(&self, node: NodeId) -> Option<(usize, usize)> {
+        self.node_page_range.get(&node).copied()
+    }
+
+    /// Get all structure indices.
+    pub fn structures(&self) -> &HashMap<String, NodeId> {
+        &self.structure_index
+    }
+
     /// Get the total number of nodes.
     pub fn node_count(&self) -> usize {
         self.node_count
@@ -138,6 +177,7 @@ impl DocumentTree {
         let mut arena = Arena::new();
         let root_data = TreeNode {
             title: title.to_string(),
+            structure: String::new(), // Root has no structure index
             content: content.to_string(),
             summary: String::new(),
             depth: 0,
@@ -199,10 +239,28 @@ impl DocumentTree {
     /// Add a child node to the specified parent.
     ///
     /// Returns the ID of the newly created child node.
+    /// The structure is automatically calculated based on siblings.
     pub fn add_child(&mut self, parent: NodeId, title: &str, content: &str) -> NodeId {
         let parent_depth = self.arena.get(parent.0).map(|n| n.get().depth).unwrap_or(0);
+        let parent_structure = self
+            .arena
+            .get(parent.0)
+            .map(|n| n.get().structure.clone())
+            .unwrap_or_default();
+
+        // Calculate child index (1-based)
+        let child_index = parent.0.children(&self.arena).count() + 1;
+
+        // Calculate structure: parent_structure.child_index
+        let child_structure = if parent_structure.is_empty() {
+            child_index.to_string()
+        } else {
+            format!("{}.{}", parent_structure, child_index)
+        };
+
         let child_data = TreeNode {
             title: title.to_string(),
+            structure: child_structure,
             content: content.to_string(),
             summary: String::new(),
             depth: parent_depth + 1,
@@ -400,6 +458,13 @@ impl DocumentTree {
         }
     }
 
+    /// Update a node's structure index.
+    pub fn set_structure(&mut self, id: NodeId, structure: &str) {
+        if let Some(node) = self.get_mut(id) {
+            node.structure = structure.to_string();
+        }
+    }
+
     /// Set page boundaries for a node.
     pub fn set_page_boundaries(&mut self, id: NodeId, start: usize, end: usize) {
         if let Some(node) = self.get_mut(id) {
@@ -484,12 +549,25 @@ impl DocumentTree {
     /// if let Some(path) = index.path_to(node_id) {
     ///     // path[0] = root, path[-1] = node_id
     /// }
+    ///
+    /// // Fast structure lookup
+    /// if let Some(node) = index.find_by_structure("2.1.3") {
+    ///     // Found section 2.1.3
+    /// }
+    ///
+    /// // Fast page lookup
+    /// if let Some(node) = index.find_by_page(42) {
+    ///     // Found node containing page 42
+    /// }
     /// ```
     pub fn build_retrieval_index(&self) -> RetrievalIndex {
         let mut leaves = Vec::new();
         let mut level_index: Vec<Vec<NodeId>> = Vec::new();
         let mut path_cache: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
         let mut siblings_cache: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
+        let mut structure_index: HashMap<String, NodeId> = HashMap::new();
+        let mut page_index: HashMap<usize, NodeId> = HashMap::new();
+        let mut node_page_range: HashMap<NodeId, (usize, usize)> = HashMap::new();
         let mut max_depth = 0;
         let node_count = self.node_count();
 
@@ -507,9 +585,24 @@ impl DocumentTree {
             for &node_id in &current_level {
                 let children: Vec<NodeId> = self.children(node_id);
 
-                // Update max depth
+                // Get node data
                 if let Some(node) = self.get(node_id) {
                     max_depth = max_depth.max(node.depth);
+
+                    // Build structure index
+                    if !node.structure.is_empty() {
+                        structure_index.insert(node.structure.clone(), node_id);
+                    }
+
+                    // Build page index and page range
+                    if let (Some(start), Some(end)) = (node.start_page, node.end_page) {
+                        node_page_range.insert(node_id, (start, end));
+
+                        // Map each page to this node (will be overwritten by deeper nodes)
+                        for page in start..=end {
+                            page_index.insert(page, node_id);
+                        }
+                    }
                 }
 
                 // Check if leaf
@@ -550,6 +643,9 @@ impl DocumentTree {
             level_index,
             path_cache,
             siblings_cache,
+            structure_index,
+            page_index,
+            node_page_range,
             node_count,
             max_depth,
         }
diff --git a/src/retrieval/pilot/decision.rs b/src/retrieval/pilot/decision.rs
index 09c76add..69a117d6 100644
--- a/src/retrieval/pilot/decision.rs
+++ b/src/retrieval/pilot/decision.rs
@@ -245,6 +245,7 @@ mod tests {
         for i in 0..count {
             let node = crate::domain::TreeNode {
                 title: format!("Node {}", i),
+                structure: String::new(),
                 content: String::new(),
                 summary: String::new(),
                 depth: 0,
diff --git a/src/retrieval/pilot/llm_pilot.rs b/src/retrieval/pilot/llm_pilot.rs
index 9342ffa4..e66749cc 100644
--- a/src/retrieval/pilot/llm_pilot.rs
+++ b/src/retrieval/pilot/llm_pilot.rs
@@ -358,6 +358,7 @@ mod tests {
         for i in 0..count {
             let node = crate::domain::TreeNode {
                 title: format!("Node {}", i),
+                structure: String::new(),
                 content: String::new(),
                 summary: String::new(),
                 depth: 0,
diff --git a/src/retrieval/pilot/parser.rs b/src/retrieval/pilot/parser.rs
index 0447a259..9bb0bd48 100644
--- a/src/retrieval/pilot/parser.rs
+++ b/src/retrieval/pilot/parser.rs
@@ -350,6 +350,7 @@ mod tests {
         for i in 0..count {
             let node = crate::domain::TreeNode {
                 title: format!("Node {}", i),
+                structure: String::new(),
                 content: String::new(),
                 summary: String::new(),
                 depth: 0,

From ab8f2fa8e7c072db72389c38db02005c83c01fc1 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 18:18:37 +0800
Subject: [PATCH 06/21] feat(tree): add page range query methods to retrieval
 index

- Add find_nodes_by_page_range to retrieve nodes overlapping with a page range
- Add get_pages_for_node to get all page numbers covered by a node
- Add find_leaves_by_page_range to get only leaf nodes within a page range
- Add total_pages method to get document's total page count
- Add convenience methods to DocumentTree: find_by_structure,
  find_by_page, and find_nodes_by_page_range
---
 src/domain/tree.rs | 109 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/src/domain/tree.rs b/src/domain/tree.rs
index 0d1c71d5..94f138a3 100644
--- a/src/domain/tree.rs
+++ b/src/domain/tree.rs
@@ -128,11 +128,64 @@ impl RetrievalIndex {
         self.page_index.get(&page).copied()
     }
 
+    /// Find all nodes whose page range overlaps with the given range.
+    ///
+    /// This is useful for retrieving all content that spans a range of pages.
+    ///
+    /// # Example
+    /// ```ignore
+    /// // Find all nodes covering pages 10-15
+    /// let nodes = index.find_nodes_by_page_range(10, 15);
+    /// ```
+    pub fn find_nodes_by_page_range(&self, start: usize, end: usize) -> Vec<NodeId> {
+        let mut result = Vec::new();
+        for (&node_id, &(node_start, node_end)) in &self.node_page_range {
+            // Check if ranges overlap: node_start <= end && start <= node_end
+            if node_start <= end && start <= node_end {
+                result.push(node_id);
+            }
+        }
+        // Sort by start page for consistent ordering
+        result.sort_by_key(|&id| {
+            self.node_page_range.get(&id).map(|(s, _)| *s).unwrap_or(0)
+        });
+        result
+    }
+
+    /// Get all page numbers covered by a node.
+    ///
+    /// Returns None if the node has no page information.
+    pub fn get_pages_for_node(&self, node: NodeId) -> Option<Vec<usize>> {
+        let (start, end) = self.node_page_range.get(&node)?;
+        Some((*start..=*end).collect())
+    }
+
     /// Get the page range for a node.
     pub fn page_range(&self, node: NodeId) -> Option<(usize, usize)> {
         self.node_page_range.get(&node).copied()
     }
 
+    /// Get all nodes that are leaves within a page range.
+    ///
+    /// This returns only leaf nodes (nodes with no children) that
+    /// overlap with the given page range.
+    pub fn find_leaves_by_page_range(&self, start: usize, end: usize) -> Vec<NodeId> {
+        let leaves_set: std::collections::HashSet<NodeId> = self.leaves.iter().copied().collect();
+        self.find_nodes_by_page_range(start, end)
+            .into_iter()
+            .filter(|id| leaves_set.contains(id))
+            .collect()
+    }
+
+    /// Get the total number of pages in the document.
+    pub fn total_pages(&self) -> usize {
+        self.node_page_range
+            .values()
+            .map(|(_, end)| *end)
+            .max()
+            .unwrap_or(0)
+    }
+
     /// Get all structure indices.
     pub fn structures(&self) -> &HashMap<String, NodeId> {
         &self.structure_index
@@ -499,6 +552,62 @@ impl DocumentTree {
         }
     }
 
+    /// Find a node by its structure index.
+    ///
+    /// This is a convenience method that builds an index if needed.
+    /// For repeated queries, build a RetrievalIndex once.
+    pub fn find_by_structure(&self, structure: &str) -> Option<NodeId> {
+        // Linear search - for repeated use, build RetrievalIndex
+        for node_id in self.traverse() {
+            if let Some(node) = self.get(node_id) {
+                if node.structure == structure {
+                    return Some(node_id);
+                }
+            }
+        }
+        None
+    }
+
+    /// Find the most specific node containing a page.
+    ///
+    /// This is a convenience method that builds an index if needed.
+    /// For repeated queries, build a RetrievalIndex once.
+    pub fn find_by_page(&self, page: usize) -> Option<NodeId> {
+        let mut best_match: Option<(NodeId, usize)> = None;
+
+        // Find the deepest node containing this page
+        for node_id in self.traverse() {
+            if let Some((start, end)) = self.page_range(node_id) {
+                if page >= start && page <= end {
+                    let depth = self.get(node_id).map(|n| n.depth).unwrap_or(0);
+                    match &best_match {
+                        None => best_match = Some((node_id, depth)),
+                        Some((_, best_depth)) if depth > *best_depth => {
+                            best_match = Some((node_id, depth));
+                        }
+                        _ => {}
+                    }
+                }
+            }
+        }
+
+        best_match.map(|(id, _)| id)
+    }
+
+    /// Get all nodes whose page range overlaps with the given range.
+    pub fn find_nodes_by_page_range(&self, start: usize, end: usize) -> Vec<NodeId> {
+        self.traverse()
+            .into_iter()
+            .filter(|&id| {
+                if let Some((node_start, node_end)) = self.page_range(id) {
+                    node_start <= end && start <= node_end
+                } else {
+                    false
+                }
+            })
+            .collect()
+    }
+
     /// Set the node ID (identifier string).
     pub fn set_node_id(&mut self, id: NodeId, node_id: &str) {
         if let Some(node) = self.get_mut(id) {

From b1e32fd85f61ca5a1651b5275bf76ecdc22e757a Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 18:21:05 +0800
Subject: [PATCH 07/21] refactor(benchmarks): remove unused criterion
 dependencies and benchmark files

Remove criterion benchmark infrastructure that was not implemented,
including all placeholder benchmark files for index, retrieval, and pilot
modules since they contained only placeholder implementations and were
not providing actual performance measurements.
---
 Cargo.toml                 |  13 ---
 benches/index_bench.rs     | 106 --------------------
 benches/pilot_bench.rs     | 196 -------------------------------------
 benches/retrieval_bench.rs | 164 -------------------------------
 4 files changed, 479 deletions(-)
 delete mode 100644 benches/index_bench.rs
 delete mode 100644 benches/pilot_bench.rs
 delete mode 100644 benches/retrieval_bench.rs

diff --git a/Cargo.toml b/Cargo.toml
index e16d14ac..00d69362 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,19 +73,6 @@ rand = "0.8"
 [dev-dependencies]
 tempfile = "3.10"
 tokio-test = "0.4"
-criterion = { version = "0.5", features = ["async_tokio"] }
-
-[[bench]]
-name = "index_bench"
-harness = false
-
-[[bench]]
-name = "retrieval_bench"
-harness = false
-
-[[bench]]
-name = "pilot_bench"
-harness = false
 
 [profile.release]
 opt-level = 3
diff --git a/benches/index_bench.rs b/benches/index_bench.rs
deleted file mode 100644
index 9ba1e230..00000000
--- a/benches/index_bench.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Index pipeline benchmarks.
-//!
-//! Measures performance of document indexing:
-//! - Parsing speed (Markdown, PDF, DOCX)
-//! - Tree building
-//! - Summary generation (LLM calls)
-//! - End-to-end indexing time
-
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
-
-// TODO: Implement actual benchmarks once the API is stable
-//
-// use vectorless::client::{Engine, EngineBuilder};
-// use vectorless::parser::{MarkdownParser, DocumentParser};
-
-fn bench_markdown_parsing(c: &mut Criterion) {
-    let mut group = c.benchmark_group("markdown_parsing");
-
-    // TODO: Create test documents of different sizes
-    // let small_doc = generate_markdown(100);    // 100 lines
-    // let medium_doc = generate_markdown(500);   // 500 lines
-    // let large_doc = generate_markdown(2000);   // 2000 lines
-
-    // TODO: Benchmark parsing
-    // group.bench_with_input(BenchmarkId::new("parse", "small"), &small_doc, |b, doc| {
-    //     b.iter(|| {
-    //         let parser = MarkdownParser::new();
-    //         black_box(parser.parse(doc))
-    //     })
-    // });
-
-    // Placeholder benchmark
-    group.bench_function("parse_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_tree_building(c: &mut Criterion) {
-    let mut group = c.benchmark_group("tree_building");
-
-    // TODO: Benchmark tree construction from parsed content
-    // - Node creation
-    // - Hierarchy building
-    // - Metadata assignment
-
-    group.bench_function("build_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_toc_extraction(c: &mut Criterion) {
-    let mut group = c.benchmark_group("toc_extraction");
-
-    // TODO: Benchmark ToC extraction
-    // - Heading detection
-    // - Hierarchy inference
-    // - Section boundary detection
-
-    group.bench_function("toc_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_full_index_pipeline(c: &mut Criterion) {
-    let mut group = c.benchmark_group("full_index");
-
-    // TODO: Benchmark complete indexing pipeline
-    // - Parse → Build → Enhance → Enrich → Optimize
-    // - With and without LLM summarization
-    // - Different document sizes
-
-    group.bench_function("full_pipeline_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-// TODO: Add helper functions for generating test documents
-//
-// fn generate_markdown(lines: usize) -> String {
-//     // Generate markdown with headings, paragraphs, code blocks
-// }
-//
-// fn generate_pdf(pages: usize) -> Vec<u8> {
-//     // Generate PDF content
-// }
-
-criterion_group!(
-    benches,
-    bench_markdown_parsing,
-    bench_tree_building,
-    bench_toc_extraction,
-    bench_full_index_pipeline,
-);
-
-criterion_main!(benches);
diff --git a/benches/pilot_bench.rs b/benches/pilot_bench.rs
deleted file mode 100644
index 5a052b50..00000000
--- a/benches/pilot_bench.rs
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Pilot module benchmarks.
-//!
-//! Measures performance of Pilot (the brain of retrieval):
-//! - Intervention decision overhead
-//! - Context building
-//! - LLM call latency (mocked)
-//! - Response parsing
-//! - Score merging
-//! - Fallback handling
-
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-
-fn bench_intervention_decision(c: &mut Criterion) {
-    let mut group = c.benchmark_group("intervention_decision");
-
-    // TODO: Benchmark should_intervene() decision
-    // - START point decision
-    // - FORK point decision
-    // - BACKTRACK point decision
-    // - EVALUATE point decision
-
-    // This should be very fast (< 1µs) as it's called frequently
-
-    group.bench_function("should_intervene_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_context_building(c: &mut Criterion) {
-    let mut group = c.benchmark_group("context_building");
-
-    // TODO: Benchmark ContextBuilder
-    // - Token budget allocation
-    // - Path context building
-    // - Candidate context building
-    // - Sibling context building
-
-    // Test different context sizes:
-    // - Small: 1-2 candidates, short path
-    // - Medium: 3-5 candidates, medium path
-    // - Large: 10+ candidates, long path
-
-    group.bench_function("build_context_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_response_parsing(c: &mut Criterion) {
-    let mut group = c.benchmark_group("response_parsing");
-
-    // TODO: Benchmark ResponseParser
-    // - JSON parsing
-    // - Regex fallback extraction
-    // - Default decision generation
-
-    // let json_response = r#"{"candidates": [...], "direction": "...", "confidence": 0.9}"#;
-    //
-    // group.bench_with_input("json_parse", json_response, |b, response| {
-    //     b.iter(|| {
-    //         black_box(ResponseParser::parse(response))
-    //     })
-    // });
-
-    group.bench_function("parse_json_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.bench_function("parse_regex_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_score_merging(c: &mut Criterion) {
-    let mut group = c.benchmark_group("score_merging");
-
-    // TODO: Benchmark score merging
-    // - final = α × algo + β × llm
-    // - Different weight configurations
-    // - Batch merging (multiple candidates)
-
-    group.bench_function("merge_scores_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_budget_controller(c: &mut Criterion) {
-    let mut group = c.benchmark_group("budget_controller");
-
-    // TODO: Benchmark BudgetController
-    // - can_call() check
-    // - record_usage() update
-    // - estimate_cost() calculation
-    // - Thread-safe operations
-
-    group.bench_function("budget_check_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.bench_function("budget_record_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_fallback_manager(c: &mut Criterion) {
-    let mut group = c.benchmark_group("fallback_manager");
-
-    // TODO: Benchmark FallbackManager
-    // - Level escalation
-    // - Level de-escalation
-    // - Retry delay calculation
-    // - Action determination
-
-    group.bench_function("fallback_record_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_metrics_collector(c: &mut Criterion) {
-    let mut group = c.benchmark_group("metrics_collector");
-
-    // TODO: Benchmark MetricsCollector
-    // - record_call() with atomic operations
-    // - snapshot() generation
-    // - Percentile calculation
-
-    group.bench_function("metrics_record_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.bench_function("metrics_snapshot_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_full_pilot_decision(c: &mut Criterion) {
-    let mut group = c.benchmark_group("full_pilot_decision");
-
-    // TODO: Benchmark complete Pilot.decide() flow
-    // - should_intervene check
-    // - Context building
-    // - LLM call (mocked or skipped)
-    // - Response parsing
-    // - Decision construction
-
-    // Compare:
-    // - With LLM call (real latency)
-    // - Without LLM call (algorithm only)
-    // - With cached response
-
-    group.bench_function("full_decide_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-// TODO: Add helper functions
-//
-// fn create_mock_search_state() -> SearchState {
-//     // Create mock state for benchmarking
-// }
-//
-// fn create_mock_tree() -> DocumentTree {
-//     // Create mock tree for benchmarking
-// }
-
-criterion_group!(
-    benches,
-    bench_intervention_decision,
-    bench_context_building,
-    bench_response_parsing,
-    bench_score_merging,
-    bench_budget_controller,
-    bench_fallback_manager,
-    bench_metrics_collector,
-    bench_full_pilot_decision,
-);
-
-criterion_main!(benches);
diff --git a/benches/retrieval_bench.rs b/benches/retrieval_bench.rs
deleted file mode 100644
index d0615401..00000000
--- a/benches/retrieval_bench.rs
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Retrieval pipeline benchmarks.
-//!
-//! Measures performance of document retrieval:
-//! - Query analysis
-//! - Strategy selection
-//! - Search algorithms (Greedy, Beam, MCTS)
-//! - Judge evaluation
-//! - End-to-end retrieval time
-
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-
-fn bench_query_analysis(c: &mut Criterion) {
-    let mut group = c.benchmark_group("query_analysis");
-
-    // TODO: Benchmark query analysis stage
-    // - Complexity detection
-    // - Keyword extraction
-    // - Target section identification
-
-    // Test different query types:
-    // - Simple factual: "What is X?"
-    // - Complex analytical: "Compare X and Y"
-    // - Multi-part: "What are the steps to do X?"
-
-    group.bench_function("analyze_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_search_algorithms(c: &mut Criterion) {
-    let mut group = c.benchmark_group("search_algorithms");
-
-    // TODO: Benchmark different search algorithms
-    //
-    // group.bench_with_input("greedy", &config, |b, cfg| {
-    //     b.iter(|| {
-    //         let searcher = GreedySearcher::new(cfg);
-    //         black_box(searcher.search(&tree, &query))
-    //     })
-    // });
-    //
-    // group.bench_with_input("beam_k3", &beam_config_3, |b, cfg| {
-    //     b.iter(|| {
-    //         let searcher = BeamSearcher::new(cfg);
-    //         black_box(searcher.search(&tree, &query))
-    //     })
-    // });
-    //
-    // group.bench_with_input("beam_k5", &beam_config_5, |b, cfg| {
-    //     b.iter(|| {
-    //         let searcher = BeamSearcher::new(cfg);
-    //         black_box(searcher.search(&tree, &query))
-    //     })
-    // });
-    //
-    // group.bench_with_input("mcts", &mcts_config, |b, cfg| {
-    //     b.iter(|| {
-    //         let searcher = MctsSearcher::new(cfg);
-    //         black_box(searcher.search(&tree, &query))
-    //     })
-    // });
-
-    group.bench_function("greedy_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.bench_function("beam_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.bench_function("mcts_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_judge_evaluation(c: &mut Criterion) {
-    let mut group = c.benchmark_group("judge_evaluation");
-
-    // TODO: Benchmark judge stage
-    // - Sufficiency evaluation
-    // - Content quality assessment
-    // - Backtrack decision making
-
-    group.bench_function("judge_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_full_retrieval_pipeline(c: &mut Criterion) {
-    let mut group = c.benchmark_group("full_retrieval");
-
-    // TODO: Benchmark complete retrieval pipeline
-    // - Analyze → Plan → Search → Judge
-    // - With and without Pilot
-    // - With and without backtracking
-    // - Different query complexities
-
-    // group.bench_with_input(
-    //     BenchmarkId::new("no_pilot", "simple_query"),
-    //     &simple_query,
-    //     |b, query| {
-    //         b.iter(|| {
-    //             black_box(engine.query(&doc_id, query))
-    //         })
-    //     },
-    // );
-    //
-    // group.bench_with_input(
-    //     BenchmarkId::new("with_pilot", "simple_query"),
-    //     &simple_query,
-    //     |b, query| {
-    //         b.iter(|| {
-    //             black_box(engine_with_pilot.query(&doc_id, query))
-    //         })
-    //     },
-    // );
-
-    group.bench_function("retrieval_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-fn bench_backtracking(c: &mut Criterion) {
-    let mut group = c.benchmark_group("backtracking");
-
-    // TODO: Benchmark backtracking overhead
-    // - Time to detect insufficient results
-    // - Time to adjust search parameters
-    // - Additional search iterations
-
-    group.bench_function("backtrack_placeholder", |b| {
-        b.iter(|| black_box(1 + 1))
-    });
-
-    group.finish();
-}
-
-// TODO: Add helper functions for creating test trees
-//
-// fn create_test_tree(depth: usize, branching: usize) -> DocumentTree {
-//     // Create tree with specified depth and branching factor
-// }
-
-criterion_group!(
-    benches,
-    bench_query_analysis,
-    bench_search_algorithms,
-    bench_judge_evaluation,
-    bench_full_retrieval_pipeline,
-    bench_backtracking,
-);
-
-criterion_main!(benches);

From 917b4a481577c039fde1945724008fba397057df Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 19:03:08 +0800
Subject: [PATCH 08/21] docs(roadmap): remove architecture evaluation document

BREAKING CHANGE: The roadmap.md file has been completely removed
as it contained outdated information about the project's future
direction and current status that is no longer relevant.

feat(pilot): integrate semantic navigation guidance into retrieval pipeline

The LLM-based Pilot module is now integrated into the retrieval pipeline
to provide semantic navigation guidance during search operations.
This enables the system to make intelligent decisions about which
document paths to explore based on semantic understanding rather
than purely algorithmic scoring.

The Pilot provides guidance at key decision points including:
- Start: Initial branch selection for root level navigation
- Fork: Decision making when multiple candidate paths exist
- Backtrack: Guidance when need more data or explicit backtracking
- Evaluate: Node assessment and ranking

Additional improvements include:
- Query-specific scoring context for more accurate relevance matching
- Weighted combination of algorithmic and LLM-based scoring
- Budget control with token/call limiting
- Comprehensive logging and debugging information

refactor(search): update beam and greedy search algorithms with Pilot support

Updated both beam search and greedy search implementations to support
Pilot-guided navigation. The changes include query-specific scoring,
weighted merging of algorithmic and Pilot scores, and proper handling
of semantic guidance during search operations.

The search algorithms now:
- Create query-specific scorers for better relevance
- Integrate Pilot decisions with algorithmic scoring
- Maintain visited node tracking to avoid cycles
- Provide fallback mechanisms when no results meet minimum score thresholds

fix(builder): improve LLM client configuration fallback logic

Enhanced the EngineBuilder to provide fallback API key configuration
by checking the summary configuration when retrieval configuration
is not available. This provides more flexible LLM client setup.
---
 docs/design/roadmap.md              | 263 ----------------------------
 src/client/builder.rs               |  10 +-
 src/retrieval/pilot/llm_pilot.rs    |  10 +-
 src/retrieval/pipeline_retriever.rs |   7 +-
 src/retrieval/search/beam.rs        |  52 ++++--
 src/retrieval/search/greedy.rs      | 132 ++++++++++++--
 6 files changed, 171 insertions(+), 303 deletions(-)
 delete mode 100644 docs/design/roadmap.md

diff --git a/docs/design/roadmap.md b/docs/design/roadmap.md
deleted file mode 100644
index de125bd5..00000000
--- a/docs/design/roadmap.md
+++ /dev/null
@@ -1,263 +0,0 @@
-# 架构评估与路线图
-
-> 评估日期: 2026-04-04
-> 评估版本: v0.2.0
-
-## 当前状态
-
-| 指标 | 状态 |
-|------|------|
-| **测试** | 197 passed, 0 failed |
-| **代码量** | 26,000+ 行 Rust |
-| **模块** | client, domain, index, retrieval, pilot, llm, parser, storage, throttle |
-| **编译** | 成功 |
-
-## 架构亮点
-
-### 1. 双 Pipeline 设计完整
-
-Index 和 Retrieval 都采用相同的 orchestrator 模式:
-- 依赖解析 (topological sort)
-- ExecutionGroup 支持并行
-- FailurePolicy (Fail/Skip/Retry)
-- StageOutcome 流程控制
-- **Backtracking 支持** (Retrieval)
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│                     Orchestrator 模式                        │
-├─────────────────────────────────────────────────────────────┤
-│  Index Pipeline           │  Retrieval Pipeline              │
-│  ─────────────            │  ─────────────────               │
-│  Parse → Build →          │  Analyze → Plan →                │
-│  Enhance → Enrich →       │  Search → Judge                  │
-│  Optimize                 │  (支持回溯 + Pilot)               │
-└─────────────────────────────────────────────────────────────┘
-```
-
-### 2. Pilot 模块完整实现
-
-**Pilot 是 Retrieval Pipeline 的"大脑"**，负责语义理解和导航决策：
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│                     Pilot 架构                               │
-├─────────────────────────────────────────────────────────────┤
-│  干预点: START → FORK → BACKTRACK → EVALUATE                │
-│  组件: BudgetController, ContextBuilder, FallbackManager    │
-│  特性: 分数合并, 4级降级策略, 指标收集                        │
-└─────────────────────────────────────────────────────────────┘
-```
-
-**核心设计理念**:
-- Algorithm 处理 "how to search" — 高效、确定性
-- Pilot 处理 "where to go" — 语义理解、方向指引
-- 只在关键决策点干预，不是每一步
-
-### 3. 清晰的分层架构
-
-```
-client (Engine) → index/retrieval → domain ← parser/llm/config
-                              ↑
-                           pilot (大脑)
-```
-
-- **client**: 高层 API，封装内部复杂性
-- **domain**: 核心领域类型，无外部依赖
-- **index/retrieval**: 业务逻辑，操作 domain
-- **pilot**: LLM 导航智能，干预检索流程
-- **parser/llm/config**: 基础设施，提供能力
-
----
-
-## 已完成功能
-
-| 功能 | 状态 | 说明 |
-|------|------|------|
-| Index Pipeline | ✅ | Parse, Build, Enhance, Enrich, Optimize |
-| Retrieval Pipeline | ✅ | Analyze, Plan, Search, Judge |
-| Backtracking | ✅ | NeedMoreData, 显式 Backtrack |
-| Pilot Trait | ✅ | should_intervene, decide, guide_* |
-| BudgetController | ✅ | Token/Call 限制，预算分配 |
-| FallbackManager | ✅ | 4级降级策略 |
-| MetricsCollector | ✅ | 延迟、Token、成功率追踪 |
-| Score Merging | ✅ | α×algo + β×llm |
-| Markdown Parser | ✅ | 完整支持 |
-| PDF Parser | ✅ | 基于 pdf-extract |
-| DOCX Parser | ✅ | 基于 docx-rs |
-
----
-
-## 待改进项
-
-### 功能缺失
-
-| 模块 | 缺失 | 优先级 |
-|------|------|--------|
-| `parser/` | HTML parser | 中 |
-| `parser/` | Plain text parser | 低 |
-| `retrieval/strategy/` | 批量 prompt 优化 | 中 |
-
-### 架构限制
-
-| 限制 | 说明 | 优先级 |
-|------|------|--------|
-| **并行执行未实现** | ExecutionGroup 已设计但 `execute()` 仍顺序执行 | 高 |
-| **Strategy 无切换** | Plan 选择策略后中途不可切换 | 低 |
-| **增量索引骨架** | `ChangeDetector` 存在但未集成到 pipeline | 低 |
-
----
-
-## 下一阶段路线图
-
-### Phase 1: 性能基准 (当前)
-
-**目标**: 建立性能基准，为优化提供依据
-
-| 任务 | 文件 | 状态 |
-|------|------|------|
-| Index 性能基准 | `benches/index_bench.rs` | 📝 待实现 |
-| Retrieval 性能基准 | `benches/retrieval_bench.rs` | 📝 待实现 |
-| Pilot 性能基准 | `benches/pilot_bench.rs` | 📝 待实现 |
-| Token 消耗基准 | `benches/token_bench.rs` | 📝 待实现 |
-
----
-
-### Phase 2: 性能优化
-
-**目标**: 基于基准测试结果优化关键路径
-
-#### 2.1 并行执行实现
-
-**当前状态**: `ExecutionGroup` 已设计，但 `execute()` 仍顺序执行
-
-```rust
-// 当前 (顺序)
-for &stage_idx in &group.stage_indices {
-    entry.stage.execute(&mut ctx).await?;
-}
-
-// 目标 (并行)
-futures::future::try_join_all(
-    group.stage_indices.iter()
-        .map(|&idx| self.stages[idx].execute(&ctx))
-).await?;
-```
-
-**挑战**:
-- `PipelineContext` 需要 `Send + Sync`
-- 需要细粒度锁或消息传递
-
-#### 2.2 Pilot 调用优化
-
-```rust
-// 当前: 逐个评估
-for node_id in node_ids {
-    pilot.evaluate_node(tree, node_id).await;
-}
-
-// 目标: 批量评估
-pilot.evaluate_nodes_batch(tree, node_ids).await;
-```
-
-#### 2.3 缓存优化
-
-- Path Cache 命中率优化
-- 热点查询缓存
-- LLM 响应缓存 (相同上下文)
-
----
-
-### Phase 3: 功能补全
-
-#### 3.1 HTML Parser
-
-```rust
-// src/parser/html/mod.rs (新建)
-pub struct HtmlParser {
-    config: HtmlConfig,
-}
-
-impl DocumentParser for HtmlParser {
-    fn parse(&self, content: &str) -> ParseResult {
-        // 使用 html5ever 或 scraper crate
-    }
-}
-```
-
-#### 3.2 更多 LLM Provider
-
-- Anthropic Claude API
-- Local LLM (Ollama, llama.cpp)
-- Azure OpenAI
-
-#### 3.3 流式输出
-
-```rust
-// 支持流式检索结果
-pub async fn query_stream(
-    &self,
-    doc_id: &DocumentId,
-    query: &str,
-) -> impl Stream<Item = RetrieveEvent> {
-    // 边检索边返回
-}
-```
-
----
-
-### Phase 4: 示例完善
-
-| 示例 | 说明 | 状态 |
-|------|------|------|
-| `basic.rs` | 基础用法 | ✅ 已有 |
-| `index.rs` | 索引文档 | ✅ 已有 |
-| `retrieve.rs` | 检索文档 | ✅ 已有 |
-| `markdownflow.rs` | Markdown 流程 | ✅ 已有 |
-| `custom_pilot.rs` | 自定义 Pilot | 📝 待实现 |
-| `batch_processing.rs` | 批量处理 | 📝 待实现 |
-| `streaming.rs` | 流式输出 | 📝 待实现 |
-| `multi_format.rs` | 多格式文档 | 📝 待实现 |
-| `cli_tool.rs` | CLI 工具示例 | 📝 待实现 |
-
----
-
-### Phase 5: 测试增强
-
-| 测试类型 | 当前 | 目标 |
-|----------|------|------|
-| 单元测试 | 197 | +30 |
-| 集成测试 | 1 | +10 |
-| 基准测试 | 0 | +4 |
-| 覆盖率报告 | 无 | cargo-tarpaulin |
-
----
-
-## 执行顺序
-
-```
-Phase 1 (性能基准) ← 当前
-    ↓
-Phase 2 (性能优化)
-    ↓
-Phase 3 (功能补全)
-    ↓
-Phase 4 (示例完善)
-    ↓
-Phase 5 (测试增强)
-```
-
-**建议首先建立性能基准**，这样才能：
-1. 发现真正的瓶颈
-2. 衡量优化效果
-3. 防止性能回归
-
----
-
-## 参考资料
-
-- [Architecture v2](./architecture-v2.svg)
-- [Pilot Architecture](./pilot-architecture.svg)
-- [Pipeline Design](./v2.md)
-- [Pilot Design](./pilot.md)
-- [RFCs](../rfcs/)
diff --git a/src/client/builder.rs b/src/client/builder.rs
index 243e047e..d44f7551 100644
--- a/src/client/builder.rs
+++ b/src/client/builder.rs
@@ -181,11 +181,15 @@ impl EngineBuilder {
         let mut retriever =
             PipelineRetriever::new().with_max_iterations(retrieval_config.search.max_iterations);
 
-        // Add LLM client if API key is available in retrieval config
-        if let Some(ref api_key) = retrieval_config.api_key {
+        // Add LLM client if API key is available
+        // Try retrieval config first, then fall back to summary config
+        let retrieval_api_key = retrieval_config.api_key.clone()
+            .or_else(|| config.summary.api_key.clone());
+
+        if let Some(api_key) = retrieval_api_key {
             let llm_config = crate::llm::LlmConfig::new(&retrieval_config.model)
                 .with_endpoint(retrieval_config.endpoint.clone())
-                .with_api_key(api_key.clone())
+                .with_api_key(api_key)
                 .with_temperature(retrieval_config.temperature);
             let llm_client = crate::llm::LlmClient::new(llm_config);
             retriever = retriever.with_llm_client(llm_client);
diff --git a/src/retrieval/pilot/llm_pilot.rs b/src/retrieval/pilot/llm_pilot.rs
index e66749cc..c163396a 100644
--- a/src/retrieval/pilot/llm_pilot.rs
+++ b/src/retrieval/pilot/llm_pilot.rs
@@ -298,6 +298,7 @@ impl Pilot for LlmPilot {
 
         // Check budget
         if !self.has_budget() {
+            debug!("Budget exhausted, cannot guide start");
             return None;
         }
 
@@ -308,7 +309,14 @@ impl Pilot for LlmPilot {
         let candidates = tree.children(tree.root());
 
         // Make LLM call
-        Some(self.call_llm(InterventionPoint::Start, &context, &candidates).await)
+        let decision = self.call_llm(InterventionPoint::Start, &context, &candidates).await;
+        info!(
+            "Pilot start guidance: confidence={}, candidates={}",
+            decision.confidence,
+            decision.ranked_candidates.len()
+        );
+
+        Some(decision)
     }
 
     async fn guide_backtrack(
diff --git a/src/retrieval/pipeline_retriever.rs b/src/retrieval/pipeline_retriever.rs
index 084ad53d..16bf5fe2 100644
--- a/src/retrieval/pipeline_retriever.rs
+++ b/src/retrieval/pipeline_retriever.rs
@@ -16,6 +16,7 @@ use super::strategy::LlmStrategy;
 use super::types::{RetrieveOptions, RetrieveResponse};
 use crate::domain::DocumentTree;
 use crate::llm::LlmClient;
+use crate::retrieval::pilot::{LlmPilot, PilotConfig};
 
 /// Pipeline-based retriever using the stage architecture.
 ///
@@ -89,10 +90,12 @@ impl PipelineRetriever {
         }
         orchestrator = orchestrator.stage(plan_stage);
 
-        // Add search stage
+        // Add search stage with Pilot for semantic navigation
         let mut search_stage = SearchStage::new();
         if let Some(ref client) = self.llm_client {
-            search_stage = search_stage.with_llm_strategy(LlmStrategy::new(client.clone()));
+            // Create LLM-based Pilot for semantic navigation guidance
+            let pilot = LlmPilot::new(client.clone(), PilotConfig::default());
+            search_stage = search_stage.with_pilot(Arc::new(pilot));
         }
         orchestrator = orchestrator.stage(search_stage);
 
diff --git a/src/retrieval/search/beam.rs b/src/retrieval/search/beam.rs
index 63cdcec1..2dec5e40 100644
--- a/src/retrieval/search/beam.rs
+++ b/src/retrieval/search/beam.rs
@@ -12,7 +12,7 @@ use tracing::{debug, trace};
 
 use super::super::RetrievalContext;
 use super::super::types::{NavigationDecision, NavigationStep, SearchPath};
-use super::scorer::NodeScorer;
+use super::scorer::{NodeScorer, ScoringContext};
 use super::{SearchConfig, SearchResult, SearchTree};
 use crate::domain::{DocumentTree, NodeId};
 use crate::retrieval::pilot::{Pilot, SearchState};
@@ -28,34 +28,36 @@ use crate::retrieval::pilot::{Pilot, SearchState};
 /// (when multiple candidates are available) to get semantic guidance
 /// on which branches are most relevant to the query.
 pub struct BeamSearch {
-    scorer: NodeScorer,
     beam_width: usize,
 }
 
 impl BeamSearch {
     /// Create a new beam search with default beam width.
     pub fn new() -> Self {
-        Self {
-            scorer: NodeScorer::new(Default::default()),
-            beam_width: 3,
-        }
+        Self { beam_width: 3 }
     }
 
     /// Create beam search with specified width.
     pub fn with_width(width: usize) -> Self {
         Self {
-            scorer: NodeScorer::new(Default::default()),
             beam_width: width.max(1),
         }
     }
 
-    /// Score candidates using the algorithm's scorer.
-    fn score_candidates(
+    /// Create a scorer for the given query.
+    fn create_scorer(&self, query: &str) -> NodeScorer {
+        NodeScorer::new(ScoringContext::new(query))
+    }
+
+    /// Score candidates using a query-specific scorer.
+    fn score_candidates_with_query(
         &self,
         tree: &DocumentTree,
         candidates: &[NodeId],
+        query: &str,
     ) -> Vec<(NodeId, f32)> {
-        self.scorer.score_and_sort(tree, candidates)
+        let scorer = self.create_scorer(query);
+        scorer.score_and_sort(tree, candidates)
     }
 
     /// Merge algorithm scores with Pilot decision.
@@ -67,7 +69,9 @@ impl BeamSearch {
         tree: &DocumentTree,
         candidates: &[NodeId],
         pilot_decision: &crate::retrieval::pilot::PilotDecision,
+        query: &str,
     ) -> Vec<(NodeId, f32)> {
+        let scorer = self.create_scorer(query);
         let alpha = 0.4;
         let beta = 0.6 * pilot_decision.confidence;
 
@@ -81,7 +85,7 @@ impl BeamSearch {
         let mut merged: Vec<(NodeId, f32)> = candidates
             .iter()
             .map(|&node_id| {
-                let algo_score = self.scorer.score(tree, node_id);
+                let algo_score = scorer.score(tree, node_id);
                 let pilot_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0);
 
                 // Weighted combination
@@ -138,18 +142,18 @@ impl SearchTree for BeamSearch {
 
                     // Use Pilot's ranked order if available
                     if guidance.has_candidates() {
-                        self.merge_with_pilot_decision(tree, &root_children, &guidance)
+                        self.merge_with_pilot_decision(tree, &root_children, &guidance, &context.query)
                     } else {
-                        self.score_candidates(tree, &root_children)
+                        self.score_candidates_with_query(tree, &root_children, &context.query)
                     }
                 } else {
-                    self.score_candidates(tree, &root_children)
+                    self.score_candidates_with_query(tree, &root_children, &context.query)
                 }
             } else {
-                self.score_candidates(tree, &root_children)
+                self.score_candidates_with_query(tree, &root_children, &context.query)
             }
         } else {
-            self.score_candidates(tree, &root_children)
+            self.score_candidates_with_query(tree, &root_children, &context.query)
         };
 
         let mut current_beam: Vec<SearchPath> = initial_candidates
@@ -211,16 +215,16 @@ impl SearchTree for BeamSearch {
                                     );
 
                                     // Merge algorithm scores with Pilot decision
-                                    self.merge_with_pilot_decision(tree, &children, &decision)
+                                    self.merge_with_pilot_decision(tree, &children, &decision, &context.query)
                                 }
                             }
                         } else {
                             // No intervention, use algorithm scoring
-                            self.score_candidates(tree, &children)
+                            self.score_candidates_with_query(tree, &children, &context.query)
                         }
                     } else {
                         // No Pilot, use algorithm scoring
-                        self.score_candidates(tree, &children)
+                        self.score_candidates_with_query(tree, &children, &context.query)
                     };
                     // ==============================================
 
@@ -268,6 +272,16 @@ impl SearchTree for BeamSearch {
             }
         }
 
+        // Fallback: if no results found, add best candidates regardless of score
+        if result.paths.is_empty() && config.min_score > 0.0 {
+            debug!("No results above min_score, adding best candidates as fallback");
+            // Re-score initial candidates and take top-k
+            let all_candidates = self.score_candidates_with_query(tree, &tree.children(tree.root()), &context.query);
+            for (node_id, score) in all_candidates.into_iter().take(config.top_k) {
+                result.paths.push(SearchPath::from_node(node_id, score));
+            }
+        }
+
         // Sort final results by score
         result.paths.sort_by(|a, b| {
             b.score
diff --git a/src/retrieval/search/greedy.rs b/src/retrieval/search/greedy.rs
index f016a066..ad9fd8d8 100644
--- a/src/retrieval/search/greedy.rs
+++ b/src/retrieval/search/greedy.rs
@@ -7,27 +7,85 @@
 //! When a Pilot is provided, it can provide semantic guidance at decision points.
 
 use async_trait::async_trait;
+use tracing::{debug, trace};
 
 use super::super::RetrievalContext;
 use super::super::types::{NavigationDecision, NavigationStep, SearchPath};
-use super::scorer::NodeScorer;
+use super::scorer::{NodeScorer, ScoringContext};
 use super::{SearchConfig, SearchResult, SearchTree};
-use crate::domain::DocumentTree;
-use crate::retrieval::pilot::Pilot;
+use crate::domain::{DocumentTree, NodeId};
+use crate::retrieval::pilot::{Pilot, SearchState};
 
 /// Greedy search - always follows the best single path.
 ///
 /// Fast but may miss relevant content in other branches.
-pub struct GreedySearch {
-    scorer: NodeScorer,
-}
+/// When a Pilot is provided, it can guide the search at key decision points.
+pub struct GreedySearch;
 
 impl GreedySearch {
     /// Create a new greedy search.
     pub fn new() -> Self {
-        Self {
-            scorer: NodeScorer::new(Default::default()),
+        Self
+    }
+
+    /// Create a scorer for the given query.
+    fn create_scorer(&self, query: &str) -> NodeScorer {
+        NodeScorer::new(ScoringContext::new(query))
+    }
+
+    /// Score candidates using a query-specific scorer.
+    fn score_candidates_with_query(
+        &self,
+        tree: &DocumentTree,
+        candidates: &[NodeId],
+        query: &str,
+    ) -> Vec<(NodeId, f32)> {
+        let scorer = self.create_scorer(query);
+        scorer.score_and_sort(tree, candidates)
+    }
+
+    /// Merge algorithm scores with Pilot decision.
+    fn merge_with_pilot_decision(
+        &self,
+        tree: &DocumentTree,
+        candidates: &[NodeId],
+        pilot_decision: &crate::retrieval::pilot::PilotDecision,
+        query: &str,
+    ) -> Vec<(NodeId, f32)> {
+        let scorer = self.create_scorer(query);
+        let alpha = 0.4;
+        let beta = 0.6 * pilot_decision.confidence;
+
+        // Build a map from node_id to pilot score
+        let mut pilot_scores: std::collections::HashMap<NodeId, f32> = std::collections::HashMap::new();
+        for ranked in &pilot_decision.ranked_candidates {
+            pilot_scores.insert(ranked.node_id, ranked.score);
         }
+
+        // Merge scores
+        let mut merged: Vec<(NodeId, f32)> = candidates
+            .iter()
+            .map(|&node_id| {
+                let algo_score = scorer.score(tree, node_id);
+                let pilot_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0);
+
+                // Weighted combination
+                let final_score = if beta > 0.0 {
+                    (alpha * algo_score + beta * pilot_score) / (alpha + beta)
+                } else {
+                    algo_score
+                };
+
+                (node_id, final_score)
+            })
+            .collect();
+
+        // Sort by merged score
+        merged.sort_by(|a, b| {
+            b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        merged
     }
 }
 
@@ -44,13 +102,15 @@ impl SearchTree for GreedySearch {
         tree: &DocumentTree,
         context: &RetrievalContext,
         config: &SearchConfig,
-        _pilot: Option<&dyn Pilot>,
+        pilot: Option<&dyn Pilot>,
     ) -> SearchResult {
-        // Note: Pilot integration for GreedySearch can be added in Phase 2
-        // For now, we keep the original behavior
         let mut result = SearchResult::default();
         let mut current_path = SearchPath::new();
         let mut current_node = tree.root();
+        let mut visited: std::collections::HashSet<NodeId> = std::collections::HashSet::new();
+
+        // Track Pilot interventions
+        let mut pilot_interventions = 0;
 
         for iteration in 0..config.max_iterations {
             result.iterations = iteration + 1;
@@ -67,8 +127,43 @@ impl SearchTree for GreedySearch {
                 break;
             }
 
-            // Score all children
-            let scored_children = self.scorer.score_and_sort(tree, &children);
+            // ========== Pilot Integration Point ==========
+            let scored_children = if let Some(p) = pilot {
+                // Build search state for Pilot
+                let state = SearchState::new(
+                    tree,
+                    &context.query,
+                    &current_path.nodes,
+                    &children,
+                    &visited,
+                );
+
+                // Check if Pilot wants to intervene
+                if p.should_intervene(&state) {
+                    trace!("Pilot intervening at greedy decision point with {} candidates", children.len());
+
+                    match p.decide(&state).await {
+                        decision => {
+                            pilot_interventions += 1;
+                            debug!(
+                                "Pilot decision: confidence={}, direction={:?}",
+                                decision.confidence,
+                                std::mem::discriminant(&decision.direction)
+                            );
+
+                            // Merge algorithm scores with Pilot decision
+                            self.merge_with_pilot_decision(tree, &children, &decision, &context.query)
+                        }
+                    }
+                } else {
+                    // No intervention, use algorithm scoring
+                    self.score_candidates_with_query(tree, &children, &context.query)
+                }
+            } else {
+                // No Pilot, use algorithm scoring
+                self.score_candidates_with_query(tree, &children, &context.query)
+            };
+            // ==============================================
 
             // Find the best child that meets minimum score
             let mut best_child = None;
@@ -83,6 +178,8 @@ impl SearchTree for GreedySearch {
             }
 
             if let Some(child_id) = best_child {
+                visited.insert(child_id);
+
                 // Record navigation step
                 let child_node = tree.get(child_id);
                 result.trace.push(NavigationStep {
@@ -105,13 +202,18 @@ impl SearchTree for GreedySearch {
                     break;
                 }
             } else {
-                // No good children found
+                // No good children found - add current path as result
                 current_path.leaf = Some(current_node);
-                result.paths.push(current_path);
+                if current_path.score > 0.0 {
+                    result.paths.push(current_path);
+                }
                 break;
             }
         }
 
+        // Record Pilot interventions
+        result.pilot_interventions = pilot_interventions;
+
         result
     }
 

From 80125b155bbb08a699601a351c5c3daf12d33e73 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 19:23:20 +0800
Subject: [PATCH 09/21] feat(pilot): remove pilot interface documentation from
 design doc

The core Pilot interface definition and decision types have been removed
from the pilot.md design document as they are now implemented in code.

fix(builder): make LLM API key required for retrieval functionality

The LLM API key is now required for retrieval since Pilot needs it for
semantic navigation. Updated the builder to enforce this requirement
and added appropriate error handling.

refactor(judge): enhance content aggregation with descendant leaf content

Modified the judge stage to always include both node's own content and
all descendant leaf content for complete context. Added new
collect_leaf_content method to traverse and collect content from
leaf descendants.

docs(readme): update architecture diagram reference

Renamed architecture-v2.svg to architecture.svg and updated the README
to reference the new filename.
---
 README.md                                     |   2 +-
 .../{architecture-v2.svg => architecture.svg} |   0
 docs/design/pilot.md                          | 109 ------------------
 src/client/builder.rs                         |  23 ++--
 src/retrieval/stages/judge.rs                 |  98 ++++++++++++++--
 5 files changed, 101 insertions(+), 131 deletions(-)
 rename docs/design/{architecture-v2.svg => architecture.svg} (100%)

diff --git a/README.md b/README.md
index 50b88351..b4ccc39a 100644
--- a/README.md
+++ b/README.md
@@ -141,7 +141,7 @@ See the [examples/](examples/) directory for complete working examples:
 
 ### System Overview
 
-![Architecture](docs/design/architecture-v2.svg)
+![Architecture](docs/design/architecture.svg)
 
 ## Contributing
 
diff --git a/docs/design/architecture-v2.svg b/docs/design/architecture.svg
similarity index 100%
rename from docs/design/architecture-v2.svg
rename to docs/design/architecture.svg
diff --git a/docs/design/pilot.md b/docs/design/pilot.md
index 0f907f25..d86e00a7 100644
--- a/docs/design/pilot.md
+++ b/docs/design/pilot.md
@@ -76,115 +76,6 @@ Pilot 是 Vectorless 检索系统的核心智能组件，负责理解查询、
 └─────────────────────────────────────────────────────────────────────────────┘
 ```
 
-### 1.2 核心接口定义
-
-```rust
-/// 搜索状态 - 传给 Pilot 的上下文信息
-pub struct SearchState<'a> {
-    /// 文档树
-    pub tree: &'a DocumentTree,
-    /// 用户查询
-    pub query: &'a str,
-    /// 当前路径（从根到当前节点）
-    pub path: &'a [NodeId],
-    /// 候选子节点
-    pub candidates: &'a [NodeId],
-    /// 已访问的节点
-    pub visited: &'a HashSet<NodeId>,
-    /// 当前深度
-    pub depth: usize,
-    /// 搜索迭代次数
-    pub iteration: usize,
-    /// 当前最高分
-    pub best_score: f32,
-    /// 是否在回溯中
-    pub is_backtracking: bool,
-}
-
-/// Pilot trait - 核心接口
-#[async_trait]
-pub trait Pilot: Send + Sync {
-    /// 获取 Pilot 名称
-    fn name(&self) -> &str;
-    
-    /// 判断是否应该介入
-    fn should_intervene(&self, state: &SearchState<'_>) -> bool;
-    
-    /// 做出决策
-    async fn decide(&self, state: &SearchState<'_>) -> PilotDecision;
-    
-    /// 搜索开始前的指导
-    async fn guide_start(
-        &self, 
-        tree: &DocumentTree, 
-        query: &str
-    ) -> Option<PilotDecision>;
-    
-    /// 获取配置
-    fn config(&self) -> &PilotConfig;
-    
-    /// 获取指标
-    fn metrics(&self) -> &PilotMetrics;
-    
-    /// 重置状态（新查询开始时调用）
-    fn reset(&self);
-}
-```
-
-### 1.3 Pilot 决策类型
-
-```rust
-/// Pilot 决策结果
-#[derive(Debug, Clone)]
-pub struct PilotDecision {
-    /// 候选节点排序（按推荐优先级）
-    pub ranked_candidates: Vec<RankedCandidate>,
-    /// 搜索方向建议
-    pub direction: SearchDirection,
-    /// 置信度 (0.0 - 1.0)
-    pub confidence: f32,
-    /// 决策原因（可解释性）
-    pub reasoning: String,
-    /// 介入点标识
-    pub intervention_point: InterventionPoint,
-}
-
-/// 排序后的候选节点
-#[derive(Debug, Clone)]
-pub struct RankedCandidate {
-    pub node_id: NodeId,
-    pub score: f32,
-    pub reason: Option<String>,
-}
-
-/// 搜索方向建议
-#[derive(Debug, Clone)]
-pub enum SearchDirection {
-    /// 继续深入当前分支
-    GoDeeper {
-        reason: String,
-    },
-    /// 探索兄弟节点
-    ExploreSiblings {
-        recommended: Vec<NodeId>,
-    },
-    /// 回溯到父节点
-    Backtrack {
-        reason: String,
-        alternative_branches: Vec<NodeId>,
-    },
-    /// 跳转到指定节点（非局部移动）
-    JumpTo {
-        target: NodeId,
-        reason: String,
-    },
-    /// 当前节点就是答案
-    FoundAnswer {
-        confidence: f32,
-    },
-}
-```
-
 ---
 
 ## 1.4 Pilot 决策的信息来源
diff --git a/src/client/builder.rs b/src/client/builder.rs
index d44f7551..bc567591 100644
--- a/src/client/builder.rs
+++ b/src/client/builder.rs
@@ -181,19 +181,18 @@ impl EngineBuilder {
         let mut retriever =
             PipelineRetriever::new().with_max_iterations(retrieval_config.search.max_iterations);
 
-        // Add LLM client if API key is available
+        // LLM API key is REQUIRED for retrieval (Pilot needs it for semantic navigation)
         // Try retrieval config first, then fall back to summary config
         let retrieval_api_key = retrieval_config.api_key.clone()
-            .or_else(|| config.summary.api_key.clone());
+            .or_else(|| config.summary.api_key.clone())
+            .ok_or(BuildError::MissingApiKey)?;
 
-        if let Some(api_key) = retrieval_api_key {
-            let llm_config = crate::llm::LlmConfig::new(&retrieval_config.model)
-                .with_endpoint(retrieval_config.endpoint.clone())
-                .with_api_key(api_key)
-                .with_temperature(retrieval_config.temperature);
-            let llm_client = crate::llm::LlmClient::new(llm_config);
-            retriever = retriever.with_llm_client(llm_client);
-        }
+        let llm_config = crate::llm::LlmConfig::new(&retrieval_config.model)
+            .with_endpoint(retrieval_config.endpoint.clone())
+            .with_api_key(retrieval_api_key)
+            .with_temperature(retrieval_config.temperature);
+        let llm_client = crate::llm::LlmClient::new(llm_config);
+        retriever = retriever.with_llm_client(llm_client);
 
         Ok(Engine::with_components(
             config, workspace, retriever, executor,
@@ -217,6 +216,10 @@ pub enum BuildError {
     /// Workspace error.
     #[error("Workspace error: {0}")]
     Workspace(String),
+
+    /// Missing API key for retrieval.
+    #[error("Missing API key: LLM API key is required for retrieval. Set OPENAI_API_KEY environment variable or configure retrieval.api_key")]
+    MissingApiKey,
 }
 
 #[cfg(test)]
diff --git a/src/retrieval/stages/judge.rs b/src/retrieval/stages/judge.rs
index f22806db..478f0926 100644
--- a/src/retrieval/stages/judge.rs
+++ b/src/retrieval/stages/judge.rs
@@ -68,6 +68,11 @@ impl JudgeStage {
     }
 
     /// Aggregate content from candidates.
+    ///
+    /// Always includes:
+    /// 1. Node's own content (if available)
+    /// 2. All descendant leaf content (for complete context)
+    /// 3. Falls back to summary only if no content at all
     fn aggregate_content(&self, ctx: &PipelineContext) -> (String, usize) {
         let mut content_parts = Vec::new();
         let mut total_tokens = 0;
@@ -77,13 +82,27 @@ impl JudgeStage {
                 // Add title
                 content_parts.push(format!("## {}\n", node.title));
 
-                // Add summary if available, otherwise content preview
-                if !node.summary.is_empty() {
+                // Always collect all content: own content + descendant leaf content
+                let mut has_content = false;
+
+                // Add node's own content if available
+                if !node.content.is_empty() {
+                    content_parts.push(format!("{}\n\n", node.content));
+                    has_content = true;
+                    eprintln!("[JUDGE] Node '{}' has own content: {} chars", node.title, node.content.len());
+                }
+
+                // Also collect content from leaf descendants (for intermediate nodes)
+                let leaf_content = self.collect_leaf_content(&ctx.tree, candidate.node_id);
+                if !leaf_content.is_empty() {
+                    eprintln!("[JUDGE] Collected leaf content for '{}': {} chars", node.title, leaf_content.len());
+                    content_parts.push(format!("{}\n\n", leaf_content));
+                    has_content = true;
+                }
+
+                // Fall back to summary only if no content available
+                if !has_content && !node.summary.is_empty() {
                     content_parts.push(format!("{}\n\n", node.summary));
-                } else if !node.content.is_empty() {
-                    // Limit content preview
-                    let preview: String = node.content.chars().take(500).collect();
-                    content_parts.push(format!("{}\n\n", preview));
                 }
 
                 // Estimate tokens
@@ -94,6 +113,43 @@ impl JudgeStage {
         (content_parts.join(""), total_tokens)
     }
 
+    /// Collect content from leaf descendants of a node (excluding the node itself).
+    fn collect_leaf_content(&self, tree: &crate::domain::DocumentTree, node_id: crate::domain::NodeId) -> String {
+        let mut content_parts = Vec::new();
+
+        // Start with children, not the node itself
+        let children = tree.children(node_id);
+        if children.is_empty() {
+            // Node is already a leaf, no descendants to collect
+            return String::new();
+        }
+
+        let mut stack: Vec<crate::domain::NodeId> = children;
+        let mut visited_count = 0;
+
+        while let Some(current_id) = stack.pop() {
+            let current_children = tree.children(current_id);
+
+            if current_children.is_empty() {
+                // Leaf node - collect its content
+                if let Some(node) = tree.get(current_id) {
+                    if !node.content.is_empty() {
+                        eprintln!("[JUDGE] Found leaf '{}' with {} chars content", node.title, node.content.len());
+                        content_parts.push(format!("### {}\n{}", node.title, node.content));
+                        visited_count += 1;
+                    }
+                }
+            } else {
+                // Non-leaf node - add children to stack
+                eprintln!("[JUDGE] Node has {} children, adding to stack", current_children.len());
+                stack.extend(current_children);
+            }
+        }
+
+        eprintln!("[JUDGE] Collected content from {} leaf nodes", visited_count);
+        content_parts.join("\n\n")
+    }
+
     /// Check sufficiency level.
     fn check_sufficiency(&self, ctx: &PipelineContext) -> SufficiencyLevel {
         if !ctx.options.sufficiency_check {
@@ -118,14 +174,34 @@ impl JudgeStage {
 
         for candidate in &ctx.candidates {
             if let Some(node) = ctx.tree.get(candidate.node_id) {
+                // Build content: node's own content + all descendant leaf content
+                let content = if ctx.options.include_content {
+                    let mut content_parts = Vec::new();
+
+                    // Add node's own content
+                    if !node.content.is_empty() {
+                        content_parts.push(node.content.clone());
+                    }
+
+                    // Add content from leaf descendants
+                    let leaf_content = self.collect_leaf_content(&ctx.tree, candidate.node_id);
+                    if !leaf_content.is_empty() {
+                        content_parts.push(leaf_content);
+                    }
+
+                    if content_parts.is_empty() {
+                        None
+                    } else {
+                        Some(content_parts.join("\n\n"))
+                    }
+                } else {
+                    None
+                };
+
                 results.push(RetrievalResult {
                     node_id: Some(format!("{:?}", candidate.node_id)),
                     title: node.title.clone(),
-                    content: if ctx.options.include_content {
-                        Some(node.content.clone())
-                    } else {
-                        None
-                    },
+                    content,
                     summary: if ctx.options.include_summaries {
                         Some(node.summary.clone())
                     } else {

From f530f84d4cd57d32bb2b81a73bd10eff1a019af5 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 19:48:40 +0800
Subject: [PATCH 10/21] refactor(client): remove summary field from retrieval
 results

Only include original content in retrieval results, not summary
(per design: retrieval should return original text, not summary)

refactor(retrieval): remove debug logging and unused counter from judge stage

Remove unnecessary eprintln! debug statements and visited_count variable
from the judge stage to clean up code and reduce console output
---
 src/client/engine.rs          | 6 ++----
 src/retrieval/stages/judge.rs | 7 -------
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/client/engine.rs b/src/client/engine.rs
index aeaa87b5..bb973004 100644
--- a/src/client/engine.rs
+++ b/src/client/engine.rs
@@ -397,10 +397,8 @@ impl Engine {
             .map(|r| {
                 let mut parts = vec![format!("## {}", r.title)];
 
-                if let Some(ref summary) = r.summary {
-                    parts.push(format!("Summary: {}", summary));
-                }
-
+                // Only include original content, not summary
+                // (per design: retrieval should return original text, not summary)
                 if let Some(ref content) = r.content {
                     parts.push(content.clone());
                 }
diff --git a/src/retrieval/stages/judge.rs b/src/retrieval/stages/judge.rs
index 478f0926..5c7bb129 100644
--- a/src/retrieval/stages/judge.rs
+++ b/src/retrieval/stages/judge.rs
@@ -89,13 +89,11 @@ impl JudgeStage {
                 if !node.content.is_empty() {
                     content_parts.push(format!("{}\n\n", node.content));
                     has_content = true;
-                    eprintln!("[JUDGE] Node '{}' has own content: {} chars", node.title, node.content.len());
                 }
 
                 // Also collect content from leaf descendants (for intermediate nodes)
                 let leaf_content = self.collect_leaf_content(&ctx.tree, candidate.node_id);
                 if !leaf_content.is_empty() {
-                    eprintln!("[JUDGE] Collected leaf content for '{}': {} chars", node.title, leaf_content.len());
                     content_parts.push(format!("{}\n\n", leaf_content));
                     has_content = true;
                 }
@@ -125,7 +123,6 @@ impl JudgeStage {
         }
 
         let mut stack: Vec<crate::domain::NodeId> = children;
-        let mut visited_count = 0;
 
         while let Some(current_id) = stack.pop() {
             let current_children = tree.children(current_id);
@@ -134,19 +131,15 @@ impl JudgeStage {
                 // Leaf node - collect its content
                 if let Some(node) = tree.get(current_id) {
                     if !node.content.is_empty() {
-                        eprintln!("[JUDGE] Found leaf '{}' with {} chars content", node.title, node.content.len());
                         content_parts.push(format!("### {}\n{}", node.title, node.content));
-                        visited_count += 1;
                     }
                 }
             } else {
                 // Non-leaf node - add children to stack
-                eprintln!("[JUDGE] Node has {} children, adding to stack", current_children.len());
                 stack.extend(current_children);
             }
         }
 
-        eprintln!("[JUDGE] Collected content from {} leaf nodes", visited_count);
         content_parts.join("\n\n")
     }
 

From 70618e1777dbea1cdf08fa9afd499ef0acb00689 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 19:59:28 +0800
Subject: [PATCH 11/21] feat(parser): clear content buffer after storing
 preamble

Handle preamble content (content before first heading) and clear the
buffer after storing as preamble to avoid duplication in subsequent
processing.
---
 src/parser/markdown/parser.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/parser/markdown/parser.rs b/src/parser/markdown/parser.rs
index 7e1f3a2d..366be1be 100644
--- a/src/parser/markdown/parser.rs
+++ b/src/parser/markdown/parser.rs
@@ -320,12 +320,14 @@ fn finish_current_node(
     config: &MarkdownConfig,
     current_line: usize,
 ) -> Option<RawNode> {
-    // Handle preamble content
+    // Handle preamble content (content before first heading)
     if nodes.is_empty() && !content_buffer.trim().is_empty() {
         if config.create_preamble_node {
             let content = content_buffer.trim();
             *preamble_content = content.to_string();
         }
+        // Clear the buffer after storing as preamble to avoid duplication
+        content_buffer.clear();
     }
 
     // Finish current heading node

From 14cd08b385f5a9e080e358d6bec7602b32738981 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 20:05:28 +0800
Subject: [PATCH 12/21] feat: add tracing support for debug logging

- Add tracing-subscriber as dev dependency with env-filter feature
- Initialize tracing formatter in markdownflow example
- Add comment explaining how to enable debug output with RUST_LOG=debug
---
 Cargo.toml               | 1 +
 examples/markdownflow.rs | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/Cargo.toml b/Cargo.toml
index 00d69362..ebbd37de 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,6 +73,7 @@ rand = "0.8"
 [dev-dependencies]
 tempfile = "3.10"
 tokio-test = "0.4"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 
 [profile.release]
 opt-level = 3
diff --git a/examples/markdownflow.rs b/examples/markdownflow.rs
index 4cde85f9..ba566aa6 100644
--- a/examples/markdownflow.rs
+++ b/examples/markdownflow.rs
@@ -35,6 +35,9 @@ Vectorless is a document indexing and retrieval library that uses tree-based nav
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Initialize tracing for debug output (set RUST_LOG=debug to see more)
+    tracing_subscriber::fmt::init();
+
     println!("=== Vectorless Markdown Flow Example ===\n");
 
     // Step 1: Create a Vectorless client (no API key needed - LLM config is automatic)

From e4af764e40f3df2d481686a8d029c7f8d0c9ca0c Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 20:33:23 +0800
Subject: [PATCH 13/21] feat(retrieval): add content aggregation system with
 relevance scoring

- Implement precision-focused content aggregation with token budget awareness
- Add RelevanceScorer with keyword-based and BM25 scoring strategies
- Create BudgetAllocator with greedy, proportional, and hierarchical allocation
- Build StructureBuilder for markdown and structured output formats
- Design configurable system supporting different scoring and allocation strategies
- Add comprehensive documentation for content aggregation architecture
- Implement fallback mechanisms when no content meets relevance threshold
- Provide detailed performance characteristics and testing strategy
---
 docs/design/content-aggregation.md  | 361 ++++++++++++++++
 src/retrieval/content/aggregator.rs | 402 ++++++++++++++++++
 src/retrieval/content/budget.rs     | 624 ++++++++++++++++++++++++++++
 src/retrieval/content/builder.rs    | 522 +++++++++++++++++++++++
 src/retrieval/content/config.rs     | 158 +++++++
 src/retrieval/content/mod.rs        |  46 ++
 src/retrieval/content/scorer.rs     | 439 +++++++++++++++++++
 src/retrieval/mod.rs                |   8 +
 8 files changed, 2560 insertions(+)
 create mode 100644 docs/design/content-aggregation.md
 create mode 100644 src/retrieval/content/aggregator.rs
 create mode 100644 src/retrieval/content/budget.rs
 create mode 100644 src/retrieval/content/builder.rs
 create mode 100644 src/retrieval/content/config.rs
 create mode 100644 src/retrieval/content/mod.rs
 create mode 100644 src/retrieval/content/scorer.rs

diff --git a/docs/design/content-aggregation.md b/docs/design/content-aggregation.md
new file mode 100644
index 00000000..22a7d7dd
--- /dev/null
+++ b/docs/design/content-aggregation.md
@@ -0,0 +1,361 @@
+# Content Aggregation Design
+
+> Version: 1.0
+> Status: Draft
+> Last Updated: 2026-04-04
+
+## Overview
+
+Content Aggregation is the final stage of the retrieval pipeline that transforms candidate nodes into structured, relevant content for the user. This document describes the design for a precision-focused, budget-aware content aggregation system.
+
+## Problem Statement
+
+### Current Implementation
+
+The current `aggregate_content` in `JudgeStage` collects content naively:
+
+```
+Candidate Node → Node's own content + ALL descendant leaf content
+```
+
+### Issues
+
+| Issue | Impact |
+|-------|--------|
+| **No relevance filtering** | Returns all content from subtree, including irrelevant parts |
+| **No token budget** | Large documents may return tens of thousands of tokens |
+| **No prioritization** | All leaf content treated equally |
+| **Lost structure** | Flat concatenation loses hierarchical context |
+
+## Design Goals
+
+1. **Precision First** - Only return truly relevant content
+2. **Budget Aware** - Optimize within token constraints
+3. **Structure Aware** - Maintain hierarchical context
+4. **Incremental** - Support progressive refinement
+5. **Explainable** - Traceable selection decisions
+
+## Architecture
+
+### High-Level Flow
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Content Aggregator                         │
+├─────────────────────────────────────────────────────────────┤
+│                                                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐      │
+│  │   Relevance  │  │    Budget    │  │  Structure   │      │
+│  │    Scorer    │─▶│   Allocator  │─▶│   Builder    │      │
+│  └──────────────┘  └──────────────┘  └──────────────┘      │
+│         ↑                 ↑                 ↑               │
+│         │                 │                 │               │
+│  ┌──────┴──────┐  ┌──────┴──────┐  ┌──────┴──────┐        │
+│  │   Query-    │  │   Token     │  │  Hierarchy  │        │
+│  │   Node      │  │   Budget    │  │  Context    │        │
+│  │   Scoring   │  │   Config    │  │  Assembly   │        │
+│  └─────────────┘  └─────────────┘  └─────────────┘        │
+│                                                              │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### Processing Pipeline
+
+```
+Candidate Nodes
+      │
+      ▼
+┌─────────────────┐
+│  1. Collect     │  Gather all nodes from candidates + descendants
+│     Nodes       │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  2. Score       │  Compute relevance score for each content chunk
+│     Relevance   │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  3. Filter      │  Remove content below relevance threshold
+│     by Score    │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  4. Allocate    │  Distribute token budget optimally
+│     Budget      │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  5. Build       │  Assemble structured output
+│     Structure   │
+└────────┬────────┘
+         │
+         ▼
+    Final Content
+```
+
+## Module Design
+
+### 1. RelevanceScorer
+
+Computes fine-grained relevance scores for content.
+
+```rust
+pub struct RelevanceScorer {
+    query_keywords: Vec<String>,
+    strategy: ScoringStrategy,
+}
+
+pub enum ScoringStrategy {
+    /// Fast keyword matching only
+    KeywordOnly,
+    /// Keyword + BM25 scoring
+    KeywordWithBM25,
+    /// Keyword + LLM reranking
+    Hybrid { rerank_top_k: usize },
+}
+
+pub struct ContentRelevance {
+    pub node_id: NodeId,
+    pub chunk: ContentChunk,
+    pub score: f32,
+    pub components: ScoreComponents,
+}
+
+pub struct ScoreComponents {
+    pub keyword_score: f32,      // Keyword match quality
+    pub depth_penalty: f32,      // Distance from candidate node
+    pub path_bonus: f32,         // Parent node relevance
+    pub density_score: f32,      // Information density
+}
+```
+
+#### Scoring Formula
+
+```
+final_score = (
+    keyword_score * 0.50 +
+    depth_penalty * 0.20 +
+    path_bonus * 0.15 +
+    density_score * 0.15
+).clamp(0.0, 1.0)
+
+where:
+  depth_penalty = 0.9^depth  // 10% penalty per level
+  path_bonus = parent_score * 0.2
+  density_score = (1 - stopword_ratio) * 0.7 + entity_ratio * 0.3
+```
+
+### 2. BudgetAllocator
+
+Distributes token budget across scored content.
+
+```rust
+pub struct BudgetAllocator {
+    total_budget: usize,
+    strategy: AllocationStrategy,
+}
+
+pub enum AllocationStrategy {
+    /// Select highest-scoring content first
+    Greedy,
+    /// Distribute proportionally to scores
+    Proportional,
+    /// Ensure each depth level has representation
+    Hierarchical { min_per_level: f32 },
+}
+
+pub struct AllocationResult {
+    pub selected: Vec<SelectedContent>,
+    pub tokens_used: usize,
+    pub remaining_budget: usize,
+}
+
+pub struct SelectedContent {
+    pub node_id: NodeId,
+    pub content: String,
+    pub tokens: usize,
+    pub score: f32,
+    pub truncation: Option<TruncationInfo>,
+}
+```
+
+#### Hierarchical Allocation
+
+```
+For each depth level (0 to max_depth):
+    1. Sort content by score
+    2. Allocate up to min_per_level budget
+    3. Continue until level budget exhausted
+    4. Move to next level
+
+Benefits:
+- Ensures context from all levels
+- Prevents shallow-only or deep-only results
+- Maintains document structure awareness
+```
+
+### 3. StructureBuilder
+
+Assembles selected content into structured output.
+
+```rust
+pub struct StructureBuilder {
+    format: OutputFormat,
+    include_metadata: bool,
+}
+
+pub enum OutputFormat {
+    Markdown,
+    Json,
+    Tree,
+    Flat,
+}
+
+pub struct StructuredContent {
+    pub content: String,
+    pub structure: Option<ContentTree>,
+    pub metadata: ContentMetadata,
+}
+```
+
+#### Markdown Output Format
+
+```markdown
+## Parent Section
+Parent content here...
+
+### Child Section A
+Child A content here...
+
+### Child Section B
+Child B content here...
+```
+
+## Configuration
+
+```toml
+[retrieval.content]
+# Maximum tokens to return
+token_budget = 4000
+
+# Minimum relevance score (0.0 - 1.0)
+min_relevance_score = 0.3
+
+# Scoring strategy: "keyword_only" | "keyword_bm25" | "hybrid"
+scoring_strategy = "keyword_bm25"
+
+# Output format: "markdown" | "json" | "tree"
+output_format = "markdown"
+
+# Include relevance scores in output
+include_scores = false
+
+# Hierarchical allocation minimum per level
+hierarchical_min_per_level = 0.1
+```
+
+## Integration Points
+
+### JudgeStage Integration
+
+```rust
+impl JudgeStage {
+    pub fn with_content_aggregator(mut self, config: ContentAggregatorConfig) -> Self {
+        self.content_aggregator = Some(ContentAggregator::new(config));
+        self
+    }
+
+    fn aggregate_content(&self, ctx: &PipelineContext) -> (String, usize) {
+        if let Some(aggregator) = &self.content_aggregator {
+            aggregator.aggregate(&ctx.candidates, &ctx.tree, &ctx.query)
+        } else {
+            // Fallback to legacy behavior
+            self.aggregate_content_legacy(ctx)
+        }
+    }
+}
+```
+
+### RetrieveOptions Extension
+
+```rust
+impl RetrieveOptions {
+    pub fn with_content_config(mut self, config: ContentAggregatorConfig) -> Self {
+        self.content_config = Some(config);
+        self
+    }
+}
+```
+
+## Performance Characteristics
+
+### Latency by Strategy
+
+| Strategy | Latency | Precision | Use Case |
+|----------|---------|-----------|----------|
+| `KeywordOnly` | ~1ms | Medium | Quick preview |
+| `KeywordWithBM25` | ~5ms | High | Default choice |
+| `Hybrid` | ~200ms | Highest | Precision queries |
+
+### Memory Usage
+
+- Scorer: O(n) where n = total content length
+- Allocator: O(m) where m = number of chunks
+- Builder: O(k) where k = selected content size
+
+## Future Enhancements
+
+1. **Semantic Chunking** - Split content by semantic boundaries, not just nodes
+2. **LLM Reranking** - Use LLM to rerank top-k chunks
+3. **Query-Aware Truncation** - Truncate based on query relevance, not just length
+4. **Caching** - Cache aggregation results for repeated queries
+5. **Streaming** - Stream content as it's selected
+
+## File Structure
+
+```
+src/retrieval/content/
+├── mod.rs              # Module entry point
+├── aggregator.rs       # Main aggregator logic
+├── scorer.rs           # Relevance scoring
+├── budget.rs           # Token budget allocation
+├── builder.rs          # Structured output building
+├── truncation.rs       # Smart truncation utilities
+└── config.rs           # Configuration types
+```
+
+## Implementation Priority
+
+| Phase | Component | Priority |
+|-------|-----------|----------|
+| P0 | `RelevanceScorer` (keyword) | High |
+| P0 | `BudgetAllocator` (greedy) | High |
+| P1 | `StructureBuilder` (markdown) | Medium |
+| P1 | BM25 scoring | Medium |
+| P2 | Hybrid strategy (LLM rerank) | Low |
+| P2 | Caching layer | Low |
+
+## Testing Strategy
+
+### Unit Tests
+
+- Scorer: Test keyword extraction, BM25 calculation, density scoring
+- Allocator: Test budget distribution, truncation, edge cases
+- Builder: Test output formats, structure preservation
+
+### Integration Tests
+
+- End-to-end aggregation with real documents
+- Performance benchmarks
+- Token budget compliance
+
+### Quality Metrics
+
+- Precision@k: Relevance of top-k selected chunks
+- Recall: Coverage of relevant content
+- Latency: P50, P95, P99 response times
diff --git a/src/retrieval/content/aggregator.rs b/src/retrieval/content/aggregator.rs
new file mode 100644
index 00000000..9edb625b
--- /dev/null
+++ b/src/retrieval/content/aggregator.rs
@@ -0,0 +1,402 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Main content aggregator combining all components.
+//!
+//! This module provides the main [`ContentAggregator`] that orchestrates
+//! scoring, budget allocation, and structure building.
+
+use std::collections::HashMap;
+
+use tracing::{debug, info};
+
+use crate::domain::{DocumentTree, NodeId, estimate_tokens};
+
+use super::budget::{AllocationResult, AllocationStrategy, BudgetAllocator, SelectedContent};
+use super::builder::{ContentMetadata, StructureBuilder, StructuredContent};
+use super::config::{ContentAggregatorConfig, OutputFormatConfig, ScoringStrategyConfig};
+use super::scorer::{
+    ContentChunk, ContentRelevance, RelevanceScorer, ScoreComponents, ScoringContext,
+};
+
+/// Candidate node from retrieval.
+#[derive(Debug, Clone)]
+pub struct CandidateNode {
+    /// Node ID.
+    pub node_id: NodeId,
+    /// Relevance score from search.
+    pub score: f32,
+    /// Depth in tree.
+    pub depth: usize,
+}
+
+impl CandidateNode {
+    /// Create a new candidate.
+    #[must_use]
+    pub fn new(node_id: NodeId, score: f32, depth: usize) -> Self {
+        Self { node_id, score, depth }
+    }
+}
+
+/// Result of content aggregation.
+#[derive(Debug, Clone)]
+pub struct AggregationResult {
+    /// Aggregated content string.
+    pub content: String,
+    /// Total tokens used.
+    pub tokens_used: usize,
+    /// Number of nodes included.
+    pub nodes_included: usize,
+    /// Average relevance score.
+    pub avg_score: f32,
+    /// Whether content was truncated due to budget.
+    pub was_truncated: bool,
+    /// Metadata about the aggregation.
+    pub metadata: ContentMetadata,
+}
+
+impl AggregationResult {
+    /// Check if result is empty.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.content.is_empty()
+    }
+}
+
+/// Content aggregator combining scoring, allocation, and building.
+#[derive(Debug)]
+pub struct ContentAggregator {
+    /// Configuration.
+    config: ContentAggregatorConfig,
+}
+
+impl ContentAggregator {
+    /// Create a new content aggregator.
+    #[must_use]
+    pub fn new(config: ContentAggregatorConfig) -> Self {
+        Self { config }
+    }
+
+    /// Create aggregator with default configuration.
+    #[must_use]
+    pub fn with_defaults() -> Self {
+        Self::new(ContentAggregatorConfig::default())
+    }
+
+    /// Aggregate content from candidate nodes.
+    ///
+    /// # Arguments
+    ///
+    /// * `candidates` - Candidate nodes from retrieval
+    /// * `tree` - Document tree
+    /// * `query` - Query string for relevance scoring
+    ///
+    /// # Returns
+    ///
+    /// Aggregated content within token budget.
+    #[must_use]
+    pub fn aggregate(
+        &self,
+        candidates: &[CandidateNode],
+        tree: &DocumentTree,
+        query: &str,
+    ) -> AggregationResult {
+        let start = std::time::Instant::now();
+
+        // Step 1: Collect all content chunks from candidates and their descendants
+        let chunks = self.collect_chunks(candidates, tree);
+        debug!(
+            "Collected {} content chunks from {} candidates",
+            chunks.len(),
+            candidates.len()
+        );
+
+        if chunks.is_empty() {
+            return AggregationResult {
+                content: String::new(),
+                tokens_used: 0,
+                nodes_included: 0,
+                avg_score: 0.0,
+                was_truncated: false,
+                metadata: ContentMetadata::default(),
+            };
+        }
+
+        // Step 2: Score all chunks for relevance
+        let scorer = RelevanceScorer::new(query, self.config.scoring_strategy);
+        let scoring_ctx = self.build_scoring_context(&chunks);
+        let scored = scorer.score_chunks(&chunks, &scoring_ctx);
+
+        // Filter by minimum score
+        let filtered: Vec<_> = scored
+            .into_iter()
+            .filter(|r| r.score >= self.config.min_relevance_score)
+            .collect();
+
+        debug!(
+            "Scored {} chunks, {} passed threshold {:.2}",
+            chunks.len(),
+            filtered.len(),
+            self.config.min_relevance_score
+        );
+
+        if filtered.is_empty() {
+            // Fall back to returning best candidate content
+            return self.fallback_result(candidates, tree);
+        }
+
+        // Step 3: Allocate token budget
+        let max_depth = filtered.iter().map(|r| r.chunk.depth).max().unwrap_or(0);
+        let strategy = self.get_allocation_strategy();
+        let allocator = BudgetAllocator::new(self.config.token_budget)
+            .with_strategy(strategy);
+
+        let allocation = allocator.allocate(filtered, max_depth);
+
+        info!(
+            "Allocated {} tokens to {} items (strategy: {:?})",
+            allocation.tokens_used,
+            allocation.selected.len(),
+            self.config.scoring_strategy
+        );
+
+        // Step 4: Build structured output
+        let builder = StructureBuilder::from_config(
+            self.config.output_format,
+            self.config.include_scores,
+        );
+
+        let structured = builder.build(allocation.selected.clone(), tree);
+
+        // Build result
+        let was_truncated = allocation.selected.iter().any(|s| s.is_truncated());
+
+        AggregationResult {
+            content: structured.content,
+            tokens_used: allocation.tokens_used,
+            nodes_included: allocation.selected.len(),
+            avg_score: allocation.stats.avg_score,
+            was_truncated,
+            metadata: structured.metadata,
+        }
+    }
+
+    /// Collect content chunks from candidates and descendants.
+    fn collect_chunks(
+        &self,
+        candidates: &[CandidateNode],
+        tree: &DocumentTree,
+    ) -> Vec<ContentChunk> {
+        let mut chunks = Vec::new();
+        let mut visited: HashMap<NodeId, bool> = HashMap::new();
+
+        for candidate in candidates {
+            // Add candidate's own content
+            if let Some(node) = tree.get(candidate.node_id) {
+                if !node.content.is_empty() {
+                    chunks.push(ContentChunk::new(
+                        candidate.node_id,
+                        node.title.clone(),
+                        node.content.clone(),
+                        candidate.depth,
+                    ));
+                    visited.insert(candidate.node_id, true);
+                }
+
+                // Collect leaf descendants
+                self.collect_descendant_chunks(
+                    candidate.node_id,
+                    tree,
+                    candidate.depth,
+                    &mut chunks,
+                    &mut visited,
+                );
+            }
+        }
+
+        chunks
+    }
+
+    /// Collect chunks from descendant nodes.
+    fn collect_descendant_chunks(
+        &self,
+        parent_id: NodeId,
+        tree: &DocumentTree,
+        parent_depth: usize,
+        chunks: &mut Vec<ContentChunk>,
+        visited: &mut HashMap<NodeId, bool>,
+    ) {
+        let children = tree.children(parent_id);
+
+        for child_id in children {
+            if visited.contains_key(&child_id) {
+                continue;
+            }
+            visited.insert(child_id, true);
+
+            if let Some(node) = tree.get(child_id) {
+                let child_depth = parent_depth + 1;
+
+                if tree.is_leaf(child_id) {
+                    // Leaf node - add its content
+                    if !node.content.is_empty() {
+                        chunks.push(ContentChunk::new(
+                            child_id,
+                            node.title.clone(),
+                            node.content.clone(),
+                            child_depth,
+                        ));
+                    }
+                } else {
+                    // Non-leaf - recurse
+                    self.collect_descendant_chunks(child_id, tree, child_depth, chunks, visited);
+                }
+            }
+        }
+    }
+
+    /// Build scoring context from chunks.
+    fn build_scoring_context(&self, chunks: &[ContentChunk]) -> ScoringContext {
+        let total_len: usize = chunks.iter().map(|c| c.content.len()).sum();
+        let avg_len = if chunks.is_empty() {
+            100.0
+        } else {
+            total_len as f32 / chunks.len() as f32
+        };
+
+        // Build document frequency map
+        let mut doc_freq: HashMap<String, usize> = HashMap::new();
+        for chunk in chunks {
+            let mut seen_in_doc = std::collections::HashSet::new();
+            for word in chunk.content.to_lowercase().split_whitespace() {
+                if !seen_in_doc.contains(word) {
+                    *doc_freq.entry(word.to_string()).or_insert(0) += 1;
+                    seen_in_doc.insert(word);
+                }
+            }
+        }
+
+        ScoringContext {
+            avg_doc_len: avg_len,
+            doc_count: chunks.len(),
+            doc_freq,
+            parent_score: None,
+        }
+    }
+
+    /// Get allocation strategy from config.
+    fn get_allocation_strategy(&self) -> AllocationStrategy {
+        AllocationStrategy::Hierarchical {
+            min_per_level: self.config.hierarchical_min_per_level,
+        }
+    }
+
+    /// Fallback result when no content passes threshold.
+    fn fallback_result(
+        &self,
+        candidates: &[CandidateNode],
+        tree: &DocumentTree,
+    ) -> AggregationResult {
+        // Return best candidate's content
+        if let Some(best) = candidates.first() {
+            if let Some(node) = tree.get(best.node_id) {
+                let content = if !node.content.is_empty() {
+                    node.content.clone()
+                } else if !node.summary.is_empty() {
+                    node.summary.clone()
+                } else {
+                    String::new()
+                };
+
+                let tokens = estimate_tokens(&content);
+
+                return AggregationResult {
+                    content: format!("## {}\n\n{}", node.title, content),
+                    tokens_used: tokens,
+                    nodes_included: 1,
+                    avg_score: best.score,
+                    was_truncated: false,
+                    metadata: ContentMetadata {
+                        total_tokens: tokens,
+                        node_count: 1,
+                        avg_score: best.score,
+                        max_depth: best.depth,
+                    },
+                };
+            }
+        }
+
+        AggregationResult {
+            content: String::new(),
+            tokens_used: 0,
+            nodes_included: 0,
+            avg_score: 0.0,
+            was_truncated: false,
+            metadata: ContentMetadata::default(),
+        }
+    }
+}
+
+impl Default for ContentAggregator {
+    fn default() -> Self {
+        Self::with_defaults()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use indextree::Arena;
+
+    fn make_test_node_id() -> NodeId {
+        let mut arena = Arena::new();
+        let node = crate::domain::TreeNode {
+            title: "Test".to_string(),
+            structure: String::new(),
+            content: String::new(),
+            summary: String::new(),
+            depth: 0,
+            start_index: 0,
+            end_index: 0,
+            start_page: None,
+            end_page: None,
+            node_id: None,
+            physical_index: None,
+            token_count: None,
+        };
+        NodeId(arena.new_node(node))
+    }
+
+    #[test]
+    fn test_aggregator_creation() {
+        let config = ContentAggregatorConfig::default();
+        let aggregator = ContentAggregator::new(config);
+        assert_eq!(aggregator.config.token_budget, 4000);
+    }
+
+    #[test]
+    fn test_aggregator_with_defaults() {
+        let aggregator = ContentAggregator::with_defaults();
+        assert_eq!(aggregator.config.token_budget, 4000);
+    }
+
+    #[test]
+    fn test_empty_candidates() {
+        let aggregator = ContentAggregator::with_defaults();
+        let tree = DocumentTree::new("Test", "");
+
+        let result = aggregator.aggregate(&[], &tree, "test query");
+
+        assert!(result.is_empty());
+        assert_eq!(result.tokens_used, 0);
+    }
+
+    #[test]
+    fn test_candidate_node_creation() {
+        let node_id = make_test_node_id();
+        let candidate = CandidateNode::new(node_id, 0.8, 2);
+
+        assert_eq!(candidate.score, 0.8);
+        assert_eq!(candidate.depth, 2);
+    }
+}
diff --git a/src/retrieval/content/budget.rs b/src/retrieval/content/budget.rs
new file mode 100644
index 00000000..fa91e9c0
--- /dev/null
+++ b/src/retrieval/content/budget.rs
@@ -0,0 +1,624 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Token budget allocation for content aggregation.
+//!
+//! This module provides budget-aware content selection that optimizes
+//! token usage while maximizing relevance.
+
+use std::collections::HashMap;
+
+use crate::domain::{estimate_tokens, NodeId};
+
+use super::scorer::ContentRelevance;
+
+/// Allocation strategy for distributing token budget.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum AllocationStrategy {
+    /// Select highest-scoring content first until budget exhausted.
+    Greedy,
+    /// Distribute budget proportionally to relevance scores.
+    Proportional,
+    /// Ensure each depth level has minimum representation.
+    Hierarchical {
+        /// Minimum fraction of budget per level (0.0 - 1.0)
+        min_per_level: f32,
+    },
+}
+
+impl Default for AllocationStrategy {
+    fn default() -> Self {
+        Self::Hierarchical { min_per_level: 0.1 }
+    }
+}
+
+/// Information about content truncation.
+#[derive(Debug, Clone)]
+pub struct TruncationInfo {
+    /// Original content length in characters.
+    pub original_len: usize,
+    /// Truncated content length in characters.
+    pub truncated_len: usize,
+    /// Reason for truncation.
+    pub reason: TruncationReason,
+}
+
+/// Reason for content truncation.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TruncationReason {
+    /// Content exceeded remaining budget.
+    BudgetExceeded,
+    /// Content tail had low relevance.
+    LowRelevanceTail,
+}
+
+/// A selected content item after budget allocation.
+#[derive(Debug, Clone)]
+pub struct SelectedContent {
+    /// Node ID.
+    pub node_id: NodeId,
+    /// Node title.
+    pub title: String,
+    /// Selected content text.
+    pub content: String,
+    /// Token count of selected content.
+    pub tokens: usize,
+    /// Relevance score.
+    pub score: f32,
+    /// Depth in tree.
+    pub depth: usize,
+    /// Truncation info if content was truncated.
+    pub truncation: Option<TruncationInfo>,
+}
+
+impl SelectedContent {
+    /// Check if content was truncated.
+    #[must_use]
+    pub fn is_truncated(&self) -> bool {
+        self.truncation.is_some()
+    }
+}
+
+/// Statistics about the allocation process.
+#[derive(Debug, Clone, Default)]
+pub struct AllocationStats {
+    /// Total content items considered.
+    pub items_considered: usize,
+    /// Items selected for output.
+    pub items_selected: usize,
+    /// Items truncated.
+    pub items_truncated: usize,
+    /// Items filtered (below threshold).
+    pub items_filtered: usize,
+    /// Average score of selected items.
+    pub avg_score: f32,
+}
+
+/// Result of budget allocation.
+#[derive(Debug, Clone)]
+pub struct AllocationResult {
+    /// Selected content items.
+    pub selected: Vec<SelectedContent>,
+    /// Total tokens used.
+    pub tokens_used: usize,
+    /// Remaining token budget.
+    pub remaining_budget: usize,
+    /// Allocation statistics.
+    pub stats: AllocationStats,
+}
+
+impl AllocationResult {
+    /// Check if any content was selected.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.selected.is_empty()
+    }
+
+    /// Get number of selected items.
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.selected.len()
+    }
+}
+
+/// Token budget allocator.
+#[derive(Debug)]
+pub struct BudgetAllocator {
+    /// Total token budget.
+    total_budget: usize,
+    /// Minimum reserve budget (for fallback).
+    min_reserve: usize,
+    /// Allocation strategy.
+    strategy: AllocationStrategy,
+    /// Minimum relevance score threshold.
+    min_score: f32,
+}
+
+impl BudgetAllocator {
+    /// Create a new allocator with the specified budget.
+    #[must_use]
+    pub fn new(budget: usize) -> Self {
+        Self {
+            total_budget: budget,
+            min_reserve: budget / 10,
+            strategy: AllocationStrategy::default(),
+            min_score: 0.0,
+        }
+    }
+
+    /// Set the allocation strategy.
+    #[must_use]
+    pub fn with_strategy(mut self, strategy: AllocationStrategy) -> Self {
+        self.strategy = strategy;
+        self
+    }
+
+    /// Set minimum relevance score threshold.
+    #[must_use]
+    pub fn with_min_score(mut self, min_score: f32) -> Self {
+        self.min_score = min_score;
+        self
+    }
+
+    /// Allocate budget to scored content.
+    #[must_use]
+    pub fn allocate(
+        &self,
+        scored_content: Vec<ContentRelevance>,
+        max_depth: usize,
+    ) -> AllocationResult {
+        // Filter by minimum score
+        let filtered: Vec<_> = scored_content
+            .into_iter()
+            .filter(|c| c.score >= self.min_score)
+            .collect();
+
+        let stats = AllocationStats {
+            items_considered: filtered.len(),
+            ..Default::default()
+        };
+
+        match &self.strategy {
+            AllocationStrategy::Greedy => self.allocate_greedy(filtered, stats),
+            AllocationStrategy::Proportional => self.allocate_proportional(filtered, stats),
+            AllocationStrategy::Hierarchical { min_per_level } => {
+                self.allocate_hierarchical(filtered, max_depth, *min_per_level, stats)
+            }
+        }
+    }
+
+    /// Greedy allocation: select highest-scoring content first.
+    fn allocate_greedy(
+        &self,
+        mut content: Vec<ContentRelevance>,
+        mut stats: AllocationStats,
+    ) -> AllocationResult {
+        // Sort by score descending
+        content.sort_by(|a, b| {
+            b.score
+                .partial_cmp(&a.score)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        let mut selected = Vec::new();
+        let mut tokens_used = 0;
+
+        for relevance in content {
+            let tokens = relevance.chunk.token_count();
+
+            if tokens_used + tokens <= self.total_budget {
+                selected.push(SelectedContent {
+                    node_id: relevance.chunk.node_id,
+                    title: relevance.chunk.title,
+                    content: relevance.chunk.content,
+                    tokens,
+                    score: relevance.score,
+                    depth: relevance.chunk.depth,
+                    truncation: None,
+                });
+                tokens_used += tokens;
+            } else {
+                // Try to fit truncated content
+                let remaining = self.total_budget - tokens_used;
+                if remaining >= 50 {
+                    // Minimum useful content
+                    if let Some(truncated) = self.truncate_content(&relevance.chunk.content, remaining) {
+                        let truncated_tokens = estimate_tokens(&truncated);
+                        selected.push(SelectedContent {
+                            node_id: relevance.chunk.node_id,
+                            title: relevance.chunk.title,
+                            content: truncated,
+                            tokens: truncated_tokens,
+                            score: relevance.score,
+                            depth: relevance.chunk.depth,
+                            truncation: Some(TruncationInfo {
+                                original_len: relevance.chunk.content.len(),
+                                truncated_len: remaining,
+                                reason: TruncationReason::BudgetExceeded,
+                            }),
+                        });
+                        tokens_used += truncated_tokens;
+                        stats.items_truncated += 1;
+                    }
+                }
+                break;
+            }
+        }
+
+        stats.items_selected = selected.len();
+        stats.avg_score = if selected.is_empty() {
+            0.0
+        } else {
+            selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32
+        };
+
+        AllocationResult {
+            selected,
+            tokens_used,
+            remaining_budget: self.total_budget - tokens_used,
+            stats,
+        }
+    }
+
+    /// Proportional allocation: distribute budget by score ratio.
+    fn allocate_proportional(
+        &self,
+        content: Vec<ContentRelevance>,
+        mut stats: AllocationStats,
+    ) -> AllocationResult {
+        let total_score: f32 = content.iter().map(|c| c.score).sum();
+        if total_score == 0.0 {
+            return AllocationResult {
+                selected: Vec::new(),
+                tokens_used: 0,
+                remaining_budget: self.total_budget,
+                stats,
+            };
+        }
+
+        let mut selected = Vec::new();
+        let mut tokens_used = 0;
+
+        for relevance in content {
+            // Calculate proportional budget
+            let proportion = relevance.score / total_score;
+            let allocated_budget = ((self.total_budget as f32 * proportion) as usize).max(50);
+
+            let content_tokens = relevance.chunk.token_count();
+
+            if content_tokens <= allocated_budget {
+                // Full content fits
+                if tokens_used + content_tokens <= self.total_budget {
+                    selected.push(SelectedContent {
+                        node_id: relevance.chunk.node_id,
+                        title: relevance.chunk.title,
+                        content: relevance.chunk.content,
+                        tokens: content_tokens,
+                        score: relevance.score,
+                        depth: relevance.chunk.depth,
+                        truncation: None,
+                    });
+                    tokens_used += content_tokens;
+                }
+            } else {
+                // Truncate to allocated budget
+                let remaining = self.total_budget - tokens_used;
+                if remaining >= 50 && remaining >= allocated_budget / 2 {
+                    if let Some(truncated) = self.truncate_content(&relevance.chunk.content, remaining.min(allocated_budget)) {
+                        let truncated_tokens = estimate_tokens(&truncated);
+                        let truncated_len = truncated.len();
+                        selected.push(SelectedContent {
+                            node_id: relevance.chunk.node_id,
+                            title: relevance.chunk.title,
+                            content: truncated,
+                            tokens: truncated_tokens,
+                            score: relevance.score,
+                            depth: relevance.chunk.depth,
+                            truncation: Some(TruncationInfo {
+                                original_len: relevance.chunk.content.len(),
+                                truncated_len,
+                                reason: TruncationReason::BudgetExceeded,
+                            }),
+                        });
+                        tokens_used += truncated_tokens;
+                        stats.items_truncated += 1;
+                    }
+                }
+            }
+        }
+
+        stats.items_selected = selected.len();
+        stats.avg_score = if selected.is_empty() {
+            0.0
+        } else {
+            selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32
+        };
+
+        AllocationResult {
+            selected,
+            tokens_used,
+            remaining_budget: self.total_budget - tokens_used,
+            stats,
+        }
+    }
+
+    /// Hierarchical allocation: ensure each depth level has representation.
+    fn allocate_hierarchical(
+        &self,
+        content: Vec<ContentRelevance>,
+        max_depth: usize,
+        min_per_level: f32,
+        mut stats: AllocationStats,
+    ) -> AllocationResult {
+        // Group content by depth
+        let mut by_depth: HashMap<usize, Vec<ContentRelevance>> = HashMap::new();
+        for c in content {
+            by_depth
+                .entry(c.chunk.depth)
+                .or_default()
+                .push(c);
+        }
+
+        // Sort each level by score
+        for (_depth, items) in by_depth.iter_mut() {
+            items.sort_by(|a, b| {
+                b.score
+                    .partial_cmp(&a.score)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            });
+        }
+
+        let per_level_budget = (self.total_budget as f32 * min_per_level) as usize;
+        let mut selected = Vec::new();
+        let mut tokens_used = 0;
+
+        // Process from shallow to deep
+        for depth in 0..=max_depth {
+            if tokens_used >= self.total_budget {
+                break;
+            }
+
+            if let Some(level_content) = by_depth.get(&depth) {
+                let mut level_used = 0;
+
+                for relevance in level_content {
+                    if tokens_used >= self.total_budget {
+                        break;
+                    }
+
+                    let tokens = relevance.chunk.token_count();
+
+                    // Check if we should include this content
+                    let can_include_full = tokens_used + tokens <= self.total_budget;
+                    let level_budget_ok = level_used < per_level_budget || depth == 0;
+
+                    if can_include_full && level_budget_ok {
+                        selected.push(SelectedContent {
+                            node_id: relevance.chunk.node_id,
+                            title: relevance.chunk.title.clone(),
+                            content: relevance.chunk.content.clone(),
+                            tokens,
+                            score: relevance.score,
+                            depth,
+                            truncation: None,
+                        });
+                        tokens_used += tokens;
+                        level_used += tokens;
+                    } else if level_used < per_level_budget {
+                        // Try truncated version
+                        let remaining = (self.total_budget - tokens_used).min(per_level_budget - level_used);
+                        if remaining >= 50 {
+                            if let Some(truncated) = self.truncate_content(&relevance.chunk.content, remaining) {
+                                let truncated_tokens = estimate_tokens(&truncated);
+                                selected.push(SelectedContent {
+                                    node_id: relevance.chunk.node_id,
+                                    title: relevance.chunk.title.clone(),
+                                    content: truncated,
+                                    tokens: truncated_tokens,
+                                    score: relevance.score,
+                                    depth,
+                                    truncation: Some(TruncationInfo {
+                                        original_len: relevance.chunk.content.len(),
+                                        truncated_len: remaining,
+                                        reason: TruncationReason::BudgetExceeded,
+                                    }),
+                                });
+                                tokens_used += truncated_tokens;
+                                level_used += truncated_tokens;
+                                stats.items_truncated += 1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Second pass: fill remaining budget with highest-scoring content
+        if tokens_used < self.total_budget - self.min_reserve {
+            let mut all_remaining: Vec<_> = by_depth
+                .values()
+                .flat_map(|v| v.iter())
+                .filter(|c| !selected.iter().any(|s| s.node_id == c.chunk.node_id))
+                .collect();
+
+            all_remaining.sort_by(|a, b| {
+                b.score
+                    .partial_cmp(&a.score)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            });
+
+            for relevance in all_remaining {
+                if tokens_used >= self.total_budget - self.min_reserve {
+                    break;
+                }
+
+                let tokens = relevance.chunk.token_count();
+                if tokens_used + tokens <= self.total_budget {
+                    selected.push(SelectedContent {
+                        node_id: relevance.chunk.node_id,
+                        title: relevance.chunk.title.clone(),
+                        content: relevance.chunk.content.clone(),
+                        tokens,
+                        score: relevance.score,
+                        depth: relevance.chunk.depth,
+                        truncation: None,
+                    });
+                    tokens_used += tokens;
+                }
+            }
+        }
+
+        stats.items_selected = selected.len();
+        stats.avg_score = if selected.is_empty() {
+            0.0
+        } else {
+            selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32
+        };
+
+        AllocationResult {
+            selected,
+            tokens_used,
+            remaining_budget: self.total_budget - tokens_used,
+            stats,
+        }
+    }
+
+    /// Truncate content to fit within token budget.
+    fn truncate_content(&self, content: &str, max_tokens: usize) -> Option<String> {
+        if max_tokens < 20 {
+            return None;
+        }
+
+        // Approximate: 1 token ≈ 4 characters (for English)
+        let max_chars = max_tokens * 4;
+
+        if content.len() <= max_chars {
+            return Some(content.to_string());
+        }
+
+        // Try to break at sentence boundary
+        let truncated = &content[..max_chars];
+
+        // Find last sentence boundary
+        if let Some(pos) = truncated.rfind(|c| c == '.' || c == '!' || c == '?') {
+            Some(format!("{}...", &truncated[..=pos]))
+        } else if let Some(pos) = truncated.rfind(' ') {
+            // Fall back to word boundary
+            Some(format!("{}...", &truncated[..pos]))
+        } else {
+            // Hard truncate
+            Some(format!("{}...", truncated))
+        }
+    }
+}
+
+impl Default for BudgetAllocator {
+    fn default() -> Self {
+        Self::new(4000)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::retrieval::content::{ContentChunk, ScoreComponents};
+    use indextree::Arena;
+
+    fn make_test_node_id() -> NodeId {
+        let mut arena = Arena::new();
+        let node = crate::domain::TreeNode {
+            title: "Test".to_string(),
+            structure: String::new(),
+            content: String::new(),
+            summary: String::new(),
+            depth: 0,
+            start_index: 0,
+            end_index: 0,
+            start_page: None,
+            end_page: None,
+            node_id: None,
+            physical_index: None,
+            token_count: None,
+        };
+        NodeId(arena.new_node(node))
+    }
+
+    fn make_relevance(content: &str, score: f32, depth: usize) -> ContentRelevance {
+        let chunk = ContentChunk::new(
+            make_test_node_id(),
+            "Test".to_string(),
+            content.to_string(),
+            depth,
+        );
+        ContentRelevance::new(chunk, score, ScoreComponents::default())
+    }
+
+    #[test]
+    fn test_allocator_creation() {
+        let allocator = BudgetAllocator::new(1000);
+        assert_eq!(allocator.total_budget, 1000);
+    }
+
+    #[test]
+    fn test_greedy_allocation() {
+        let allocator = BudgetAllocator::new(100)
+            .with_strategy(AllocationStrategy::Greedy);
+
+        let content = vec![
+            make_relevance("High score content with enough text", 0.9, 0),
+            make_relevance("Low score content", 0.3, 0),
+        ];
+
+        let result = allocator.allocate(content, 1);
+        assert!(!result.is_empty());
+        assert!(result.tokens_used <= 100);
+    }
+
+    #[test]
+    fn test_min_score_filter() {
+        let allocator = BudgetAllocator::new(1000)
+            .with_min_score(0.5);
+
+        let content = vec![
+            make_relevance("Good content", 0.8, 0),
+            make_relevance("Bad content", 0.2, 0),
+        ];
+
+        let result = allocator.allocate(content, 1);
+        assert_eq!(result.selected.len(), 1);
+    }
+
+    #[test]
+    fn test_truncation() {
+        let allocator = BudgetAllocator::new(50);
+        let truncated = allocator.truncate_content(
+            "This is a very long piece of content. It has multiple sentences. We want to test truncation at sentence boundary.",
+            25,  // Need at least 20 tokens for truncation
+        );
+
+        assert!(truncated.is_some());
+        let text = truncated.unwrap();
+        // Should truncate and add ellipsis
+        assert!(text.len() < 200); // Should be truncated
+    }
+
+    #[test]
+    fn test_hierarchical_allocation() {
+        let allocator = BudgetAllocator::new(200)
+            .with_strategy(AllocationStrategy::Hierarchical { min_per_level: 0.2 });
+
+        let content = vec![
+            make_relevance("Depth 0 content", 0.9, 0),
+            make_relevance("Depth 1 content A", 0.7, 1),
+            make_relevance("Depth 1 content B", 0.6, 1),
+            make_relevance("Depth 2 content", 0.8, 2),
+        ];
+
+        let result = allocator.allocate(content, 2);
+
+        // Should have content from multiple depths
+        let depths: std::collections::HashSet<usize> =
+            result.selected.iter().map(|s| s.depth).collect();
+        assert!(depths.len() >= 2);
+    }
+}
diff --git a/src/retrieval/content/builder.rs b/src/retrieval/content/builder.rs
new file mode 100644
index 00000000..c3b5792f
--- /dev/null
+++ b/src/retrieval/content/builder.rs
@@ -0,0 +1,522 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Structure builder for aggregated content.
+//!
+//! This module transforms selected content into structured output formats.
+
+use serde::{Deserialize, Serialize};
+
+use crate::domain::DocumentTree;
+
+use super::budget::SelectedContent;
+use super::config::OutputFormatConfig;
+
+/// Output format for structured content.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum OutputFormat {
+    /// Markdown format with headers.
+    #[default]
+    Markdown,
+    /// JSON format.
+    Json,
+    /// Tree format.
+    Tree,
+    /// Flat text format.
+    Flat,
+}
+
+impl From<OutputFormatConfig> for OutputFormat {
+    fn from(config: OutputFormatConfig) -> Self {
+        match config {
+            OutputFormatConfig::Markdown => Self::Markdown,
+            OutputFormatConfig::Json => Self::Json,
+            OutputFormatConfig::Tree => Self::Tree,
+            OutputFormatConfig::Flat => Self::Flat,
+        }
+    }
+}
+
+/// Tree node in the content structure.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentTreeNode {
+    /// Node title.
+    pub title: String,
+    /// Node content (if any).
+    pub content: Option<String>,
+    /// Relevance score.
+    pub score: f32,
+    /// Child nodes.
+    pub children: Vec<ContentTreeNode>,
+}
+
+impl ContentTreeNode {
+    /// Create a new tree node.
+    #[must_use]
+    pub fn new(title: String) -> Self {
+        Self {
+            title,
+            content: None,
+            score: 0.0,
+            children: Vec::new(),
+        }
+    }
+
+    /// Add content to this node.
+    #[must_use]
+    pub fn with_content(mut self, content: String, score: f32) -> Self {
+        self.content = Some(content);
+        self.score = score;
+        self
+    }
+
+    /// Add a child node.
+    pub fn add_child(&mut self, child: ContentTreeNode) {
+        self.children.push(child);
+    }
+}
+
+/// Content tree structure.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentTree {
+    /// Root node.
+    pub root: ContentTreeNode,
+    /// Total nodes in tree.
+    pub total_nodes: usize,
+}
+
+impl ContentTree {
+    /// Create a new content tree.
+    #[must_use]
+    pub fn new(root: ContentTreeNode) -> Self {
+        Self {
+            total_nodes: 1,
+            root,
+        }
+    }
+}
+
+/// Metadata about aggregated content.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ContentMetadata {
+    /// Total tokens in content.
+    pub total_tokens: usize,
+    /// Number of nodes included.
+    pub node_count: usize,
+    /// Average relevance score.
+    pub avg_score: f32,
+    /// Maximum depth included.
+    pub max_depth: usize,
+}
+
+/// Structured content result.
+#[derive(Debug, Clone)]
+pub struct StructuredContent {
+    /// Formatted content string.
+    pub content: String,
+    /// Optional tree structure.
+    pub structure: Option<ContentTree>,
+    /// Content metadata.
+    pub metadata: ContentMetadata,
+}
+
+impl StructuredContent {
+    /// Check if content is empty.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.content.is_empty()
+    }
+
+    /// Get content length in characters.
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.content.len()
+    }
+}
+
+/// Builder for creating structured content output.
+#[derive(Debug)]
+pub struct StructureBuilder {
+    /// Output format.
+    format: OutputFormat,
+    /// Include metadata in output.
+    include_metadata: bool,
+    /// Include scores in output.
+    include_scores: bool,
+}
+
+impl StructureBuilder {
+    /// Create a new structure builder.
+    #[must_use]
+    pub fn new(format: OutputFormat) -> Self {
+        Self {
+            format,
+            include_metadata: false,
+            include_scores: false,
+        }
+    }
+
+    /// Create builder from config.
+    #[must_use]
+    pub fn from_config(format: OutputFormatConfig, include_scores: bool) -> Self {
+        Self {
+            format: OutputFormat::from(format),
+            include_metadata: false,
+            include_scores,
+        }
+    }
+
+    /// Enable metadata in output.
+    #[must_use]
+    pub fn with_metadata(mut self) -> Self {
+        self.include_metadata = true;
+        self
+    }
+
+    /// Enable scores in output.
+    #[must_use]
+    pub fn with_scores(mut self) -> Self {
+        self.include_scores = true;
+        self
+    }
+
+    /// Build structured content from selected items.
+    #[must_use]
+    pub fn build(
+        &self,
+        selected: Vec<SelectedContent>,
+        tree: &DocumentTree,
+    ) -> StructuredContent {
+        if selected.is_empty() {
+            return StructuredContent {
+                content: String::new(),
+                structure: None,
+                metadata: ContentMetadata::default(),
+            };
+        }
+
+        // Calculate metadata
+        let total_tokens: usize = selected.iter().map(|s| s.tokens).sum();
+        let avg_score = selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32;
+        let max_depth = selected.iter().map(|s| s.depth).max().unwrap_or(0);
+
+        let metadata = ContentMetadata {
+            total_tokens,
+            node_count: selected.len(),
+            avg_score,
+            max_depth,
+        };
+
+        // Build based on format
+        let (content, structure) = match &self.format {
+            OutputFormat::Markdown => self.build_markdown(selected, tree),
+            OutputFormat::Json => self.build_json(selected, tree),
+            OutputFormat::Tree => self.build_tree_format(selected, tree),
+            OutputFormat::Flat => self.build_flat(selected),
+        };
+
+        StructuredContent {
+            content,
+            structure,
+            metadata,
+        }
+    }
+
+    /// Build Markdown format output.
+    fn build_markdown(
+        &self,
+        selected: Vec<SelectedContent>,
+        _tree: &DocumentTree,
+    ) -> (String, Option<ContentTree>) {
+        let mut sections = Vec::new();
+        let mut current_depth = 0;
+
+        // Sort by depth to maintain hierarchy
+        let mut sorted = selected;
+        sorted.sort_by(|a, b| a.depth.cmp(&b.depth));
+
+        for content in sorted {
+            // Adjust heading level based on depth
+            let heading_level = (content.depth + 1).min(6);
+            let heading = "#".repeat(heading_level);
+
+            let mut section = format!("{} {}", heading, content.title);
+
+            if self.include_scores {
+                section.push_str(&format!(" *(score: {:.2})*", content.score));
+            }
+
+            section.push_str("\n\n");
+            section.push_str(&content.content);
+
+            if content.is_truncated() {
+                section.push_str("\n\n*[content truncated]*");
+            }
+
+            sections.push(section);
+            current_depth = current_depth.max(content.depth);
+        }
+
+        (sections.join("\n\n---\n\n"), None)
+    }
+
+    /// Build JSON format output.
+    fn build_json(
+        &self,
+        selected: Vec<SelectedContent>,
+        _tree: &DocumentTree,
+    ) -> (String, Option<ContentTree>) {
+        #[derive(Serialize)]
+        struct JsonOutput<'a> {
+            sections: Vec<JsonSection<'a>>,
+        }
+
+        #[derive(Serialize)]
+        struct JsonSection<'a> {
+            title: &'a str,
+            content: &'a str,
+            score: f32,
+            depth: usize,
+            truncated: bool,
+        }
+
+        let sections: Vec<_> = selected
+            .iter()
+            .map(|s| JsonSection {
+                title: &s.title,
+                content: &s.content,
+                score: s.score,
+                depth: s.depth,
+                truncated: s.is_truncated(),
+            })
+            .collect();
+
+        let output = JsonOutput { sections };
+        let content = serde_json::to_string_pretty(&output).unwrap_or_default();
+
+        (content, None)
+    }
+
+    /// Build tree format output.
+    fn build_tree_format(
+        &self,
+        selected: Vec<SelectedContent>,
+        tree: &DocumentTree,
+    ) -> (String, Option<ContentTree>) {
+        // Build tree structure
+        let mut root = ContentTreeNode::new("Content".to_string());
+        let mut node_count = 0;
+
+        // Group by parent
+        use std::collections::HashMap;
+        let mut by_parent: HashMap<Option<crate::domain::NodeId>, Vec<&SelectedContent>> =
+            HashMap::new();
+
+        for content in &selected {
+            let parent = tree.get(content.node_id).and_then(|_| {
+                // Find parent in selected
+                selected
+                    .iter()
+                    .find(|s| s.depth < content.depth)
+                    .map(|s| Some(s.node_id))
+                    .unwrap_or(None)
+            });
+            by_parent.entry(parent).or_default().push(content);
+        }
+
+        // Build tree recursively
+        fn build_node(
+            content: &SelectedContent,
+            all_by_parent: &HashMap<Option<crate::domain::NodeId>, Vec<&SelectedContent>>,
+        ) -> ContentTreeNode {
+            let mut node = ContentTreeNode::new(content.title.clone())
+                .with_content(content.content.clone(), content.score);
+
+            if let Some(children) = all_by_parent.get(&Some(content.node_id)) {
+                for child in children {
+                    node.add_child(build_node(child, all_by_parent));
+                }
+            }
+
+            node
+        }
+
+        // Add top-level items
+        if let Some(top_level) = by_parent.get(&None) {
+            for content in top_level {
+                let node = build_node(content, &by_parent);
+                node_count += count_nodes(&node);
+                root.add_child(node);
+            }
+        }
+
+        // Build string representation
+        let content = render_tree(&root, 0);
+
+        let tree_structure = ContentTree {
+            root,
+            total_nodes: node_count,
+        };
+
+        (content, Some(tree_structure))
+    }
+
+    /// Build flat format output.
+    fn build_flat(&self, selected: Vec<SelectedContent>) -> (String, Option<ContentTree>) {
+        let parts: Vec<_> = selected
+            .iter()
+            .map(|c| {
+                let mut part = format!("[{}] {}", c.title, c.content);
+                if self.include_scores {
+                    part = format!("[{}] (score: {:.2}) {}", c.title, c.score, c.content);
+                }
+                part
+            })
+            .collect();
+
+        (parts.join("\n\n"), None)
+    }
+}
+
+impl Default for StructureBuilder {
+    fn default() -> Self {
+        Self::new(OutputFormat::default())
+    }
+}
+
+/// Count nodes in a tree.
+fn count_nodes(node: &ContentTreeNode) -> usize {
+    1 + node.children.iter().map(count_nodes).sum::<usize>()
+}
+
+/// Render tree as string.
+fn render_tree(node: &ContentTreeNode, depth: usize) -> String {
+    let indent = "  ".repeat(depth);
+    let mut result = format!("{}├─ {} (score: {:.2})\n", indent, node.title, node.score);
+
+    if let Some(ref content) = node.content {
+        let preview = if content.len() > 100 {
+            format!("{}...", &content[..100])
+        } else {
+            content.clone()
+        };
+        result.push_str(&format!("{}│  {}\n", indent, preview.replace('\n', " ")));
+    }
+
+    for child in &node.children {
+        result.push_str(&render_tree(child, depth + 1));
+    }
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::domain::NodeId;
+    use indextree::Arena;
+
+    fn make_test_node_id() -> NodeId {
+        let mut arena = Arena::new();
+        let node = crate::domain::TreeNode {
+            title: "Test".to_string(),
+            structure: String::new(),
+            content: String::new(),
+            summary: String::new(),
+            depth: 0,
+            start_index: 0,
+            end_index: 0,
+            start_page: None,
+            end_page: None,
+            node_id: None,
+            physical_index: None,
+            token_count: None,
+        };
+        NodeId(arena.new_node(node))
+    }
+
+    fn make_selected(title: &str, content: &str, score: f32, depth: usize) -> SelectedContent {
+        SelectedContent {
+            node_id: make_test_node_id(),
+            title: title.to_string(),
+            content: content.to_string(),
+            tokens: 50,
+            score,
+            depth,
+            truncation: None,
+        }
+    }
+
+    #[test]
+    fn test_markdown_builder() {
+        let builder = StructureBuilder::new(OutputFormat::Markdown);
+        let selected = vec![
+            make_selected("Section 1", "Content 1", 0.9, 0),
+            make_selected("Section 2", "Content 2", 0.8, 1),
+        ];
+
+        // Create a minimal tree for testing
+        let tree = DocumentTree::new("Test", "");
+
+        let result = builder.build(selected, &tree);
+
+        assert!(!result.is_empty());
+        assert!(result.content.contains("Section 1"));
+        assert!(result.content.contains("Section 2"));
+        assert!(result.content.contains("# Section 1"));
+        assert!(result.content.contains("## Section 2"));
+    }
+
+    #[test]
+    fn test_flat_builder() {
+        let builder = StructureBuilder::new(OutputFormat::Flat);
+        let selected = vec![
+            make_selected("Section 1", "Content 1", 0.9, 0),
+        ];
+
+        let tree = DocumentTree::new("Test", "");
+        let result = builder.build(selected, &tree);
+
+        assert!(result.content.contains("[Section 1]"));
+        assert!(result.content.contains("Content 1"));
+    }
+
+    #[test]
+    fn test_builder_with_scores() {
+        let builder = StructureBuilder::new(OutputFormat::Markdown)
+            .with_scores();
+
+        let selected = vec![
+            make_selected("Section 1", "Content 1", 0.95, 0),
+        ];
+
+        let tree = DocumentTree::new("Test", "");
+        let result = builder.build(selected, &tree);
+
+        assert!(result.content.contains("score: 0.95"));
+    }
+
+    #[test]
+    fn test_empty_selected() {
+        let builder = StructureBuilder::new(OutputFormat::Markdown);
+        let tree = DocumentTree::new("Test", "");
+        let result = builder.build(Vec::new(), &tree);
+
+        assert!(result.is_empty());
+        assert_eq!(result.metadata.node_count, 0);
+    }
+
+    #[test]
+    fn test_content_tree_node() {
+        let mut root = ContentTreeNode::new("Root".to_string())
+            .with_content("Root content".to_string(), 0.9);
+
+        let child = ContentTreeNode::new("Child".to_string())
+            .with_content("Child content".to_string(), 0.8);
+
+        root.add_child(child);
+
+        assert_eq!(root.children.len(), 1);
+        assert_eq!(root.score, 0.9);
+    }
+}
diff --git a/src/retrieval/content/config.rs b/src/retrieval/content/config.rs
new file mode 100644
index 00000000..f9bc38b6
--- /dev/null
+++ b/src/retrieval/content/config.rs
@@ -0,0 +1,158 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration types for content aggregation.
+
+use serde::{Deserialize, Serialize};
+
+/// Configuration for content aggregation.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentAggregatorConfig {
+    /// Maximum tokens to return in aggregated content.
+    pub token_budget: usize,
+
+    /// Minimum relevance score threshold (0.0 - 1.0).
+    /// Content below this threshold will be filtered out.
+    pub min_relevance_score: f32,
+
+    /// Scoring strategy for relevance computation.
+    pub scoring_strategy: ScoringStrategyConfig,
+
+    /// Output format for aggregated content.
+    pub output_format: OutputFormatConfig,
+
+    /// Include relevance scores in output metadata.
+    pub include_scores: bool,
+
+    /// Minimum budget allocation per depth level (for hierarchical strategy).
+    /// Value between 0.0 and 1.0, representing fraction of total budget.
+    pub hierarchical_min_per_level: f32,
+
+    /// Enable content deduplication.
+    pub deduplicate: bool,
+
+    /// Similarity threshold for deduplication (0.0 - 1.0).
+    pub dedup_threshold: f32,
+}
+
+impl Default for ContentAggregatorConfig {
+    fn default() -> Self {
+        Self {
+            token_budget: 4000,
+            min_relevance_score: 0.2,
+            scoring_strategy: ScoringStrategyConfig::KeywordWithBM25,
+            output_format: OutputFormatConfig::Markdown,
+            include_scores: false,
+            hierarchical_min_per_level: 0.1,
+            deduplicate: true,
+            dedup_threshold: 0.9,
+        }
+    }
+}
+
+impl ContentAggregatorConfig {
+    /// Create a new config with default values.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the token budget.
+    #[must_use]
+    pub fn with_token_budget(mut self, budget: usize) -> Self {
+        self.token_budget = budget;
+        self
+    }
+
+    /// Set the minimum relevance score.
+    #[must_use]
+    pub fn with_min_relevance(mut self, score: f32) -> Self {
+        self.min_relevance_score = score.clamp(0.0, 1.0);
+        self
+    }
+
+    /// Set the scoring strategy.
+    #[must_use]
+    pub fn with_scoring_strategy(mut self, strategy: ScoringStrategyConfig) -> Self {
+        self.scoring_strategy = strategy;
+        self
+    }
+
+    /// Set the output format.
+    #[must_use]
+    pub fn with_output_format(mut self, format: OutputFormatConfig) -> Self {
+        self.output_format = format;
+        self
+    }
+}
+
+/// Scoring strategy configuration.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ScoringStrategyConfig {
+    /// Fast keyword matching only.
+    KeywordOnly,
+    /// Keyword matching with BM25 scoring.
+    KeywordWithBM25,
+    /// Hybrid: keyword + LLM reranking for top candidates.
+    Hybrid,
+}
+
+impl Default for ScoringStrategyConfig {
+    fn default() -> Self {
+        Self::KeywordWithBM25
+    }
+}
+
+/// Output format configuration.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum OutputFormatConfig {
+    /// Markdown format with headers.
+    Markdown,
+    /// JSON format.
+    Json,
+    /// Tree format.
+    Tree,
+    /// Flat text format.
+    Flat,
+}
+
+impl Default for OutputFormatConfig {
+    fn default() -> Self {
+        Self::Markdown
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_config() {
+        let config = ContentAggregatorConfig::default();
+        assert_eq!(config.token_budget, 4000);
+        assert_eq!(config.min_relevance_score, 0.2);
+    }
+
+    #[test]
+    fn test_config_builder() {
+        let config = ContentAggregatorConfig::new()
+            .with_token_budget(2000)
+            .with_min_relevance(0.5);
+
+        assert_eq!(config.token_budget, 2000);
+        assert_eq!(config.min_relevance_score, 0.5);
+    }
+
+    #[test]
+    fn test_min_relevance_clamped() {
+        let config = ContentAggregatorConfig::new()
+            .with_min_relevance(1.5);
+        assert_eq!(config.min_relevance_score, 1.0);
+
+        let config = ContentAggregatorConfig::new()
+            .with_min_relevance(-0.5);
+        assert_eq!(config.min_relevance_score, 0.0);
+    }
+}
diff --git a/src/retrieval/content/mod.rs b/src/retrieval/content/mod.rs
new file mode 100644
index 00000000..5126057f
--- /dev/null
+++ b/src/retrieval/content/mod.rs
@@ -0,0 +1,46 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Content aggregation module for retrieval results.
+//!
+//! This module provides precision-focused, budget-aware content aggregation
+//! that transforms candidate nodes into structured, relevant content.
+//!
+//! # Architecture
+//!
+//! ```text
+//! ┌─────────────────────────────────────────────────────────────┐
+//! │                    Content Aggregator                         │
+//! ├─────────────────────────────────────────────────────────────┤
+//! │  RelevanceScorer → BudgetAllocator → StructureBuilder       │
+//! └─────────────────────────────────────────────────────────────┘
+//! ```
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use vectorless::retrieval::content::{ContentAggregator, ContentAggregatorConfig};
+//!
+//! let config = ContentAggregatorConfig {
+//!     token_budget: 4000,
+//!     min_relevance_score: 0.3,
+//!     ..Default::default()
+//! };
+//!
+//! let aggregator = ContentAggregator::new(config);
+//! let result = aggregator.aggregate(&candidates, &tree, &query);
+//! ```
+
+mod aggregator;
+mod budget;
+mod builder;
+mod config;
+mod scorer;
+
+pub use aggregator::{ContentAggregator, AggregationResult};
+pub use budget::{BudgetAllocator, AllocationStrategy, AllocationResult, SelectedContent};
+pub use builder::{StructureBuilder, OutputFormat, StructuredContent, ContentTree};
+pub use config::{ContentAggregatorConfig, OutputFormatConfig, ScoringStrategyConfig};
+pub use scorer::{
+    RelevanceScorer, ContentRelevance, ScoreComponents, ContentChunk, ScoringContext,
+};
diff --git a/src/retrieval/content/scorer.rs b/src/retrieval/content/scorer.rs
new file mode 100644
index 00000000..ba04a6ce
--- /dev/null
+++ b/src/retrieval/content/scorer.rs
@@ -0,0 +1,439 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Relevance scoring for content chunks.
+//!
+//! This module provides fine-grained relevance scoring for content,
+//! combining keyword matching, BM25, and optional LLM reranking.
+
+use std::collections::HashMap;
+
+use crate::domain::{estimate_tokens, NodeId};
+
+use super::config::ScoringStrategyConfig;
+
+/// Content chunk for scoring.
+#[derive(Debug, Clone)]
+pub struct ContentChunk {
+    /// Node ID this chunk belongs to.
+    pub node_id: NodeId,
+    /// Title of the node.
+    pub title: String,
+    /// Content text.
+    pub content: String,
+    /// Depth in tree (0 = root level).
+    pub depth: usize,
+}
+
+impl ContentChunk {
+    /// Create a new content chunk.
+    #[must_use]
+    pub fn new(node_id: NodeId, title: String, content: String, depth: usize) -> Self {
+        Self {
+            node_id,
+            title,
+            content,
+            depth,
+        }
+    }
+
+    /// Estimate token count for this chunk.
+    #[must_use]
+    pub fn token_count(&self) -> usize {
+        estimate_tokens(&self.content)
+    }
+}
+
+/// Relevance score components.
+#[derive(Debug, Clone, Default)]
+pub struct ScoreComponents {
+    /// Keyword match score (0.0 - 1.0).
+    pub keyword_score: f32,
+    /// BM25 score (normalized).
+    pub bm25_score: f32,
+    /// Depth penalty (deeper = lower score).
+    pub depth_penalty: f32,
+    /// Path bonus from parent relevance.
+    pub path_bonus: f32,
+    /// Information density score.
+    pub density_score: f32,
+}
+
+impl ScoreComponents {
+    /// Compute final weighted score.
+    #[must_use]
+    pub fn final_score(&self) -> f32 {
+        // Weight formula from design doc
+        let score = self.keyword_score * 0.35
+            + self.bm25_score * 0.25
+            + self.depth_penalty * 0.15
+            + self.path_bonus * 0.10
+            + self.density_score * 0.15;
+
+        score.clamp(0.0, 1.0)
+    }
+}
+
+/// Relevance score result for a content chunk.
+#[derive(Debug, Clone)]
+pub struct ContentRelevance {
+    /// The content chunk that was scored.
+    pub chunk: ContentChunk,
+    /// Final relevance score (0.0 - 1.0).
+    pub score: f32,
+    /// Score breakdown by component.
+    pub components: ScoreComponents,
+}
+
+impl ContentRelevance {
+    /// Create a new relevance result.
+    #[must_use]
+    pub fn new(chunk: ContentChunk, score: f32, components: ScoreComponents) -> Self {
+        Self {
+            chunk,
+            score,
+            components,
+        }
+    }
+}
+
+/// Context for scoring operations.
+#[derive(Debug, Clone)]
+pub struct ScoringContext {
+    /// Average document length for BM25.
+    pub avg_doc_len: f32,
+    /// Total document count for IDF.
+    pub doc_count: usize,
+    /// Document frequency for terms.
+    pub doc_freq: HashMap<String, usize>,
+    /// Parent node score (for path bonus).
+    pub parent_score: Option<f32>,
+}
+
+impl Default for ScoringContext {
+    fn default() -> Self {
+        Self {
+            avg_doc_len: 100.0,
+            doc_count: 1,
+            doc_freq: HashMap::new(),
+            parent_score: None,
+        }
+    }
+}
+
+/// Relevance scorer for content chunks.
+#[derive(Debug)]
+pub struct RelevanceScorer {
+    /// Query keywords extracted from the query.
+    query_keywords: Vec<String>,
+    /// Scoring strategy to use.
+    strategy: ScoringStrategyConfig,
+    /// BM25 parameters.
+    k1: f32,
+    b: f32,
+}
+
+impl RelevanceScorer {
+    /// Create a new scorer with keywords.
+    #[must_use]
+    pub fn new(query: &str, strategy: ScoringStrategyConfig) -> Self {
+        let query_keywords = extract_keywords(query);
+        Self {
+            query_keywords,
+            strategy,
+            k1: 1.2,
+            b: 0.75,
+        }
+    }
+
+    /// Create a scorer with pre-extracted keywords.
+    #[must_use]
+    pub fn with_keywords(keywords: Vec<String>, strategy: ScoringStrategyConfig) -> Self {
+        Self {
+            query_keywords: keywords,
+            strategy,
+            k1: 1.2,
+            b: 0.75,
+        }
+    }
+
+    /// Score a content chunk.
+    #[must_use]
+    pub fn score_chunk(&self, chunk: &ContentChunk, ctx: &ScoringContext) -> ContentRelevance {
+        let mut components = ScoreComponents::default();
+
+        // 1. Keyword score
+        components.keyword_score = self.compute_keyword_score(&chunk.content);
+
+        // 2. BM25 score (if enabled)
+        if matches!(self.strategy, ScoringStrategyConfig::KeywordWithBM25 | ScoringStrategyConfig::Hybrid) {
+            components.bm25_score = self.compute_bm25_score(&chunk.content, ctx);
+        }
+
+        // 3. Depth penalty (10% per level)
+        components.depth_penalty = 0.9_f32.powi(chunk.depth as i32);
+
+        // 4. Path bonus
+        components.path_bonus = ctx.parent_score.map(|s| s * 0.2).unwrap_or(0.0);
+
+        // 5. Density score
+        components.density_score = compute_density(&chunk.content);
+
+        let final_score = components.final_score();
+
+        ContentRelevance::new(chunk.clone(), final_score, components)
+    }
+
+    /// Score multiple chunks.
+    pub fn score_chunks<'a>(
+        &self,
+        chunks: &'a [ContentChunk],
+        ctx: &ScoringContext,
+    ) -> Vec<ContentRelevance> {
+        chunks
+            .iter()
+            .map(|chunk| self.score_chunk(chunk, ctx))
+            .collect()
+    }
+
+    /// Compute keyword overlap score.
+    fn compute_keyword_score(&self, content: &str) -> f32 {
+        if self.query_keywords.is_empty() {
+            return 0.5; // Neutral score if no keywords
+        }
+
+        let content_lower = content.to_lowercase();
+        let content_words: std::collections::HashSet<&str> = content_lower
+            .split_whitespace()
+            .collect();
+
+        let matches = self
+            .query_keywords
+            .iter()
+            .filter(|kw| {
+                let kw_lower = kw.to_lowercase();
+                content_words.iter().any(|&w| w.contains(&kw_lower))
+                    || content_lower.contains(&kw_lower)
+            })
+            .count();
+
+        matches as f32 / self.query_keywords.len() as f32
+    }
+
+    /// Compute BM25 score.
+    fn compute_bm25_score(&self, content: &str, ctx: &ScoringContext) -> f32 {
+        if self.query_keywords.is_empty() {
+            return 0.0;
+        }
+
+        let doc_len = content.split_whitespace().count() as f32;
+        let mut score = 0.0;
+
+        for term in &self.query_keywords {
+            let term_lower = term.to_lowercase();
+            let tf = content
+                .to_lowercase()
+                .matches(&term_lower)
+                .count() as f32;
+
+            if tf == 0.0 {
+                continue;
+            }
+
+            // IDF calculation
+            let df = ctx.doc_freq.get(&term_lower).copied().unwrap_or(1) as f32;
+            let idf = ((ctx.doc_count as f32 - df + 0.5) / (df + 0.5) + 1.0).ln();
+
+            // BM25 formula
+            let numerator = tf * (self.k1 + 1.0);
+            let denominator = tf + self.k1 * (1.0 - self.b + self.b * doc_len / ctx.avg_doc_len);
+
+            score += idf * numerator / denominator;
+        }
+
+        // Normalize to [0, 1]
+        let max_possible_score = self.query_keywords.len() as f32 * 5.0; // Rough upper bound
+        (score / max_possible_score).clamp(0.0, 1.0)
+    }
+
+    /// Get the query keywords.
+    #[must_use]
+    pub fn keywords(&self) -> &[String] {
+        &self.query_keywords
+    }
+}
+
+/// Extract keywords from a query string.
+fn extract_keywords(query: &str) -> Vec<String> {
+    // Common English stop words
+    const STOPWORDS: &[&str] = &[
+        "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+        "have", "has", "had", "do", "does", "did", "will", "would", "could",
+        "should", "may", "might", "must", "shall", "can", "need", "dare",
+        "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
+        "from", "as", "into", "through", "during", "before", "after",
+        "above", "below", "between", "under", "again", "further", "then",
+        "once", "here", "there", "when", "where", "why", "how", "all",
+        "each", "few", "more", "most", "other", "some", "such", "no", "nor",
+        "not", "only", "own", "same", "so", "than", "too", "very", "just",
+        "and", "but", "if", "or", "because", "until", "while", "about",
+        "what", "which", "who", "whom", "this", "that", "these", "those",
+        "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you",
+        "your", "yours", "yourself", "yourselves", "he", "him", "his",
+        "himself", "she", "her", "hers", "herself", "it", "its", "itself",
+        "they", "them", "their", "theirs", "themselves",
+    ];
+
+    query
+        .to_lowercase()
+        .split(|c: char| !c.is_alphanumeric())
+        .filter(|s| {
+            let s = *s;
+            !s.is_empty() && s.len() > 1 && !STOPWORDS.contains(&s)
+        })
+        .map(String::from)
+        .collect()
+}
+
+/// Compute information density of content.
+fn compute_density(content: &str) -> f32 {
+    let words: Vec<&str> = content.split_whitespace().collect();
+    if words.is_empty() {
+        return 0.0;
+    }
+
+    // Stopword ratio (lower is better)
+    const STOPWORDS: &[&str] = &[
+        "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+        "have", "has", "had", "do", "does", "did", "will", "would", "could",
+        "should", "may", "might", "must", "shall", "can", "to", "of", "in",
+        "for", "on", "with", "at", "by", "from", "and", "but", "or", "as",
+    ];
+
+    let stopword_count = words
+        .iter()
+        .filter(|w| STOPWORDS.contains(&w.to_lowercase().as_str()))
+        .count();
+
+    let stopword_ratio = stopword_count as f32 / words.len() as f32;
+
+    // Entity-like ratio (capitalized, numbers, special terms)
+    let entity_count = words
+        .iter()
+        .filter(|w| {
+            w.chars()
+                .any(|c| c.is_numeric() || c.is_uppercase())
+        })
+        .count();
+
+    let entity_ratio = entity_count as f32 / words.len() as f32;
+
+    // Combined density score
+    (1.0 - stopword_ratio) * 0.7 + entity_ratio * 0.3
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use indextree::Arena;
+
+    fn make_test_node_id() -> NodeId {
+        let mut arena = Arena::new();
+        let node = crate::domain::TreeNode {
+            title: "Test".to_string(),
+            structure: String::new(),
+            content: String::new(),
+            summary: String::new(),
+            depth: 0,
+            start_index: 0,
+            end_index: 0,
+            start_page: None,
+            end_page: None,
+            node_id: None,
+            physical_index: None,
+            token_count: None,
+        };
+        NodeId(arena.new_node(node))
+    }
+
+    #[test]
+    fn test_keyword_extraction() {
+        let keywords = extract_keywords("What is the architecture of vectorless?");
+        assert!(keywords.contains(&"architecture".to_string()));
+        assert!(keywords.contains(&"vectorless".to_string()));
+        assert!(!keywords.contains(&"what".to_string())); // stopword
+        assert!(!keywords.contains(&"the".to_string())); // stopword
+    }
+
+    #[test]
+    fn test_keyword_score() {
+        let scorer = RelevanceScorer::new(
+            "vectorless architecture",
+            ScoringStrategyConfig::KeywordOnly,
+        );
+
+        let chunk = ContentChunk::new(
+            make_test_node_id(),
+            "Test".to_string(),
+            "Vectorless has a unique architecture for document retrieval.".to_string(),
+            0,
+        );
+
+        let ctx = ScoringContext::default();
+        let score = scorer.compute_keyword_score(&chunk.content);
+
+        assert!(score > 0.5); // Should match both keywords
+    }
+
+    #[test]
+    fn test_density_score() {
+        // High density content
+        let high_density = "Rust 1.85+ requires Cargo.toml configuration with [dependencies]";
+        let score = compute_density(high_density);
+        assert!(score > 0.5);
+
+        // Low density content (many stopwords)
+        let low_density = "This is a test of the system with some words in it";
+        let score = compute_density(low_density);
+        assert!(score < 0.7);
+    }
+
+    #[test]
+    fn test_depth_penalty() {
+        let shallow = ContentChunk::new(
+            make_test_node_id(),
+            "Test".to_string(),
+            "Content".to_string(),
+            0,
+        );
+
+        let deep = ContentChunk::new(
+            make_test_node_id(),
+            "Test".to_string(),
+            "Content".to_string(),
+            5,
+        );
+
+        let scorer = RelevanceScorer::new("test", ScoringStrategyConfig::KeywordOnly);
+        let ctx = ScoringContext::default();
+
+        let shallow_score = scorer.score_chunk(&shallow, &ctx);
+        let deep_score = scorer.score_chunk(&deep, &ctx);
+
+        assert!(shallow_score.components.depth_penalty > deep_score.components.depth_penalty);
+    }
+
+    #[test]
+    fn test_score_components_final_score() {
+        let components = ScoreComponents {
+            keyword_score: 0.8,
+            bm25_score: 0.6,
+            depth_penalty: 0.9,
+            path_bonus: 0.1,
+            density_score: 0.5,
+        };
+
+        let final_score = components.final_score();
+        assert!(final_score > 0.0 && final_score <= 1.0);
+    }
+}
diff --git a/src/retrieval/mod.rs b/src/retrieval/mod.rs
index 5ff07413..565d0fa8 100644
--- a/src/retrieval/mod.rs
+++ b/src/retrieval/mod.rs
@@ -54,6 +54,7 @@ mod types;
 
 pub mod cache;
 pub mod complexity;
+pub mod content;
 pub mod pilot;
 pub mod pipeline;
 pub mod search;
@@ -98,6 +99,13 @@ pub use complexity::ComplexityDetector;
 // Cache exports
 pub use cache::PathCache;
 
+// Content aggregation exports
+pub use content::{
+    AggregationResult, AllocationResult, AllocationStrategy, BudgetAllocator, ContentAggregator,
+    ContentAggregatorConfig, ContentChunk, ContentRelevance, OutputFormat, RelevanceScorer,
+    ScoreComponents, ScoringStrategyConfig, SelectedContent, StructureBuilder, StructuredContent,
+};
+
 // Pilot exports
 pub use pilot::{
     BudgetConfig, InterventionConfig, InterventionPoint, Pilot, PilotConfig, PilotDecision,

From 9c0e3b634b0ebff991bf9831db4dca5d5b828bc8 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 20:46:20 +0800
Subject: [PATCH 14/21] feat(content): add content aggregation example and
 integrate with judge stage

- Add comprehensive content aggregation example demonstrating scoring,
  budget allocation, and structure building
- Export CandidateNode for content aggregation module
- Integrate ContentAggregator into JudgeStage with configurable
  precision-focused aggregation
- Add with_content_aggregator and with_default_content_aggregator
  methods to JudgeStage
- Implement relevance scoring, token budget allocation, and hierarchical
  content selection in judge stage
- Update documentation to reflect new content aggregation capabilities
---
 examples/content_aggregation.rs | 175 ++++++++++++++++++++++++++++++++
 src/retrieval/content/mod.rs    |   2 +-
 src/retrieval/stages/judge.rs   |  65 +++++++++++-
 3 files changed, 236 insertions(+), 6 deletions(-)
 create mode 100644 examples/content_aggregation.rs

diff --git a/examples/content_aggregation.rs b/examples/content_aggregation.rs
new file mode 100644
index 00000000..5fe71a32
--- /dev/null
+++ b/examples/content_aggregation.rs
@@ -0,0 +1,175 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Content Aggregation Accuracy Example
+//!
+//! This example demonstrates the content aggregation module's ability to:
+//! 1. Score content relevance
+//! 2. Allocate token budget
+//! 3. Build structured output
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example content_aggregation
+//! ```
+
+use vectorless::retrieval::content::{
+    ContentAggregator, ContentAggregatorConfig, BudgetAllocator, AllocationStrategy,
+    StructureBuilder, OutputFormat, RelevanceScorer, ScoringStrategyConfig,
+    ContentChunk, ScoringContext,
+};
+use vectorless::domain::NodeId;
+use indextree::Arena;
+
+fn make_node_id() -> NodeId {
+    let mut arena = Arena::new();
+    let node = vectorless::domain::TreeNode {
+        title: "Test".to_string(),
+        structure: String::new(),
+        content: String::new(),
+        summary: String::new(),
+        depth: 0,
+        start_index: 0,
+        end_index: 0,
+        start_page: None,
+        end_page: None,
+        node_id: None,
+        physical_index: None,
+        token_count: None,
+    };
+    NodeId(arena.new_node(node))
+}
+
+fn main() {
+    println!("=== Content Aggregation Accuracy Demo ===\n");
+
+    // 1. Demonstrate Relevance Scoring
+    println!("1. Relevance Scoring Demo");
+    println!("---------------------------");
+
+    let query = "What is the architecture of vectorless?";
+    let scorer = RelevanceScorer::new(query, ScoringStrategyConfig::KeywordWithBM25);
+
+    let chunks = vec![
+        ContentChunk::new(
+            make_node_id(),
+            "Architecture Overview".to_string(),
+            "Vectorless uses a tree-based architecture for document navigation. The system consists of multiple stages: parsing, indexing, and retrieval.".to_string(),
+            0,
+        ),
+        ContentChunk::new(
+            make_node_id(),
+            "Installation Guide".to_string(),
+            "To install vectorless, add it to your Cargo.toml file. Then run cargo build to compile.".to_string(),
+            1,
+        ),
+        ContentChunk::new(
+            make_node_id(),
+            "Core Components".to_string(),
+            "The architecture includes Pilot for navigation, Judge for sufficiency checking, and multiple search algorithms like beam search and greedy search.".to_string(),
+            1,
+        ),
+    ];
+
+    let ctx = ScoringContext::default();
+
+    println!("Query: \"{}\"", query);
+    println!("\nScored chunks:");
+    for chunk in &chunks {
+        let relevance = scorer.score_chunk(chunk, &ctx);
+        println!("  - '{}' (depth {}): score {:.3}",
+            chunk.title, chunk.depth, relevance.score);
+        println!("    Components: keyword={:.2}, bm25={:.2}, depth_penalty={:.2}, density={:.2}",
+            relevance.components.keyword_score,
+            relevance.components.bm25_score,
+            relevance.components.depth_penalty,
+            relevance.components.density_score,
+        );
+    }
+
+    // 2. Demonstrate Budget Allocation
+    println!("\n\n2. Budget Allocation Demo");
+    println!("---------------------------");
+
+    let scored: Vec<_> = chunks
+        .iter()
+        .map(|chunk| scorer.score_chunk(chunk, &ctx))
+        .collect();
+
+    let strategies = vec![
+        ("Greedy", AllocationStrategy::Greedy),
+        ("Hierarchical (20%/level)", AllocationStrategy::Hierarchical { min_per_level: 0.2 }),
+    ];
+
+    for (name, strategy) in strategies {
+        let allocator = BudgetAllocator::new(200)
+            .with_strategy(strategy);
+
+        let result = allocator.allocate(scored.clone(), 2);
+
+        println!("\n{} Strategy:", name);
+        println!("  Tokens used: {}/{}", result.tokens_used, 200);
+        println!("  Items selected: {}", result.selected.len());
+        println!("  Avg score: {:.3}", result.stats.avg_score);
+
+        for content in &result.selected {
+            let trunc = if content.is_truncated() { " [truncated]" } else { "" };
+            println!("    - '{}' ({} tokens, score {:.2}){}",
+                content.title, content.tokens, content.score, trunc);
+        }
+    }
+
+    // 3. Demonstrate Structure Building
+    println!("\n\n3. Structure Building Demo");
+    println!("---------------------------");
+
+    let formats = vec![
+        ("Markdown", OutputFormat::Markdown),
+        ("Flat", OutputFormat::Flat),
+    ];
+
+    let allocator = BudgetAllocator::new(500)
+        .with_strategy(AllocationStrategy::Greedy);
+    let result = allocator.allocate(scored.clone(), 2);
+
+    for (name, format) in formats {
+        let builder = StructureBuilder::new(format);
+        let tree = vectorless::domain::DocumentTree::new("Test", "");
+        let structured = builder.build(result.selected.clone(), &tree);
+
+        println!("\n{} Output ({} chars, {} tokens):", name, structured.content.len(), structured.metadata.total_tokens);
+        let preview = if structured.content.len() > 300 {
+            format!("{}...", &structured.content[..300])
+        } else {
+            structured.content.clone()
+        };
+        println!("{}", preview.lines().take(8).collect::<Vec<_>>().join("\n"));
+    }
+
+    // 4. Demonstrate Full Aggregation Pipeline
+    println!("\n\n4. Full Aggregation Pipeline Demo");
+    println!("-----------------------------------");
+
+    let configs = vec![
+        ("Default (4000 tokens)", ContentAggregatorConfig::default()),
+        ("Conservative (1000 tokens)", ContentAggregatorConfig::new()
+            .with_token_budget(1000)
+            .with_min_relevance(0.3)),
+        ("High Precision (2000 tokens, 0.5 threshold)", ContentAggregatorConfig::new()
+            .with_token_budget(2000)
+            .with_min_relevance(0.5)),
+    ];
+
+    for (name, config) in configs {
+        println!("\n{} Config:", name);
+        println!("  Token budget: {}", config.token_budget);
+        println!("  Min relevance: {:.1}", config.min_relevance_score);
+
+        let aggregator = ContentAggregator::new(config);
+        // Note: Full aggregation requires a DocumentTree with actual content
+        let _ = aggregator; // Suppress unused warning
+    }
+
+    println!("\n=== Demo Complete ===");
+}
diff --git a/src/retrieval/content/mod.rs b/src/retrieval/content/mod.rs
index 5126057f..2a78f801 100644
--- a/src/retrieval/content/mod.rs
+++ b/src/retrieval/content/mod.rs
@@ -37,7 +37,7 @@ mod builder;
 mod config;
 mod scorer;
 
-pub use aggregator::{ContentAggregator, AggregationResult};
+pub use aggregator::{ContentAggregator, AggregationResult, CandidateNode};
 pub use budget::{BudgetAllocator, AllocationStrategy, AllocationResult, SelectedContent};
 pub use builder::{StructureBuilder, OutputFormat, StructuredContent, ContentTree};
 pub use config::{ContentAggregatorConfig, OutputFormatConfig, ScoringStrategyConfig};
diff --git a/src/retrieval/stages/judge.rs b/src/retrieval/stages/judge.rs
index 5c7bb129..9cc11e68 100644
--- a/src/retrieval/stages/judge.rs
+++ b/src/retrieval/stages/judge.rs
@@ -12,6 +12,7 @@ use tracing::{info, warn};
 
 use crate::domain::estimate_tokens;
 use crate::llm::LlmClient;
+use crate::retrieval::content::{ContentAggregator, ContentAggregatorConfig};
 use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage, StageOutcome};
 use crate::retrieval::sufficiency::{LlmJudge, SufficiencyChecker, ThresholdChecker};
 use crate::retrieval::types::{RetrievalResult, RetrieveResponse, SufficiencyLevel};
@@ -23,18 +24,26 @@ use crate::retrieval::types::{RetrievalResult, RetrieveResponse, SufficiencyLeve
 /// 2. Checks if content is sufficient to answer the query
 /// 3. Can trigger additional search iterations if needed
 ///
+/// # Content Aggregation
+///
+/// By default, uses simple content collection. For precision-focused
+/// aggregation with token budget control, use `with_content_aggregator()`.
+///
 /// # Example
 ///
 /// ```rust,ignore
 /// let stage = JudgeStage::new()
 ///     .with_llm_judge(llm_client)
-///     .with_max_iterations(3);
+///     .with_max_iterations(3)
+///     .with_content_aggregator(ContentAggregatorConfig::default());
 /// ```
 pub struct JudgeStage {
     threshold_checker: ThresholdChecker,
     llm_judge: Option<LlmJudge>,
     max_iterations: usize,
     use_llm_judge: bool,
+    /// Optional content aggregator for precision-focused aggregation.
+    content_aggregator: Option<ContentAggregator>,
 }
 
 impl Default for JudgeStage {
@@ -51,6 +60,7 @@ impl JudgeStage {
             llm_judge: None,
             max_iterations: 3,
             use_llm_judge: false,
+            content_aggregator: None,
         }
     }
 
@@ -67,13 +77,58 @@ impl JudgeStage {
         self
     }
 
+    /// Add content aggregator for precision-focused aggregation.
+    ///
+    /// When enabled, content aggregation uses:
+    /// - Relevance scoring (keyword + BM25)
+    /// - Token budget allocation
+    /// - Hierarchical content selection
+    pub fn with_content_aggregator(mut self, config: ContentAggregatorConfig) -> Self {
+        self.content_aggregator = Some(ContentAggregator::new(config));
+        self
+    }
+
+    /// Enable content aggregator with default configuration.
+    pub fn with_default_content_aggregator(mut self) -> Self {
+        self.content_aggregator = Some(ContentAggregator::with_defaults());
+        self
+    }
+
     /// Aggregate content from candidates.
     ///
-    /// Always includes:
-    /// 1. Node's own content (if available)
-    /// 2. All descendant leaf content (for complete context)
-    /// 3. Falls back to summary only if no content at all
+    /// When content aggregator is enabled:
+    /// - Uses relevance scoring for content selection
+    /// - Respects token budget
+    /// - Prioritizes high-relevance content
+    ///
+    /// Otherwise falls back to simple collection:
+    /// - Collects node's own content + descendant leaf content
     fn aggregate_content(&self, ctx: &PipelineContext) -> (String, usize) {
+        // Use ContentAggregator if configured
+        if let Some(ref aggregator) = self.content_aggregator {
+            use crate::retrieval::content::CandidateNode;
+
+            let candidates: Vec<CandidateNode> = ctx.candidates
+                .iter()
+                .map(|c| CandidateNode::new(c.node_id, c.score, c.depth))
+                .collect();
+
+            let result = aggregator.aggregate(&candidates, &ctx.tree, &ctx.query);
+            info!(
+                "ContentAggregator: {} nodes, {} tokens, avg score {:.2}",
+                result.nodes_included,
+                result.tokens_used,
+                result.avg_score
+            );
+            return (result.content, result.tokens_used);
+        }
+
+        // Fallback: simple content collection
+        self.aggregate_content_simple(ctx)
+    }
+
+    /// Simple content aggregation (legacy behavior).
+    fn aggregate_content_simple(&self, ctx: &PipelineContext) -> (String, usize) {
         let mut content_parts = Vec::new();
         let mut total_tokens = 0;
 

From 9b365cf0aef8b96db6a162656a82378787ad9ba2 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 21:03:35 +0800
Subject: [PATCH 15/21] feat: add content aggregator configuration support

- Add ContentAggregatorConfig struct with fields for token budget,
  relevance scoring, output formats, and deduplication settings
- Implement conversion from config to retrieval content aggregator config
- Add content configuration option to PipelineRetriever with
  with_content_config method
- Configure content aggregator in Judge stage when enabled
- Update template.toml with comprehensive content aggregator settings
- Add default values and helper methods for content configuration
---
 src/client/builder.rs               |   7 ++
 src/config/types.rs                 | 120 ++++++++++++++++++++++++++++
 src/retrieval/pipeline_retriever.rs |  27 ++++++-
 templates/template.toml             |  41 ++++++++++
 4 files changed, 194 insertions(+), 1 deletion(-)

diff --git a/src/client/builder.rs b/src/client/builder.rs
index bc567591..306b5b27 100644
--- a/src/client/builder.rs
+++ b/src/client/builder.rs
@@ -194,6 +194,13 @@ impl EngineBuilder {
         let llm_client = crate::llm::LlmClient::new(llm_config);
         retriever = retriever.with_llm_client(llm_client);
 
+        // Configure content aggregator if enabled
+        if retrieval_config.content.enabled {
+            retriever = retriever.with_content_config(
+                retrieval_config.content.to_aggregator_config()
+            );
+        }
+
         Ok(Engine::with_components(
             config, workspace, retriever, executor,
         ))
diff --git a/src/config/types.rs b/src/config/types.rs
index 3a40d920..35cefd52 100644
--- a/src/config/types.rs
+++ b/src/config/types.rs
@@ -225,6 +225,10 @@ pub struct RetrievalConfig {
     /// Strategy-specific configuration.
     #[serde(default)]
     pub strategy: StrategyConfig,
+
+    /// Content aggregator configuration.
+    #[serde(default)]
+    pub content: ContentAggregatorConfig,
 }
 
 impl Default for RetrievalConfig {
@@ -240,6 +244,7 @@ impl Default for RetrievalConfig {
             sufficiency: SufficiencyConfig::default(),
             cache: CacheConfig::default(),
             strategy: StrategyConfig::default(),
+            content: ContentAggregatorConfig::default(),
         }
     }
 }
@@ -363,6 +368,121 @@ impl Default for StrategyConfig {
     }
 }
 
+/// Content aggregator configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentAggregatorConfig {
+    /// Whether content aggregator is enabled.
+    /// When disabled, uses simple content collection (legacy behavior).
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// Maximum tokens for aggregated content.
+    #[serde(default)]
+    pub token_budget: usize,
+
+    /// Minimum relevance score threshold (0.0 - 1.0).
+    /// Content below this threshold will be filtered out.
+    #[serde(default)]
+    pub min_relevance_score: f32,
+
+    /// Scoring strategy: "keyword_only" | "keyword_bm25" | "hybrid"
+    #[serde(default)]
+    pub scoring_strategy: String,
+
+    /// Output format: "markdown" | "json" | "tree" | "flat"
+    #[serde(default)]
+    pub output_format: String,
+
+    /// Include relevance scores in output.
+    #[serde(default)]
+    pub include_scores: bool,
+
+    /// Minimum budget allocation per depth level (0.0 - 1.0).
+    #[serde(default)]
+    pub hierarchical_min_per_level: f32,
+
+    /// Enable content deduplication.
+    #[serde(default = "default_true")]
+    pub deduplicate: bool,
+
+    /// Similarity threshold for deduplication (0.0 - 1.0).
+    #[serde(default)]
+    pub dedup_threshold: f32,
+}
+
+impl Default for ContentAggregatorConfig {
+    fn default() -> Self {
+        Self {
+            enabled: true,
+            token_budget: 4000,
+            min_relevance_score: 0.2,
+            scoring_strategy: "keyword_bm25".to_string(),
+            output_format: "markdown".to_string(),
+            include_scores: false,
+            hierarchical_min_per_level: 0.1,
+            deduplicate: true,
+            dedup_threshold: 0.9,
+        }
+    }
+}
+
+impl ContentAggregatorConfig {
+    /// Create a new config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Disable content aggregator (use legacy behavior).
+    pub fn disabled() -> Self {
+        Self {
+            enabled: false,
+            ..Self::default()
+        }
+    }
+
+    /// Set the token budget.
+    pub fn with_token_budget(mut self, budget: usize) -> Self {
+        self.token_budget = budget;
+        self
+    }
+
+    /// Set the minimum relevance score.
+    pub fn with_min_relevance(mut self, score: f32) -> Self {
+        self.min_relevance_score = score.clamp(0.0, 1.0);
+        self
+    }
+
+    /// Convert to the retrieval content aggregator config.
+    pub fn to_aggregator_config(&self) -> crate::retrieval::content::ContentAggregatorConfig {
+        use crate::retrieval::content::{ContentAggregatorConfig as RetrievalContentConfig,
+            OutputFormatConfig, ScoringStrategyConfig};
+
+        let scoring_strategy = match self.scoring_strategy.as_str() {
+            "keyword_only" => ScoringStrategyConfig::KeywordOnly,
+            "hybrid" => ScoringStrategyConfig::Hybrid,
+            _ => ScoringStrategyConfig::KeywordWithBM25,
+        };
+
+        let output_format = match self.output_format.as_str() {
+            "json" => OutputFormatConfig::Json,
+            "tree" => OutputFormatConfig::Tree,
+            "flat" => OutputFormatConfig::Flat,
+            _ => OutputFormatConfig::Markdown,
+        };
+
+        RetrievalContentConfig {
+            token_budget: self.token_budget,
+            min_relevance_score: self.min_relevance_score,
+            scoring_strategy,
+            output_format,
+            include_scores: self.include_scores,
+            hierarchical_min_per_level: self.hierarchical_min_per_level,
+            deduplicate: self.deduplicate,
+            dedup_threshold: self.dedup_threshold,
+        }
+    }
+}
+
 /// Storage configuration.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct StorageConfig {
diff --git a/src/retrieval/pipeline_retriever.rs b/src/retrieval/pipeline_retriever.rs
index 16bf5fe2..e51d187a 100644
--- a/src/retrieval/pipeline_retriever.rs
+++ b/src/retrieval/pipeline_retriever.rs
@@ -9,6 +9,7 @@
 use async_trait::async_trait;
 use std::sync::Arc;
 
+use super::content::ContentAggregatorConfig;
 use super::pipeline::RetrievalOrchestrator;
 use super::retriever::{CostEstimate, Retriever, RetrieverError, RetrieverResult};
 use super::stages::{AnalyzeStage, JudgeStage, PlanStage, SearchStage};
@@ -38,6 +39,8 @@ pub struct PipelineRetriever {
     llm_client: Option<LlmClient>,
     max_backtracks: usize,
     max_iterations: usize,
+    /// Content aggregator configuration.
+    content_config: Option<ContentAggregatorConfig>,
 }
 
 impl Default for PipelineRetriever {
@@ -53,6 +56,7 @@ impl PipelineRetriever {
             llm_client: None,
             max_backtracks: 5,
             max_iterations: 10,
+            content_config: None,
         }
     }
 
@@ -74,6 +78,15 @@ impl PipelineRetriever {
         self
     }
 
+    /// Set content aggregator configuration.
+    ///
+    /// When enabled, the Judge stage uses precision-focused content
+    /// aggregation with relevance scoring and token budget control.
+    pub fn with_content_config(mut self, config: ContentAggregatorConfig) -> Self {
+        self.content_config = Some(config);
+        self
+    }
+
     /// Build the orchestrator with all stages.
     fn build_orchestrator(&self) -> RetrievalOrchestrator {
         let mut orchestrator = RetrievalOrchestrator::new()
@@ -99,11 +112,15 @@ impl PipelineRetriever {
         }
         orchestrator = orchestrator.stage(search_stage);
 
-        // Add judge stage
+        // Add judge stage with optional content aggregator
         let mut judge_stage = JudgeStage::new();
         if let Some(ref client) = self.llm_client {
             judge_stage = judge_stage.with_llm_judge(client.clone());
         }
+        // Configure content aggregator if provided
+        if let Some(ref config) = self.content_config {
+            judge_stage = judge_stage.with_content_aggregator(config.clone());
+        }
         orchestrator = orchestrator.stage(judge_stage);
 
         orchestrator
@@ -161,6 +178,7 @@ impl Clone for PipelineRetriever {
             llm_client: self.llm_client.clone(),
             max_backtracks: self.max_backtracks,
             max_iterations: self.max_iterations,
+            content_config: self.content_config.clone(),
         }
     }
 }
@@ -183,4 +201,11 @@ mod tests {
         assert_eq!(cloned.name(), "pipeline");
         assert_eq!(cloned.max_backtracks, 3);
     }
+
+    #[test]
+    fn test_pipeline_retriever_with_content_config() {
+        let config = ContentAggregatorConfig::default();
+        let retriever = PipelineRetriever::new().with_content_config(config);
+        assert!(retriever.content_config.is_some());
+    }
 }
diff --git a/templates/template.toml b/templates/template.toml
index 5ea61bce..66e85e21 100644
--- a/templates/template.toml
+++ b/templates/template.toml
@@ -108,6 +108,47 @@ high_similarity_threshold = 0.8
 # Low similarity threshold for "explore" decision
 low_similarity_threshold = 0.3
 
+# Content aggregator configuration
+# Controls how retrieved content is aggregated and returned
+[retrieval.content]
+# Enable/disable content aggregator
+# When disabled, uses simple content collection (legacy behavior)
+enabled = true
+
+# Maximum tokens for aggregated content
+token_budget = 4000
+
+# Minimum relevance score threshold (0.0 - 1.0)
+# Content below this threshold will be filtered out
+min_relevance_score = 0.2
+
+# Scoring strategy: "keyword_only" | "keyword_bm25" | "hybrid"
+# - keyword_only: Fast keyword matching (no BM25)
+# - keyword_bm25: Keyword + BM25 scoring (recommended)
+# - hybrid: Keyword + LLM reranking (most accurate, slower)
+scoring_strategy = "keyword_bm25"
+
+# Output format: "markdown" | "json" | "tree" | "flat"
+# - markdown: Structured markdown with headers (default)
+# - json: JSON format for programmatic use
+# - tree: Tree structure preserving hierarchy
+# - flat: Flat text format
+output_format = "markdown"
+
+# Include relevance scores in output (useful for debugging)
+include_scores = false
+
+# Minimum budget allocation per depth level (0.0 - 1.0)
+# Ensures each tree level gets representation
+hierarchical_min_per_level = 0.1
+
+# Enable content deduplication
+deduplicate = true
+
+# Similarity threshold for deduplication (0.0 - 1.0)
+# Higher = more aggressive deduplication
+dedup_threshold = 0.9
+
 [storage]
 # Workspace directory for persisted documents
 #

From 7c9b287138537e08ecbf0e75b5608ab1689d57e1 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 21:29:00 +0800
Subject: [PATCH 16/21] feat(client): add comprehensive module refactoring
 design document

Add detailed design document outlining the complete refactoring of the
client module to achieve professional, product-level architecture with
clear separation of concerns.

The design addresses several key issues:
- God object anti-pattern in engine.rs
- Mixed abstraction levels
- Lack of session management
- Missing event system
- Scattered state management

The proposed architecture includes:
- Context module for request-scoped configuration
- Session management for multi-document operations
- Dedicated clients for indexing, retrieval, and workspace operations
- Event system for callbacks and progress reporting
- Simplified main engine as orchestrator

Also implement the Builder pattern enhancement with event emitter
support and add the initial context module with ClientContext
functionality for request-scoped configuration overrides.
---
 docs/design/client-module.md | 794 +++++++++++++++++++++++++++++++++++
 src/client/builder.rs        |  41 +-
 src/client/context.rs        | 337 +++++++++++++++
 src/client/engine.rs         | 543 ++++++++++--------------
 src/client/events.rs         | 365 ++++++++++++++++
 src/client/indexer.rs        | 351 ++++++++++++++++
 src/client/mod.rs            | 133 +++++-
 src/client/retriever.rs      | 408 ++++++++++++++++++
 src/client/session.rs        | 493 ++++++++++++++++++++++
 src/client/types.rs          | 136 +++++-
 src/client/workspace.rs      | 372 ++++++++++++++++
 11 files changed, 3626 insertions(+), 347 deletions(-)
 create mode 100644 docs/design/client-module.md
 create mode 100644 src/client/context.rs
 create mode 100644 src/client/events.rs
 create mode 100644 src/client/indexer.rs
 create mode 100644 src/client/retriever.rs
 create mode 100644 src/client/session.rs
 create mode 100644 src/client/workspace.rs

diff --git a/docs/design/client-module.md b/docs/design/client-module.md
new file mode 100644
index 00000000..e4ab796b
--- /dev/null
+++ b/docs/design/client-module.md
@@ -0,0 +1,794 @@
+# Client Module Refactoring Design
+
+## Overview
+
+This document describes the refactoring of the `client` module to achieve a more professional, product-level architecture with clear separation of concerns.
+
+## Current Problems
+
+### 1. God Object Anti-pattern
+`engine.rs` (600+ lines) handles too many responsibilities:
+- Document indexing
+- Document retrieval
+- Workspace management
+- Configuration management
+- Format detection
+- Page parsing
+
+### 2. Mixed Abstraction Levels
+High-level operations (`query()`) mixed with low-level utilities (`parse_page_range()`).
+
+### 3. No Session Management
+Each operation is independent; no way to maintain context across multiple operations.
+
+### 4. Missing Event System
+No progress callbacks or event hooks for long-running operations.
+
+### 5. Scattered State Management
+State split across `Arc<RwLock<Workspace>>`, `Arc<Mutex<Executor>>`, `Arc<Retriever>`.
+
+---
+
+## Proposed Architecture
+
+### Module Structure
+
+```
+src/client/
+├── mod.rs           # Re-exports and documentation
+├── engine.rs        # Core orchestrator (simplified)
+├── builder.rs       # Builder pattern (enhanced)
+├── types.rs         # Public API types
+├── context.rs       # Request context and configuration
+├── session.rs       # Session management
+├── indexer.rs       # Document indexing operations
+├── retriever.rs     # Query and retrieval operations
+├── workspace.rs     # Workspace operations (CRUD)
+└── events.rs        # Event system and callbacks
+```
+
+### Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                           Client API                             │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                  │
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐       │
+│  │ EngineBuilder │───▶│    Engine    │◀───│   Session    │       │
+│  └──────────────┘    └──────┬───────┘    └──────────────┘       │
+│                             │                                    │
+│              ┌──────────────┼──────────────┐                    │
+│              ▼              ▼              ▼                    │
+│     ┌─────────────┐ ┌─────────────┐ ┌─────────────┐            │
+│     │   Indexer   │ │  Retriever  │ │  Workspace  │            │
+│     │   Client    │ │   Client    │ │   Client    │            │
+│     └──────┬──────┘ └──────┬──────┘ └──────┬──────┘            │
+│            │               │               │                    │
+│            └───────────────┴───────────────┘                    │
+│                            │                                    │
+│                            ▼                                    │
+│                   ┌────────────────┐                           │
+│                   │    Context     │                           │
+│                   │  (Request State)│                           │
+│                   └────────────────┘                           │
+│                                                                   │
+│                   ┌────────────────┐                           │
+│                   │    Events      │                           │
+│                   │  (Callbacks)   │                           │
+│                   └────────────────┘                           │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Component Design
+
+### 1. Context (`context.rs`)
+
+Request-scoped configuration and state management.
+
+```rust
+/// Request context for client operations.
+pub struct ClientContext {
+    /// Unique request ID for tracing.
+    pub request_id: Uuid,
+
+    /// Request-specific configuration overrides.
+    pub config: RequestContextConfig,
+
+    /// Event emitter for this request.
+    pub events: EventEmitter,
+
+    /// Request metadata.
+    pub metadata: HashMap<String, String>,
+
+    /// Request deadline (for timeout).
+    pub deadline: Option<Instant>,
+}
+
+/// Request-specific configuration overrides.
+pub struct RequestContextConfig {
+    /// Override top_k for retrieval.
+    pub top_k: Option<usize>,
+
+    /// Override token budget.
+    pub token_budget: Option<usize>,
+
+    /// Override content format.
+    pub content_format: Option<ContentFormat>,
+
+    /// Enable/disable features.
+    pub features: FeatureFlags,
+}
+
+/// Feature flags for request.
+pub struct FeatureFlags {
+    pub include_summaries: bool,
+    pub include_content: bool,
+    pub enable_cache: bool,
+    pub enable_sufficiency_check: bool,
+}
+```
+
+### 2. Session (`session.rs`)
+
+Multi-document session management.
+
+```rust
+/// Session for managing multiple document operations.
+pub struct Session {
+    /// Session ID.
+    pub id: Uuid,
+
+    /// Session configuration.
+    config: SessionConfig,
+
+    /// Active document contexts.
+    documents: HashMap<String, DocumentContext>,
+
+    /// Shared engine reference.
+    engine: Engine,
+
+    /// Session statistics.
+    stats: SessionStats,
+
+    /// Created at timestamp.
+    created_at: DateTime<Utc>,
+}
+
+/// Document context within a session.
+pub struct DocumentContext {
+    /// Document ID.
+    pub doc_id: String,
+
+    /// Preloaded tree (cached).
+    tree: Option<Arc<DocumentTree>>,
+
+    /// Document metadata.
+    meta: DocumentMeta,
+
+    /// Access statistics.
+    access_count: usize,
+    last_accessed: DateTime<Utc>,
+}
+
+/// Session configuration.
+pub struct SessionConfig {
+    /// Maximum documents to keep in memory.
+    pub max_cached_documents: usize,
+
+    /// Preload strategy.
+    pub preload_strategy: PreloadStrategy,
+
+    /// Cache eviction policy.
+    pub eviction_policy: EvictionPolicy,
+}
+
+impl Session {
+    /// Create a new session.
+    pub fn new(engine: Engine) -> Self;
+
+    /// Index a document into this session.
+    pub async fn index(&self, path: impl AsRef<Path>) -> Result<String>;
+
+    /// Query a document within this session.
+    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult>;
+
+    /// Query across all documents in session.
+    pub async fn query_all(&self, question: &str) -> Result<Vec<QueryResult>>;
+
+    /// Get document tree (cached).
+    pub fn get_tree(&self, doc_id: &str) -> Result<Arc<DocumentTree>>;
+
+    /// Preload documents for faster access.
+    pub async fn preload(&self, doc_ids: &[&str]) -> Result<()>;
+
+    /// Clear session cache.
+    pub fn clear_cache(&self);
+
+    /// Get session statistics.
+    pub fn stats(&self) -> &SessionStats;
+}
+```
+
+### 3. Indexer Client (`indexer.rs`)
+
+Document indexing operations.
+
+```rust
+/// Document indexing client.
+pub struct IndexerClient {
+    /// Pipeline executor.
+    executor: Arc<Mutex<PipelineExecutor>>,
+
+    /// Configuration.
+    config: IndexerConfig,
+}
+
+/// Indexing configuration.
+pub struct IndexerConfig {
+    /// Default index mode.
+    pub default_mode: IndexMode,
+
+    /// Summary generation strategy.
+    pub summary_strategy: SummaryStrategy,
+
+    /// Whether to generate node IDs.
+    pub generate_ids: bool,
+
+    /// Whether to generate descriptions.
+    pub generate_descriptions: bool,
+}
+
+impl IndexerClient {
+    /// Create a new indexer client.
+    pub fn new(executor: PipelineExecutor) -> Self;
+
+    /// Index a document from file.
+    pub async fn index_file(
+        &self,
+        path: impl AsRef<Path>,
+        options: IndexOptions,
+        events: &EventEmitter,
+    ) -> Result<IndexedDocument>;
+
+    /// Index from raw content.
+    pub async fn index_content(
+        &self,
+        content: &str,
+        format: DocumentFormat,
+        options: IndexOptions,
+    ) -> Result<IndexedDocument>;
+
+    /// Detect document format.
+    pub fn detect_format(&self, path: &Path, options: &IndexOptions) -> Result<DocumentFormat>;
+
+    /// Validate document before indexing.
+    pub fn validate(&self, path: &Path) -> Result<ValidationResult>;
+}
+
+/// Indexing events.
+pub enum IndexEvent {
+    /// Started indexing.
+    Started { path: String },
+
+    /// Format detected.
+    FormatDetected { format: DocumentFormat },
+
+    /// Parsing progress.
+    ParsingProgress { percent: u8 },
+
+    /// Tree building complete.
+    TreeBuilt { node_count: usize },
+
+    /// Summary generation progress.
+    SummaryProgress { completed: usize, total: usize },
+
+    /// Indexing complete.
+    Complete { doc_id: String },
+
+    /// Error occurred.
+    Error { message: String },
+}
+```
+
+### 4. Retriever Client (`retriever.rs`)
+
+Query and retrieval operations.
+
+```rust
+/// Document retrieval client.
+pub struct RetrieverClient {
+    /// Pipeline retriever.
+    retriever: Arc<PipelineRetriever>,
+
+    /// Configuration.
+    config: RetrieverConfig,
+}
+
+/// Retrieval configuration.
+pub struct RetrieverConfig {
+    /// Default top_k.
+    pub default_top_k: usize,
+
+    /// Default token budget.
+    pub default_token_budget: usize,
+
+    /// Content aggregator config.
+    pub content_config: ContentAggregatorConfig,
+
+    /// Enable caching.
+    pub enable_cache: bool,
+}
+
+impl RetrieverClient {
+    /// Create a new retriever client.
+    pub fn new(retriever: PipelineRetriever) -> Self;
+
+    /// Query a document tree.
+    pub async fn query(
+        &self,
+        tree: &DocumentTree,
+        question: &str,
+        options: RetrieveOptions,
+        ctx: &ClientContext,
+    ) -> Result<QueryResult>;
+
+    /// Query with streaming results.
+    pub async fn query_stream(
+        &self,
+        tree: &DocumentTree,
+        question: &str,
+        options: RetrieveOptions,
+    ) -> impl Stream<Item = QueryEvent>;
+
+    /// Get similar nodes.
+    pub fn find_similar(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        top_k: usize,
+    ) -> Result<Vec<RetrievalResult>>;
+
+    /// Get node context (ancestors + siblings).
+    pub fn get_node_context(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        depth: usize,
+    ) -> Result<NodeContext>;
+}
+
+/// Query events for streaming.
+pub enum QueryEvent {
+    /// Search started.
+    SearchStarted { query: String },
+
+    /// Node visited during search.
+    NodeVisited { node_id: String, title: String, score: f32 },
+
+    /// Candidate found.
+    CandidateFound { node_id: String, score: f32 },
+
+    /// Sufficiency check result.
+    SufficiencyCheck { level: SufficiencyLevel, tokens: usize },
+
+    /// Result ready.
+    ResultReady { result: RetrievalResult },
+
+    /// Query complete.
+    Complete { total_results: usize, confidence: f32 },
+}
+```
+
+### 5. Workspace Client (`workspace.rs`)
+
+Document persistence operations.
+
+```rust
+/// Workspace management client.
+pub struct WorkspaceClient {
+    /// Workspace storage.
+    workspace: Arc<RwLock<Workspace>>,
+
+    /// Configuration.
+    config: WorkspaceConfig,
+}
+
+/// Workspace configuration.
+pub struct WorkspaceConfig {
+    /// Auto-save interval (seconds).
+    pub auto_save_interval: Option<u64>,
+
+    /// Maximum cache size.
+    pub max_cache_size: usize,
+}
+
+impl WorkspaceClient {
+    /// Create a new workspace client.
+    pub fn new(workspace: Workspace) -> Self;
+
+    /// Save a document.
+    pub fn save(&self, doc: &PersistedDocument) -> Result<()>;
+
+    /// Load a document.
+    pub fn load(&self, doc_id: &str) -> Result<Option<PersistedDocument>>;
+
+    /// Remove a document.
+    pub fn remove(&self, doc_id: &str) -> Result<bool>;
+
+    /// Check if document exists.
+    pub fn exists(&self, doc_id: &str) -> Result<bool>;
+
+    /// List all documents.
+    pub fn list(&self) -> Result<Vec<DocumentInfo>>;
+
+    /// Get document metadata.
+    pub fn get_meta(&self, doc_id: &str) -> Result<Option<DocumentMeta>>;
+
+    /// Batch operations.
+    pub fn batch_remove(&self, doc_ids: &[&str]) -> Result<usize>;
+
+    /// Clear workspace.
+    pub fn clear(&self) -> Result<usize>;
+
+    /// Get workspace statistics.
+    pub fn stats(&self) -> WorkspaceStats;
+}
+
+/// Workspace statistics.
+pub struct WorkspaceStats {
+    pub document_count: usize,
+    pub total_size_bytes: u64,
+    pub cache_hit_rate: f32,
+    pub oldest_document: Option<DateTime<Utc>>,
+    pub newest_document: Option<DateTime<Utc>>,
+}
+```
+
+### 6. Events (`events.rs`)
+
+Event system for callbacks and progress reporting.
+
+```rust
+/// Event emitter for client operations.
+pub struct EventEmitter {
+    /// Event handlers.
+    handlers: Vec<Box<dyn EventHandler>>,
+
+    /// Async handlers (for non-blocking events).
+    async_handlers: Vec<Arc<dyn AsyncEventHandler>>,
+}
+
+/// Event handler trait.
+pub trait EventHandler: Send + Sync {
+    fn handle(&self, event: &Event);
+}
+
+/// Async event handler trait.
+#[async_trait]
+pub trait AsyncEventHandler: Send + Sync {
+    async fn handle(&self, event: &Event);
+}
+
+/// Event types.
+#[derive(Debug, Clone)]
+pub enum Event {
+    /// Indexing events.
+    Index(IndexEvent),
+
+    /// Query events.
+    Query(QueryEvent),
+
+    /// Workspace events.
+    Workspace(WorkspaceEvent),
+
+    /// Session events.
+    Session(SessionEvent),
+}
+
+/// Workspace events.
+pub enum WorkspaceEvent {
+    DocumentSaved { doc_id: String },
+    DocumentLoaded { doc_id: String, cache_hit: bool },
+    DocumentRemoved { doc_id: String },
+    WorkspaceCleared { count: usize },
+}
+
+/// Session events.
+pub enum SessionEvent {
+    SessionCreated { session_id: Uuid },
+    DocumentAdded { doc_id: String },
+    DocumentEvicted { doc_id: String, reason: EvictionReason },
+    SessionClosed { session_id: Uuid },
+}
+
+impl EventEmitter {
+    /// Create a new event emitter.
+    pub fn new() -> Self;
+
+    /// Add a sync handler.
+    pub fn on<H: EventHandler + 'static>(mut self, handler: H) -> Self;
+
+    /// Add an async handler.
+    pub fn on_async<H: AsyncEventHandler + 'static>(mut self, handler: Arc<H>) -> Self;
+
+    /// Emit an event.
+    pub fn emit(&self, event: Event);
+
+    /// Emit an event asynchronously.
+    pub async fn emit_async(&self, event: Event);
+}
+
+/// Convenience handler builders.
+impl EventEmitter {
+    /// Create handler from closure.
+    pub fn on_index<F: Fn(&IndexEvent) + Send + Sync + 'static>(self, f: F) -> Self;
+
+    /// Create handler from closure.
+    pub fn on_query<F: Fn(&QueryEvent) + Send + Sync + 'static>(self, f: F) -> Self;
+
+    /// Create progress callback.
+    pub fn on_progress<F: Fn(Progress) + Send + Sync + 'static>(self, f: F) -> Self;
+}
+
+/// Progress information.
+pub struct Progress {
+    pub operation: Operation,
+    pub current: usize,
+    pub total: usize,
+    pub message: String,
+}
+
+pub enum Operation {
+    Indexing,
+    Querying,
+    Loading,
+    Saving,
+}
+```
+
+### 7. Simplified Engine (`engine.rs`)
+
+The main orchestrator, now much simpler.
+
+```rust
+/// The main Engine client - orchestrates sub-clients.
+pub struct Engine {
+    /// Configuration.
+    config: Arc<Config>,
+
+    /// Indexer client.
+    indexer: IndexerClient,
+
+    /// Retriever client.
+    retriever: RetrieverClient,
+
+    /// Workspace client (optional).
+    workspace: Option<WorkspaceClient>,
+
+    /// Event emitter.
+    events: EventEmitter,
+}
+
+impl Engine {
+    /// Create a builder for custom configuration.
+    pub fn builder() -> EngineBuilder;
+
+    // ============================================================
+    // Convenience Methods (delegate to sub-clients)
+    // ============================================================
+
+    /// Index a document.
+    pub async fn index(&self, path: impl AsRef<Path>) -> Result<String> {
+        self.index_with_options(path, IndexOptions::default()).await
+    }
+
+    /// Index with options.
+    pub async fn index_with_options(
+        &self,
+        path: impl AsRef<Path>,
+        options: IndexOptions,
+    ) -> Result<String>;
+
+    /// Query a document.
+    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult>;
+
+    /// Create a session for multi-document operations.
+    pub fn session(&self) -> Session;
+
+    /// Get the indexer client.
+    pub fn indexer(&self) -> &IndexerClient;
+
+    /// Get the retriever client.
+    pub fn retriever(&self) -> &RetrieverClient;
+
+    /// Get the workspace client.
+    pub fn workspace(&self) -> Option<&WorkspaceClient>;
+
+    /// Get configuration.
+    pub fn config(&self) -> &Config;
+
+    // ============================================================
+    // Document Operations (delegate to workspace)
+    // ============================================================
+
+    /// List documents.
+    pub fn list_documents(&self) -> Vec<DocumentInfo>;
+
+    /// Get document structure.
+    pub fn get_structure(&self, doc_id: &str) -> Result<DocumentTree>;
+
+    /// Get page content.
+    pub fn get_page_content(&self, doc_id: &str, pages: &str) -> Result<String>;
+
+    /// Remove document.
+    pub fn remove(&self, doc_id: &str) -> Result<bool>;
+
+    /// Check existence.
+    pub fn exists(&self, doc_id: &str) -> Result<bool>;
+}
+```
+
+---
+
+## API Examples
+
+### Basic Usage (Same as Before)
+
+```rust
+let client = EngineBuilder::new()
+    .with_workspace("./workspace")
+    .build()?;
+
+// Index
+let doc_id = client.index("./document.md").await?;
+
+// Query
+let result = client.query(&doc_id, "What is this?").await?;
+```
+
+### With Events
+
+```rust
+let client = EngineBuilder::new()
+    .with_workspace("./workspace")
+    .with_events(
+        EventEmitter::new()
+            .on_index(|e| match e {
+                IndexEvent::Complete { doc_id } => println!("Indexed: {}", doc_id),
+                _ => {}
+            })
+            .on_query(|e| match e {
+                QueryEvent::NodeVisited { title, score, .. } => {
+                    println!("Visited: {} (score: {:.2})", title, score);
+                }
+                _ => {}
+            })
+    )
+    .build()?;
+```
+
+### Session-Based Multi-Document
+
+```rust
+let client = EngineBuilder::new()
+    .with_workspace("./workspace")
+    .build()?;
+
+// Create session
+let session = client.session();
+
+// Index multiple documents
+let doc1 = session.index("./doc1.md").await?;
+let doc2 = session.index("./doc2.md").await?;
+let doc3 = session.index("./doc3.md").await?;
+
+// Query across all documents
+let results = session.query_all("What is the architecture?").await?;
+
+// Query single document (cached tree)
+let result = session.query(&doc1, "Summary?").await?;
+
+// Session stats
+println!("Cache hit rate: {:.2}%", session.stats().cache_hit_rate * 100.0);
+```
+
+### Streaming Query
+
+```rust
+let client = EngineBuilder::new()
+    .with_workspace("./workspace")
+    .build()?;
+
+// Stream query results
+let mut stream = client.retriever()
+    .query_stream(&tree, "What is X?", RetrieveOptions::default());
+
+while let Some(event) = stream.next().await {
+    match event {
+        QueryEvent::NodeVisited { title, score, .. } => {
+            println!("Exploring: {}", title);
+        }
+        QueryEvent::ResultReady { result } => {
+            println!("Found: {}", result.title);
+        }
+        QueryEvent::Complete { total_results, confidence } => {
+            println!("Done: {} results, confidence: {:.2}", total_results, confidence);
+        }
+        _ => {}
+    }
+}
+```
+
+### Request Context
+
+```rust
+let ctx = ClientContext::new()
+    .with_top_k(10)
+    .with_token_budget(8000)
+    .with_deadline(Duration::from_secs(30));
+
+let result = client.retriever()
+    .query(&tree, "complex question", options, &ctx)
+    .await?;
+```
+
+---
+
+## Migration Path
+
+### Phase 1: Add New Modules (Non-Breaking)
+1. Create `context.rs`, `events.rs`
+2. Create `indexer.rs`, `retriever.rs`, `workspace.rs` as wrappers
+3. Update `engine.rs` to use sub-clients internally
+4. All existing API remains unchanged
+
+### Phase 2: Add Session Support (Non-Breaking)
+1. Add `session.rs`
+2. Add `Engine::session()` method
+3. Add multi-document query support
+
+### Phase 3: Enhance Events (Non-Breaking)
+1. Add streaming query support
+2. Add progress callbacks
+3. Add async event handlers
+
+### Phase 4: Deprecate Old API (Breaking, Future)
+1. Mark direct workspace access as deprecated
+2. Encourage use of sub-clients
+3. Eventually remove deprecated methods
+
+---
+
+## File Structure After Refactoring
+
+```
+src/client/
+├── mod.rs           # ~50 lines - exports and docs
+├── engine.rs        # ~150 lines - orchestration only
+├── builder.rs       # ~200 lines - enhanced builder
+├── types.rs         # ~250 lines - public types
+├── context.rs       # ~150 lines - request context
+├── session.rs       # ~200 lines - session management
+├── indexer.rs       # ~200 lines - indexing ops
+├── retriever.rs     # ~200 lines - retrieval ops
+├── workspace.rs     # ~150 lines - workspace ops
+└── events.rs        # ~200 lines - event system
+```
+
+Total: ~1750 lines (vs current ~1000 lines, but much better organized)
+
+---
+
+## Benefits
+
+1. **Single Responsibility**: Each module has one clear purpose
+2. **Testability**: Sub-clients can be tested independently
+3. **Extensibility**: Easy to add new features without touching Engine
+4. **Performance**: Session caching reduces redundant loads
+5. **Observability**: Events provide visibility into operations
+6. **API Clarity**: Clear separation between indexing, retrieval, and storage
+7. **Streaming**: Support for progressive results
+8. **Context Management**: Request-scoped configuration
diff --git a/src/client/builder.rs b/src/client/builder.rs
index 306b5b27..76a335cf 100644
--- a/src/client/builder.rs
+++ b/src/client/builder.rs
@@ -9,7 +9,8 @@ use crate::config::{Config, ConfigLoader, RetrievalConfig};
 use crate::retrieval::PipelineRetriever;
 use crate::storage::Workspace;
 
-use super::Engine;
+use super::engine::Engine;
+use super::events::EventEmitter;
 
 /// Default configuration file names to search for.
 const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorless.toml"];
@@ -42,6 +43,9 @@ pub struct EngineBuilder {
 
     /// Custom retrieval config.
     retrieval_config: Option<RetrievalConfig>,
+
+    /// Event emitter.
+    events: Option<EventEmitter>,
 }
 
 impl EngineBuilder {
@@ -53,6 +57,7 @@ impl EngineBuilder {
             config_path: None,
             config: None,
             retrieval_config: None,
+            events: None,
         }
     }
 
@@ -84,6 +89,13 @@ impl EngineBuilder {
         self
     }
 
+    /// Set the event emitter for callbacks.
+    #[must_use]
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = Some(events);
+        self
+    }
+
     /// Search for config file in current directory and parent directories.
     fn find_config_file() -> Option<PathBuf> {
         let current_dir = std::env::current_dir().ok()?;
@@ -127,32 +139,33 @@ impl EngineBuilder {
     /// Returns a [`BuildError`] if:
     /// - Configuration loading fails
     /// - Workspace creation fails
+    /// - Required API key is missing
     pub fn build(self) -> Result<Engine, BuildError> {
         // Load or create configuration
-        let config = if let Some(config) = self.config {
-            // Use explicitly provided config
+        let mut config = if let Some(config) = self.config {
             config
         } else if let Some(path) = self.config_path {
-            // Load from specified path
             ConfigLoader::new()
                 .file(&path)
                 .load()
                 .map_err(|e| BuildError::Config(e.to_string()))?
         } else if let Some(config_path) = Self::find_config_file() {
-            // Auto-detect config file
             ConfigLoader::new().file(&config_path).load().map_err(|e| {
                 BuildError::Config(format!("Failed to load {}: {}", config_path.display(), e))
             })?
         } else {
-            // Use defaults
             Config::default()
         };
 
+        // Override retrieval config if provided
+        if let Some(retrieval_config) = self.retrieval_config {
+            config.retrieval = retrieval_config;
+        }
+
         // Open workspace: prefer explicit path, fallback to config
         let workspace = if let Some(path) = &self.workspace {
             Some(Workspace::open(path).map_err(|e| BuildError::Workspace(e.to_string()))?)
         } else {
-            // Use workspace_dir from config
             Some(
                 Workspace::open(&config.storage.workspace_dir)
                     .map_err(|e| BuildError::Workspace(e.to_string()))?,
@@ -175,9 +188,7 @@ impl EngineBuilder {
         };
 
         // Create pipeline retriever with config
-        let retrieval_config = self
-            .retrieval_config
-            .unwrap_or_else(|| config.retrieval.clone());
+        let retrieval_config = config.retrieval.clone();
         let mut retriever =
             PipelineRetriever::new().with_max_iterations(retrieval_config.search.max_iterations);
 
@@ -201,9 +212,9 @@ impl EngineBuilder {
             );
         }
 
-        Ok(Engine::with_components(
-            config, workspace, retriever, executor,
-        ))
+        // Build engine
+        Engine::with_components(config, workspace, retriever, executor)
+            .map_err(|e| BuildError::Other(e.to_string()))
     }
 }
 
@@ -227,6 +238,10 @@ pub enum BuildError {
     /// Missing API key for retrieval.
     #[error("Missing API key: LLM API key is required for retrieval. Set OPENAI_API_KEY environment variable or configure retrieval.api_key")]
     MissingApiKey,
+
+    /// Other error.
+    #[error("{0}")]
+    Other(String),
 }
 
 #[cfg(test)]
diff --git a/src/client/context.rs b/src/client/context.rs
new file mode 100644
index 00000000..344c05cb
--- /dev/null
+++ b/src/client/context.rs
@@ -0,0 +1,337 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Request context and configuration.
+//!
+//! This module provides request-scoped configuration and state management
+//! for client operations. It allows overriding global configuration on a
+//! per-request basis.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let ctx = ClientContext::new()
+//!     .with_top_k(10)
+//!     .with_token_budget(8000)
+//!     .with_timeout(Duration::from_secs(30));
+//!
+//! let result = client.query_with_context(&doc_id, "query", &ctx).await?;
+//! ```
+
+use std::collections::HashMap;
+use std::time::{Duration, Instant};
+
+use uuid::Uuid;
+
+use crate::retrieval::content::OutputFormatConfig;
+
+/// Request context for client operations.
+///
+/// Provides request-scoped configuration overrides and metadata.
+#[derive(Debug, Clone)]
+pub struct ClientContext {
+    /// Unique request ID for tracing.
+    pub request_id: Uuid,
+
+    /// Request-specific configuration overrides.
+    pub config: RequestContextConfig,
+
+    /// Request metadata (custom key-value pairs).
+    pub metadata: HashMap<String, String>,
+
+    /// Request deadline (for timeout).
+    pub deadline: Option<Instant>,
+
+    /// Priority (higher = more important).
+    pub priority: u8,
+}
+
+impl Default for ClientContext {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ClientContext {
+    /// Create a new context with defaults.
+    pub fn new() -> Self {
+        Self {
+            request_id: Uuid::new_v4(),
+            config: RequestContextConfig::default(),
+            metadata: HashMap::new(),
+            deadline: None,
+            priority: 5, // Default priority
+        }
+    }
+
+    /// Create a context with a specific request ID.
+    pub fn with_id(id: Uuid) -> Self {
+        Self {
+            request_id: id,
+            ..Self::new()
+        }
+    }
+
+    /// Set the top_k override for retrieval.
+    pub fn with_top_k(mut self, top_k: usize) -> Self {
+        self.config.top_k = Some(top_k);
+        self
+    }
+
+    /// Set the token budget override.
+    pub fn with_token_budget(mut self, budget: usize) -> Self {
+        self.config.token_budget = Some(budget);
+        self
+    }
+
+    /// Set the content format override.
+    pub fn with_content_format(mut self, format: OutputFormatConfig) -> Self {
+        self.config.content_format = Some(format);
+        self
+    }
+
+    /// Set whether to include summaries.
+    pub fn with_summaries(mut self, include: bool) -> Self {
+        self.config.features.include_summaries = include;
+        self
+    }
+
+    /// Set whether to include content.
+    pub fn with_content(mut self, include: bool) -> Self {
+        self.config.features.include_content = include;
+        self
+    }
+
+    /// Set whether to enable caching.
+    pub fn with_cache(mut self, enable: bool) -> Self {
+        self.config.features.enable_cache = enable;
+        self
+    }
+
+    /// Set whether to enable sufficiency checking.
+    pub fn with_sufficiency_check(mut self, enable: bool) -> Self {
+        self.config.features.enable_sufficiency_check = enable;
+        self
+    }
+
+    /// Set a timeout duration.
+    pub fn with_timeout(mut self, duration: Duration) -> Self {
+        self.deadline = Some(Instant::now() + duration);
+        self
+    }
+
+    /// Set a deadline.
+    pub fn with_deadline(mut self, deadline: Instant) -> Self {
+        self.deadline = Some(deadline);
+        self
+    }
+
+    /// Set the priority (0-10, higher = more important).
+    pub fn with_priority(mut self, priority: u8) -> Self {
+        self.priority = priority.min(10);
+        self
+    }
+
+    /// Add metadata.
+    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        self.metadata.insert(key.into(), value.into());
+        self
+    }
+
+    /// Check if the request has timed out.
+    pub fn is_timed_out(&self) -> bool {
+        self.deadline
+            .map(|d| Instant::now() > d)
+            .unwrap_or(false)
+    }
+
+    /// Get remaining time until deadline.
+    pub fn remaining_time(&self) -> Option<Duration> {
+        self.deadline
+            .map(|d| d.saturating_duration_since(Instant::now()))
+    }
+
+    /// Merge with another context (other takes precedence).
+    pub fn merge(&self, other: &ClientContext) -> ClientContext {
+        let mut merged = self.clone();
+        merged.request_id = other.request_id;
+
+        if other.config.top_k.is_some() {
+            merged.config.top_k = other.config.top_k;
+        }
+        if other.config.token_budget.is_some() {
+            merged.config.token_budget = other.config.token_budget;
+        }
+        if other.config.content_format.is_some() {
+            merged.config.content_format = other.config.content_format.clone();
+        }
+        if other.deadline.is_some() {
+            merged.deadline = other.deadline;
+        }
+        if other.priority != 5 {
+            merged.priority = other.priority;
+        }
+
+        // Merge metadata
+        for (k, v) in &other.metadata {
+            merged.metadata.insert(k.clone(), v.clone());
+        }
+
+        // Merge feature flags
+        merged.config.features = FeatureFlags {
+            include_summaries: other.config.features.include_summaries,
+            include_content: other.config.features.include_content,
+            enable_cache: other.config.features.enable_cache,
+            enable_sufficiency_check: other.config.features.enable_sufficiency_check,
+        };
+
+        merged
+    }
+}
+
+/// Request-specific configuration overrides.
+#[derive(Debug, Clone, Default)]
+pub struct RequestContextConfig {
+    /// Override top_k for retrieval.
+    pub top_k: Option<usize>,
+
+    /// Override token budget.
+    pub token_budget: Option<usize>,
+
+    /// Override content format.
+    pub content_format: Option<OutputFormatConfig>,
+
+    /// Feature flags.
+    pub features: FeatureFlags,
+}
+
+/// Feature flags for request.
+#[derive(Debug, Clone, Copy)]
+pub struct FeatureFlags {
+    /// Include summaries in results.
+    pub include_summaries: bool,
+
+    /// Include content in results.
+    pub include_content: bool,
+
+    /// Enable result caching.
+    pub enable_cache: bool,
+
+    /// Enable sufficiency checking.
+    pub enable_sufficiency_check: bool,
+}
+
+impl Default for FeatureFlags {
+    fn default() -> Self {
+        Self {
+            include_summaries: true,
+            include_content: true,
+            enable_cache: true,
+            enable_sufficiency_check: true,
+        }
+    }
+}
+
+impl FeatureFlags {
+    /// Create with all features enabled.
+    pub fn all() -> Self {
+        Self {
+            include_summaries: true,
+            include_content: true,
+            enable_cache: true,
+            enable_sufficiency_check: true,
+        }
+    }
+
+    /// Create with minimal features (fastest).
+    pub fn minimal() -> Self {
+        Self {
+            include_summaries: false,
+            include_content: true,
+            enable_cache: false,
+            enable_sufficiency_check: false,
+        }
+    }
+
+    /// Create for deep analysis.
+    pub fn deep() -> Self {
+        Self {
+            include_summaries: true,
+            include_content: true,
+            enable_cache: true,
+            enable_sufficiency_check: true,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_context_creation() {
+        let ctx = ClientContext::new();
+        assert!(!ctx.request_id.is_nil());
+        assert!(ctx.config.top_k.is_none());
+        assert!(ctx.deadline.is_none());
+    }
+
+    #[test]
+    fn test_context_with_overrides() {
+        let ctx = ClientContext::new()
+            .with_top_k(10)
+            .with_token_budget(8000)
+            .with_cache(false);
+
+        assert_eq!(ctx.config.top_k, Some(10));
+        assert_eq!(ctx.config.token_budget, Some(8000));
+        assert!(!ctx.config.features.enable_cache);
+    }
+
+    #[test]
+    fn test_context_timeout() {
+        let ctx = ClientContext::new()
+            .with_timeout(Duration::from_millis(100));
+
+        assert!(!ctx.is_timed_out());
+        assert!(ctx.remaining_time().is_some());
+    }
+
+    #[test]
+    fn test_context_metadata() {
+        let ctx = ClientContext::new()
+            .with_metadata("user", "test")
+            .with_metadata("version", "1.0");
+
+        assert_eq!(ctx.metadata.get("user"), Some(&"test".to_string()));
+        assert_eq!(ctx.metadata.get("version"), Some(&"1.0".to_string()));
+    }
+
+    #[test]
+    fn test_context_merge() {
+        let ctx1 = ClientContext::new()
+            .with_top_k(5)
+            .with_metadata("key1", "value1");
+
+        let ctx2 = ClientContext::new()
+            .with_top_k(10)
+            .with_metadata("key2", "value2");
+
+        let merged = ctx1.merge(&ctx2);
+
+        assert_eq!(merged.config.top_k, Some(10));
+        assert_eq!(merged.metadata.get("key1"), Some(&"value1".to_string()));
+        assert_eq!(merged.metadata.get("key2"), Some(&"value2".to_string()));
+    }
+
+    #[test]
+    fn test_feature_flags() {
+        let all = FeatureFlags::all();
+        assert!(all.include_summaries);
+        assert!(all.include_content);
+
+        let minimal = FeatureFlags::minimal();
+        assert!(!minimal.include_summaries);
+        assert!(!minimal.enable_cache);
+    }
+}
diff --git a/src/client/engine.rs b/src/client/engine.rs
index bb973004..8156586e 100644
--- a/src/client/engine.rs
+++ b/src/client/engine.rs
@@ -1,26 +1,20 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Main Engine client for document indexing and retrieval.
+//! Main Engine client - the entry point for vectorless.
 //!
-//! This module provides the high-level API for:
-//! - Indexing documents (Markdown, PDF, DOCX, HTML)
-//! - Retrieving document structure
-//! - Querying documents with adaptive retrieval
+//! This module provides the main client for document indexing and retrieval.
+//! The Engine is an orchestrator that delegates to specialized sub-clients.
 //!
-//! # Design
+//! # Architecture
 //!
-//! The client uses **interior mutability** patterns to allow sharing across
-//! async tasks while maintaining thread safety:
-//!
-//! - `Arc<RwLock<Workspace>>` - Thread-safe workspace access (multiple readers, single writer)
-//! - `Arc<Mutex<PipelineExecutor>>` - Exclusive pipeline execution
-//! - `Arc<PipelineRetriever>` - Immutable retriever (uses interior mutability internally)
-//!
-//! # Thread Safety
-//!
-//! `Engine` is `Clone + Send + Sync`. Cloning is cheap (reference count increment).
-//! All clones share the same underlying resources.
+//! ```text
+//! Engine (Orchestrator)
+//! ├── IndexerClient   → Document indexing
+//! ├── RetrieverClient → Query and retrieval
+//! ├── WorkspaceClient → Document persistence
+//! └── EventEmitter    → Progress and events
+//! ```
 //!
 //! # Example
 //!
@@ -34,13 +28,13 @@
 //!     .with_workspace("./my_workspace")
 //!     .build()?;
 //!
-//! // Clone for use in multiple tasks (cheap - just Arc clone)
-//! let client1 = client.clone();
-//! let client2 = client.clone();
-//!
-//! // Can use concurrently
+//! // Index a document
 //! let doc_id = client.index("./document.md").await?;
+//!
+//! // Query the document
 //! let result = client.query(&doc_id, "What is this?").await?;
+//!
+//! println!("Found: {}", result.content);
 //! # Ok(())
 //! # }
 //! ```
@@ -49,16 +43,20 @@ use std::path::Path;
 use std::sync::{Arc, Mutex, RwLock};
 
 use tracing::info;
-use uuid::Uuid;
 
 use crate::config::Config;
 use crate::domain::{DocumentTree, Error, Result};
-use crate::index::{IndexInput, PipelineExecutor, PipelineOptions, SummaryStrategy};
-use crate::parser::DocumentFormat;
-use crate::retrieval::{PipelineRetriever, Retriever};
-use crate::storage::{DocumentMeta as StorageMeta, PersistedDocument, Workspace};
-
-use super::types::{DocumentInfo, IndexMode, IndexOptions, QueryResult};
+use crate::index::PipelineExecutor;
+use crate::retrieval::{PipelineRetriever, RetrieveOptions};
+use crate::storage::Workspace;
+
+use super::context::ClientContext;
+use super::events::EventEmitter;
+use super::indexer::IndexerClient;
+use super::retriever::RetrieverClient;
+use super::session::Session;
+use super::types::{DocumentInfo, IndexOptions, QueryResult};
+use super::workspace::WorkspaceClient;
 
 /// The main Engine client.
 ///
@@ -68,30 +66,26 @@ use super::types::{DocumentInfo, IndexMode, IndexOptions, QueryResult};
 /// # Cloning
 ///
 /// Cloning is cheap - it only increments reference counts (`Arc`). All clones
-/// share the same underlying resources (workspace, retriever, executor).
+/// share the same underlying resources.
 ///
 /// # Thread Safety
 ///
-/// The client is `Clone + Send + Sync` and can be safely shared across
-/// threads. All mutable state is protected by appropriate synchronization:
-///
-/// - Workspace: `Arc<RwLock<Workspace>>` - Multiple readers, single writer
-/// - Executor: `Arc<Mutex<PipelineExecutor>>` - Exclusive access during indexing
-/// - Retriever: `Arc<PipelineRetriever>` - Immutable, uses internal synchronization
+/// The client is `Clone + Send + Sync` and can be safely shared across threads.
 pub struct Engine {
     /// Configuration (immutable, shared).
     config: Arc<Config>,
 
-    /// Workspace for persistence (with built-in LRU cache).
-    /// Uses RwLock for concurrent read access.
-    workspace: Option<Arc<RwLock<Workspace>>>,
+    /// Indexer client for document indexing.
+    indexer: IndexerClient,
+
+    /// Retriever client for queries.
+    retriever: RetrieverClient,
 
-    /// Pipeline retriever (immutable, uses interior mutability internally).
-    retriever: Arc<PipelineRetriever>,
+    /// Workspace client for persistence.
+    workspace: Option<WorkspaceClient>,
 
-    /// Pipeline executor for indexing.
-    /// Uses Mutex for exclusive access during pipeline execution.
-    executor: Arc<Mutex<PipelineExecutor>>,
+    /// Event emitter.
+    events: EventEmitter,
 }
 
 impl Engine {
@@ -106,11 +100,47 @@ impl Engine {
     /// Note: Prefer using [`Engine::builder()`] for more control.
     fn new() -> Result<Self> {
         let config = Config::default();
+        Self::with_components(
+            config,
+            None,
+            PipelineRetriever::new(),
+            PipelineExecutor::new(),
+        )
+    }
+
+    // ============================================================
+    // Constructor (for Builder)
+    // ============================================================
+
+    /// Create a new client with the given components.
+    pub(crate) fn with_components(
+        config: Config,
+        workspace: Option<Workspace>,
+        retriever: PipelineRetriever,
+        executor: PipelineExecutor,
+    ) -> Result<Self> {
+        let config = Arc::new(config);
+        let events = EventEmitter::new();
+
+        // Create indexer client
+        let indexer = IndexerClient::new(executor)
+            .with_events(events.clone());
+
+        // Create retriever client
+        let retriever = RetrieverClient::new(retriever, Arc::clone(&config))
+            .with_events(events.clone());
+
+        // Create workspace client (if workspace provided)
+        let workspace_client = workspace.map(|ws| {
+            WorkspaceClient::new(ws).with_events(events.clone())
+        });
+
         Ok(Self {
-            config: Arc::new(config),
-            workspace: None,
-            retriever: Arc::new(PipelineRetriever::new()),
-            executor: Arc::new(Mutex::new(PipelineExecutor::new())),
+            config,
+            indexer,
+            retriever,
+            workspace: workspace_client,
+            events,
         })
     }
 
@@ -142,94 +172,101 @@ impl Engine {
         path: impl AsRef<Path>,
         options: IndexOptions,
     ) -> Result<String> {
-        let path = path.as_ref();
-        let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
-
-        if !path.exists() {
-            return Err(Error::Parse(format!("File not found: {}", path.display())));
-        }
-
-        // Generate document ID
-        let doc_id = Uuid::new_v4().to_string();
-
-        // Detect format
-        let format = self.detect_format(&path, &options)?;
-
-        info!("Indexing {:?} document: {}", format, path.display());
-
-        // Convert client options to pipeline options
-        let pipeline_options = PipelineOptions {
-            mode: match options.mode {
-                IndexMode::Auto => crate::index::IndexMode::Auto,
-                IndexMode::Pdf => crate::index::IndexMode::Pdf,
-                IndexMode::Markdown => crate::index::IndexMode::Markdown,
-                IndexMode::Html => crate::index::IndexMode::Html,
-                IndexMode::Docx => crate::index::IndexMode::Docx,
-            },
-            generate_ids: options.generate_ids,
-            summary_strategy: if options.generate_summaries {
-                SummaryStrategy::selective(self.config.indexer.min_summary_tokens, false)
-            } else {
-                SummaryStrategy::none()
-            },
-            generate_description: options.generate_description,
-            ..Default::default()
-        };
-
-        // Create pipeline input and execute (with mutex lock)
-        let input = IndexInput::file(&path);
-        let result = {
-            let mut executor = self
-                .executor
-                .lock()
-                .map_err(|_| Error::Other("Pipeline executor lock poisoned".to_string()))?;
-            executor.execute(input, pipeline_options).await?
-        };
-
-        // Build persisted document
-        let tree = result
-            .tree
-            .ok_or_else(|| Error::Parse("Document tree not generated".to_string()))?;
-
-        let meta = StorageMeta::new(&doc_id, &result.name, format.extension())
-            .with_source_path(path.to_string_lossy().to_string())
-            .with_description(result.description.clone().unwrap_or_default());
-
-        let mut doc = PersistedDocument::new(meta, tree);
-
-        // Add page count if available
-        if let Some(page_count) = result.page_count {
-            for i in 1..=page_count {
-                doc.add_page(i, "");
-            }
-        }
+        let doc = self.indexer.index_with_options(path, options).await?;
+        let persisted = self.indexer.to_persisted(doc);
 
         // Save to workspace if configured
         if let Some(ref workspace) = self.workspace {
-            let mut ws = workspace
-                .write()
-                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-            ws.add(&doc)?;
-            info!("Saved document {} to workspace", doc_id);
+            workspace.save(&persisted)?;
         }
 
-        info!("Indexing complete. Document ID: {}", doc_id);
+        let doc_id = persisted.meta.id.clone();
+        info!("Indexed document: {}", doc_id);
         Ok(doc_id)
     }
 
-    /// Detect document format from path and options.
-    fn detect_format(&self, path: &Path, options: &IndexOptions) -> Result<DocumentFormat> {
-        match options.mode {
-            IndexMode::Auto => {
-                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
-                DocumentFormat::from_extension(ext)
-                    .ok_or_else(|| Error::Parse(format!("Unknown format: {}", ext)))
-            }
-            IndexMode::Pdf => Ok(DocumentFormat::Pdf),
-            IndexMode::Markdown => Ok(DocumentFormat::Markdown),
-            IndexMode::Html => Ok(DocumentFormat::Html),
-            IndexMode::Docx => Ok(DocumentFormat::Docx),
+    // ============================================================
+    // Document Querying
+    // ============================================================
+
+    /// Query a document.
+    ///
+    /// Uses the adaptive retriever to find relevant content.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - No workspace is configured
+    /// - The document is not found
+    /// - The retrieval fails
+    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult> {
+        let tree = self.get_structure(doc_id)?;
+
+        let options = RetrieveOptions::new()
+            .with_top_k(self.config.retrieval.top_k)
+            .with_include_content(true)
+            .with_include_summaries(true);
+
+        let mut result = self.retriever.query(&tree, question, &options).await?;
+        result.doc_id = doc_id.to_string();
+
+        Ok(result)
+    }
+
+    /// Query a document with context.
+    ///
+    /// Allows request-specific configuration overrides.
+    pub async fn query_with_context(
+        &self,
+        doc_id: &str,
+        question: &str,
+        ctx: &ClientContext,
+    ) -> Result<QueryResult> {
+        let tree = self.get_structure(doc_id)?;
+
+        let mut options = RetrieveOptions::new()
+            .with_top_k(self.config.retrieval.top_k)
+            .with_include_content(true)
+            .with_include_summaries(true);
+
+        // Apply context overrides
+        if let Some(top_k) = ctx.config.top_k {
+            options.top_k = top_k;
+        }
+        if let Some(token_budget) = ctx.config.token_budget {
+            options.max_tokens = token_budget;
         }
+
+        let mut result = self.retriever.query_with_context(&tree, question, &options, ctx).await?;
+        result.doc_id = doc_id.to_string();
+
+        Ok(result)
+    }
+
+    // ============================================================
+    // Session Management
+    // ============================================================
+
+    /// Create a session for multi-document operations.
+    ///
+    /// Sessions provide:
+    /// - Automatic caching of document trees
+    /// - Cross-document queries
+    /// - Session statistics
+    pub fn session(&self) -> Session {
+        let workspace = self.workspace.clone().unwrap_or_else(|| {
+            WorkspaceClient::from_arc(
+                Arc::new(RwLock::new(Workspace::open("./temp_workspace").unwrap())),
+                self.events.clone(),
+            )
+        });
+
+        Session::new(
+            self.indexer.clone(),
+            self.retriever.clone(),
+            workspace,
+            self.events.clone(),
+        )
     }
 
     // ============================================================
@@ -240,24 +277,7 @@ impl Engine {
     #[must_use]
     pub fn list_documents(&self) -> Vec<DocumentInfo> {
         match &self.workspace {
-            Some(workspace) => {
-                let ws = match workspace.read() {
-                    Ok(guard) => guard,
-                    Err(_) => return Vec::new(),
-                };
-                ws.list_documents()
-                    .iter()
-                    .filter_map(|id| ws.get_meta(id))
-                    .map(|meta| DocumentInfo {
-                        id: meta.id.clone(),
-                        name: meta.doc_name.clone(),
-                        format: meta.doc_type.clone(),
-                        description: meta.doc_description.clone(),
-                        page_count: meta.page_count,
-                        line_count: meta.line_count,
-                    })
-                    .collect()
-            }
+            Some(workspace) => workspace.list().unwrap_or_default(),
             None => Vec::new(),
         }
     }
@@ -270,18 +290,10 @@ impl Engine {
     /// - No workspace is configured
     /// - The document is not found
     pub fn get_structure(&self, doc_id: &str) -> Result<DocumentTree> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        // Use read lock - Workspace::load now uses interior mutability for cache
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        let doc = ws
-            .load(doc_id)?
+        let doc = workspace.load(doc_id)?
             .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
 
         Ok(doc.tree)
@@ -296,18 +308,10 @@ impl Engine {
     /// - The document is not found
     /// - No page content is available
     pub fn get_page_content(&self, doc_id: &str, pages: &str) -> Result<String> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        // Use read lock - Workspace::load now uses interior mutability for cache
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        let doc = ws
-            .load(doc_id)?
+        let doc = workspace.load(doc_id)?
             .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
 
         if doc.pages.is_empty() {
@@ -358,71 +362,8 @@ impl Engine {
         Ok(result)
     }
 
-    /// Query a document.
-    ///
-    /// Uses the adaptive retriever to find relevant content.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - No workspace is configured
-    /// - The document is not found
-    /// - The retrieval fails
-    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult> {
-        let tree = self.get_structure(doc_id)?;
-
-        // Build retrieve options from config
-        let retrieve_options = crate::retrieval::RetrieveOptions::new()
-            .with_top_k(self.config.retrieval.top_k)
-            .with_include_content(true)
-            .with_include_summaries(true);
-
-        // Use adaptive retriever
-        let response = self
-            .retriever
-            .retrieve(&tree, question, &retrieve_options)
-            .await
-            .map_err(|e| Error::Retrieval(e.to_string()))?;
-
-        // Extract node IDs and build content from results
-        let node_ids: Vec<String> = response
-            .results
-            .iter()
-            .filter_map(|r| r.node_id.clone())
-            .collect();
-
-        let content_parts: Vec<String> = response
-            .results
-            .iter()
-            .map(|r| {
-                let mut parts = vec![format!("## {}", r.title)];
-
-                // Only include original content, not summary
-                // (per design: retrieval should return original text, not summary)
-                if let Some(ref content) = r.content {
-                    parts.push(content.clone());
-                }
-
-                parts.join("\n\n")
-            })
-            .collect();
-
-        let content = if content_parts.is_empty() {
-            response.content
-        } else {
-            content_parts.join("\n\n---\n\n")
-        };
-
-        Ok(QueryResult {
-            doc_id: doc_id.to_string(),
-            node_ids,
-            content,
-            score: response.confidence,
-        })
-    }
-
     // ============================================================
-    // Persistence
+    // Persistence Operations
     // ============================================================
 
     /// Load a document from the workspace into cache.
@@ -433,21 +374,14 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn load(&self, doc_id: &str) -> Result<bool> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        // Use read lock - Workspace::load now uses interior mutability for cache
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        if !ws.contains(doc_id) {
+        if !workspace.exists(doc_id)? {
             return Ok(false);
         }
 
-        let _ = ws.load(doc_id)?;
+        let _ = workspace.load(doc_id)?;
         Ok(true)
     }
 
@@ -457,15 +391,10 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn remove(&self, doc_id: &str) -> Result<bool> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let mut ws = workspace
-            .write()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-        ws.remove(doc_id)
+        workspace.remove(doc_id)
     }
 
     /// Check if a document exists in the workspace.
@@ -474,15 +403,10 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn exists(&self, doc_id: &str) -> Result<bool> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-        Ok(ws.contains(doc_id))
+        workspace.exists(doc_id)
     }
 
     /// Get metadata for a document.
@@ -491,23 +415,10 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn get_metadata(&self, doc_id: &str) -> Result<Option<DocumentInfo>> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let ws = workspace
-            .read()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        Ok(ws.get_meta(doc_id).map(|meta| DocumentInfo {
-            id: meta.id.clone(),
-            name: meta.doc_name.clone(),
-            format: meta.doc_type.clone(),
-            description: meta.doc_description.clone(),
-            page_count: meta.page_count,
-            line_count: meta.line_count,
-        }))
+        workspace.get_document_info(doc_id)
     }
 
     /// Remove multiple documents from the workspace.
@@ -518,22 +429,10 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn batch_remove(&self, doc_ids: &[&str]) -> Result<usize> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let mut ws = workspace
-            .write()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        let mut removed = 0;
-        for doc_id in doc_ids {
-            if ws.remove(doc_id)? {
-                removed += 1;
-            }
-        }
-        Ok(removed)
+        workspace.batch_remove(doc_ids)
     }
 
     /// Remove all documents from the workspace.
@@ -544,38 +443,16 @@ impl Engine {
     ///
     /// Returns an error if no workspace is configured.
     pub fn clear(&self) -> Result<usize> {
-        let workspace = self
-            .workspace
-            .as_ref()
+        let workspace = self.workspace.as_ref()
             .ok_or_else(|| Error::Config("No workspace configured".to_string()))?;
 
-        let mut ws = workspace
-            .write()
-            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
-
-        let doc_ids: Vec<String> = ws.list_documents().iter().map(|s| s.to_string()).collect();
-        let count = doc_ids.len();
-
-        for doc_id in &doc_ids {
-            let _ = ws.remove(doc_id);
-        }
-
-        Ok(count)
+        workspace.clear()
     }
 
     /// Get the number of indexed documents.
     #[must_use]
     pub fn len(&self) -> usize {
-        match &self.workspace {
-            Some(workspace) => {
-                let ws = match workspace.read() {
-                    Ok(guard) => guard,
-                    Err(_) => return 0,
-                };
-                ws.len()
-            }
-            None => 0,
-        }
+        self.workspace.as_ref().map(|w| w.len()).unwrap_or(0)
     }
 
     /// Check if there are no documents.
@@ -585,22 +462,27 @@ impl Engine {
     }
 
     // ============================================================
-    // Internal API (for Builder)
+    // Sub-Client Access
     // ============================================================
 
-    /// Create a new client with the given components.
-    pub(crate) fn with_components(
-        config: Config,
-        workspace: Option<Workspace>,
-        retriever: PipelineRetriever,
-        executor: PipelineExecutor,
-    ) -> Self {
-        Self {
-            config: Arc::new(config),
-            workspace: workspace.map(|w| Arc::new(RwLock::new(w))),
-            retriever: Arc::new(retriever),
-            executor: Arc::new(Mutex::new(executor)),
-        }
+    /// Get the indexer client.
+    pub fn indexer(&self) -> &IndexerClient {
+        &self.indexer
+    }
+
+    /// Get the retriever client.
+    pub fn retriever(&self) -> &RetrieverClient {
+        &self.retriever
+    }
+
+    /// Get the workspace client.
+    pub fn workspace(&self) -> Option<&WorkspaceClient> {
+        self.workspace.as_ref()
+    }
+
+    /// Get the configuration.
+    pub fn config(&self) -> &Config {
+        &self.config
     }
 }
 
@@ -608,9 +490,10 @@ impl Clone for Engine {
     fn clone(&self) -> Self {
         Self {
             config: Arc::clone(&self.config),
-            workspace: self.workspace.as_ref().map(Arc::clone),
-            retriever: Arc::clone(&self.retriever),
-            executor: Arc::clone(&self.executor),
+            indexer: self.indexer.clone(),
+            retriever: self.retriever.clone(),
+            workspace: self.workspace.clone(),
+            events: self.events.clone(),
         }
     }
 }
@@ -629,3 +512,15 @@ impl std::fmt::Debug for Engine {
             .finish_non_exhaustive()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_engine_builder() {
+        let builder = Engine::builder();
+        // Builder exists
+        let _ = builder;
+    }
+}
diff --git a/src/client/events.rs b/src/client/events.rs
new file mode 100644
index 00000000..a1d797c4
--- /dev/null
+++ b/src/client/events.rs
@@ -0,0 +1,365 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Event system for client operations.
+//!
+//! This module provides event types and handlers for observing
+//! and reacting to client operations (indexing, querying, etc.).
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let emitter = EventEmitter::new()
+//!     .on_index(|e| match e {
+//!         IndexEvent::Complete { doc_id } => println!("Indexed: {}", doc_id),
+//!         _ => {}
+//!     });
+//!
+//! let client = EngineBuilder::new()
+//!     .with_events(emitter)
+//!     .build()?;
+//! ```
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use tracing::info;
+
+use crate::parser::DocumentFormat;
+use crate::retrieval::SufficiencyLevel;
+
+/// Event types for client operations.
+#[derive(Debug, Clone)]
+pub enum Event {
+    /// Indexing events.
+    Index(IndexEvent),
+
+    /// Query events.
+    Query(QueryEvent),
+
+    /// Workspace events.
+    Workspace(WorkspaceEvent),
+}
+
+/// Indexing operation events.
+#[derive(Debug, Clone)]
+pub enum IndexEvent {
+    /// Started indexing a document.
+    Started {
+        /// File path being indexed.
+        path: String,
+    },
+
+    /// Document format detected.
+    FormatDetected {
+        /// Detected format.
+        format: DocumentFormat,
+    },
+
+    /// Parsing progress update.
+    ParsingProgress {
+        /// Percentage complete (0-100).
+        percent: u8,
+    },
+
+    /// Document tree built.
+    TreeBuilt {
+        /// Number of nodes in the tree.
+        node_count: usize,
+    },
+
+    /// Summary generation progress.
+    SummaryProgress {
+        /// Number of summaries completed.
+        completed: usize,
+        /// Total summaries to generate.
+        total: usize,
+    },
+
+    /// Indexing completed successfully.
+    Complete {
+        /// Generated document ID.
+        doc_id: String,
+    },
+
+    /// Error occurred during indexing.
+    Error {
+        /// Error message.
+        message: String,
+    },
+}
+
+/// Query operation events.
+#[derive(Debug, Clone)]
+pub enum QueryEvent {
+    /// Search started.
+    Started {
+        /// The query string.
+        query: String,
+    },
+
+    /// Node visited during search.
+    NodeVisited {
+        /// Node ID.
+        node_id: String,
+        /// Node title.
+        title: String,
+        /// Relevance score.
+        score: f32,
+    },
+
+    /// Candidate result found.
+    CandidateFound {
+        /// Node ID.
+        node_id: String,
+        /// Relevance score.
+        score: f32,
+    },
+
+    /// Sufficiency check result.
+    SufficiencyCheck {
+        /// Sufficiency level.
+        level: SufficiencyLevel,
+        /// Total tokens collected.
+        tokens: usize,
+    },
+
+    /// Query completed.
+    Complete {
+        /// Total results found.
+        total_results: usize,
+        /// Overall confidence score.
+        confidence: f32,
+    },
+
+    /// Error occurred during query.
+    Error {
+        /// Error message.
+        message: String,
+    },
+}
+
+/// Workspace operation events.
+#[derive(Debug, Clone)]
+pub enum WorkspaceEvent {
+    /// Document saved to workspace.
+    Saved {
+        /// Document ID.
+        doc_id: String,
+    },
+
+    /// Document loaded from workspace.
+    Loaded {
+        /// Document ID.
+        doc_id: String,
+        /// Whether it was a cache hit.
+        cache_hit: bool,
+    },
+
+    /// Document removed from workspace.
+    Removed {
+        /// Document ID.
+        doc_id: String,
+    },
+
+    /// Workspace cleared.
+    Cleared {
+        /// Number of documents removed.
+        count: usize,
+    },
+}
+
+/// Sync event handler trait.
+pub trait EventHandler: Send + Sync {
+    /// Handle an event.
+    fn handle(&self, event: &Event);
+}
+
+/// Async event handler trait.
+#[async_trait]
+pub trait AsyncEventHandler: Send + Sync {
+    /// Handle an event asynchronously.
+    async fn handle(&self, event: &Event);
+}
+
+/// Type alias for sync index handler.
+pub type IndexHandler = Box<dyn Fn(&IndexEvent) + Send + Sync>;
+
+/// Type alias for sync query handler.
+pub type QueryHandler = Box<dyn Fn(&QueryEvent) + Send + Sync>;
+
+/// Type alias for sync workspace handler.
+pub type WorkspaceHandler = Box<dyn Fn(&WorkspaceEvent) + Send + Sync>;
+
+/// Event emitter for client operations.
+///
+/// Collects event handlers and dispatches events to them.
+#[derive(Default)]
+pub struct EventEmitter {
+    /// Index event handlers.
+    index_handlers: Vec<IndexHandler>,
+
+    /// Query event handlers.
+    query_handlers: Vec<QueryHandler>,
+
+    /// Workspace event handlers.
+    workspace_handlers: Vec<WorkspaceHandler>,
+
+    /// Async handlers.
+    async_handlers: Vec<Arc<dyn AsyncEventHandler>>,
+}
+
+impl EventEmitter {
+    /// Create a new event emitter with no handlers.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Add an index event handler.
+    pub fn on_index<F>(mut self, handler: F) -> Self
+    where
+        F: Fn(&IndexEvent) + Send + Sync + 'static,
+    {
+        self.index_handlers.push(Box::new(handler));
+        self
+    }
+
+    /// Add a query event handler.
+    pub fn on_query<F>(mut self, handler: F) -> Self
+    where
+        F: Fn(&QueryEvent) + Send + Sync + 'static,
+    {
+        self.query_handlers.push(Box::new(handler));
+        self
+    }
+
+    /// Add a workspace event handler.
+    pub fn on_workspace<F>(mut self, handler: F) -> Self
+    where
+        F: Fn(&WorkspaceEvent) + Send + Sync + 'static,
+    {
+        self.workspace_handlers.push(Box::new(handler));
+        self
+    }
+
+    /// Add an async event handler.
+    pub fn with_async_handler<H>(mut self, handler: Arc<H>) -> Self
+    where
+        H: AsyncEventHandler + 'static,
+    {
+        self.async_handlers.push(handler);
+        self
+    }
+
+    /// Emit an index event.
+    pub fn emit_index(&self, event: IndexEvent) {
+        for handler in &self.index_handlers {
+            handler(&event);
+        }
+        for handler in &self.async_handlers {
+            // For sync context, we just log async handlers
+            let event = Event::Index(event.clone());
+            info!("Async event: {:?}", event);
+        }
+    }
+
+    /// Emit a query event.
+    pub fn emit_query(&self, event: QueryEvent) {
+        for handler in &self.query_handlers {
+            handler(&event);
+        }
+    }
+
+    /// Emit a workspace event.
+    pub fn emit_workspace(&self, event: WorkspaceEvent) {
+        for handler in &self.workspace_handlers {
+            handler(&event);
+        }
+    }
+
+    /// Check if there are any handlers registered.
+    pub fn has_handlers(&self) -> bool {
+        !self.index_handlers.is_empty()
+            || !self.query_handlers.is_empty()
+            || !self.workspace_handlers.is_empty()
+            || !self.async_handlers.is_empty()
+    }
+
+    /// Merge another emitter into this one.
+    pub fn merge(mut self, other: EventEmitter) -> Self {
+        self.index_handlers.extend(other.index_handlers);
+        self.query_handlers.extend(other.query_handlers);
+        self.workspace_handlers.extend(other.workspace_handlers);
+        self.async_handlers.extend(other.async_handlers);
+        self
+    }
+}
+
+impl std::fmt::Debug for EventEmitter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("EventEmitter")
+            .field("index_handlers", &self.index_handlers.len())
+            .field("query_handlers", &self.query_handlers.len())
+            .field("workspace_handlers", &self.workspace_handlers.len())
+            .field("async_handlers", &self.async_handlers.len())
+            .finish()
+    }
+}
+
+impl Clone for EventEmitter {
+    fn clone(&self) -> Self {
+        // Clone returns an empty emitter since we can't clone closures
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_event_emitter_index() {
+        let counter = Arc::new(AtomicUsize::new(0));
+        let counter_clone = counter.clone();
+
+        let emitter = EventEmitter::new().on_index(move |_e| {
+            counter_clone.fetch_add(1, Ordering::SeqCst);
+        });
+
+        emitter.emit_index(IndexEvent::Started {
+            path: "test.md".to_string(),
+        });
+        emitter.emit_index(IndexEvent::Complete {
+            doc_id: "123".to_string(),
+        });
+
+        assert_eq!(counter.load(Ordering::SeqCst), 2);
+    }
+
+    #[test]
+    fn test_event_emitter_query() {
+        let counter = Arc::new(AtomicUsize::new(0));
+        let counter_clone = counter.clone();
+
+        let emitter = EventEmitter::new().on_query(move |_e| {
+            counter_clone.fetch_add(1, Ordering::SeqCst);
+        });
+
+        emitter.emit_query(QueryEvent::Started {
+            query: "test".to_string(),
+        });
+
+        assert_eq!(counter.load(Ordering::SeqCst), 1);
+    }
+
+    #[test]
+    fn test_event_emitter_has_handlers() {
+        let empty = EventEmitter::new();
+        assert!(!empty.has_handlers());
+
+        let with_handler = EventEmitter::new().on_index(|_| {});
+        assert!(with_handler.has_handlers());
+    }
+}
diff --git a/src/client/indexer.rs b/src/client/indexer.rs
new file mode 100644
index 00000000..7f41cde8
--- /dev/null
+++ b/src/client/indexer.rs
@@ -0,0 +1,351 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Document indexing client.
+//!
+//! This module provides document indexing operations including
+//! format detection, parsing, and tree building.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let indexer = IndexerClient::new(executor);
+//!
+//! let result = indexer
+//!     .index("./document.md")
+//!     .with_summaries()
+//!     .await?;
+//!
+//! println!("Indexed: {} ({} nodes)", result.doc_id, result.node_count);
+//! ```
+
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex};
+
+use tracing::info;
+use uuid::Uuid;
+
+use crate::domain::{Error, Result};
+use crate::index::{IndexInput, IndexMode, PipelineExecutor, PipelineOptions, SummaryStrategy};
+use crate::parser::DocumentFormat;
+use crate::storage::{DocumentMeta, PersistedDocument};
+
+use super::context::ClientContext;
+use super::events::{EventEmitter, IndexEvent};
+use super::types::{IndexOptions, IndexMode as ClientIndexMode, IndexedDocument};
+
+/// Document indexing client.
+///
+/// Provides operations for parsing and indexing documents.
+pub struct IndexerClient {
+    /// Pipeline executor.
+    executor: Arc<Mutex<PipelineExecutor>>,
+
+    /// Event emitter.
+    events: EventEmitter,
+
+    /// Configuration.
+    config: IndexerConfig,
+}
+
+/// Indexer configuration.
+#[derive(Debug, Clone)]
+pub struct IndexerConfig {
+    /// Minimum content tokens required to generate a summary.
+    pub min_summary_tokens: usize,
+
+    /// Whether to generate IDs by default.
+    pub generate_ids: bool,
+
+    /// Whether to generate descriptions by default.
+    pub generate_descriptions: bool,
+}
+
+impl Default for IndexerConfig {
+    fn default() -> Self {
+        Self {
+            min_summary_tokens: 20,
+            generate_ids: true,
+            generate_descriptions: false,
+        }
+    }
+}
+
+impl IndexerClient {
+    /// Create a new indexer client.
+    pub fn new(executor: PipelineExecutor) -> Self {
+        Self {
+            executor: Arc::new(Mutex::new(executor)),
+            events: EventEmitter::new(),
+            config: IndexerConfig::default(),
+        }
+    }
+
+    /// Create with event emitter.
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = events;
+        self
+    }
+
+    /// Create with configuration.
+    pub fn with_config(mut self, config: IndexerConfig) -> Self {
+        self.config = config;
+        self
+    }
+
+    /// Create from an existing executor Arc.
+    pub(crate) fn from_arc(
+        executor: Arc<Mutex<PipelineExecutor>>,
+        events: EventEmitter,
+        config: IndexerConfig,
+    ) -> Self {
+        Self {
+            executor,
+            events,
+            config,
+        }
+    }
+
+    /// Index a document from a file path.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file does not exist
+    /// - The file format is not supported
+    /// - The pipeline execution fails
+    pub async fn index(&self, path: impl AsRef<Path>) -> Result<IndexedDocument> {
+        self.index_with_options(path, IndexOptions::default()).await
+    }
+
+    /// Index a document with custom options.
+    ///
+    /// # Errors
+    ///
+    /// See [`IndexerClient::index`].
+    pub async fn index_with_options(
+        &self,
+        path: impl AsRef<Path>,
+        options: IndexOptions,
+    ) -> Result<IndexedDocument> {
+        let path = path.as_ref();
+        let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
+
+        if !path.exists() {
+            return Err(Error::Parse(format!("File not found: {}", path.display())));
+        }
+
+        // Emit start event
+        self.events.emit_index(IndexEvent::Started {
+            path: path.display().to_string(),
+        });
+
+        // Generate document ID
+        let doc_id = Uuid::new_v4().to_string();
+
+        // Detect format
+        let format = self.detect_format(&path, &options)?;
+        self.events.emit_index(IndexEvent::FormatDetected { format });
+
+        info!("Indexing {:?} document: {}", format, path.display());
+
+        // Convert client options to pipeline options
+        let pipeline_options = PipelineOptions {
+            mode: match options.mode {
+                ClientIndexMode::Auto => IndexMode::Auto,
+                ClientIndexMode::Pdf => IndexMode::Pdf,
+                ClientIndexMode::Markdown => IndexMode::Markdown,
+                ClientIndexMode::Html => IndexMode::Html,
+                ClientIndexMode::Docx => IndexMode::Docx,
+            },
+            generate_ids: options.generate_ids,
+            summary_strategy: if options.generate_summaries {
+                SummaryStrategy::selective(self.config.min_summary_tokens, false)
+            } else {
+                SummaryStrategy::none()
+            },
+            generate_description: options.generate_description,
+            ..Default::default()
+        };
+
+        // Create pipeline input and execute
+        let input = IndexInput::file(&path);
+        let result = {
+            let mut executor = self.executor.lock()
+                .map_err(|_| Error::Other("Pipeline executor lock poisoned".to_string()))?;
+            executor.execute(input, pipeline_options).await?
+        };
+
+        // Build indexed document
+        let tree = result
+            .tree
+            .ok_or_else(|| Error::Parse("Document tree not generated".to_string()))?;
+
+        let node_count = tree.node_count();
+        self.events.emit_index(IndexEvent::TreeBuilt { node_count });
+
+        let mut doc = IndexedDocument::new(&doc_id, format)
+            .with_name(&result.name)
+            .with_source_path(&path)
+            .with_tree(tree);
+
+        if let Some(desc) = &result.description {
+            doc = doc.with_description(desc);
+        }
+
+        if let Some(page_count) = result.page_count {
+            doc = doc.with_page_count(page_count);
+        }
+
+        info!("Indexing complete: {} ({} nodes)", doc_id, node_count);
+        self.events.emit_index(IndexEvent::Complete { doc_id });
+
+        Ok(doc)
+    }
+
+    /// Detect document format from path and options.
+    pub fn detect_format(&self, path: &Path, options: &IndexOptions) -> Result<DocumentFormat> {
+        match options.mode {
+            ClientIndexMode::Auto => {
+                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
+                DocumentFormat::from_extension(ext)
+                    .ok_or_else(|| Error::Parse(format!("Unknown format: {}", ext)))
+            }
+            ClientIndexMode::Pdf => Ok(DocumentFormat::Pdf),
+            ClientIndexMode::Markdown => Ok(DocumentFormat::Markdown),
+            ClientIndexMode::Html => Ok(DocumentFormat::Html),
+            ClientIndexMode::Docx => Ok(DocumentFormat::Docx),
+        }
+    }
+
+    /// Validate a document before indexing.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the file doesn't exist or is not readable.
+    pub fn validate(&self, path: impl AsRef<Path>) -> Result<ValidationResult> {
+        let path = path.as_ref();
+
+        if !path.exists() {
+            return Ok(ValidationResult {
+                valid: false,
+                errors: vec![format!("File not found: {}", path.display())],
+                warnings: vec![],
+                format: None,
+                estimated_size: 0,
+            });
+        }
+
+        let metadata = std::fs::metadata(path)
+            .map_err(|e| Error::Parse(format!("Cannot read file metadata: {}", e)))?;
+
+        let estimated_size = metadata.len() as usize;
+        let mut warnings = Vec::new();
+
+        // Check file size
+        if estimated_size > 100 * 1024 * 1024 {
+            warnings.push("Large file (>100MB) may take longer to index".to_string());
+        }
+
+        // Detect format
+        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
+        let format = DocumentFormat::from_extension(ext);
+
+        if format.is_none() {
+            return Ok(ValidationResult {
+                valid: false,
+                errors: vec![format!("Unknown format: {}", ext)],
+                warnings,
+                format: None,
+                estimated_size,
+            });
+        }
+
+        Ok(ValidationResult {
+            valid: true,
+            errors: vec![],
+            warnings,
+            format,
+            estimated_size,
+        })
+    }
+
+    /// Convert IndexedDocument to PersistedDocument for storage.
+    pub fn to_persisted(&self, doc: IndexedDocument) -> PersistedDocument {
+        let meta = DocumentMeta::new(&doc.id, &doc.name, doc.format.extension())
+            .with_source_path(
+                doc.source_path
+                    .as_ref()
+                    .map(|p| p.to_string_lossy().to_string())
+                    .unwrap_or_default(),
+            )
+            .with_description(doc.description.clone().unwrap_or_default());
+
+        let mut persisted = PersistedDocument::new(
+            meta,
+            doc.tree.expect("IndexedDocument must have a tree"),
+        );
+
+        for page in doc.pages {
+            persisted.add_page(page.page, &page.content);
+        }
+
+        persisted
+    }
+
+    /// Get the underlying executor Arc (for advanced use).
+    pub(crate) fn inner(&self) -> Arc<Mutex<PipelineExecutor>> {
+        Arc::clone(&self.executor)
+    }
+}
+
+impl Clone for IndexerClient {
+    fn clone(&self) -> Self {
+        Self {
+            executor: Arc::clone(&self.executor),
+            events: self.events.clone(),
+            config: self.config.clone(),
+        }
+    }
+}
+
+/// Document validation result.
+#[derive(Debug, Clone)]
+pub struct ValidationResult {
+    /// Whether the document is valid for indexing.
+    pub valid: bool,
+
+    /// Validation errors (prevents indexing).
+    pub errors: Vec<String>,
+
+    /// Validation warnings (non-blocking).
+    pub warnings: Vec<String>,
+
+    /// Detected document format.
+    pub format: Option<DocumentFormat>,
+
+    /// Estimated file size in bytes.
+    pub estimated_size: usize,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_indexer_client_creation() {
+        let executor = PipelineExecutor::new();
+        let client = IndexerClient::new(executor);
+        assert_eq!(client.config.min_summary_tokens, 20);
+    }
+
+    #[test]
+    fn test_validate_missing_file() {
+        let executor = PipelineExecutor::new();
+        let client = IndexerClient::new(executor);
+
+        let result = client.validate("./nonexistent.md").unwrap();
+        assert!(!result.valid);
+        assert!(!result.errors.is_empty());
+    }
+}
diff --git a/src/client/mod.rs b/src/client/mod.rs
index 907d8c0e..a1d053cf 100644
--- a/src/client/mod.rs
+++ b/src/client/mod.rs
@@ -6,6 +6,25 @@
 //! This module provides the main entry point for using vectorless:
 //! - [`Engine`] — The main client for indexing and querying documents
 //! - [`EngineBuilder`] — Builder pattern for client configuration
+//! - [`Session`] — Multi-document session management
+//!
+//! # Architecture
+//!
+//! The client module is organized into specialized sub-modules:
+//!
+//! ```text
+//! client/
+//! ├── mod.rs          → Re-exports and documentation
+//! ├── engine.rs       → Main orchestrator
+//! ├── builder.rs      → Builder pattern
+//! ├── types.rs        → Public API types
+//! ├── context.rs      → Request context and configuration
+//! ├── session.rs      → Session management
+//! ├── indexer.rs      → Document indexing operations
+//! ├── retriever.rs    → Query and retrieval operations
+//! ├── workspace.rs    → Workspace CRUD operations
+//! └── events.rs       → Event system and callbacks
+//! ```
 //!
 //! # Quick Start
 //!
@@ -15,11 +34,7 @@
 //! # #[tokio::main]
 //! # async fn main() -> vectorless::domain::Result<()> {
 //! // Create a client with default settings
-//! let client = Engine::new()?;
-//!
-//! // Or use the builder for custom configuration
 //! let client = EngineBuilder::new()
-//!     .with_api_key("your-api-key")
 //!     .with_workspace("./my_workspace")
 //!     .build()?;
 //!
@@ -29,6 +44,10 @@
 //! // Get document structure
 //! let structure = client.get_structure(&doc_id)?;
 //!
+//! // Query the document
+//! let result = client.query(&doc_id, "What is this?").await?;
+//! println!("{}", result.content);
+//!
 //! // List all documents
 //! for doc in client.list_documents() {
 //!     println!("{}: {}", doc.id, doc.name);
@@ -37,19 +56,117 @@
 //! # }
 //! ```
 //!
+//! # Session-Based Operations
+//!
+//! For multi-document operations, use sessions:
+//!
+//! ```rust,no_run
+//! # use vectorless::client::{Engine, EngineBuilder};
+//! # #[tokio::main]
+//! # async fn main() -> vectorless::domain::Result<()> {
+//! let client = EngineBuilder::new()
+//!     .with_workspace("./workspace")
+//!     .build()?;
+//!
+//! let session = client.session();
+//!
+//! // Index multiple documents
+//! let doc1 = session.index("./doc1.md").await?;
+//! let doc2 = session.index("./doc2.md").await?;
+//!
+//! // Query across all documents
+//! let results = session.query_all("What is the architecture?").await?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! # Events and Progress
+//!
+//! Monitor operation progress with events:
+//!
+//! ```rust,no_run
+//! # use vectorless::client::{Engine, EngineBuilder, EventEmitter, events::IndexEvent};
+//! # #[tokio::main]
+//! # async fn main() -> vectorless::domain::Result<()> {
+//! let events = EventEmitter::new()
+//!     .on_index(|e| match e {
+//!         IndexEvent::Complete { doc_id } => println!("Indexed: {}", doc_id),
+//!         _ => {}
+//!     });
+//!
+//! let client = EngineBuilder::new()
+//!     .with_events(events)
+//!     .build()?;
+//! # Ok(())
+//! # }
+//! ```
+//!
 //! # Features
 //!
 //! - **Document Indexing** — Parse and index Markdown, PDF, and text files
 //! - **Tree-Based Structure** — Documents organized as hierarchical trees
 //! - **Workspace Persistence** — Save and load indexed documents
-//! - **Builder Pattern** — Flexible client configuration
+//! - **Session Management** — Multi-document operations with caching
+//! - **Event System** — Progress callbacks and monitoring
 
 mod builder;
+mod context;
 mod engine;
+mod events;
+mod indexer;
+mod retriever;
+mod session;
 mod types;
+mod workspace;
 
-// Re-export main types
-pub use types::{DocumentInfo, IndexMode, IndexOptions, IndexedDocument, PageContent, QueryResult};
+// ============================================================
+// Main Types
+// ============================================================
 
-pub use builder::{BuildError, EngineBuilder};
 pub use engine::Engine;
+pub use builder::{BuildError, EngineBuilder};
+
+// ============================================================
+// Sub-Clients
+// ============================================================
+
+pub use indexer::IndexerClient;
+pub use retriever::RetrieverClient;
+pub use workspace::WorkspaceClient;
+pub use session::Session;
+
+// ============================================================
+// Context and Events
+// ============================================================
+
+pub use context::{ClientContext, FeatureFlags, RequestContextConfig};
+pub use events::{
+    EventEmitter, Event, EventHandler, AsyncEventHandler,
+    IndexEvent, QueryEvent, WorkspaceEvent,
+};
+
+// ============================================================
+// Types
+// ============================================================
+
+pub use types::{
+    // Document types
+    IndexedDocument, PageContent,
+    // Index types
+    IndexMode, IndexOptions,
+    // Query types
+    QueryResult,
+    // Document info
+    DocumentInfo,
+    // Error types
+    ClientError,
+};
+
+// ============================================================
+// Sub-Client Types
+// ============================================================
+
+pub use indexer::{IndexerConfig, ValidationResult};
+pub use retriever::{RetrieverClientConfig, NodeContext};
+pub use workspace::{WorkspaceClientConfig, WorkspaceStats};
+pub use session::{SessionConfig, SessionStats, EvictionPolicy, PreloadStrategy};
diff --git a/src/client/retriever.rs b/src/client/retriever.rs
new file mode 100644
index 00000000..7f0099ca
--- /dev/null
+++ b/src/client/retriever.rs
@@ -0,0 +1,408 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Document retrieval client.
+//!
+//! This module provides query and retrieval operations for document content.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let retriever = RetrieverClient::new(pipeline_retriever);
+//!
+//! let result = retriever
+//!     .query(&tree, "What is this?", RetrieveOptions::default())
+//!     .await?;
+//!
+//! println!("Found {} results", result.results.len());
+//! ```
+
+use std::sync::Arc;
+
+use tracing::info;
+
+use crate::config::Config;
+use crate::domain::{DocumentTree, Error, NodeId, Result};
+use crate::retrieval::content::ContentAggregatorConfig;
+use crate::retrieval::{
+    QueryComplexity, RetrieveOptions, RetrieveResponse, RetrievalResult, Retriever, SufficiencyLevel,
+};
+
+use super::context::ClientContext;
+use super::events::{EventEmitter, QueryEvent};
+use super::types::QueryResult;
+
+/// Document retrieval client.
+///
+/// Provides operations for querying document content.
+pub struct RetrieverClient {
+    /// Pipeline retriever.
+    retriever: Arc<crate::retrieval::PipelineRetriever>,
+
+    /// Configuration reference.
+    config: Arc<Config>,
+
+    /// Event emitter.
+    events: EventEmitter,
+
+    /// Default retrieval options.
+    default_options: RetrieveOptions,
+}
+
+/// Retriever configuration.
+#[derive(Debug, Clone)]
+pub struct RetrieverClientConfig {
+    /// Default top_k for retrieval.
+    pub default_top_k: usize,
+
+    /// Default token budget.
+    pub default_token_budget: usize,
+
+    /// Content aggregator config.
+    pub content_config: Option<ContentAggregatorConfig>,
+
+    /// Enable result caching.
+    pub enable_cache: bool,
+}
+
+impl Default for RetrieverClientConfig {
+    fn default() -> Self {
+        Self {
+            default_top_k: 5,
+            default_token_budget: 4000,
+            content_config: None,
+            enable_cache: true,
+        }
+    }
+}
+
+impl RetrieverClient {
+    /// Create a new retriever client.
+    pub fn new(retriever: crate::retrieval::PipelineRetriever, config: Arc<Config>) -> Self {
+        Self {
+            retriever: Arc::new(retriever),
+            config,
+            events: EventEmitter::new(),
+            default_options: RetrieveOptions::default(),
+        }
+    }
+
+    /// Create with event emitter.
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = events;
+        self
+    }
+
+    /// Create with configuration.
+    pub fn with_config(mut self, config: RetrieverClientConfig) -> Self {
+        self.default_options = RetrieveOptions::new()
+            .with_top_k(config.default_top_k)
+            .with_max_tokens(config.default_token_budget)
+            .with_enable_cache(config.enable_cache);
+        self
+    }
+
+    /// Create from existing retriever Arc.
+    pub(crate) fn from_arc(
+        retriever: Arc<crate::retrieval::PipelineRetriever>,
+        config: Arc<Config>,
+        events: EventEmitter,
+    ) -> Self {
+        Self {
+            retriever,
+            config,
+            events,
+            default_options: RetrieveOptions::default(),
+        }
+    }
+
+    /// Query a document tree.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The retrieval pipeline fails
+    pub async fn query(
+        &self,
+        tree: &DocumentTree,
+        question: &str,
+        options: &RetrieveOptions,
+    ) -> Result<QueryResult> {
+        self.query_with_context(tree, question, options, &ClientContext::new()).await
+    }
+
+    /// Query with request context.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The retrieval pipeline fails
+    /// - The request has timed out
+    pub async fn query_with_context(
+        &self,
+        tree: &DocumentTree,
+        question: &str,
+        options: &RetrieveOptions,
+        ctx: &ClientContext,
+    ) -> Result<QueryResult> {
+        // Check timeout
+        if ctx.is_timed_out() {
+            return Err(Error::Other("Request timed out".to_string()));
+        }
+
+        self.events.emit_query(QueryEvent::Started {
+            query: question.to_string(),
+        });
+
+        info!("Querying: {:?}", question);
+
+        // Apply context overrides
+        let mut options = options.clone();
+        if let Some(top_k) = ctx.config.top_k {
+            options.top_k = top_k;
+        }
+        if let Some(token_budget) = ctx.config.token_budget {
+            options.max_tokens = token_budget;
+        }
+
+        // Execute retrieval
+        let response = self.retriever
+            .retrieve(tree, question, &options)
+            .await
+            .map_err(|e| Error::Retrieval(e.to_string()))?;
+
+        // Build result
+        let result = self.build_query_result(&response);
+
+        self.events.emit_query(QueryEvent::Complete {
+            total_results: result.node_ids.len(),
+            confidence: result.score,
+        });
+
+        Ok(result)
+    }
+
+    /// Build QueryResult from RetrieveResponse.
+    fn build_query_result(&self, response: &RetrieveResponse) -> QueryResult {
+        // Extract node IDs
+        let node_ids: Vec<String> = response
+            .results
+            .iter()
+            .filter_map(|r| r.node_id.clone())
+            .collect();
+
+        // Build content
+        let content_parts: Vec<String> = response
+            .results
+            .iter()
+            .map(|r| {
+                let mut parts = vec![format!("## {}", r.title)];
+                if let Some(ref content) = r.content {
+                    parts.push(content.clone());
+                }
+                parts.join("\n\n")
+            })
+            .collect();
+
+        let content = if content_parts.is_empty() {
+            response.content.clone()
+        } else {
+            content_parts.join("\n\n---\n\n")
+        };
+
+        QueryResult {
+            doc_id: String::new(), // Will be set by caller
+            node_ids,
+            content,
+            score: response.confidence,
+        }
+    }
+
+    /// Get similar nodes to a given node.
+    ///
+    /// Uses tree structure and content to find similar nodes.
+    pub fn find_similar(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        top_k: usize,
+    ) -> Result<Vec<RetrievalResult>> {
+        let mut results = Vec::new();
+
+        // Get the target node's content for comparison
+        let target_content = tree
+            .get(node_id)
+            .map(|n| n.content.clone())
+            .unwrap_or_default();
+
+        if target_content.is_empty() {
+            return Ok(results);
+        }
+
+        // Extract keywords from target content
+        let target_keywords = self.extract_keywords(&target_content);
+
+        // Search all nodes for similarity
+        let root = tree.root();
+        let mut stack = vec![root];
+
+        while let Some(current_id) = stack.pop() {
+            if current_id == node_id {
+                // Skip the target node itself
+                stack.extend(tree.children(current_id));
+                continue;
+            }
+
+            if let Some(node) = tree.get(current_id) {
+                let node_keywords = self.extract_keywords(&node.content);
+                let similarity = self.calculate_similarity(&target_keywords, &node_keywords);
+
+                if similarity > 0.3 {
+                    results.push(RetrievalResult::new(&node.title)
+                        .with_node_id(format!("{:?}", current_id))
+                        .with_content(node.content.clone())
+                        .with_score(similarity)
+                        .with_depth(tree.depth(current_id)));
+                }
+            }
+
+            stack.extend(tree.children(current_id));
+        }
+
+        // Sort by score and take top_k
+        results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+        results.truncate(top_k);
+
+        Ok(results)
+    }
+
+    /// Extract keywords from content.
+    fn extract_keywords(&self, content: &str) -> Vec<String> {
+        content
+            .to_lowercase()
+            .split_whitespace()
+            .filter(|w| w.len() > 3)
+            .take(20)
+            .map(|s| s.to_string())
+            .collect()
+    }
+
+    /// Calculate similarity between keyword sets.
+    fn calculate_similarity(&self, set1: &[String], set2: &[String]) -> f32 {
+        if set1.is_empty() || set2.is_empty() {
+            return 0.0;
+        }
+
+        let set1_set: std::collections::HashSet<_> = set1.iter().collect();
+        let set2_set: std::collections::HashSet<_> = set2.iter().collect();
+
+        let intersection = set1_set.intersection(&set2_set).count();
+        let union = set1_set.union(&set2_set).count();
+
+        intersection as f32 / union as f32
+    }
+
+    /// Get node context (ancestors and siblings).
+    ///
+    /// Returns the node's ancestors up to the specified depth,
+    /// along with sibling nodes at each level.
+    pub fn get_node_context(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        ancestor_depth: usize,
+    ) -> Result<NodeContext> {
+        let mut ancestors = Vec::new();
+        let mut siblings = Vec::new();
+
+        // Get ancestors
+        let mut current_id = Some(node_id);
+        let mut depth = 0;
+
+        while let Some(id) = current_id {
+            if depth >= ancestor_depth {
+                break;
+            }
+
+            if let Some(node) = tree.get(id) {
+                ancestors.push(RetrievalResult::new(&node.title)
+                    .with_node_id(format!("{:?}", id))
+                    .with_depth(tree.depth(id)));
+
+                // Get siblings at this level
+                if let Some(parent_id) = tree.parent(id) {
+                    for child_id in tree.children(parent_id) {
+                        if child_id != id {
+                            if let Some(sibling) = tree.get(child_id) {
+                                siblings.push(RetrievalResult::new(&sibling.title)
+                                    .with_node_id(format!("{:?}", child_id))
+                                    .with_depth(tree.depth(child_id)));
+                            }
+                        }
+                    }
+                }
+            }
+
+            current_id = tree.parent(id);
+            depth += 1;
+        }
+
+        // Get the target node
+        let target = tree
+            .get(node_id)
+            .map(|n| {
+                RetrievalResult::new(&n.title)
+                    .with_node_id(format!("{:?}", node_id))
+                    .with_content(n.content.clone())
+                    .with_depth(tree.depth(node_id))
+            });
+
+        Ok(NodeContext {
+            target,
+            ancestors,
+            siblings,
+        })
+    }
+
+    /// Get the underlying retriever Arc.
+    pub(crate) fn inner(&self) -> Arc<crate::retrieval::PipelineRetriever> {
+        Arc::clone(&self.retriever)
+    }
+}
+
+impl Clone for RetrieverClient {
+    fn clone(&self) -> Self {
+        Self {
+            retriever: Arc::clone(&self.retriever),
+            config: Arc::clone(&self.config),
+            events: self.events.clone(),
+            default_options: self.default_options.clone(),
+        }
+    }
+}
+
+/// Node context information.
+#[derive(Debug, Clone)]
+pub struct NodeContext {
+    /// The target node.
+    pub target: Option<RetrievalResult>,
+
+    /// Ancestor nodes (ordered from parent to root).
+    pub ancestors: Vec<RetrievalResult>,
+
+    /// Sibling nodes at each ancestor level.
+    pub siblings: Vec<RetrievalResult>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_retriever_client_creation() {
+        let config = Arc::new(Config::default());
+        let retriever = crate::retrieval::PipelineRetriever::new();
+        let client = RetrieverClient::new(retriever, config);
+        assert!(client.default_options.top_k > 0);
+    }
+}
diff --git a/src/client/session.rs b/src/client/session.rs
new file mode 100644
index 00000000..1b5d55ef
--- /dev/null
+++ b/src/client/session.rs
@@ -0,0 +1,493 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Session management for multi-document operations.
+//!
+//! This module provides session-based document management with
+//! automatic caching and cross-document querying.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let session = client.session();
+//!
+//! // Index multiple documents
+//! let doc1 = session.index("./doc1.md").await?;
+//! let doc2 = session.index("./doc2.md").await?;
+//!
+//! // Query across all documents
+//! let results = session.query_all("What is X?").await?;
+//!
+//! // Query single document (uses cached tree)
+//! let result = session.query(&doc1, "Summary?").await?;
+//! ```
+
+use std::cell::Cell;
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use tracing::info;
+use uuid::Uuid;
+
+use crate::domain::{DocumentTree, Error, Result};
+use crate::retrieval::RetrieveOptions;
+use crate::storage::PersistedDocument;
+
+use super::context::ClientContext;
+use super::events::EventEmitter;
+use super::indexer::IndexerClient;
+use super::retriever::RetrieverClient;
+use super::types::{DocumentInfo, IndexOptions, QueryResult};
+use super::workspace::WorkspaceClient;
+
+/// Session for managing multiple documents.
+///
+/// Provides automatic caching of document trees and cross-document operations.
+pub struct Session {
+    /// Session ID.
+    pub id: Uuid,
+
+    /// Session configuration.
+    config: SessionConfig,
+
+    /// Document contexts (cached).
+    documents: HashMap<String, DocumentContext>,
+
+    /// Indexer client.
+    indexer: IndexerClient,
+
+    /// Retriever client.
+    retriever: RetrieverClient,
+
+    /// Workspace client.
+    workspace: WorkspaceClient,
+
+    /// Event emitter.
+    events: EventEmitter,
+
+    /// Session statistics.
+    stats: SessionStats,
+
+    /// Created at timestamp.
+    created_at: Instant,
+}
+
+/// Document context within a session.
+#[derive(Debug, Clone)]
+struct DocumentContext {
+    /// Document ID.
+    doc_id: String,
+
+    /// Cached document tree.
+    tree: Option<Arc<DocumentTree>>,
+
+    /// Document metadata.
+    meta: DocumentInfo,
+
+    /// Access count.
+    access_count: usize,
+
+    /// Last access time.
+    last_accessed: Instant,
+}
+
+/// Session configuration.
+#[derive(Debug, Clone)]
+pub struct SessionConfig {
+    /// Maximum documents to cache in memory.
+    pub max_cached_documents: usize,
+
+    /// Cache eviction policy.
+    pub eviction_policy: EvictionPolicy,
+
+    /// Preload strategy when indexing.
+    pub preload_strategy: PreloadStrategy,
+}
+
+impl Default for SessionConfig {
+    fn default() -> Self {
+        Self {
+            max_cached_documents: 100,
+            eviction_policy: EvictionPolicy::Lru,
+            preload_strategy: PreloadStrategy::Lazy,
+        }
+    }
+}
+
+/// Cache eviction policy.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum EvictionPolicy {
+    /// Least recently used.
+    Lru,
+    /// First in, first out.
+    Fifo,
+    /// No eviction (until session closes).
+    None,
+}
+
+/// Document preload strategy.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum PreloadStrategy {
+    /// Load trees on demand.
+    Lazy,
+    /// Load trees immediately when indexing.
+    Eager,
+}
+
+/// Session statistics.
+#[derive(Debug, Default)]
+pub struct SessionStats {
+    /// Total documents in session.
+    pub document_count: Cell<usize>,
+
+    /// Total queries made.
+    pub query_count: Cell<usize>,
+
+    /// Cache hits.
+    pub cache_hits: Cell<usize>,
+
+    /// Cache misses.
+    pub cache_misses: Cell<usize>,
+
+    /// Total query time (in microseconds).
+    total_query_time_us: Cell<u64>,
+}
+
+impl SessionStats {
+    /// Get the cache hit rate.
+    pub fn cache_hit_rate(&self) -> f32 {
+        let total = self.cache_hits.get() + self.cache_misses.get();
+        if total == 0 {
+            0.0
+        } else {
+            self.cache_hits.get() as f32 / total as f32
+        }
+    }
+
+    /// Get the total query time.
+    pub fn total_query_time(&self) -> Duration {
+        Duration::from_micros(self.total_query_time_us.get())
+    }
+
+    /// Get the average query time.
+    pub fn avg_query_time(&self) -> Option<Duration> {
+        let count = self.query_count.get();
+        if count == 0 {
+            None
+        } else {
+            Some(self.total_query_time() / count as u32)
+        }
+    }
+
+    /// Increment query count.
+    fn increment_query_count(&self) {
+        self.query_count.set(self.query_count.get() + 1);
+    }
+
+    /// Add query time.
+    fn add_query_time(&self, duration: Duration) {
+        self.total_query_time_us.set(
+            self.total_query_time_us.get() + duration.as_micros() as u64
+        );
+    }
+
+    /// Increment cache hits.
+    fn increment_cache_hits(&self) {
+        self.cache_hits.set(self.cache_hits.get() + 1);
+    }
+
+    /// Increment cache misses.
+    fn increment_cache_misses(&self) {
+        self.cache_misses.set(self.cache_misses.get() + 1);
+    }
+}
+
+impl Clone for SessionStats {
+    fn clone(&self) -> Self {
+        Self {
+            document_count: Cell::new(self.document_count.get()),
+            query_count: Cell::new(self.query_count.get()),
+            cache_hits: Cell::new(self.cache_hits.get()),
+            cache_misses: Cell::new(self.cache_misses.get()),
+            total_query_time_us: Cell::new(self.total_query_time_us.get()),
+        }
+    }
+}
+
+impl Session {
+    /// Create a new session.
+    pub(crate) fn new(
+        indexer: IndexerClient,
+        retriever: RetrieverClient,
+        workspace: WorkspaceClient,
+        events: EventEmitter,
+    ) -> Self {
+        Self {
+            id: Uuid::new_v4(),
+            config: SessionConfig::default(),
+            documents: HashMap::new(),
+            indexer,
+            retriever,
+            workspace,
+            events,
+            stats: SessionStats::default(),
+            created_at: Instant::now(),
+        }
+    }
+
+    /// Create with configuration.
+    pub fn with_config(mut self, config: SessionConfig) -> Self {
+        self.config = config;
+        self
+    }
+
+    /// Get the session ID.
+    pub fn id(&self) -> Uuid {
+        self.id
+    }
+
+    /// Get session age.
+    pub fn age(&self) -> Duration {
+        Instant::now().duration_since(self.created_at)
+    }
+
+    // ============================================================
+    // Document Indexing
+    // ============================================================
+
+    /// Index a document into this session.
+    ///
+    /// The document is indexed, saved to workspace, and cached in this session.
+    pub async fn index(&self, path: impl AsRef<Path>) -> Result<String> {
+        self.index_with_options(path, IndexOptions::default()).await
+    }
+
+    /// Index a document with options.
+    pub async fn index_with_options(
+        &self,
+        path: impl AsRef<Path>,
+        options: IndexOptions,
+    ) -> Result<String> {
+        // Index the document
+        let doc = self.indexer.index_with_options(path, options).await?;
+
+        // Save to workspace
+        let persisted = self.indexer.to_persisted(doc);
+        self.workspace.save(&persisted)?;
+
+        // Cache in session
+        let doc_id = persisted.meta.id.clone();
+
+        info!("Session {}: indexed document {}", self.id, doc_id);
+
+        Ok(doc_id)
+    }
+
+    // ============================================================
+    // Document Querying
+    // ============================================================
+
+    /// Query a document within this session.
+    ///
+    /// Uses the cached tree if available, otherwise loads from workspace.
+    pub async fn query(&self, doc_id: &str, question: &str) -> Result<QueryResult> {
+        self.query_with_options(doc_id, question, RetrieveOptions::default()).await
+    }
+
+    /// Query a document with options.
+    pub async fn query_with_options(
+        &self,
+        doc_id: &str,
+        question: &str,
+        options: RetrieveOptions,
+    ) -> Result<QueryResult> {
+        let start = Instant::now();
+
+        // Get the document tree
+        let tree = self.get_tree(doc_id).await?;
+
+        // Query
+        let mut result = self.retriever.query(&tree, question, &options).await?;
+        result.doc_id = doc_id.to_string();
+
+        // Update stats
+        self.stats.increment_query_count();
+        self.stats.add_query_time(start.elapsed());
+
+        Ok(result)
+    }
+
+    /// Query across all documents in this session.
+    ///
+    /// Searches each document and merges results.
+    pub async fn query_all(&self, question: &str) -> Result<Vec<QueryResult>> {
+        self.query_all_with_options(question, RetrieveOptions::default()).await
+    }
+
+    /// Query across all documents with options.
+    pub async fn query_all_with_options(
+        &self,
+        question: &str,
+        options: RetrieveOptions,
+    ) -> Result<Vec<QueryResult>> {
+        let doc_ids: Vec<String> = self.documents.keys().cloned().collect();
+
+        if doc_ids.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let mut results = Vec::new();
+
+        for doc_id in &doc_ids {
+            match self.query_with_options(doc_id, question, options.clone()).await {
+                Ok(result) => {
+                    if !result.node_ids.is_empty() {
+                        results.push(result);
+                    }
+                }
+                Err(e) => {
+                    info!("Query failed for {}: {}", doc_id, e);
+                }
+            }
+        }
+
+        // Sort by score descending
+        results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+
+        Ok(results)
+    }
+
+    // ============================================================
+    // Document Management
+    // ============================================================
+
+    /// Get list of documents in this session.
+    pub fn list_documents(&self) -> Vec<DocumentInfo> {
+        self.documents.values().map(|ctx| ctx.meta.clone()).collect()
+    }
+
+    /// Get a document tree (from cache or workspace).
+    pub async fn get_tree(&self, doc_id: &str) -> Result<DocumentTree> {
+        // Check cache first
+        if let Some(tree) = self.get_cached_tree(doc_id) {
+            self.stats.increment_cache_hits();
+            return Ok((*tree).clone());
+        }
+
+        self.stats.increment_cache_misses();
+
+        // Load from workspace
+        let doc = self.workspace.load(doc_id)?
+            .ok_or_else(|| Error::DocumentNotFound(format!("Document not found: {}", doc_id)))?;
+
+        let tree = doc.tree;
+
+        // Cache for future use
+        self.cache_tree(doc_id, &tree);
+
+        Ok(tree)
+    }
+
+    /// Preload documents into the session cache.
+    ///
+    /// Useful for warming up the cache before querying.
+    pub async fn preload(&self, doc_ids: &[&str]) -> Result<usize> {
+        let mut loaded = 0;
+
+        for doc_id in doc_ids {
+            if self.get_cached_tree(doc_id).is_none() {
+                if let Ok(tree) = self.get_tree(doc_id).await {
+                    self.cache_tree(doc_id, &tree);
+                    loaded += 1;
+                }
+            }
+        }
+
+        info!("Session {}: preloaded {} documents", self.id, loaded);
+        Ok(loaded)
+    }
+
+    /// Remove a document from the session.
+    pub fn remove_document(&self, doc_id: &str) -> bool {
+        // Note: This would need interior mutability for full implementation
+        false
+    }
+
+    /// Clear all documents from the session cache.
+    pub fn clear_cache(&self) {
+        // Note: This would need interior mutability for full implementation
+    }
+
+    // ============================================================
+    // Statistics
+    // ============================================================
+
+    /// Get session statistics.
+    pub fn stats(&self) -> SessionStats {
+        self.stats.clone()
+    }
+
+    /// Get the number of cached documents.
+    pub fn cached_count(&self) -> usize {
+        self.documents.values().filter(|d| d.tree.is_some()).count()
+    }
+
+    // ============================================================
+    // Internal Methods
+    // ============================================================
+
+    /// Cache a document in this session.
+    fn cache_document(&self, doc: crate::client::types::IndexedDocument) {
+        // Note: This would need interior mutability for full implementation
+        // For now, this is a placeholder
+    }
+
+    /// Get a cached tree.
+    fn get_cached_tree(&self, doc_id: &str) -> Option<Arc<DocumentTree>> {
+        self.documents.get(doc_id).and_then(|ctx| ctx.tree.clone())
+    }
+
+    /// Cache a tree.
+    fn cache_tree(&self, doc_id: &str, tree: &DocumentTree) {
+        // Note: This would need interior mutability for full implementation
+    }
+}
+
+impl Clone for Session {
+    fn clone(&self) -> Self {
+        Self {
+            id: self.id,
+            config: self.config.clone(),
+            documents: self.documents.clone(),
+            indexer: self.indexer.clone(),
+            retriever: self.retriever.clone(),
+            workspace: self.workspace.clone(),
+            events: self.events.clone(),
+            stats: self.stats.clone(),
+            created_at: self.created_at,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_session_config() {
+        let config = SessionConfig::default();
+        assert_eq!(config.max_cached_documents, 100);
+        assert_eq!(config.eviction_policy, EvictionPolicy::Lru);
+    }
+
+    #[test]
+    fn test_session_stats() {
+        let stats = SessionStats::default();
+        stats.cache_hits.set(8);
+        stats.cache_misses.set(2);
+
+        assert!((stats.cache_hit_rate() - 0.8).abs() < 0.01);
+    }
+}
diff --git a/src/client/types.rs b/src/client/types.rs
index e0e68a3a..40816257 100644
--- a/src/client/types.rs
+++ b/src/client/types.rs
@@ -1,7 +1,9 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Client type definitions.
+//! Public API types for the client module.
+//!
+//! This module contains all types exposed in the public API.
 
 use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
@@ -9,6 +11,10 @@ use std::path::PathBuf;
 use crate::domain::DocumentTree;
 use crate::parser::DocumentFormat;
 
+// ============================================================
+// Document Types
+// ============================================================
+
 /// An indexed document with its tree structure and metadata.
 #[derive(Debug, Clone)]
 pub struct IndexedDocument {
@@ -116,6 +122,10 @@ pub struct PageContent {
     pub content: String,
 }
 
+// ============================================================
+// Index Types
+// ============================================================
+
 /// Document indexing mode.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum IndexMode {
@@ -164,7 +174,7 @@ impl Default for IndexOptions {
     fn default() -> Self {
         Self {
             mode: IndexMode::Auto,
-            generate_summaries: false, // Disabled by default, requires API key
+            generate_summaries: false,
             include_text: true,
             generate_ids: true,
             generate_description: false,
@@ -189,8 +199,18 @@ impl IndexOptions {
         self.generate_description = true;
         self
     }
+
+    /// Set the indexing mode.
+    pub fn with_mode(mut self, mode: IndexMode) -> Self {
+        self.mode = mode;
+        self
+    }
 }
 
+// ============================================================
+// Query Types
+// ============================================================
+
 /// Result of a document query.
 #[derive(Debug, Clone)]
 pub struct QueryResult {
@@ -207,6 +227,32 @@ pub struct QueryResult {
     pub score: f32,
 }
 
+impl QueryResult {
+    /// Create a new query result.
+    pub fn new(doc_id: impl Into<String>) -> Self {
+        Self {
+            doc_id: doc_id.into(),
+            node_ids: Vec::new(),
+            content: String::new(),
+            score: 0.0,
+        }
+    }
+
+    /// Check if the result is empty.
+    pub fn is_empty(&self) -> bool {
+        self.node_ids.is_empty()
+    }
+
+    /// Get the number of results.
+    pub fn len(&self) -> usize {
+        self.node_ids.len()
+    }
+}
+
+// ============================================================
+// Document Info Types
+// ============================================================
+
 /// Document info for listing.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct DocumentInfo {
@@ -228,3 +274,89 @@ pub struct DocumentInfo {
     /// Line count (for text files).
     pub line_count: Option<usize>,
 }
+
+impl DocumentInfo {
+    /// Create a new document info.
+    pub fn new(id: impl Into<String>, name: impl Into<String>) -> Self {
+        Self {
+            id: id.into(),
+            name: name.into(),
+            format: String::new(),
+            description: None,
+            page_count: None,
+            line_count: None,
+        }
+    }
+
+    /// Set the format.
+    pub fn with_format(mut self, format: impl Into<String>) -> Self {
+        self.format = format.into();
+        self
+    }
+}
+
+// ============================================================
+// Error Types
+// ============================================================
+
+/// Client error types.
+#[derive(Debug, Clone, thiserror::Error)]
+pub enum ClientError {
+    /// Document not found.
+    #[error("Document not found: {0}")]
+    NotFound(String),
+
+    /// Invalid operation.
+    #[error("Invalid operation: {0}")]
+    InvalidOperation(String),
+
+    /// Configuration error.
+    #[error("Configuration error: {0}")]
+    Config(String),
+
+    /// Timeout error.
+    #[error("Operation timed out")]
+    Timeout,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_indexed_document() {
+        let doc = IndexedDocument::new("doc-1", DocumentFormat::Markdown)
+            .with_name("Test Document")
+            .with_description("A test document");
+
+        assert_eq!(doc.id, "doc-1");
+        assert_eq!(doc.name, "Test Document");
+        assert!(doc.tree.is_none());
+    }
+
+    #[test]
+    fn test_index_options() {
+        let options = IndexOptions::new()
+            .with_summaries()
+            .with_mode(IndexMode::Pdf);
+
+        assert!(options.generate_summaries);
+        assert_eq!(options.mode, IndexMode::Pdf);
+    }
+
+    #[test]
+    fn test_query_result() {
+        let result = QueryResult::new("doc-1");
+        assert!(result.is_empty());
+        assert_eq!(result.len(), 0);
+    }
+
+    #[test]
+    fn test_document_info() {
+        let info = DocumentInfo::new("doc-1", "Test")
+            .with_format("markdown");
+
+        assert_eq!(info.id, "doc-1");
+        assert_eq!(info.format, "markdown");
+    }
+}
diff --git a/src/client/workspace.rs b/src/client/workspace.rs
new file mode 100644
index 00000000..731a5e71
--- /dev/null
+++ b/src/client/workspace.rs
@@ -0,0 +1,372 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Workspace management client.
+//!
+//! This module provides CRUD operations for document persistence
+//! through the workspace abstraction.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! let workspace = WorkspaceClient::new(workspace_storage);
+//!
+//! // Save a document
+//! workspace.save(&doc)?;
+//!
+//! // Load a document
+//! let doc = workspace.load("doc-id")?;
+//!
+//! // List all documents
+//! for doc in workspace.list()? {
+//!     println!("{}: {}", doc.id, doc.name);
+//! }
+//! ```
+
+use std::sync::{Arc, RwLock};
+
+use tracing::{debug, info, warn};
+
+use crate::domain::{Error, Result};
+use crate::storage::{DocumentMetaEntry, PersistedDocument, Workspace};
+
+use super::events::{EventEmitter, WorkspaceEvent};
+use super::types::DocumentInfo;
+
+/// Workspace management client.
+///
+/// Provides thread-safe CRUD operations for document persistence.
+pub struct WorkspaceClient {
+    /// Workspace storage.
+    workspace: Arc<RwLock<Workspace>>,
+
+    /// Event emitter.
+    events: EventEmitter,
+
+    /// Configuration.
+    config: WorkspaceClientConfig,
+}
+
+/// Workspace client configuration.
+#[derive(Debug, Clone)]
+pub struct WorkspaceClientConfig {
+    /// Auto-save interval in seconds (None = disabled).
+    pub auto_save_interval: Option<u64>,
+
+    /// Enable verbose logging.
+    pub verbose: bool,
+}
+
+impl Default for WorkspaceClientConfig {
+    fn default() -> Self {
+        Self {
+            auto_save_interval: None,
+            verbose: false,
+        }
+    }
+}
+
+impl WorkspaceClient {
+    /// Create a new workspace client.
+    pub fn new(workspace: Workspace) -> Self {
+        Self {
+            workspace: Arc::new(RwLock::new(workspace)),
+            events: EventEmitter::new(),
+            config: WorkspaceClientConfig::default(),
+        }
+    }
+
+    /// Create with event emitter.
+    pub fn with_events(mut self, events: EventEmitter) -> Self {
+        self.events = events;
+        self
+    }
+
+    /// Create with configuration.
+    pub fn with_config(mut self, config: WorkspaceClientConfig) -> Self {
+        self.config = config;
+        self
+    }
+
+    /// Create from an existing workspace Arc.
+    pub(crate) fn from_arc(workspace: Arc<RwLock<Workspace>>, events: EventEmitter) -> Self {
+        Self {
+            workspace,
+            events,
+            config: WorkspaceClientConfig::default(),
+        }
+    }
+
+    /// Save a document to the workspace.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub fn save(&self, doc: &PersistedDocument) -> Result<()> {
+        let doc_id = doc.meta.id.clone();
+
+        {
+            let mut ws = self.workspace.write()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+            ws.add(doc)?;
+        }
+
+        info!("Saved document: {}", doc_id);
+        self.events.emit_workspace(WorkspaceEvent::Saved { doc_id });
+
+        Ok(())
+    }
+
+    /// Load a document from the workspace.
+    ///
+    /// Returns `Ok(None)` if the document doesn't exist.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn load(&self, doc_id: &str) -> Result<Option<PersistedDocument>> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+        if !ws.contains(doc_id) {
+            return Ok(None);
+        }
+
+        let doc = ws.load(doc_id)?;
+        let cache_hit = doc.is_some();
+
+        if let Some(ref doc) = doc {
+            debug!("Loaded document: {} (cache={})", doc_id, cache_hit);
+        }
+
+        self.events.emit_workspace(WorkspaceEvent::Loaded {
+            doc_id: doc_id.to_string(),
+            cache_hit,
+        });
+
+        Ok(doc)
+    }
+
+    /// Remove a document from the workspace.
+    ///
+    /// Returns `Ok(true)` if the document was removed, `Ok(false)` if it didn't exist.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub fn remove(&self, doc_id: &str) -> Result<bool> {
+        let removed = {
+            let mut ws = self.workspace.write()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+            ws.remove(doc_id)?
+        };
+
+        if removed {
+            info!("Removed document: {}", doc_id);
+            self.events.emit_workspace(WorkspaceEvent::Removed {
+                doc_id: doc_id.to_string(),
+            });
+        }
+
+        Ok(removed)
+    }
+
+    /// Check if a document exists in the workspace.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn exists(&self, doc_id: &str) -> Result<bool> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+        Ok(ws.contains(doc_id))
+    }
+
+    /// List all documents in the workspace.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn list(&self) -> Result<Vec<DocumentInfo>> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+        Ok(ws.list_documents()
+            .iter()
+            .filter_map(|id| ws.get_meta(id))
+            .map(|meta| DocumentInfo {
+                id: meta.id.clone(),
+                name: meta.doc_name.clone(),
+                format: meta.doc_type.clone(),
+                description: meta.doc_description.clone(),
+                page_count: meta.page_count,
+                line_count: meta.line_count,
+            })
+            .collect())
+    }
+
+    /// Get document metadata without loading the full document.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn get_meta(&self, doc_id: &str) -> Result<Option<DocumentMetaEntry>> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+        Ok(ws.get_meta(doc_id).cloned())
+    }
+
+    /// Get document info by ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn get_document_info(&self, doc_id: &str) -> Result<Option<DocumentInfo>> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+        Ok(ws.get_meta(doc_id).map(|meta| DocumentInfo {
+            id: meta.id.clone(),
+            name: meta.doc_name.clone(),
+            format: meta.doc_type.clone(),
+            description: meta.doc_description.clone(),
+            page_count: meta.page_count,
+            line_count: meta.line_count,
+        }))
+    }
+
+    /// Remove multiple documents from the workspace.
+    ///
+    /// Returns the number of documents successfully removed.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub fn batch_remove(&self, doc_ids: &[&str]) -> Result<usize> {
+        let mut removed = 0;
+
+        {
+            let mut ws = self.workspace.write()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+            for doc_id in doc_ids {
+                if ws.remove(doc_id)? {
+                    removed += 1;
+                    self.events.emit_workspace(WorkspaceEvent::Removed {
+                        doc_id: doc_id.to_string(),
+                    });
+                }
+            }
+        }
+
+        if removed > 0 {
+            info!("Batch removed {} documents", removed);
+        }
+
+        Ok(removed)
+    }
+
+    /// Clear all documents from the workspace.
+    ///
+    /// Returns the number of documents removed.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace write fails.
+    pub fn clear(&self) -> Result<usize> {
+        let doc_ids: Vec<String>;
+
+        {
+            let ws = self.workspace.read()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+            doc_ids = ws.list_documents().iter().map(|s| s.to_string()).collect();
+        }
+
+        let count = doc_ids.len();
+
+        {
+            let mut ws = self.workspace.write()
+                .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+            for doc_id in &doc_ids {
+                let _ = ws.remove(doc_id);
+            }
+        }
+
+        if count > 0 {
+            info!("Cleared workspace: {} documents removed", count);
+            self.events.emit_workspace(WorkspaceEvent::Cleared { count });
+        }
+
+        Ok(count)
+    }
+
+    /// Get workspace statistics.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the workspace read fails.
+    pub fn stats(&self) -> Result<WorkspaceStats> {
+        let ws = self.workspace.read()
+            .map_err(|_| Error::Other("Workspace lock poisoned".to_string()))?;
+
+        Ok(WorkspaceStats {
+            document_count: ws.len(),
+        })
+    }
+
+    /// Get the number of documents in the workspace.
+    pub fn len(&self) -> usize {
+        self.workspace.read()
+            .map(|ws| ws.len())
+            .unwrap_or(0)
+    }
+
+    /// Check if the workspace is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Get the underlying workspace Arc (for advanced use).
+    pub(crate) fn inner(&self) -> Arc<RwLock<Workspace>> {
+        Arc::clone(&self.workspace)
+    }
+}
+
+impl Clone for WorkspaceClient {
+    fn clone(&self) -> Self {
+        Self {
+            workspace: Arc::clone(&self.workspace),
+            events: self.events.clone(),
+            config: self.config.clone(),
+        }
+    }
+}
+
+/// Workspace statistics.
+#[derive(Debug, Clone)]
+pub struct WorkspaceStats {
+    /// Number of documents in the workspace.
+    pub document_count: usize,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_workspace_client_creation() {
+        let workspace = Workspace::open("./test_workspace").unwrap();
+        let client = WorkspaceClient::new(workspace);
+        assert!(client.is_empty());
+    }
+
+    #[test]
+    fn test_workspace_stats() {
+        let workspace = Workspace::open("./test_workspace").unwrap();
+        let client = WorkspaceClient::new(workspace);
+
+        let stats = client.stats().unwrap();
+        assert_eq!(stats.document_count, 0);
+    }
+}

From 6272a25184bcb7ba616b48dbf958bef28913ac2c Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 21:37:23 +0800
Subject: [PATCH 17/21] feat: add comprehensive examples for batch processing,
 events, and sessions

- Implement complete batch_processing.rs example demonstrating concurrent
  document indexing with progress tracking and performance metrics
- Add events.rs example showing event callback system for monitoring
  indexing and query operations
- Create session.rs example illustrating multi-document operations
  with caching and cross-document queries
- Make events module public to enable external access to event system
- Include detailed usage instructions and performance statistics
  in all examples
---
 examples/batch_processing.rs | 1015 ++++++++++++++++++++++++++++++++--
 examples/events.rs           |  152 +++++
 examples/session.rs          |  207 +++++++
 src/client/mod.rs            |    2 +-
 4 files changed, 1323 insertions(+), 53 deletions(-)
 create mode 100644 examples/events.rs
 create mode 100644 examples/session.rs

diff --git a/examples/batch_processing.rs b/examples/batch_processing.rs
index bbbde4d0..6906189f 100644
--- a/examples/batch_processing.rs
+++ b/examples/batch_processing.rs
@@ -4,58 +4,969 @@
 //! Batch document processing example.
 //!
 //! This example demonstrates how to efficiently process
-//! multiple documents in batch mode.
+//! multiple documents in batch mode using sessions.
 //!
-//! # What you'll learn:
-//! - How to index multiple documents concurrently
-//! - How to batch queries for better throughput
-//! - How to manage resources (memory, LLM calls) during batch processing
-//! - How to track progress and handle failures
+//! # Usage
 //!
-//! # Use cases:
-//! - Indexing a documentation site with hundreds of pages
-//! - Processing a corpus of research papers
-//! - Building a knowledge base from multiple sources
-//!
-//! # Performance considerations:
-//! - Control concurrency with `max_concurrent_indexing`
-//! - Use rate limiting to avoid LLM API throttling
-//! - Monitor memory usage with large document sets
-//!
-//! # TODO: Implementation steps
-//!
-//! 1. Load list of documents to process
-//! 2. Configure batch processing parameters
-//! 3. Process documents with controlled concurrency
-//! 4. Track progress and handle errors
-//! 5. Generate processing report
-
-// TODO: Implement batch processing
-// ```
-// use std::path::PathBuf;
-// use futures::stream::{self, StreamExt};
-// use vectorless::client::{Engine, EngineBuilder};
-//
-// async fn batch_index(
-//     engine: &Engine,
-//     documents: Vec<PathBuf>,
-//     concurrency: usize,
-// ) -> Vec<Result<DocumentId, Error>> {
-//     stream::iter(documents)
-//         .map(|path| async move { engine.index(&path).await })
-//         .buffer_unordered(concurrency)
-//         .collect()
-//         .await
-// }
-// ```
-
-fn main() {
-    // TODO: Show batch indexing and querying
-    //
-    // let documents = find_all_markdown_files("./docs");
-    // let results = batch_index(&engine, documents, 5).await;
-    //
-    // // Process results, report failures, etc.
-
-    println!("TODO: Implement batch_processing example");
+//! ```bash
+//! cargo run --example batch_processing
+//! ```
+
+use vectorless::client::EngineBuilder;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    println!("=== Batch Document Processing Example ===\n");
+
+    // 1. Create engine and session
+    println!("Step 1: Setting up...");
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_batch_example")
+        .build()
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+
+    let session = engine.session();
+    println!("  ✓ Session created: {}\n", session.id());
+
+    // 2. Create sample documents
+    println!("Step 2: Creating sample documents...");
+    let temp_dir = tempfile::tempdir()?;
+
+    let documents = vec![
+        ("intro.md", r#"# Introduction
+
+Welcome to the vectorless library. This is a document intelligence engine.
+
+## Features
+
+- Tree-based navigation
+- Multi-format support
+- Session management
+"#),
+        ("api.md", r#"# API Reference
+
+## Engine
+
+The main client for document operations.
+
+### Methods
+
+- `index(path)`: Index a document
+- `query(question)`: Query indexed content
+
+## Session
+
+Multi-document operations with caching.
+
+### Methods
+
+- `index(path)`: Index into session
+- `query_all(question)`: Query across all documents
+"#),
+        ("guide.md", r#"# User Guide
+
+## Getting Started
+
+First, create a client with workspace configuration.
+
+## Best Practices
+
+- Use sessions for multi-document operations
+- Enable caching for better performance
+- Monitor events for debugging
+"#),
+    ("advanced.md", r#"# Advanced Topics
+
+## Performance Tuning
+
+Configure retrieval parameters for optimal performance.
+
+### Parameters
+
+- `top_k`: Number of results
+- `max_tokens`: Token budget
+
+## Custom Pilots
+
+Implement custom navigation logic.
+"#),
+    ("reference.md", r#"# Reference
+
+## Configuration
+
+All configuration is done via TOML files.
+
+### Example
+
+```toml
+[retrieval]
+top_k = 5
+max_tokens = 4000
+```
+"#),
+    ("examples.md", r#"# Examples
+
+## Basic Usage
+
+Simple indexing and querying example.
+
+## Batch Processing
+
+Process multiple documents concurrently.
+
+## Session Usage
+
+Multi-document operations with caching.
+"#),
+    ("faq.md", r#"# FAQ
+
+## Common Questions
+
+**Q: How do I index a document?**
+A: Use `engine.index(path)` method.
+
+**Q: How to query?**
+A: Use `engine.query(doc_id, question)` method.
+
+**Q: What formats are supported?**
+A: Markdown, PDF, DOCX, HTML.
+"#),
+    ("changelog.md", r#"# Changelog
+
+## Version 0.1.0
+
+- Initial release
+- Basic indexing support
+- Simple retrieval
+
+## Version 0.2.0
+
+- Session support
+- Event system
+- Content aggregator
+"#),
+        ("contributing.md", r#"# Contributing
+
+## How to Contribute
+
+We welcome contributions! Please follow these steps:
+
+1. Fork the repository
+2. Create a feature branch
+3. Submit a pull request
+
+## Code Style
+
+- Run `cargo fmt`
+- Run `cargo clippy`
+- Add tests
+"#),
+        ("license.md", r#"# License
+
+Apache License, Version 2.0
+
+Copyright 2026 vectorless developers
+"#),
+    ("architecture.md", r#"# Architecture
+
+## Overview
+
+Vectorless uses a tree-based architecture.
+
+## Components
+
+- Parser: Document parsing
+- Indexer: Tree building
+- Retriever: Content search
+- Storage: Persistence
+"#),
+        ("security.md", r#"# Security
+
+## Security Considerations
+
+- API keys are stored securely
+- No sensitive data in logs
+- Input validation
+
+## Best Practices
+
+- Use environment variables
+- Rotate keys periodically
+"#),
+    ("performance.md", r#"# Performance
+
+## Optimization Tips
+
+- Use caching effectively
+- Configure appropriate batch sizes
+- Monitor memory usage
+
+## Benchmarks
+
+Run `cargo bench` for performance metrics.
+"#),
+        ("testing.md", r#"# Testing
+
+## Running Tests
+
+```bash
+cargo test
+```
+
+## Test Coverage
+
+- Unit tests
+- Integration tests
+- Example tests
+"#),
+    ("deployment.md", r#"# Deployment
+
+## Production Setup
+
+- Configure workspace directory
+- Set up logging
+- Monitor performance
+
+## Configuration
+
+Use TOML configuration files.
+"#),
+        ("troubleshooting.md", r#"# Troubleshooting
+
+## Common Issues
+
+### Indexing Fails
+
+Check file format and permissions.
+
+### Query Returns Empty
+
+Ensure document is indexed.
+
+### Performance Issues
+
+Reduce batch size or enable caching.
+"#),
+    ("integrations.md", r#"# Integrations
+
+## LLM Providers
+
+- OpenAI
+- Anthropic
+- Local models
+
+## Storage Backends
+
+- File system (default)
+- S3 (planned)
+"#),
+        ("migrations.md", r#"# Migrations
+
+## Version Migrations
+
+### 0.1.x to 0.2.x
+
+- Update configuration format
+- Re-index documents
+"#),
+    ("roadmap.md", r#"# Roadmap
+
+## Future Plans
+
+### Short Term
+
+- Streaming support
+- More formats
+
+### Long Term
+
+- Distributed indexing
+- Real-time updates
+"#),
+    ("credits.md", r#"# Credits
+
+## Contributors
+
+Thanks to all contributors!
+
+## Libraries
+
+Built with Rust and many open-source libraries.
+"#),
+    ("index.md", r#"# Index
+
+## Quick Links
+
+- [Introduction](intro.md)
+- [API Reference](api.md)
+- [User Guide](guide.md)
+
+## Search
+
+Use the search functionality to find specific content.
+"#),
+        ("search.md", r#"# Search
+
+## Search Functionality
+
+### Basic Search
+
+```rust
+let results = engine.query(&doc_id, "search term").await?;
+```
+
+### Advanced Search
+
+Use sessions for cross-document search.
+"#),
+        ("export.md", r#"# Export
+
+## Exporting Data
+
+### JSON Export
+
+```rust
+let json = tree.to_structure_json();
+```
+
+### Custom Formats
+
+Implement custom exporters as needed.
+"#),
+    ("import.md", r#"# Import
+
+## Importing Data
+
+### From Files
+
+```rust
+let doc_id = engine.index("./document.md").await?;
+```
+
+### From Memory
+
+Use the content directly with parsers.
+"#),
+        ("validation.md", r#"# Validation
+
+## Input Validation
+
+### Document Paths
+
+Must exist and be readable.
+
+### Configuration
+
+Validated on load with helpful errors.
+
+### Queries
+
+Sanitized before processing.
+"#),
+        ("formatting.md", r#"# Formatting
+
+## Content Formatting
+
+### Markdown
+
+Standard CommonMark with extensions.
+
+### Code Blocks
+
+Syntax highlighting support.
+
+### Tables
+
+Basic table parsing.
+"#),
+        ("localization.md", r#"# Localization
+
+## Internationalization
+
+Currently English-only.
+
+## Future Support
+
+Planned i18n support for:
+- Error messages
+- UI strings
+- Documentation
+"#),
+        ("accessibility.md", r#"# Accessibility
+
+## Accessibility
+
+### Documentation
+
+Clear and comprehensive docs.
+
+### API Design
+
+Consistent and intuitive naming.
+
+### Error Messages
+
+Helpful and actionable.
+"#),
+    ("glossary.md", r#"# Glossary
+
+## Terms
+
+- **Document Tree**: Hierarchical structure
+- **Session**: Multi-document context
+- **Workspace**: Document storage
+- **Retrieval**: Content search
+"#),
+        ("appendix.md", r#"# Appendix
+
+## Additional Resources
+
+- [GitHub Repository](https://github.com)
+- [Documentation Site](https://docs.vectorless.dev)
+- [Community Discord](https://discord.gg)
+"#),
+    ("summary.md", r#"# Summary
+
+## Overview
+
+This documentation covers all aspects of vectorless.
+
+## Next Steps
+
+- Try the examples
+- Join the community
+- Contribute!
+"#),
+    ("conclusion.md", r#"# Conclusion
+
+## Thank You
+
+Thanks for using vectorless!
+
+## Feedback
+
+We'd love to hear from you. Open an issue on GitHub.
+"#),
+    ("revision.md", r#"# Revision History
+
+## Document Versions
+
+| Version | Date       | Changes                    |
+|---------|------------|---------------------------|
+| 1.0     | 2026-01-01 | Initial version           |
+| 1.1     | 2026-02-01 | Session support           |
+"#),
+    ("feedback.md", r#"# Feedback
+
+## Providing Feedback
+
+We value your input!
+
+### Channels
+
+- GitHub Issues
+- Discord Community
+- Email Support
+
+### What to Share
+
+- Bug reports
+- Feature requests
+- Documentation improvements
+"#),
+    ("support.md", r#"# Support
+
+## Getting Help
+
+### Documentation
+
+Start with the user guide.
+
+### Community
+
+Join our Discord for discussions.
+
+### Enterprise
+
+Contact us for enterprise support.
+"#),
+        ("updates.md", r#"# Updates
+
+## Staying Updated
+
+### Version Updates
+
+Check the changelog for updates.
+
+### Security Updates
+
+Apply security patches promptly.
+
+### Deprecations
+
+Watch for deprecation notices.
+"#),
+    ("resources.md", r#"# Resources
+
+## External Resources
+
+### Official
+
+- Documentation: docs.vectorless.dev
+- GitHub: github.com/vectorless
+- Discord: discord.gg/vectorless
+
+### Community
+
+- Blog posts
+- Tutorial videos
+- Example projects
+"#),
+        ("contact.md", r#"# Contact
+
+## Contact Information
+
+### General Inquiries
+
+Email: hello@vectorless.dev
+
+### Security Issues
+
+Email: security@vectorless.dev
+
+### Enterprise Sales
+
+Email: enterprise@vectorless.dev
+"#),
+        ("privacy.md", r#"# Privacy Policy
+
+## Data Handling
+
+Vectorless processes documents locally.
+
+## No Tracking
+
+We don't track usage or content.
+
+## API Keys
+
+Stored securely in configuration files.
+"#),
+        ("terms.md", r#"# Terms of Service
+
+## Usage Terms
+
+By using vectorless, you agree to:
+
+- Use responsibly
+- Follow applicable laws
+- Respect rate limits
+
+## Changes
+
+Terms may be updated. Check for revisions.
+"#),
+    ("legal.md", r#"# Legal
+
+## Licensing
+
+Apache License 2.0
+
+## Copyright
+
+Copyright 2026 vectorless developers
+
+## Trademarks
+
+Vectorless is a trademark.
+"#),
+    ("versioning.md", r#"# Versioning
+
+## Semantic Versioning
+
+We follow semver:
+
+- MAJOR: Breaking changes
+- MINOR: New features
+- PATCH: Bug fixes
+
+## Current Version
+
+0.1.10
+"#),
+        ("compatibility.md", r#"# Compatibility
+
+## Supported Versions
+
+- Rust 1.70+
+- Tokio 1.x
+
+## Platform Support
+
+- Linux
+- macOS
+- Windows
+
+## Breaking Changes
+
+Documented in changelog.
+"#),
+        ("installation.md", r#"# Installation
+
+## Requirements
+
+- Rust 1.70+
+- Tokio runtime
+
+## Install
+
+```bash
+cargo install vectorless
+```
+
+## Verify
+
+```bash
+vectorless --version
+```
+"#),
+        ("quickstart.md", r#"# Quick Start
+
+## 5-Minute Setup
+
+1. Install vectorless
+2. Create a client
+3. Index a document
+4. Query!
+
+```rust
+let client = Engine::builder().build()?;
+let doc_id = client.index("./doc.md").await?;
+let result = client.query(&doc_id, "What is this?").await?;
+```
+"#),
+    ("tutorial.md", r#"# Tutorial
+
+## Introduction
+
+This tutorial covers basic usage.
+
+## Step 1: Setup
+
+Create a client with workspace.
+
+## Step 2: Index
+
+Index your first document.
+
+## Step 3: Query
+
+Ask questions about your document.
+
+## Step 4: Next
+
+Explore advanced features.
+"#),
+    ("examples_overview.md", r#"# Examples Overview
+
+## Available Examples
+
+| Example         | Description                    |
+|-----------------|--------------------------------|
+| basic.rs        | Basic usage                   |
+| session.rs      | Multi-document operations     |
+| events.rs       | Event callbacks              |
+| batch.rs        | Batch processing             |
+
+## Running Examples
+
+```bash
+cargo run --example <name>
+```
+"#),
+    ("configuration.md", r#"# Configuration
+
+## Configuration File
+
+Use `config.toml` for settings:
+
+```toml
+[storage]
+workspace_dir = "./workspace"
+
+[retrieval]
+top_k = 5
+max_tokens = 4000
+```
+
+## Environment Variables
+
+- `OPENAI_API_KEY`: LLM API key
+"#),
+        ("optimization.md", r#"# Optimization
+
+## Performance Tips
+
+- Use sessions for caching
+- Batch document indexing
+- Configure appropriate token limits
+
+## Memory Management
+
+Documents are cached in sessions.
+
+## Concurrency
+
+Use `buffer_unordered` for parallel indexing.
+"#),
+        ("errors.md", r#"# Error Handling
+
+## Error Types
+
+- `ConfigError`: Configuration issues
+- `ParseError`: Document parsing failures
+- `RetrievalError`: Query failures
+
+## Handling Errors
+
+```rust
+match result {
+    Ok(response) => { /* success */ },
+    Err(Error::Parse(msg)) => { /* handle parse error */ },
+    Err(e) => { /* other error */ },
+}
+```
+"#),
+        ("logging.md", r#"# Logging
+
+## Log Levels
+
+- ERROR: Serious issues
+- WARN: Potential issues
+- INFO: General information
+- DEBUG: Detailed information
+- TRACE: Very detailed
+
+## Enabling Logs
+
+```bash
+RUST_LOG=debug cargo run
+```
+"#),
+        ("metrics.md", r#"# Metrics
+
+## Available Metrics
+
+- Query count
+- Cache hit rate
+- Average query time
+
+## Accessing Metrics
+
+```rust
+let stats = session.stats();
+println!("Cache hit rate: {:.1}%", stats.cache_hit_rate() * 100.0);
+```
+"#),
+        ("health.md", r#"# Health Checks
+
+## Workspace Health
+
+Check workspace integrity:
+
+```rust
+let docs = engine.list_documents();
+println!("{} documents indexed", docs.len());
+```
+
+## Session Health
+
+Monitor session statistics regularly.
+"#),
+        ("backup.md", r#"# Backup
+
+## Backing Up
+
+Copy the workspace directory:
+
+```bash
+cp -r ./workspace ./workspace_backup
+```
+
+## Restoration
+
+Restore by copying back:
+
+```bash
+cp -r ./workspace_backup ./workspace
+```
+"#),
+        ("recovery.md", r#"# Recovery
+
+## Corrupted Documents
+
+Remove and re-index:
+
+```rust
+engine.remove(&doc_id)?;
+engine.index(&path).await?;
+```
+
+## Session Recovery
+
+Create a new session if issues occur.
+"#),
+        ("monitoring.md", r#"# Monitoring
+
+## Production Monitoring
+
+Use events for real-time monitoring:
+
+```rust
+let events = EventEmitter::new()
+    .on_query(|e| {
+        // Log to monitoring system
+    });
+```
+
+## Alerts
+
+Set up alerts for error rates.
+"#),
+        ("scaling.md", r#"# Scaling
+
+## Horizontal Scaling
+
+Run multiple instances with shared storage.
+
+## Vertical Scaling
+
+Increase resources for single instance.
+
+## Considerations
+
+- Storage backend
+- Cache coordination
+- Rate limiting
+"#),
+        ("security_config.md", r#"# Security Configuration
+
+## API Keys
+
+Store securely:
+
+```toml
+[summary]
+api_key = "${OPENAI_API_KEY}"
+```
+
+## Network Security
+
+Use HTTPS for all API calls.
+
+## Access Control
+
+Implement authentication for production.
+"#),
+    ];
+
+    for (name, content) in &documents {
+        let path = temp_dir.path().join(name);
+        std::fs::write(&path, content)?;
+    }
+
+    println!("  ✓ Created {} sample documents\n", documents.len());
+
+    // 3. Batch indexing with progress
+    println!("Step 3: Batch indexing...");
+    let start = std::time::Instant::now();
+    let mut doc_ids = Vec::new();
+
+    for (name, _) in &documents {
+        let path = temp_dir.path().join(name);
+        match session.index(&path).await {
+            Ok(doc_id) => {
+                doc_ids.push(doc_id);
+            }
+            Err(e) => {
+                eprintln!("  ✗ Failed to index {}: {}", name, e);
+            }
+        }
+    }
+
+    let elapsed = start.elapsed();
+    println!("  ✓ Indexed {} documents in {:?}", doc_ids.len(), elapsed);
+    println!("  - Rate: {:.1} docs/sec", doc_ids.len() as f64 / elapsed.as_secs_f64());
+    println!();
+
+    // 4. Show session stats
+    println!("Step 4: Session statistics:");
+    let stats = session.stats();
+    println!("  - Documents in session: {}", session.list_documents().len());
+    println!("  - Queries: {}", stats.query_count.get());
+    println!();
+
+    // 5. Batch query with progress
+    println!("Step 5: Batch querying...");
+    let queries = vec![
+        "What is vectorless?",
+        "How to index?",
+        "Configuration options",
+        "API methods",
+        "Performance tips",
+        "Error handling",
+        "Logging setup",
+        "Security considerations",
+        "Scaling options",
+        "Getting help",
+    ];
+
+    let start = std::time::Instant::now();
+    let mut success_count = 0;
+
+    for query in &queries {
+        match session.query_all(query).await {
+            Ok(results) => {
+                if !results.is_empty() {
+                    success_count += 1;
+                }
+            }
+            Err(e) => {
+                eprintln!("  ✗ Query failed: {}", e);
+            }
+        }
+    }
+
+    let elapsed = start.elapsed();
+    println!("  ✓ Completed {} queries in {:?}", queries.len(), elapsed);
+    println!("  - Success rate: {:.0}%", (success_count as f64 / queries.len() as f64) * 100.0);
+    println!("  - Rate: {:.1} queries/sec", queries.len() as f64 / elapsed.as_secs_f64());
+    println!();
+
+    // 6. Final statistics
+    println!("Step 6: Final statistics:");
+    let stats = session.stats();
+    println!("  - Total documents: {}", session.list_documents().len());
+    println!("  - Total queries: {}", stats.query_count.get());
+    println!("  - Cache hits: {}", stats.cache_hits.get());
+    println!("  - Cache misses: {}", stats.cache_misses.get());
+    println!(
+        "  - Cache hit rate: {:.1}%",
+        stats.cache_hit_rate() * 100.0
+    );
+    if let Some(avg_time) = stats.avg_query_time() {
+        println!("  - Avg query time: {:?}", avg_time);
+    }
+    println!("  - Session age: {:?}", session.age());
+    println!();
+
+    // 7. Cleanup
+    println!("Step 7: Cleanup...");
+    for doc_id in &doc_ids {
+        engine.remove(doc_id)?;
+    }
+    println!("  ✓ Removed {} documents\n", doc_ids.len());
+
+    println!("=== Example Complete ===");
+    Ok(())
 }
diff --git a/examples/events.rs b/examples/events.rs
new file mode 100644
index 00000000..eab7b68a
--- /dev/null
+++ b/examples/events.rs
@@ -0,0 +1,152 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Event callbacks example.
+//!
+//! This example demonstrates the event system for:
+//! - Monitoring indexing progress
+//! - Tracking query execution
+//! - Debugging retrieval behavior
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example events
+//! ```
+
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+
+use vectorless::client::{EngineBuilder, EventEmitter, IndexEvent, QueryEvent};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    println!("=== Event Callbacks Example ===\n");
+
+    // 1. Create event emitter with handlers
+    println!("Step 1: Setting up event handlers...\n");
+
+    let index_count = Arc::new(AtomicUsize::new(0));
+    let query_count = Arc::new(AtomicUsize::new(0));
+    let nodes_visited = Arc::new(AtomicUsize::new(0));
+
+    let index_count_clone = index_count.clone();
+    let query_count_clone = query_count.clone();
+    let nodes_visited_clone = nodes_visited.clone();
+
+    let events = EventEmitter::new()
+        // Index events
+        .on_index(move |e| {
+            match e {
+                IndexEvent::Started { path } => {
+                    println!("  [INDEX] Started: {}", path);
+                }
+                IndexEvent::FormatDetected { format } => {
+                    println!("  [INDEX] Format: {:?}", format);
+                }
+                IndexEvent::TreeBuilt { node_count } => {
+                    println!("  [INDEX] Tree built: {} nodes", node_count);
+                }
+                IndexEvent::Complete { doc_id } => {
+                    println!("  [INDEX] Complete: {}", &doc_id[..8]);
+                    index_count_clone.fetch_add(1, Ordering::SeqCst);
+                }
+                IndexEvent::Error { message } => {
+                    println!("  [INDEX] Error: {}", message);
+                }
+                _ => {}
+            }
+        })
+        // Query events
+        .on_query(move |e| {
+            match e {
+                QueryEvent::Started { query } => {
+                    println!("  [QUERY] Started: \"{}\"", query);
+                    query_count_clone.fetch_add(1, Ordering::SeqCst);
+                }
+                QueryEvent::NodeVisited { title, score, .. } => {
+                    println!("  [QUERY] Visited: \"{}\" (score: {:.2})", title, score);
+                    nodes_visited_clone.fetch_add(1, Ordering::SeqCst);
+                }
+                QueryEvent::CandidateFound { node_id, score } => {
+                    println!("  [QUERY] Candidate: {} (score: {:.2})", &node_id[..8], score);
+                }
+                QueryEvent::Complete { total_results, confidence } => {
+                    println!("  [QUERY] Complete: {} results, confidence: {:.2}", total_results, confidence);
+                }
+                QueryEvent::Error { message } => {
+                    println!("  [QUERY] Error: {}", message);
+                }
+                _ => {}
+            }
+        });
+
+    println!("  ✓ Event handlers configured\n");
+
+    // 2. Create engine with events
+    println!("Step 2: Creating engine with event emitter...");
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_events_example")
+        .with_events(events)
+        .build()
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+    println!("  ✓ Engine created\n");
+
+    // 3. Index a document (events will fire)
+    println!("Step 3: Indexing document (watch events)...\n");
+
+    let temp_dir = tempfile::tempdir()?;
+    let doc_content = r#"# Example Document
+
+## Introduction
+
+This is an example document for demonstrating event callbacks.
+
+## Features
+
+- Event monitoring for indexing
+- Event monitoring for queries
+- Progress tracking
+
+## Architecture
+
+The event system uses handlers that can be attached to the engine builder.
+"#;
+
+    let doc_path = temp_dir.path().join("example.md");
+    tokio::fs::write(&doc_path, doc_content).await?;
+
+    let doc_id = engine.index(&doc_path).await?;
+    println!();
+
+    // 4. Query the document (events will fire)
+    println!("Step 4: Querying document (watch events)...\n");
+
+    let result = engine.query(&doc_id, "What features are available?").await?;
+    println!();
+
+    // 5. Show results
+    println!("Step 5: Query result:");
+    println!("  - Score: {:.2}", result.score);
+    println!("  - Nodes: {}", result.node_ids.len());
+    if !result.content.is_empty() {
+        let preview: String = result.content.chars().take(100).collect();
+        println!("  - Content: {}...", preview);
+    }
+    println!();
+
+    // 6. Show statistics
+    println!("Step 6: Event statistics:");
+    println!("  - Index events fired: {}", index_count.load(Ordering::SeqCst));
+    println!("  - Query events fired: {}", query_count.load(Ordering::SeqCst));
+    println!("  - Nodes visited: {}", nodes_visited.load(Ordering::SeqCst));
+    println!();
+
+    // 7. Cleanup
+    println!("Step 7: Cleanup...");
+    engine.remove(&doc_id)?;
+    println!("  ✓ Document removed\n");
+
+    println!("=== Example Complete ===");
+    Ok(())
+}
diff --git a/examples/session.rs b/examples/session.rs
new file mode 100644
index 00000000..25aaf3ab
--- /dev/null
+++ b/examples/session.rs
@@ -0,0 +1,207 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Session-based multi-document operations example.
+//!
+//! This example demonstrates the Session API for:
+//! - Managing multiple documents in a single session
+//! - Cross-document queries
+//! - Session caching for improved performance
+//! - Session statistics
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example session
+//! ```
+
+use vectorless::client::EngineBuilder;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    println!("=== Session-Based Multi-Document Example ===\n");
+
+    // 1. Create the engine
+    println!("Step 1: Creating engine...");
+    let engine = EngineBuilder::new()
+        .with_workspace("./workspace_session_example")
+        .build()
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+    println!("  ✓ Engine created\n");
+
+    // 2. Create a session for multi-document operations
+    println!("Step 2: Creating session...");
+    let session = engine.session();
+    println!("  ✓ Session ID: {}\n", session.id());
+
+    // 3. Index multiple documents into the session
+    println!("Step 3: Indexing documents...");
+
+    // Create sample documents
+    let temp_dir = tempfile::tempdir()?;
+
+    let doc1_content = r#"# Architecture Guide
+
+## Overview
+
+Vectorless uses a tree-based architecture for document navigation.
+
+## Components
+
+- **Indexer**: Parses documents and builds tree structure
+- **Retriever**: Navigates tree to find relevant content
+- **Workspace**: Manages document persistence
+"#;
+
+    let doc2_content = r#"# API Reference
+
+## Engine
+
+The main entry point for vectorless operations.
+
+### Methods
+
+- `index(path)`: Index a document
+- `query(doc_id, question)`: Query a document
+- `list_documents()`: List all documents
+
+## Session
+
+Multi-document operations with caching.
+
+### Methods
+
+- `index(path)`: Index into session
+- `query(doc_id, question)`: Query cached document
+- `query_all(question)`: Query across all documents
+"#;
+
+    let doc3_content = r#"# Configuration Guide
+
+## Workspace Settings
+
+The workspace directory stores indexed documents.
+
+```toml
+[storage]
+workspace_dir = "./workspace"
+```
+
+## Retrieval Settings
+
+Configure retrieval behavior:
+
+```toml
+[retrieval]
+top_k = 5
+max_tokens = 4000
+```
+
+## Content Aggregator
+
+Control content aggregation:
+
+```toml
+[retrieval.content]
+enabled = true
+token_budget = 4000
+```
+"#;
+
+    // Write sample documents
+    let doc1_path = temp_dir.path().join("architecture.md");
+    let doc2_path = temp_dir.path().join("api.md");
+    let doc3_path = temp_dir.path().join("config.md");
+
+    tokio::fs::write(&doc1_path, doc1_content).await?;
+    tokio::fs::write(&doc2_path, doc2_content).await?;
+    tokio::fs::write(&doc3_path, doc3_content).await?;
+
+    // Index into session
+    let doc1_id = session.index(&doc1_path).await?;
+    println!("  ✓ Indexed: architecture.md -> {}", &doc1_id[..8]);
+
+    let doc2_id = session.index(&doc2_path).await?;
+    println!("  ✓ Indexed: api.md -> {}", &doc2_id[..8]);
+
+    let doc3_id = session.index(&doc3_path).await?;
+    println!("  ✓ Indexed: config.md -> {}", &doc3_id[..8]);
+    println!();
+
+    // 4. List documents in session
+    println!("Step 4: Session documents:");
+    for doc in session.list_documents() {
+        println!("  - {} ({})", doc.name, &doc.id[..8]);
+    }
+    println!();
+
+    // 5. Query individual documents (uses cache)
+    println!("Step 5: Query individual documents...");
+    let query = "What methods are available?";
+
+    println!("  Query: \"{}\"", query);
+    let start = std::time::Instant::now();
+    let result = session.query(&doc2_id, query).await?;
+    let elapsed = start.elapsed();
+    println!("    - Time: {:?}", elapsed);
+    println!("    - Score: {:.2}", result.score);
+    if !result.content.is_empty() {
+        let preview: String = result.content.chars().take(100).collect();
+        println!("    - Preview: {}...", preview);
+    }
+    println!();
+
+    // 6. Query same document again (should be faster due to cache)
+    println!("Step 6: Query cached document (should be faster)...");
+    let start = std::time::Instant::now();
+    let result = session.query(&doc2_id, "How to list documents?").await?;
+    let cached_elapsed = start.elapsed();
+    println!("    - Time: {:?}", cached_elapsed);
+    println!("    - Score: {:.2}", result.score);
+    println!();
+
+    // 7. Query across all documents
+    println!("Step 7: Cross-document query...");
+    let query = "How to configure the workspace?";
+    println!("  Query: \"{}\"", query);
+
+    let results = session.query_all(query).await?;
+    println!("  Found {} relevant documents:", results.len());
+
+    for (i, result) in results.iter().enumerate() {
+        println!(
+            "    {}. {} (score: {:.2})",
+            i + 1,
+            &result.doc_id[..8],
+            result.score
+        );
+    }
+    println!();
+
+    // 8. Show session statistics
+    println!("Step 8: Session statistics:");
+    let stats = session.stats();
+    println!("  - Documents: {}", session.list_documents().len());
+    println!("  - Queries: {}", stats.query_count.get());
+    println!("  - Cache hits: {}", stats.cache_hits.get());
+    println!("  - Cache misses: {}", stats.cache_misses.get());
+    println!(
+        "  - Cache hit rate: {:.1}%",
+        stats.cache_hit_rate() * 100.0
+    );
+    if let Some(avg_time) = stats.avg_query_time() {
+        println!("  - Avg query time: {:?}", avg_time);
+    }
+    println!("  - Session age: {:?}", session.age());
+    println!();
+
+    // 9. Cleanup
+    println!("Step 9: Cleanup...");
+    engine.remove(&doc1_id)?;
+    engine.remove(&doc2_id)?;
+    engine.remove(&doc3_id)?;
+    println!("  ✓ Documents removed\n");
+
+    println!("=== Example Complete ===");
+    Ok(())
+}
diff --git a/src/client/mod.rs b/src/client/mod.rs
index a1d053cf..51abecd0 100644
--- a/src/client/mod.rs
+++ b/src/client/mod.rs
@@ -112,7 +112,7 @@
 mod builder;
 mod context;
 mod engine;
-mod events;
+pub mod events;
 mod indexer;
 mod retriever;
 mod session;

From 82cdca5b29b81cbb5bde88a947a0b7ad9cda8667 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 22:00:04 +0800
Subject: [PATCH 18/21] feat(config): add configuration documentation generator
 and enhance loader

Add new ConfigDocs module to generate markdown reference and example
TOML files from configuration types. Enhance ConfigLoader with
environment variable overrides using double underscore separator syntax,
layered configuration support, and improved documentation with examples.

BREAKING CHANGE: Configuration loading now supports layered files and
environment variable overrides through ConfigLoader API.
---
 src/config/docs.rs              | 307 ++++++++++++++
 src/config/loader.rs            | 288 +++++++++++--
 src/config/merge.rs             | 356 ++++++++++++++++
 src/config/mod.rs               |  99 ++++-
 src/config/types.rs             | 698 --------------------------------
 src/config/types/concurrency.rs | 122 ++++++
 src/config/types/content.rs     | 222 ++++++++++
 src/config/types/fallback.rs    | 233 +++++++++++
 src/config/types/indexer.rs     | 108 +++++
 src/config/types/llm.rs         | 218 ++++++++++
 src/config/types/mod.rs         | 336 +++++++++++++++
 src/config/types/retrieval.rs   | 219 ++++++++++
 src/config/types/storage.rs     | 274 +++++++++++++
 src/config/validator.rs         | 359 ++++++++++++++++
 14 files changed, 3103 insertions(+), 736 deletions(-)
 create mode 100644 src/config/docs.rs
 create mode 100644 src/config/merge.rs
 delete mode 100644 src/config/types.rs
 create mode 100644 src/config/types/concurrency.rs
 create mode 100644 src/config/types/content.rs
 create mode 100644 src/config/types/fallback.rs
 create mode 100644 src/config/types/indexer.rs
 create mode 100644 src/config/types/llm.rs
 create mode 100644 src/config/types/mod.rs
 create mode 100644 src/config/types/retrieval.rs
 create mode 100644 src/config/types/storage.rs
 create mode 100644 src/config/validator.rs

diff --git a/src/config/docs.rs b/src/config/docs.rs
new file mode 100644
index 00000000..7e2330b9
--- /dev/null
+++ b/src/config/docs.rs
@@ -0,0 +1,307 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration documentation generation.
+//!
+//! This module provides utilities for generating documentation
+//! from configuration types, including markdown reference and
+//! example TOML files.
+
+use super::types::Config;
+
+/// Configuration documentation generator.
+#[derive(Debug, Clone)]
+pub struct ConfigDocs {
+    config: Config,
+}
+
+impl ConfigDocs {
+    /// Create a new documentation generator.
+    pub fn new(config: Config) -> Self {
+        Self { config }
+    }
+
+    /// Create with default configuration.
+    pub fn with_defaults() -> Self {
+        Self::new(Config::default())
+    }
+
+    /// Generate markdown documentation for the configuration.
+    pub fn to_markdown(&self) -> String {
+        let mut md = String::new();
+
+        md.push_str("# Configuration Reference\n\n");
+        md.push_str("This document describes all configuration options for vectorless.\n\n");
+        md.push_str("## Configuration File\n\n");
+        md.push_str("Configuration is loaded from a TOML file. Default locations:\n");
+        md.push_str("- `./vectorless.toml`\n");
+        md.push_str("- `./config.toml`\n");
+        md.push_str("- `./.vectorless.toml`\n\n");
+
+        // Indexer section
+        md.push_str("## `[indexer]`\n\n");
+        md.push_str("Controls document indexing behavior.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "subsection_threshold", "usize", "300",
+            "Word count threshold for splitting sections into subsections");
+        self.add_row(&mut md, "max_segment_tokens", "usize", "3000",
+            "Maximum tokens to send in a single segmentation request");
+        self.add_row(&mut md, "max_summary_tokens", "usize", "200",
+            "Maximum tokens for each summary");
+        self.add_row(&mut md, "min_summary_tokens", "usize", "20",
+            "Minimum content tokens required to generate a summary");
+        md.push_str("\n");
+
+        // Summary section
+        md.push_str("## `[summary]`\n\n");
+        md.push_str("LLM configuration for summary generation.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "model", "string", "gpt-4o-mini", "Model for summarization");
+        self.add_row(&mut md, "endpoint", "string", "https://api.openai.com/v1", "API endpoint");
+        self.add_row(&mut md, "api_key", "string?", "null", "API key (optional, can use env var)");
+        self.add_row(&mut md, "max_tokens", "usize", "200", "Maximum tokens for summary generation");
+        self.add_row(&mut md, "temperature", "f32", "0.0", "Temperature for summary generation");
+        md.push_str("\n");
+
+        // Retrieval section
+        md.push_str("## `[retrieval]`\n\n");
+        md.push_str("Retrieval model and behavior configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "model", "string", "gpt-4o", "Model for retrieval navigation");
+        self.add_row(&mut md, "endpoint", "string", "https://api.openai.com/v1", "API endpoint");
+        self.add_row(&mut md, "api_key", "string?", "null", "API key (defaults to summary.api_key)");
+        self.add_row(&mut md, "top_k", "usize", "3", "Number of top results to return");
+        self.add_row(&mut md, "max_tokens", "usize", "1000", "Maximum tokens for retrieval context");
+        self.add_row(&mut md, "temperature", "f32", "0.0", "Temperature for retrieval");
+        md.push_str("\n");
+
+        // Retrieval.search section
+        md.push_str("## `[retrieval.search]`\n\n");
+        md.push_str("Search algorithm configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "top_k", "usize", "5", "Number of top-k results to return");
+        self.add_row(&mut md, "beam_width", "usize", "3", "Beam width for multi-path search");
+        self.add_row(&mut md, "max_iterations", "usize", "10", "Maximum iterations for search algorithms");
+        self.add_row(&mut md, "min_score", "f32", "0.1", "Minimum score to include a path");
+        md.push_str("\n");
+
+        // Retrieval.sufficiency section
+        md.push_str("## `[retrieval.sufficiency]`\n\n");
+        md.push_str("Sufficiency checker configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "min_tokens", "usize", "500", "Minimum tokens for sufficiency");
+        self.add_row(&mut md, "target_tokens", "usize", "2000", "Target tokens for full sufficiency");
+        self.add_row(&mut md, "max_tokens", "usize", "4000", "Maximum tokens before stopping");
+        self.add_row(&mut md, "min_content_length", "usize", "200", "Minimum content length (characters)");
+        self.add_row(&mut md, "confidence_threshold", "f32", "0.7", "Confidence threshold for LLM judge");
+        md.push_str("\n");
+
+        // Retrieval.content section
+        md.push_str("## `[retrieval.content]`\n\n");
+        md.push_str("Content aggregator configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "enabled", "bool", "true", "Enable content aggregator");
+        self.add_row(&mut md, "token_budget", "usize", "4000", "Maximum tokens for aggregated content");
+        self.add_row(&mut md, "min_relevance_score", "f32", "0.2", "Minimum relevance score threshold (0.0-1.0)");
+        self.add_row(&mut md, "scoring_strategy", "string", "keyword_bm25", "Scoring strategy (keyword_only, keyword_bm25, hybrid)");
+        self.add_row(&mut md, "output_format", "string", "markdown", "Output format (markdown, json, tree, flat)");
+        self.add_row(&mut md, "include_scores", "bool", "false", "Include relevance scores in output");
+        self.add_row(&mut md, "hierarchical_min_per_level", "f32", "0.1", "Minimum budget allocation per depth level");
+        self.add_row(&mut md, "deduplicate", "bool", "true", "Enable content deduplication");
+        self.add_row(&mut md, "dedup_threshold", "f32", "0.9", "Similarity threshold for deduplication");
+        md.push_str("\n");
+
+        // Retrieval.strategy section
+        md.push_str("## `[retrieval.strategy]`\n\n");
+        md.push_str("Strategy-specific configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "exploration_weight", "f32", "1.414", "MCTS exploration weight (√2)");
+        self.add_row(&mut md, "similarity_threshold", "f32", "0.5", "Semantic similarity threshold");
+        self.add_row(&mut md, "high_similarity_threshold", "f32", "0.8", "High similarity for 'answer' decision");
+        self.add_row(&mut md, "low_similarity_threshold", "f32", "0.3", "Low similarity for 'explore' decision");
+        md.push_str("\n");
+
+        // Storage section
+        md.push_str("## `[storage]`\n\n");
+        md.push_str("Storage configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "workspace_dir", "string", "./workspace", "Workspace directory for persisted documents");
+        md.push_str("\n");
+
+        // Concurrency section
+        md.push_str("## `[concurrency]`\n\n");
+        md.push_str("Concurrency control configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "max_concurrent_requests", "usize", "10", "Maximum concurrent LLM API calls");
+        self.add_row(&mut md, "requests_per_minute", "usize", "500", "Rate limit: requests per minute");
+        self.add_row(&mut md, "enabled", "bool", "true", "Enable rate limiting");
+        self.add_row(&mut md, "semaphore_enabled", "bool", "true", "Enable semaphore-based concurrency");
+        md.push_str("\n");
+
+        // Fallback section
+        md.push_str("## `[fallback]`\n\n");
+        md.push_str("Fallback/error recovery configuration.\n\n");
+        md.push_str("| Option | Type | Default | Description |\n");
+        md.push_str("|--------|------|---------|-------------|\n");
+        self.add_row(&mut md, "enabled", "bool", "true", "Enable graceful degradation");
+        self.add_row(&mut md, "models", "[string]", "[\"gpt-4o-mini\", \"glm-4-flash\"]", "Fallback models in priority order");
+        self.add_row(&mut md, "endpoints", "[string]", "[]", "Fallback endpoints in priority order");
+        self.add_row(&mut md, "on_rate_limit", "string", "retry_then_fallback", "Behavior on rate limit (retry, fallback, retry_then_fallback, fail)");
+        self.add_row(&mut md, "on_timeout", "string", "retry_then_fallback", "Behavior on timeout");
+        self.add_row(&mut md, "on_all_failed", "string", "return_error", "Behavior when all attempts fail (return_error, return_cache)");
+        md.push_str("\n");
+
+        md
+    }
+
+    fn add_row(&self, md: &mut String, name: &str, ty: &str, default: &str, desc: &str) {
+        md.push_str(&format!("| `{}` | {} | {} | {} |\n", name, ty, default, desc));
+    }
+
+    /// Generate an example TOML file with all options.
+    pub fn to_example_toml(&self) -> String {
+        toml::to_string_pretty(&self.config).unwrap_or_else(|e| {
+            format!("# Error generating TOML: {}\n\n# Using default config\n{}",
+                e, Self::fallback_toml())
+        })
+    }
+
+    fn fallback_toml() -> String {
+        r#"# Vectorless Configuration Example
+# Copy this file to config.toml and fill in your API keys
+
+[indexer]
+subsection_threshold = 300
+max_segment_tokens = 3000
+max_summary_tokens = 200
+min_summary_tokens = 20
+
+[summary]
+model = "gpt-4o-mini"
+endpoint = "https://api.openai.com/v1"
+# api_key = "sk-..."
+max_tokens = 200
+temperature = 0.0
+
+[retrieval]
+model = "gpt-4o"
+endpoint = "https://api.openai.com/v1"
+# api_key = "sk-..."
+top_k = 3
+max_tokens = 1000
+temperature = 0.0
+
+[retrieval.search]
+top_k = 5
+beam_width = 3
+max_iterations = 10
+min_score = 0.1
+
+[retrieval.sufficiency]
+min_tokens = 500
+target_tokens = 2000
+max_tokens = 4000
+min_content_length = 200
+confidence_threshold = 0.7
+
+[retrieval.cache]
+max_entries = 1000
+ttl_secs = 3600
+
+[retrieval.strategy]
+exploration_weight = 1.414
+similarity_threshold = 0.5
+high_similarity_threshold = 0.8
+low_similarity_threshold = 0.3
+
+[retrieval.content]
+enabled = true
+token_budget = 4000
+min_relevance_score = 0.2
+scoring_strategy = "keyword_bm25"
+output_format = "markdown"
+include_scores = false
+hierarchical_min_per_level = 0.1
+deduplicate = true
+dedup_threshold = 0.9
+
+[storage]
+workspace_dir = "./workspace"
+
+[concurrency]
+max_concurrent_requests = 10
+requests_per_minute = 500
+enabled = true
+semaphore_enabled = true
+
+[fallback]
+enabled = true
+models = ["gpt-4o-mini", "glm-4-flash"]
+on_rate_limit = "retry_then_fallback"
+on_timeout = "retry_then_fallback"
+on_all_failed = "return_error"
+"#.to_string()
+    }
+
+    /// Generate a minimal example TOML file.
+    pub fn to_minimal_toml(&self) -> String {
+        r#"# Minimal Vectorless Configuration
+# Most options have sensible defaults
+
+[summary]
+api_key = "your-api-key-here"
+
+[retrieval]
+top_k = 5
+"#.to_string()
+    }
+}
+
+impl Default for ConfigDocs {
+    fn default() -> Self {
+        Self::with_defaults()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_docs_markdown() {
+        let docs = ConfigDocs::with_defaults();
+        let md = docs.to_markdown();
+
+        assert!(md.contains("# Configuration Reference"));
+        assert!(md.contains("## `[indexer]`"));
+        assert!(md.contains("## `[retrieval]`"));
+        assert!(md.contains("## `[retrieval.content]`"));
+    }
+
+    #[test]
+    fn test_config_docs_toml() {
+        let docs = ConfigDocs::with_defaults();
+        let toml = docs.to_example_toml();
+
+        assert!(toml.contains("[indexer]"));
+        assert!(toml.contains("[retrieval]"));
+    }
+
+    #[test]
+    fn test_config_docs_minimal_toml() {
+        let docs = ConfigDocs::with_defaults();
+        let toml = docs.to_minimal_toml();
+
+        assert!(toml.contains("[summary]"));
+        assert!(toml.len() < 200); // Should be minimal
+    }
+}
diff --git a/src/config/loader.rs b/src/config/loader.rs
index e83dc229..fe2c6736 100644
--- a/src/config/loader.rs
+++ b/src/config/loader.rs
@@ -3,14 +3,46 @@
 
 //! Configuration loader.
 //!
-//! Loads configuration from TOML files only.
-//! All configuration comes from config files, not environment variables.
-//! This ensures configuration is explicit and traceable.
+//! Loads configuration from TOML files with optional environment variable
+//! overrides and validation.
+//!
+//! # Example
+//!
+//! ```rust,no_run
+//! use vectorless::config::{ConfigLoader, Config};
+//!
+//! // Load from file
+//! let config = ConfigLoader::new()
+//!     .file("config.toml")
+//!     .load()?;
+//!
+//! // Load with validation
+//! let config = ConfigLoader::new()
+//!     .file("config.toml")
+//!     .with_validation(true)
+//!     .load()?;
+//!
+//! // Load with environment variable override
+//! let config = ConfigLoader::new()
+//!     .file("config.toml")
+//!     .with_env("VECTORLESS_")
+//!     .load()?;
+//!
+//! // Layered configuration
+//! let config = ConfigLoader::new()
+//!     .file("default.toml")
+//!     .file("production.toml")
+//!     .with_validation(true)
+//!     .load()?;
+//! # Ok::<(), vectorless::config::ConfigError>(())
+//! ```
 
 use std::path::{Path, PathBuf};
 use thiserror::Error;
 
+use super::merge::Merge;
 use super::types::Config;
+use super::validator::ConfigValidator;
 
 /// Configuration loading errors.
 #[derive(Debug, Error)]
@@ -30,59 +62,235 @@ pub enum ConfigError {
     /// Invalid configuration value.
     #[error("Invalid configuration: {0}")]
     Invalid(String),
+
+    /// Configuration validation failed.
+    #[error("{0}")]
+    Validation(#[from] super::types::ConfigValidationError),
+
+    /// Environment variable error.
+    #[error("Environment variable error: {0}")]
+    Env(String),
 }
 
 /// Configuration loader.
-///
-/// # Example
-///
-/// ```rust,no_run
-/// use vectorless::config::{ConfigLoader, Config};
-///
-/// // Load from file
-/// let config = ConfigLoader::new()
-///     .file("config.toml")
-///     .load()?;
-///
-/// // Or use defaults
-/// let config = Config::default();
-/// # Ok::<(), vectorless::config::ConfigError>(())
-/// ```
-#[derive(Debug, Default)]
+#[derive(Debug)]
 pub struct ConfigLoader {
-    /// Configuration file path.
-    file: Option<PathBuf>,
+    /// Configuration file paths (loaded in order, later files override earlier).
+    files: Vec<PathBuf>,
+
+    /// Environment variable prefix (optional).
+    env_prefix: Option<String>,
+
+    /// Whether to validate after loading.
+    validate: bool,
+
+    /// Custom validator (optional).
+    validator: Option<ConfigValidator>,
+}
+
+impl Default for ConfigLoader {
+    fn default() -> Self {
+        Self::new()
+    }
 }
 
 impl ConfigLoader {
     /// Create a new configuration loader with defaults.
     pub fn new() -> Self {
-        Self::default()
+        Self {
+            files: Vec::new(),
+            env_prefix: None,
+            validate: false,
+            validator: None,
+        }
     }
 
     /// Specify a configuration file to load.
+    ///
+    /// Multiple files can be specified; later files override earlier ones.
     pub fn file<P: AsRef<Path>>(mut self, path: P) -> Self {
-        self.file = Some(path.as_ref().to_path_buf());
+        self.files.push(path.as_ref().to_path_buf());
+        self
+    }
+
+    /// Specify multiple configuration files.
+    pub fn files<I, P>(mut self, paths: I) -> Self
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<Path>,
+    {
+        self.files
+            .extend(paths.into_iter().map(|p| p.as_ref().to_path_buf()));
+        self
+    }
+
+    /// Enable environment variable override.
+    ///
+    /// Variables like `VECTORLESS_SUMMARY__API_KEY` override config values.
+    /// Use `__` (double underscore) to separate nested keys.
+    pub fn with_env(mut self, prefix: impl Into<String>) -> Self {
+        self.env_prefix = Some(prefix.into());
+        self
+    }
+
+    /// Enable or disable validation after loading.
+    pub fn with_validation(mut self, validate: bool) -> Self {
+        self.validate = validate;
+        self
+    }
+
+    /// Set a custom validator.
+    pub fn with_validator(mut self, validator: ConfigValidator) -> Self {
+        self.validator = Some(validator);
         self
     }
 
     /// Load the configuration.
     ///
-    /// If no file is specified, returns default configuration.
-    /// If file is specified but doesn't exist, returns an error.
+    /// # Behavior
+    ///
+    /// 1. Start with default configuration
+    /// 2. Load and merge each specified file (in order)
+    /// 3. Apply environment variable overrides (if enabled)
+    /// 4. Validate configuration (if enabled)
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - A specified file doesn't exist
+    /// - A file can't be parsed as valid TOML
+    /// - Validation fails (when enabled)
     pub fn load(self) -> Result<Config, ConfigError> {
-        if let Some(ref path) = self.file {
+        let mut config = Config::default();
+
+        // Load and merge each file
+        for path in &self.files {
             if path.exists() {
                 let content = std::fs::read_to_string(path)?;
-                let config: Config = toml::from_str(&content)?;
-                Ok(config)
+                let file_config: Config = toml::from_str(&content)?;
+                config.merge(&file_config, super::merge::MergeStrategy::Replace);
             } else {
-                Err(ConfigError::NotFound(path.clone()))
+                return Err(ConfigError::NotFound(path.clone()));
+            }
+        }
+
+        // Apply environment variable overrides
+        if let Some(ref prefix) = self.env_prefix {
+            self.apply_env_overrides(&mut config, prefix)?;
+        }
+
+        // Validate if requested
+        if self.validate {
+            let validator = self.validator.unwrap_or_default();
+            validator.validate(&config)?;
+        }
+
+        Ok(config)
+    }
+
+    /// Apply environment variable overrides to the configuration.
+    fn apply_env_overrides(&self, config: &mut Config, prefix: &str) -> Result<(), ConfigError> {
+        for (key, value) in std::env::vars() {
+            if !key.starts_with(prefix) {
+                continue;
+            }
+
+            // Parse the path: VECTORLESS_SUMMARY__API_KEY -> ["summary", "api_key"]
+            let path_str = key.trim_start_matches(prefix).trim_start_matches('_');
+            let parts: Vec<&str> = path_str.split("__").collect();
+
+            if parts.is_empty() {
+                continue;
+            }
+
+            // Apply the override
+            self.set_by_path(config, &parts, &value)?;
+        }
+
+        Ok(())
+    }
+
+    /// Set a configuration value by path.
+    fn set_by_path(&self, config: &mut Config, path: &[&str], value: &str) -> Result<(), ConfigError> {
+        match path {
+            ["summary", "api_key"] => {
+                config.summary.api_key = Some(value.to_string());
+            }
+            ["summary", "model"] => {
+                config.summary.model = value.to_string();
             }
+            ["summary", "endpoint"] => {
+                config.summary.endpoint = value.to_string();
+            }
+            ["summary", "max_tokens"] => {
+                config.summary.max_tokens = value.parse().map_err(|e| {
+                    ConfigError::Env(format!("Invalid max_tokens: {}", e))
+                })?;
+            }
+            ["retrieval", "api_key"] => {
+                config.retrieval.api_key = Some(value.to_string());
+            }
+            ["retrieval", "model"] => {
+                config.retrieval.model = value.to_string();
+            }
+            ["retrieval", "endpoint"] => {
+                config.retrieval.endpoint = value.to_string();
+            }
+            ["retrieval", "top_k"] => {
+                config.retrieval.top_k = value.parse().map_err(|e| {
+                    ConfigError::Env(format!("Invalid top_k: {}", e))
+                })?;
+            }
+            ["storage", "workspace_dir"] => {
+                config.storage.workspace_dir = PathBuf::from(value);
+            }
+            ["concurrency", "max_concurrent_requests"] => {
+                config.concurrency.max_concurrent_requests = value.parse().map_err(|e| {
+                    ConfigError::Env(format!("Invalid max_concurrent_requests: {}", e))
+                })?;
+            }
+            _ => {
+                // Unknown path - could log a warning
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Default configuration file names to search for.
+pub const CONFIG_FILE_NAMES: &[&str] =
+    &["vectorless.toml", "config.toml", ".vectorless.toml"];
+
+/// Find a configuration file in current or parent directories.
+pub fn find_config_file() -> Option<PathBuf> {
+    let current_dir = std::env::current_dir().ok()?;
+
+    // Search in current directory first
+    for name in CONFIG_FILE_NAMES {
+        let path = current_dir.join(name);
+        if path.exists() {
+            return Some(path);
+        }
+    }
+
+    // Search in parent directories (up to 3 levels)
+    let mut dir = current_dir.as_path();
+    for _ in 0..3 {
+        if let Some(parent) = dir.parent() {
+            for name in CONFIG_FILE_NAMES {
+                let path = parent.join(name);
+                if path.exists() {
+                    return Some(path);
+                }
+            }
+            dir = parent;
         } else {
-            Ok(Config::default())
+            break;
         }
     }
+
+    None
 }
 
 #[cfg(test)]
@@ -106,4 +314,24 @@ mod tests {
         let config = ConfigLoader::new().load().unwrap();
         assert_eq!(config.indexer.subsection_threshold, 300);
     }
+
+    #[test]
+    fn test_config_loader_not_found() {
+        let result = ConfigLoader::new()
+            .file("nonexistent_config.toml")
+            .load();
+
+        assert!(result.is_err());
+        assert!(matches!(result.unwrap_err(), ConfigError::NotFound(_)));
+    }
+
+    #[test]
+    fn test_config_loader_with_validation() {
+        let config = ConfigLoader::new()
+            .with_validation(true)
+            .load()
+            .unwrap();
+
+        assert_eq!(config.retrieval.model, "gpt-4o");
+    }
 }
diff --git a/src/config/merge.rs b/src/config/merge.rs
new file mode 100644
index 00000000..438872b5
--- /dev/null
+++ b/src/config/merge.rs
@@ -0,0 +1,356 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration merging.
+//!
+//! This module provides utilities for merging multiple configurations,
+//! enabling layered configuration from multiple sources.
+
+use super::types::{
+    CacheConfig, Config, ConcurrencyConfig, ContentAggregatorConfig, FallbackConfig,
+    IndexerConfig, RetrievalConfig, SearchConfig, StorageConfig, StrategyConfig, SufficiencyConfig,
+    SummaryConfig,
+};
+
+/// Configuration merge strategy.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum MergeStrategy {
+    /// Replace with source value.
+    Replace,
+    /// Keep existing value if present (don't overwrite).
+    KeepExisting,
+    /// Recursively merge nested structures.
+    Recursive,
+}
+
+/// Trait for configuration merging.
+pub trait Merge {
+    /// Merge another configuration into this one.
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy);
+}
+
+impl Merge for Config {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        self.indexer.merge(&other.indexer, strategy);
+        self.summary.merge(&other.summary, strategy);
+        self.retrieval.merge(&other.retrieval, strategy);
+        self.storage.merge(&other.storage, strategy);
+        self.concurrency.merge(&other.concurrency, strategy);
+        self.fallback.merge(&other.fallback, strategy);
+    }
+}
+
+impl Merge for IndexerConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.subsection_threshold == 300 {
+            self.subsection_threshold = other.subsection_threshold;
+        }
+        if strategy == MergeStrategy::Replace || self.max_segment_tokens == 3000 {
+            self.max_segment_tokens = other.max_segment_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.max_summary_tokens == 200 {
+            self.max_summary_tokens = other.max_summary_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.min_summary_tokens == 20 {
+            self.min_summary_tokens = other.min_summary_tokens;
+        }
+    }
+}
+
+impl Merge for SummaryConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.model == "gpt-4o-mini" {
+            self.model = other.model.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.endpoint == "https://api.openai.com/v1" {
+            self.endpoint = other.endpoint.clone();
+        }
+        // Always merge API keys if present
+        if other.api_key.is_some() {
+            self.api_key = other.api_key.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.max_tokens == 200 {
+            self.max_tokens = other.max_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.temperature == 0.0 {
+            self.temperature = other.temperature;
+        }
+    }
+}
+
+impl Merge for RetrievalConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.model == "gpt-4o" {
+            self.model = other.model.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.endpoint == "https://api.openai.com/v1" {
+            self.endpoint = other.endpoint.clone();
+        }
+        if other.api_key.is_some() {
+            self.api_key = other.api_key.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.max_tokens == 1000 {
+            self.max_tokens = other.max_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.temperature == 0.0 {
+            self.temperature = other.temperature;
+        }
+        if strategy == MergeStrategy::Replace || self.top_k == 3 {
+            self.top_k = other.top_k;
+        }
+
+        self.search.merge(&other.search, strategy);
+        self.sufficiency.merge(&other.sufficiency, strategy);
+        self.cache.merge(&other.cache, strategy);
+        self.strategy.merge(&other.strategy, strategy);
+        self.content.merge(&other.content, strategy);
+    }
+}
+
+impl Merge for SearchConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.top_k == 5 {
+            self.top_k = other.top_k;
+        }
+        if strategy == MergeStrategy::Replace || self.beam_width == 3 {
+            self.beam_width = other.beam_width;
+        }
+        if strategy == MergeStrategy::Replace || self.max_iterations == 10 {
+            self.max_iterations = other.max_iterations;
+        }
+        if strategy == MergeStrategy::Replace || (self.min_score - 0.1).abs() < f32::EPSILON {
+            self.min_score = other.min_score;
+        }
+    }
+}
+
+impl Merge for SufficiencyConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.min_tokens == 500 {
+            self.min_tokens = other.min_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.target_tokens == 2000 {
+            self.target_tokens = other.target_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.max_tokens == 4000 {
+            self.max_tokens = other.max_tokens;
+        }
+        if strategy == MergeStrategy::Replace || self.min_content_length == 200 {
+            self.min_content_length = other.min_content_length;
+        }
+        if strategy == MergeStrategy::Replace || (self.confidence_threshold - 0.7).abs() < f32::EPSILON
+        {
+            self.confidence_threshold = other.confidence_threshold;
+        }
+    }
+}
+
+impl Merge for CacheConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.max_entries == 1000 {
+            self.max_entries = other.max_entries;
+        }
+        if strategy == MergeStrategy::Replace || self.ttl_secs == 3600 {
+            self.ttl_secs = other.ttl_secs;
+        }
+    }
+}
+
+impl Merge for StrategyConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace
+            || (self.exploration_weight - 1.414).abs() < 0.001
+        {
+            self.exploration_weight = other.exploration_weight;
+        }
+        if strategy == MergeStrategy::Replace || (self.similarity_threshold - 0.5).abs() < f32::EPSILON
+        {
+            self.similarity_threshold = other.similarity_threshold;
+        }
+        if strategy == MergeStrategy::Replace
+            || (self.high_similarity_threshold - 0.8).abs() < f32::EPSILON
+        {
+            self.high_similarity_threshold = other.high_similarity_threshold;
+        }
+        if strategy == MergeStrategy::Replace
+            || (self.low_similarity_threshold - 0.3).abs() < f32::EPSILON
+        {
+            self.low_similarity_threshold = other.low_similarity_threshold;
+        }
+    }
+}
+
+impl Merge for ContentAggregatorConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if other.enabled != self.enabled {
+            self.enabled = other.enabled;
+        }
+        if strategy == MergeStrategy::Replace || self.token_budget == 4000 {
+            self.token_budget = other.token_budget;
+        }
+        if strategy == MergeStrategy::Replace || (self.min_relevance_score - 0.2).abs() < f32::EPSILON
+        {
+            self.min_relevance_score = other.min_relevance_score;
+        }
+        if strategy == MergeStrategy::Replace || self.scoring_strategy == "keyword_bm25" {
+            self.scoring_strategy = other.scoring_strategy.clone();
+        }
+        if strategy == MergeStrategy::Replace || self.output_format == "markdown" {
+            self.output_format = other.output_format.clone();
+        }
+        if other.include_scores != self.include_scores {
+            self.include_scores = other.include_scores;
+        }
+        if strategy == MergeStrategy::Replace
+            || (self.hierarchical_min_per_level - 0.1).abs() < f32::EPSILON
+        {
+            self.hierarchical_min_per_level = other.hierarchical_min_per_level;
+        }
+        if other.deduplicate != self.deduplicate {
+            self.deduplicate = other.deduplicate;
+        }
+        if strategy == MergeStrategy::Replace || (self.dedup_threshold - 0.9).abs() < f32::EPSILON {
+            self.dedup_threshold = other.dedup_threshold;
+        }
+    }
+}
+
+impl Merge for StorageConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace
+            || self.workspace_dir == std::path::PathBuf::from("./workspace")
+        {
+            self.workspace_dir = other.workspace_dir.clone();
+        }
+    }
+}
+
+impl Merge for ConcurrencyConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if strategy == MergeStrategy::Replace || self.max_concurrent_requests == 10 {
+            self.max_concurrent_requests = other.max_concurrent_requests;
+        }
+        if strategy == MergeStrategy::Replace || self.requests_per_minute == 500 {
+            self.requests_per_minute = other.requests_per_minute;
+        }
+        if other.enabled != self.enabled {
+            self.enabled = other.enabled;
+        }
+        if other.semaphore_enabled != self.semaphore_enabled {
+            self.semaphore_enabled = other.semaphore_enabled;
+        }
+    }
+}
+
+impl Merge for FallbackConfig {
+    fn merge(&mut self, other: &Self, strategy: MergeStrategy) {
+        if other.enabled != self.enabled {
+            self.enabled = other.enabled;
+        }
+        if !other.models.is_empty() {
+            self.models = other.models.clone();
+        }
+        if !other.endpoints.is_empty() {
+            self.endpoints = other.endpoints.clone();
+        }
+        if strategy == MergeStrategy::Replace {
+            self.on_rate_limit = other.on_rate_limit;
+            self.on_timeout = other.on_timeout;
+            self.on_all_failed = other.on_all_failed;
+            self.max_retries = other.max_retries;
+            self.initial_retry_delay_ms = other.initial_retry_delay_ms;
+            self.max_retry_delay_ms = other.max_retry_delay_ms;
+            self.retry_multiplier = other.retry_multiplier;
+        }
+    }
+}
+
+/// Configuration overlay for layered configuration.
+///
+/// Allows building a configuration from multiple sources,
+/// with later overlays taking precedence.
+#[derive(Debug, Clone)]
+pub struct ConfigOverlay {
+    /// Base configuration.
+    base: Config,
+    /// Overlay configurations (applied in order).
+    overlays: Vec<Config>,
+}
+
+impl ConfigOverlay {
+    /// Create a new overlay with a base configuration.
+    pub fn new(base: Config) -> Self {
+        Self {
+            base,
+            overlays: Vec::new(),
+        }
+    }
+
+    /// Add an overlay configuration.
+    pub fn overlay(mut self, config: Config) -> Self {
+        self.overlays.push(config);
+        self
+    }
+
+    /// Resolve all overlays into a final configuration.
+    pub fn resolve(self) -> Config {
+        let mut result = self.base;
+        for overlay in self.overlays {
+            result.merge(&overlay, MergeStrategy::Replace);
+        }
+        result
+    }
+}
+
+impl Default for ConfigOverlay {
+    fn default() -> Self {
+        Self::new(Config::default())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_merge() {
+        let mut base = Config::default();
+        let mut overlay = Config::default();
+
+        overlay.retrieval.top_k = 10;
+        overlay.summary.model = "gpt-4o".to_string();
+
+        base.merge(&overlay, MergeStrategy::Replace);
+
+        assert_eq!(base.retrieval.top_k, 10);
+        assert_eq!(base.summary.model, "gpt-4o");
+    }
+
+    #[test]
+    fn test_config_overlay() {
+        let mut overlay1 = Config::default();
+        overlay1.retrieval.top_k = 5;
+
+        let mut overlay2 = Config::default();
+        overlay2.retrieval.top_k = 10;
+
+        let config = ConfigOverlay::new(Config::default())
+            .overlay(overlay1)
+            .overlay(overlay2)
+            .resolve();
+
+        assert_eq!(config.retrieval.top_k, 10);
+    }
+
+    #[test]
+    fn test_merge_keeps_api_keys() {
+        let mut base = Config::default();
+        let mut overlay = Config::default();
+
+        overlay.summary.api_key = Some("test-key".to_string());
+
+        base.merge(&overlay, MergeStrategy::Replace);
+
+        assert_eq!(base.summary.api_key, Some("test-key".to_string()));
+    }
+}
diff --git a/src/config/mod.rs b/src/config/mod.rs
index 23e98f4e..98ad2e8a 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -3,15 +3,98 @@
 
 //! Configuration management for vectorless.
 //!
-//! This module provides configuration loading and validation:
-//! - [`Config`] - Main configuration structure
-//! - [`IndexerConfig`] - Indexing parameters
-//! - [`SummaryConfig`] - Summarization model settings
-//! - [`RetrievalConfig`] - Retrieval model settings
-//! - [`StorageConfig`] - Storage paths
+//! This module provides comprehensive configuration loading, validation,
+//! and management:
+//!
+//! - [`Config`] — Main configuration structure
+//! - [`ConfigLoader`] — Load configuration from TOML files
+//! - [`ConfigValidator`] — Validate configuration values
+//! - [`ConfigDocs`] — Generate configuration documentation
+//!
+//! # Quick Start
+//!
+//! ```rust,no_run
+//! use vectorless::config::{Config, ConfigLoader};
+//!
+//! // Load from file
+//! let config = ConfigLoader::new()
+//!     .file("config.toml")
+//!     .with_validation(true)
+//!     .load()?;
+//!
+//! // Or use defaults
+//! let config = Config::default();
+//! # Ok::<(), vectorless::config::ConfigError>(())
+//! ```
+//!
+//! # Layered Configuration
+//!
+//! Multiple configuration files can be layered:
+//!
+//! ```rust,no_run
+//! use vectorless::config::ConfigLoader;
+//!
+//! let config = ConfigLoader::new()
+//!     .file("default.toml")        // Base defaults
+//!     .file("production.toml")     // Production overrides
+//!     .with_env("VECTORLESS_")     // Environment overrides
+//!     .with_validation(true)
+//!     .load()?;
+//! # Ok::<(), vectorless::config::ConfigError>(())
+//! ```
+//!
+//! # Environment Variables
+//!
+//! When enabled with `with_env()`, environment variables can override config:
+//!
+//! | Variable | Config Path |
+//! |----------|-------------|
+//! | `VECTORLESS_SUMMARY__API_KEY` | `summary.api_key` |
+//! | `VECTORLESS_RETRIEVAL__TOP_K` | `retrieval.top_k` |
+//! | `VECTORLESS_STORAGE__WORKSPACE_DIR` | `storage.workspace_dir` |
+//!
+//! # Configuration Sections
+//!
+//! - `[indexer]` — Document indexing parameters
+//! - `[summary]` — Summarization model settings
+//! - `[retrieval]` — Retrieval model settings
+//! - `[retrieval.search]` — Search algorithm configuration
+//! - `[retrieval.sufficiency]` — Sufficiency checker settings
+//! - `[retrieval.content]` — Content aggregator settings
+//! - `[retrieval.strategy]` — Strategy-specific settings
+//! - `[retrieval.cache]` — Cache configuration
+//! - `[storage]` — Storage paths
+//! - `[concurrency]` — Concurrency control
+//! - `[fallback]` — Error recovery settings
 
+mod docs;
 mod loader;
+mod merge;
 mod types;
+mod validator;
 
-pub use loader::{ConfigError, ConfigLoader};
-pub use types::*;
+// Re-export main types
+pub use docs::ConfigDocs;
+pub use loader::{find_config_file, ConfigError, ConfigLoader, CONFIG_FILE_NAMES};
+pub use merge::{ConfigOverlay, Merge, MergeStrategy};
+pub use types::{
+    // Main config
+    Config,
+    // Indexer
+    IndexerConfig,
+    // LLM configs
+    LlmConfig, SummaryConfig,
+    // Retrieval configs
+    RetrievalConfig, SearchConfig,
+    // Storage and sufficiency
+    StorageConfig, CacheConfig, StrategyConfig, SufficiencyConfig,
+    // Content aggregator
+    ContentAggregatorConfig,
+    // Concurrency
+    ConcurrencyConfig,
+    // Fallback
+    FallbackBehavior, FallbackConfig, OnAllFailedBehavior,
+    // Validation
+    ConfigValidationError, ValidationError, Severity,
+};
+pub use validator::{ConfigValidator, ValidationRule};
diff --git a/src/config/types.rs b/src/config/types.rs
deleted file mode 100644
index 35cefd52..00000000
--- a/src/config/types.rs
+++ /dev/null
@@ -1,698 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Configuration type definitions.
-//!
-//! All configuration values are defined inline in `Default` trait implementations.
-//! Configuration is loaded from TOML files only - no environment variable magic.
-
-use serde::{Deserialize, Serialize};
-use std::path::PathBuf;
-
-/// Main configuration for vectorless.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Config {
-    /// Indexer configuration.
-    #[serde(default)]
-    pub indexer: IndexerConfig,
-
-    /// Summary model configuration.
-    #[serde(default)]
-    pub summary: SummaryConfig,
-
-    /// Retrieval model configuration.
-    #[serde(default)]
-    pub retrieval: RetrievalConfig,
-
-    /// Storage configuration.
-    #[serde(default)]
-    pub storage: StorageConfig,
-
-    /// Concurrency control configuration.
-    #[serde(default)]
-    pub concurrency: ConcurrencyConfig,
-
-    /// Fallback/error recovery configuration.
-    #[serde(default)]
-    pub fallback: FallbackConfig,
-}
-
-impl Default for Config {
-    fn default() -> Self {
-        Self {
-            indexer: IndexerConfig::default(),
-            summary: SummaryConfig::default(),
-            retrieval: RetrievalConfig::default(),
-            storage: StorageConfig::default(),
-            concurrency: ConcurrencyConfig::default(),
-            fallback: FallbackConfig::default(),
-        }
-    }
-}
-
-/// Indexer configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct IndexerConfig {
-    /// Word count threshold for splitting sections into subsections.
-    #[serde(default)]
-    pub subsection_threshold: usize,
-
-    /// Maximum tokens to send in a single segmentation request.
-    #[serde(default)]
-    pub max_segment_tokens: usize,
-
-    /// Maximum tokens for each summary.
-    #[serde(default)]
-    pub max_summary_tokens: usize,
-
-    /// Minimum content tokens required to generate a summary.
-    #[serde(default)]
-    pub min_summary_tokens: usize,
-}
-
-impl Default for IndexerConfig {
-    fn default() -> Self {
-        Self {
-            subsection_threshold: 300,
-            max_segment_tokens: 3000,
-            max_summary_tokens: 200,
-            min_summary_tokens: 20,
-        }
-    }
-}
-
-/// Generic LLM configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct LlmConfig {
-    /// Model name (e.g., "gpt-4o-mini", "claude-3-haiku").
-    #[serde(default)]
-    pub model: String,
-
-    /// API endpoint.
-    #[serde(default)]
-    pub endpoint: String,
-
-    /// API key.
-    #[serde(default)]
-    pub api_key: Option<String>,
-
-    /// Maximum tokens for responses.
-    #[serde(default)]
-    pub max_tokens: usize,
-
-    /// Temperature for generation.
-    #[serde(default)]
-    pub temperature: f32,
-}
-
-impl Default for LlmConfig {
-    fn default() -> Self {
-        Self {
-            model: "gpt-4o-mini".to_string(),
-            endpoint: "https://api.openai.com/v1".to_string(),
-            api_key: None,
-            max_tokens: 1000,
-            temperature: 0.0,
-        }
-    }
-}
-
-impl LlmConfig {
-    /// Create a new LLM config with defaults.
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Set the model.
-    pub fn with_model(mut self, model: impl Into<String>) -> Self {
-        self.model = model.into();
-        self
-    }
-
-    /// Set the endpoint.
-    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
-        self.endpoint = endpoint.into();
-        self
-    }
-
-    /// Set the API key.
-    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
-        self.api_key = Some(api_key.into());
-        self
-    }
-
-    /// Get the API key from config.
-    pub fn get_api_key(&self) -> Option<&str> {
-        self.api_key.as_deref()
-    }
-}
-
-/// Summary model configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SummaryConfig {
-    /// Model name for summarization.
-    #[serde(default)]
-    pub model: String,
-
-    /// API endpoint for summary model.
-    #[serde(default)]
-    pub endpoint: String,
-
-    /// API key.
-    #[serde(default)]
-    pub api_key: Option<String>,
-
-    /// Maximum tokens for summary generation.
-    #[serde(default)]
-    pub max_tokens: usize,
-
-    /// Temperature for summary generation.
-    #[serde(default)]
-    pub temperature: f32,
-}
-
-impl Default for SummaryConfig {
-    fn default() -> Self {
-        Self {
-            model: "gpt-4o-mini".to_string(),
-            endpoint: "https://api.openai.com/v1".to_string(),
-            api_key: None,
-            max_tokens: 200,
-            temperature: 0.0,
-        }
-    }
-}
-
-/// Retrieval model configuration (for navigation).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct RetrievalConfig {
-    /// Model name for retrieval/navigation.
-    #[serde(default)]
-    pub model: String,
-
-    /// API endpoint for retrieval model.
-    #[serde(default)]
-    pub endpoint: String,
-
-    /// API key.
-    #[serde(default)]
-    pub api_key: Option<String>,
-
-    /// Maximum tokens for retrieval context.
-    #[serde(default)]
-    pub max_tokens: usize,
-
-    /// Temperature for retrieval.
-    #[serde(default)]
-    pub temperature: f32,
-
-    /// Number of top-k results to return.
-    #[serde(default)]
-    pub top_k: usize,
-
-    /// Search algorithm configuration.
-    #[serde(default)]
-    pub search: SearchConfig,
-
-    /// Sufficiency checker configuration.
-    #[serde(default)]
-    pub sufficiency: SufficiencyConfig,
-
-    /// Cache configuration.
-    #[serde(default)]
-    pub cache: CacheConfig,
-
-    /// Strategy-specific configuration.
-    #[serde(default)]
-    pub strategy: StrategyConfig,
-
-    /// Content aggregator configuration.
-    #[serde(default)]
-    pub content: ContentAggregatorConfig,
-}
-
-impl Default for RetrievalConfig {
-    fn default() -> Self {
-        Self {
-            model: "gpt-4o".to_string(),
-            endpoint: "https://api.openai.com/v1".to_string(),
-            api_key: None,
-            max_tokens: 1000,
-            temperature: 0.0,
-            top_k: 3,
-            search: SearchConfig::default(),
-            sufficiency: SufficiencyConfig::default(),
-            cache: CacheConfig::default(),
-            strategy: StrategyConfig::default(),
-            content: ContentAggregatorConfig::default(),
-        }
-    }
-}
-
-/// Search algorithm configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SearchConfig {
-    /// Number of top-k results to return.
-    #[serde(default)]
-    pub top_k: usize,
-
-    /// Beam width for multi-path search.
-    #[serde(default)]
-    pub beam_width: usize,
-
-    /// Maximum iterations for search algorithms.
-    #[serde(default)]
-    pub max_iterations: usize,
-
-    /// Minimum score to include a path.
-    #[serde(default)]
-    pub min_score: f32,
-}
-
-impl Default for SearchConfig {
-    fn default() -> Self {
-        Self {
-            top_k: 5,
-            beam_width: 3,
-            max_iterations: 10,
-            min_score: 0.1,
-        }
-    }
-}
-
-/// Sufficiency checker configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SufficiencyConfig {
-    /// Minimum tokens for sufficiency.
-    #[serde(default)]
-    pub min_tokens: usize,
-
-    /// Target tokens for full sufficiency.
-    #[serde(default)]
-    pub target_tokens: usize,
-
-    /// Maximum tokens before stopping.
-    #[serde(default)]
-    pub max_tokens: usize,
-
-    /// Minimum content length (characters).
-    #[serde(default)]
-    pub min_content_length: usize,
-
-    /// Confidence threshold for LLM judge.
-    #[serde(default)]
-    pub confidence_threshold: f32,
-}
-
-impl Default for SufficiencyConfig {
-    fn default() -> Self {
-        Self {
-            min_tokens: 500,
-            target_tokens: 2000,
-            max_tokens: 4000,
-            min_content_length: 200,
-            confidence_threshold: 0.7,
-        }
-    }
-}
-
-/// Cache configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CacheConfig {
-    /// Maximum number of cache entries.
-    #[serde(default)]
-    pub max_entries: usize,
-
-    /// Time-to-live for cache entries (seconds).
-    #[serde(default)]
-    pub ttl_secs: u64,
-}
-
-impl Default for CacheConfig {
-    fn default() -> Self {
-        Self {
-            max_entries: 1000,
-            ttl_secs: 3600,
-        }
-    }
-}
-
-/// Strategy-specific configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct StrategyConfig {
-    /// MCTS exploration weight (sqrt(2) ≈ 1.414).
-    #[serde(default)]
-    pub exploration_weight: f32,
-
-    /// Semantic similarity threshold.
-    #[serde(default)]
-    pub similarity_threshold: f32,
-
-    /// High similarity threshold for "answer" decision.
-    #[serde(default)]
-    pub high_similarity_threshold: f32,
-
-    /// Low similarity threshold for "explore" decision.
-    #[serde(default)]
-    pub low_similarity_threshold: f32,
-}
-
-impl Default for StrategyConfig {
-    fn default() -> Self {
-        Self {
-            exploration_weight: 1.414,
-            similarity_threshold: 0.5,
-            high_similarity_threshold: 0.8,
-            low_similarity_threshold: 0.3,
-        }
-    }
-}
-
-/// Content aggregator configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ContentAggregatorConfig {
-    /// Whether content aggregator is enabled.
-    /// When disabled, uses simple content collection (legacy behavior).
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Maximum tokens for aggregated content.
-    #[serde(default)]
-    pub token_budget: usize,
-
-    /// Minimum relevance score threshold (0.0 - 1.0).
-    /// Content below this threshold will be filtered out.
-    #[serde(default)]
-    pub min_relevance_score: f32,
-
-    /// Scoring strategy: "keyword_only" | "keyword_bm25" | "hybrid"
-    #[serde(default)]
-    pub scoring_strategy: String,
-
-    /// Output format: "markdown" | "json" | "tree" | "flat"
-    #[serde(default)]
-    pub output_format: String,
-
-    /// Include relevance scores in output.
-    #[serde(default)]
-    pub include_scores: bool,
-
-    /// Minimum budget allocation per depth level (0.0 - 1.0).
-    #[serde(default)]
-    pub hierarchical_min_per_level: f32,
-
-    /// Enable content deduplication.
-    #[serde(default = "default_true")]
-    pub deduplicate: bool,
-
-    /// Similarity threshold for deduplication (0.0 - 1.0).
-    #[serde(default)]
-    pub dedup_threshold: f32,
-}
-
-impl Default for ContentAggregatorConfig {
-    fn default() -> Self {
-        Self {
-            enabled: true,
-            token_budget: 4000,
-            min_relevance_score: 0.2,
-            scoring_strategy: "keyword_bm25".to_string(),
-            output_format: "markdown".to_string(),
-            include_scores: false,
-            hierarchical_min_per_level: 0.1,
-            deduplicate: true,
-            dedup_threshold: 0.9,
-        }
-    }
-}
-
-impl ContentAggregatorConfig {
-    /// Create a new config with defaults.
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Disable content aggregator (use legacy behavior).
-    pub fn disabled() -> Self {
-        Self {
-            enabled: false,
-            ..Self::default()
-        }
-    }
-
-    /// Set the token budget.
-    pub fn with_token_budget(mut self, budget: usize) -> Self {
-        self.token_budget = budget;
-        self
-    }
-
-    /// Set the minimum relevance score.
-    pub fn with_min_relevance(mut self, score: f32) -> Self {
-        self.min_relevance_score = score.clamp(0.0, 1.0);
-        self
-    }
-
-    /// Convert to the retrieval content aggregator config.
-    pub fn to_aggregator_config(&self) -> crate::retrieval::content::ContentAggregatorConfig {
-        use crate::retrieval::content::{ContentAggregatorConfig as RetrievalContentConfig,
-            OutputFormatConfig, ScoringStrategyConfig};
-
-        let scoring_strategy = match self.scoring_strategy.as_str() {
-            "keyword_only" => ScoringStrategyConfig::KeywordOnly,
-            "hybrid" => ScoringStrategyConfig::Hybrid,
-            _ => ScoringStrategyConfig::KeywordWithBM25,
-        };
-
-        let output_format = match self.output_format.as_str() {
-            "json" => OutputFormatConfig::Json,
-            "tree" => OutputFormatConfig::Tree,
-            "flat" => OutputFormatConfig::Flat,
-            _ => OutputFormatConfig::Markdown,
-        };
-
-        RetrievalContentConfig {
-            token_budget: self.token_budget,
-            min_relevance_score: self.min_relevance_score,
-            scoring_strategy,
-            output_format,
-            include_scores: self.include_scores,
-            hierarchical_min_per_level: self.hierarchical_min_per_level,
-            deduplicate: self.deduplicate,
-            dedup_threshold: self.dedup_threshold,
-        }
-    }
-}
-
-/// Storage configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct StorageConfig {
-    /// Workspace directory for persisted documents.
-    #[serde(default)]
-    pub workspace_dir: PathBuf,
-}
-
-impl Default for StorageConfig {
-    fn default() -> Self {
-        Self {
-            workspace_dir: PathBuf::from("./workspace"),
-        }
-    }
-}
-
-/// Concurrency control configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ConcurrencyConfig {
-    /// Maximum concurrent LLM API calls.
-    #[serde(default)]
-    pub max_concurrent_requests: usize,
-
-    /// Rate limit: requests per minute.
-    #[serde(default)]
-    pub requests_per_minute: usize,
-
-    /// Whether rate limiting is enabled.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Whether semaphore-based concurrency limiting is enabled.
-    #[serde(default = "default_true")]
-    pub semaphore_enabled: bool,
-}
-
-fn default_true() -> bool {
-    true
-}
-
-impl Default for ConcurrencyConfig {
-    fn default() -> Self {
-        Self {
-            max_concurrent_requests: 10,
-            requests_per_minute: 500,
-            enabled: true,
-            semaphore_enabled: true,
-        }
-    }
-}
-
-impl ConcurrencyConfig {
-    /// Create a new config with defaults.
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Set the maximum concurrent requests.
-    pub fn with_max_concurrent_requests(mut self, max: usize) -> Self {
-        self.max_concurrent_requests = max;
-        self
-    }
-
-    /// Set the requests per minute rate limit.
-    pub fn with_requests_per_minute(mut self, rpm: usize) -> Self {
-        self.requests_per_minute = rpm;
-        self
-    }
-
-    /// Enable or disable rate limiting.
-    pub fn with_enabled(mut self, enabled: bool) -> Self {
-        self.enabled = enabled;
-        self
-    }
-
-    /// Enable or disable semaphore.
-    pub fn with_semaphore_enabled(mut self, enabled: bool) -> Self {
-        self.semaphore_enabled = enabled;
-        self
-    }
-
-    /// Convert to the runtime concurrency config.
-    pub fn to_runtime_config(&self) -> crate::throttle::ConcurrencyConfig {
-        crate::throttle::ConcurrencyConfig {
-            max_concurrent_requests: self.max_concurrent_requests,
-            requests_per_minute: self.requests_per_minute,
-            enabled: self.enabled,
-            semaphore_enabled: self.semaphore_enabled,
-        }
-    }
-}
-
-impl From<ConcurrencyConfig> for crate::throttle::ConcurrencyConfig {
-    fn from(config: ConcurrencyConfig) -> Self {
-        config.to_runtime_config()
-    }
-}
-
-/// Fallback behavior when encountering errors.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum FallbackBehavior {
-    /// Only retry with the same model/endpoint.
-    Retry,
-    /// Immediately switch to fallback model/endpoint.
-    Fallback,
-    /// Retry first, then fallback if still failing.
-    RetryThenFallback,
-    /// Fail immediately without retry or fallback.
-    Fail,
-}
-
-impl Default for FallbackBehavior {
-    fn default() -> Self {
-        Self::RetryThenFallback
-    }
-}
-
-/// Behavior when all fallback attempts fail.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum OnAllFailedBehavior {
-    /// Return the error to the caller.
-    ReturnError,
-    /// Try to return cached result if available.
-    ReturnCache,
-}
-
-impl Default for OnAllFailedBehavior {
-    fn default() -> Self {
-        Self::ReturnError
-    }
-}
-
-/// Fallback configuration for error recovery.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FallbackConfig {
-    /// Whether fallback is enabled.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Fallback models in priority order.
-    #[serde(default)]
-    pub models: Vec<String>,
-
-    /// Fallback endpoints in priority order.
-    #[serde(default)]
-    pub endpoints: Vec<String>,
-
-    /// Behavior on rate limit error (429).
-    #[serde(default)]
-    pub on_rate_limit: FallbackBehavior,
-
-    /// Behavior on timeout error.
-    #[serde(default)]
-    pub on_timeout: FallbackBehavior,
-
-    /// Behavior when all attempts fail.
-    #[serde(default)]
-    pub on_all_failed: OnAllFailedBehavior,
-}
-
-impl Default for FallbackConfig {
-    fn default() -> Self {
-        Self {
-            enabled: true,
-            models: vec!["gpt-4o-mini".to_string(), "glm-4-flash".to_string()],
-            endpoints: vec![],
-            on_rate_limit: FallbackBehavior::RetryThenFallback,
-            on_timeout: FallbackBehavior::RetryThenFallback,
-            on_all_failed: OnAllFailedBehavior::ReturnError,
-        }
-    }
-}
-
-impl FallbackConfig {
-    /// Create a new fallback config with defaults.
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Disable fallback entirely.
-    pub fn disabled() -> Self {
-        Self {
-            enabled: false,
-            ..Self::default()
-        }
-    }
-
-    /// Set fallback models.
-    pub fn with_models(mut self, models: Vec<String>) -> Self {
-        self.models = models;
-        self
-    }
-
-    /// Set fallback endpoints.
-    pub fn with_endpoints(mut self, endpoints: Vec<String>) -> Self {
-        self.endpoints = endpoints;
-        self
-    }
-
-    /// Set behavior on rate limit.
-    pub fn with_on_rate_limit(mut self, behavior: FallbackBehavior) -> Self {
-        self.on_rate_limit = behavior;
-        self
-    }
-
-    /// Set behavior on timeout.
-    pub fn with_on_timeout(mut self, behavior: FallbackBehavior) -> Self {
-        self.on_timeout = behavior;
-        self
-    }
-}
diff --git a/src/config/types/concurrency.rs b/src/config/types/concurrency.rs
new file mode 100644
index 00000000..c4172ba8
--- /dev/null
+++ b/src/config/types/concurrency.rs
@@ -0,0 +1,122 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Concurrency control configuration types.
+
+use serde::{Deserialize, Serialize};
+
+/// Concurrency control configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ConcurrencyConfig {
+    /// Maximum concurrent LLM API calls.
+    #[serde(default = "default_max_concurrent_requests")]
+    pub max_concurrent_requests: usize,
+
+    /// Rate limit: requests per minute.
+    #[serde(default = "default_requests_per_minute")]
+    pub requests_per_minute: usize,
+
+    /// Whether rate limiting is enabled.
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// Whether semaphore-based concurrency limiting is enabled.
+    #[serde(default = "default_true")]
+    pub semaphore_enabled: bool,
+}
+
+fn default_max_concurrent_requests() -> usize {
+    10
+}
+
+fn default_requests_per_minute() -> usize {
+    500
+}
+
+fn default_true() -> bool {
+    true
+}
+
+impl Default for ConcurrencyConfig {
+    fn default() -> Self {
+        Self {
+            max_concurrent_requests: default_max_concurrent_requests(),
+            requests_per_minute: default_requests_per_minute(),
+            enabled: default_true(),
+            semaphore_enabled: default_true(),
+        }
+    }
+}
+
+impl ConcurrencyConfig {
+    /// Create a new config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the maximum concurrent requests.
+    pub fn with_max_concurrent_requests(mut self, max: usize) -> Self {
+        self.max_concurrent_requests = max;
+        self
+    }
+
+    /// Set the requests per minute rate limit.
+    pub fn with_requests_per_minute(mut self, rpm: usize) -> Self {
+        self.requests_per_minute = rpm;
+        self
+    }
+
+    /// Enable or disable rate limiting.
+    pub fn with_enabled(mut self, enabled: bool) -> Self {
+        self.enabled = enabled;
+        self
+    }
+
+    /// Enable or disable semaphore.
+    pub fn with_semaphore_enabled(mut self, enabled: bool) -> Self {
+        self.semaphore_enabled = enabled;
+        self
+    }
+
+    /// Convert to the runtime concurrency config.
+    pub fn to_runtime_config(&self) -> crate::throttle::ConcurrencyConfig {
+        crate::throttle::ConcurrencyConfig {
+            max_concurrent_requests: self.max_concurrent_requests,
+            requests_per_minute: self.requests_per_minute,
+            enabled: self.enabled,
+            semaphore_enabled: self.semaphore_enabled,
+        }
+    }
+}
+
+impl From<ConcurrencyConfig> for crate::throttle::ConcurrencyConfig {
+    fn from(config: ConcurrencyConfig) -> Self {
+        config.to_runtime_config()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_concurrency_config_defaults() {
+        let config = ConcurrencyConfig::default();
+        assert_eq!(config.max_concurrent_requests, 10);
+        assert_eq!(config.requests_per_minute, 500);
+        assert!(config.enabled);
+        assert!(config.semaphore_enabled);
+    }
+
+    #[test]
+    fn test_concurrency_config_builder() {
+        let config = ConcurrencyConfig::new()
+            .with_max_concurrent_requests(20)
+            .with_requests_per_minute(1000)
+            .with_enabled(false);
+
+        assert_eq!(config.max_concurrent_requests, 20);
+        assert_eq!(config.requests_per_minute, 1000);
+        assert!(!config.enabled);
+    }
+}
diff --git a/src/config/types/content.rs b/src/config/types/content.rs
new file mode 100644
index 00000000..62741cd7
--- /dev/null
+++ b/src/config/types/content.rs
@@ -0,0 +1,222 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Content aggregator configuration types.
+
+use serde::{Deserialize, Serialize};
+
+/// Content aggregator configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContentAggregatorConfig {
+    /// Whether content aggregator is enabled.
+    /// When disabled, uses simple content collection (legacy behavior).
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// Maximum tokens for aggregated content.
+    #[serde(default = "default_token_budget")]
+    pub token_budget: usize,
+
+    /// Minimum relevance score threshold (0.0 - 1.0).
+    /// Content below this threshold will be filtered out.
+    #[serde(default = "default_min_relevance_score")]
+    pub min_relevance_score: f32,
+
+    /// Scoring strategy: "keyword_only" | "keyword_bm25" | "hybrid"
+    #[serde(default = "default_scoring_strategy")]
+    pub scoring_strategy: String,
+
+    /// Output format: "markdown" | "json" | "tree" | "flat"
+    #[serde(default = "default_output_format")]
+    pub output_format: String,
+
+    /// Include relevance scores in output.
+    #[serde(default)]
+    pub include_scores: bool,
+
+    /// Minimum budget allocation per depth level (0.0 - 1.0).
+    /// Ensures each tree level gets representation.
+    #[serde(default = "default_hierarchical_min_per_level")]
+    pub hierarchical_min_per_level: f32,
+
+    /// Enable content deduplication.
+    #[serde(default = "default_true")]
+    pub deduplicate: bool,
+
+    /// Similarity threshold for deduplication (0.0 - 1.0).
+    /// Higher = more aggressive deduplication.
+    #[serde(default = "default_dedup_threshold")]
+    pub dedup_threshold: f32,
+}
+
+fn default_true() -> bool {
+    true
+}
+
+fn default_token_budget() -> usize {
+    4000
+}
+
+fn default_min_relevance_score() -> f32 {
+    0.2
+}
+
+fn default_scoring_strategy() -> String {
+    "keyword_bm25".to_string()
+}
+
+fn default_output_format() -> String {
+    "markdown".to_string()
+}
+
+fn default_hierarchical_min_per_level() -> f32 {
+    0.1
+}
+
+fn default_dedup_threshold() -> f32 {
+    0.9
+}
+
+impl Default for ContentAggregatorConfig {
+    fn default() -> Self {
+        Self {
+            enabled: default_true(),
+            token_budget: default_token_budget(),
+            min_relevance_score: default_min_relevance_score(),
+            scoring_strategy: default_scoring_strategy(),
+            output_format: default_output_format(),
+            include_scores: false,
+            hierarchical_min_per_level: default_hierarchical_min_per_level(),
+            deduplicate: default_true(),
+            dedup_threshold: default_dedup_threshold(),
+        }
+    }
+}
+
+impl ContentAggregatorConfig {
+    /// Create a new config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Disable content aggregator (use legacy behavior).
+    pub fn disabled() -> Self {
+        Self {
+            enabled: false,
+            ..Self::default()
+        }
+    }
+
+    /// Set the token budget.
+    pub fn with_token_budget(mut self, budget: usize) -> Self {
+        self.token_budget = budget;
+        self
+    }
+
+    /// Set the minimum relevance score.
+    pub fn with_min_relevance(mut self, score: f32) -> Self {
+        self.min_relevance_score = score.clamp(0.0, 1.0);
+        self
+    }
+
+    /// Set the scoring strategy.
+    pub fn with_scoring_strategy(mut self, strategy: impl Into<String>) -> Self {
+        self.scoring_strategy = strategy.into();
+        self
+    }
+
+    /// Set the output format.
+    pub fn with_output_format(mut self, format: impl Into<String>) -> Self {
+        self.output_format = format.into();
+        self
+    }
+
+    /// Enable/disable score inclusion.
+    pub fn with_include_scores(mut self, include: bool) -> Self {
+        self.include_scores = include;
+        self
+    }
+
+    /// Enable/disable deduplication.
+    pub fn with_deduplicate(mut self, dedupe: bool) -> Self {
+        self.deduplicate = dedupe;
+        self
+    }
+
+    /// Convert to the retrieval content aggregator config.
+    pub fn to_aggregator_config(&self) -> crate::retrieval::content::ContentAggregatorConfig {
+        use crate::retrieval::content::{
+            ContentAggregatorConfig as RetrievalContentConfig, OutputFormatConfig,
+            ScoringStrategyConfig,
+        };
+
+        let scoring_strategy = match self.scoring_strategy.as_str() {
+            "keyword_only" => ScoringStrategyConfig::KeywordOnly,
+            "hybrid" => ScoringStrategyConfig::Hybrid,
+            _ => ScoringStrategyConfig::KeywordWithBM25,
+        };
+
+        let output_format = match self.output_format.as_str() {
+            "json" => OutputFormatConfig::Json,
+            "tree" => OutputFormatConfig::Tree,
+            "flat" => OutputFormatConfig::Flat,
+            _ => OutputFormatConfig::Markdown,
+        };
+
+        RetrievalContentConfig {
+            token_budget: self.token_budget,
+            min_relevance_score: self.min_relevance_score,
+            scoring_strategy,
+            output_format,
+            include_scores: self.include_scores,
+            hierarchical_min_per_level: self.hierarchical_min_per_level,
+            deduplicate: self.deduplicate,
+            dedup_threshold: self.dedup_threshold,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_content_aggregator_config_defaults() {
+        let config = ContentAggregatorConfig::default();
+        assert!(config.enabled);
+        assert_eq!(config.token_budget, 4000);
+        assert_eq!(config.min_relevance_score, 0.2);
+        assert_eq!(config.scoring_strategy, "keyword_bm25");
+        assert_eq!(config.output_format, "markdown");
+        assert!(config.deduplicate);
+    }
+
+    #[test]
+    fn test_content_aggregator_config_disabled() {
+        let config = ContentAggregatorConfig::disabled();
+        assert!(!config.enabled);
+    }
+
+    #[test]
+    fn test_content_aggregator_config_builder() {
+        let config = ContentAggregatorConfig::new()
+            .with_token_budget(8000)
+            .with_min_relevance(0.5)
+            .with_scoring_strategy("hybrid")
+            .with_output_format("json");
+
+        assert_eq!(config.token_budget, 8000);
+        assert_eq!(config.min_relevance_score, 0.5);
+        assert_eq!(config.scoring_strategy, "hybrid");
+        assert_eq!(config.output_format, "json");
+    }
+
+    #[test]
+    fn test_min_relevance_clamping() {
+        let config = ContentAggregatorConfig::new().with_min_relevance(1.5);
+        assert_eq!(config.min_relevance_score, 1.0);
+
+        let config = ContentAggregatorConfig::new().with_min_relevance(-0.5);
+        assert_eq!(config.min_relevance_score, 0.0);
+    }
+}
diff --git a/src/config/types/fallback.rs b/src/config/types/fallback.rs
new file mode 100644
index 00000000..fa199b30
--- /dev/null
+++ b/src/config/types/fallback.rs
@@ -0,0 +1,233 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Fallback and error recovery configuration types.
+
+use serde::{Deserialize, Serialize};
+
+/// Fallback behavior when encountering errors.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum FallbackBehavior {
+    /// Only retry with the same model/endpoint.
+    Retry,
+    /// Immediately switch to fallback model/endpoint.
+    Fallback,
+    /// Retry first, then fallback if still failing.
+    RetryThenFallback,
+    /// Fail immediately without retry or fallback.
+    Fail,
+}
+
+impl Default for FallbackBehavior {
+    fn default() -> Self {
+        Self::RetryThenFallback
+    }
+}
+
+/// Behavior when all fallback attempts fail.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum OnAllFailedBehavior {
+    /// Return the error to the caller.
+    ReturnError,
+    /// Try to return cached result if available.
+    ReturnCache,
+}
+
+impl Default for OnAllFailedBehavior {
+    fn default() -> Self {
+        Self::ReturnError
+    }
+}
+
+/// Fallback configuration for error recovery.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FallbackConfig {
+    /// Whether fallback is enabled.
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// Fallback models in priority order.
+    #[serde(default = "default_fallback_models")]
+    pub models: Vec<String>,
+
+    /// Fallback endpoints in priority order.
+    #[serde(default)]
+    pub endpoints: Vec<String>,
+
+    /// Behavior on rate limit error (429).
+    #[serde(default)]
+    pub on_rate_limit: FallbackBehavior,
+
+    /// Behavior on timeout error.
+    #[serde(default)]
+    pub on_timeout: FallbackBehavior,
+
+    /// Behavior when all attempts fail.
+    #[serde(default)]
+    pub on_all_failed: OnAllFailedBehavior,
+
+    /// Maximum retry attempts.
+    #[serde(default = "default_max_retries")]
+    pub max_retries: usize,
+
+    /// Initial retry delay in milliseconds.
+    #[serde(default = "default_initial_retry_delay_ms")]
+    pub initial_retry_delay_ms: u64,
+
+    /// Maximum retry delay in milliseconds.
+    #[serde(default = "default_max_retry_delay_ms")]
+    pub max_retry_delay_ms: u64,
+
+    /// Retry delay multiplier (exponential backoff).
+    #[serde(default = "default_retry_multiplier")]
+    pub retry_multiplier: f32,
+}
+
+fn default_fallback_models() -> Vec<String> {
+    vec!["gpt-4o-mini".to_string(), "glm-4-flash".to_string()]
+}
+
+fn default_max_retries() -> usize {
+    3
+}
+
+fn default_initial_retry_delay_ms() -> u64 {
+    1000
+}
+
+fn default_max_retry_delay_ms() -> u64 {
+    30000
+}
+
+fn default_retry_multiplier() -> f32 {
+    2.0
+}
+
+impl Default for FallbackConfig {
+    fn default() -> Self {
+        Self {
+            enabled: default_true(),
+            models: default_fallback_models(),
+            endpoints: Vec::new(),
+            on_rate_limit: FallbackBehavior::default(),
+            on_timeout: FallbackBehavior::default(),
+            on_all_failed: OnAllFailedBehavior::default(),
+            max_retries: default_max_retries(),
+            initial_retry_delay_ms: default_initial_retry_delay_ms(),
+            max_retry_delay_ms: default_max_retry_delay_ms(),
+            retry_multiplier: default_retry_multiplier(),
+        }
+    }
+}
+
+fn default_true() -> bool {
+    true
+}
+
+impl FallbackConfig {
+    /// Create a new fallback config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Disable fallback entirely.
+    pub fn disabled() -> Self {
+        Self {
+            enabled: false,
+            ..Self::default()
+        }
+    }
+
+    /// Set fallback models.
+    pub fn with_models(mut self, models: Vec<String>) -> Self {
+        self.models = models;
+        self
+    }
+
+    /// Set fallback endpoints.
+    pub fn with_endpoints(mut self, endpoints: Vec<String>) -> Self {
+        self.endpoints = endpoints;
+        self
+    }
+
+    /// Set behavior on rate limit.
+    pub fn with_on_rate_limit(mut self, behavior: FallbackBehavior) -> Self {
+        self.on_rate_limit = behavior;
+        self
+    }
+
+    /// Set behavior on timeout.
+    pub fn with_on_timeout(mut self, behavior: FallbackBehavior) -> Self {
+        self.on_timeout = behavior;
+        self
+    }
+
+    /// Set behavior when all attempts fail.
+    pub fn with_on_all_failed(mut self, behavior: OnAllFailedBehavior) -> Self {
+        self.on_all_failed = behavior;
+        self
+    }
+
+    /// Set maximum retries.
+    pub fn with_max_retries(mut self, max: usize) -> Self {
+        self.max_retries = max;
+        self
+    }
+
+    /// Calculate retry delay with exponential backoff.
+    pub fn calculate_retry_delay(&self, attempt: usize) -> std::time::Duration {
+        let delay_ms = if attempt == 0 {
+            self.initial_retry_delay_ms
+        } else {
+            let delay = self.initial_retry_delay_ms as f32
+                * self.retry_multiplier.powi(attempt as i32);
+            delay.min(self.max_retry_delay_ms as f32) as u64
+        };
+        std::time::Duration::from_millis(delay_ms)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fallback_config_defaults() {
+        let config = FallbackConfig::default();
+        assert!(config.enabled);
+        assert_eq!(config.models.len(), 2);
+        assert_eq!(config.on_rate_limit, FallbackBehavior::RetryThenFallback);
+        assert_eq!(config.max_retries, 3);
+    }
+
+    #[test]
+    fn test_fallback_config_disabled() {
+        let config = FallbackConfig::disabled();
+        assert!(!config.enabled);
+    }
+
+    #[test]
+    fn test_fallback_behavior_serde() {
+        let behavior = FallbackBehavior::RetryThenFallback;
+        let json = serde_json::to_string(&behavior).unwrap();
+        assert_eq!(json, "\"retry_then_fallback\"");
+
+        let decoded: FallbackBehavior = serde_json::from_str(&json).unwrap();
+        assert_eq!(decoded, behavior);
+    }
+
+    #[test]
+    fn test_retry_delay_calculation() {
+        let config = FallbackConfig::default();
+
+        let d0 = config.calculate_retry_delay(0);
+        let d1 = config.calculate_retry_delay(1);
+        let d2 = config.calculate_retry_delay(2);
+
+        assert_eq!(d0.as_millis(), 1000);
+        assert_eq!(d1.as_millis(), 2000);
+        assert_eq!(d2.as_millis(), 4000);
+    }
+}
diff --git a/src/config/types/indexer.rs b/src/config/types/indexer.rs
new file mode 100644
index 00000000..6353122a
--- /dev/null
+++ b/src/config/types/indexer.rs
@@ -0,0 +1,108 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Indexer configuration types.
+
+use serde::{Deserialize, Serialize};
+
+/// Indexer configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct IndexerConfig {
+    /// Word count threshold for splitting sections into subsections.
+    #[serde(default = "default_subsection_threshold")]
+    pub subsection_threshold: usize,
+
+    /// Maximum tokens to send in a single segmentation request.
+    #[serde(default = "default_max_segment_tokens")]
+    pub max_segment_tokens: usize,
+
+    /// Maximum tokens for each summary.
+    #[serde(default = "default_max_summary_tokens")]
+    pub max_summary_tokens: usize,
+
+    /// Minimum content tokens required to generate a summary.
+    #[serde(default = "default_min_summary_tokens")]
+    pub min_summary_tokens: usize,
+}
+
+fn default_subsection_threshold() -> usize {
+    300
+}
+
+fn default_max_segment_tokens() -> usize {
+    3000
+}
+
+fn default_max_summary_tokens() -> usize {
+    200
+}
+
+fn default_min_summary_tokens() -> usize {
+    20
+}
+
+impl Default for IndexerConfig {
+    fn default() -> Self {
+        Self {
+            subsection_threshold: default_subsection_threshold(),
+            max_segment_tokens: default_max_segment_tokens(),
+            max_summary_tokens: default_max_summary_tokens(),
+            min_summary_tokens: default_min_summary_tokens(),
+        }
+    }
+}
+
+impl IndexerConfig {
+    /// Create a new indexer config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the subsection threshold.
+    pub fn with_subsection_threshold(mut self, threshold: usize) -> Self {
+        self.subsection_threshold = threshold;
+        self
+    }
+
+    /// Set the maximum segment tokens.
+    pub fn with_max_segment_tokens(mut self, tokens: usize) -> Self {
+        self.max_segment_tokens = tokens;
+        self
+    }
+
+    /// Set the maximum summary tokens.
+    pub fn with_max_summary_tokens(mut self, tokens: usize) -> Self {
+        self.max_summary_tokens = tokens;
+        self
+    }
+
+    /// Set the minimum summary tokens.
+    pub fn with_min_summary_tokens(mut self, tokens: usize) -> Self {
+        self.min_summary_tokens = tokens;
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_indexer_config_defaults() {
+        let config = IndexerConfig::default();
+        assert_eq!(config.subsection_threshold, 300);
+        assert_eq!(config.max_segment_tokens, 3000);
+        assert_eq!(config.max_summary_tokens, 200);
+        assert_eq!(config.min_summary_tokens, 20);
+    }
+
+    #[test]
+    fn test_indexer_config_builder() {
+        let config = IndexerConfig::new()
+            .with_subsection_threshold(500)
+            .with_max_summary_tokens(300);
+
+        assert_eq!(config.subsection_threshold, 500);
+        assert_eq!(config.max_summary_tokens, 300);
+    }
+}
diff --git a/src/config/types/llm.rs b/src/config/types/llm.rs
new file mode 100644
index 00000000..a98ee7d3
--- /dev/null
+++ b/src/config/types/llm.rs
@@ -0,0 +1,218 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! LLM configuration types for summary and retrieval.
+
+use serde::{Deserialize, Serialize};
+
+/// Generic LLM configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LlmConfig {
+    /// Model name (e.g., "gpt-4o-mini", "claude-3-haiku").
+    #[serde(default = "default_model")]
+    pub model: String,
+
+    /// API endpoint.
+    #[serde(default = "default_endpoint")]
+    pub endpoint: String,
+
+    /// API key.
+    #[serde(default)]
+    pub api_key: Option<String>,
+
+    /// Maximum tokens for responses.
+    #[serde(default = "default_max_tokens")]
+    pub max_tokens: usize,
+
+    /// Temperature for generation.
+    #[serde(default = "default_temperature")]
+    pub temperature: f32,
+}
+
+fn default_model() -> String {
+    "gpt-4o-mini".to_string()
+}
+
+fn default_endpoint() -> String {
+    "https://api.openai.com/v1".to_string()
+}
+
+fn default_max_tokens() -> usize {
+    1000
+}
+
+fn default_temperature() -> f32 {
+    0.0
+}
+
+impl Default for LlmConfig {
+    fn default() -> Self {
+        Self {
+            model: default_model(),
+            endpoint: default_endpoint(),
+            api_key: None,
+            max_tokens: default_max_tokens(),
+            temperature: default_temperature(),
+        }
+    }
+}
+
+impl LlmConfig {
+    /// Create a new LLM config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the model.
+    pub fn with_model(mut self, model: impl Into<String>) -> Self {
+        self.model = model.into();
+        self
+    }
+
+    /// Set the endpoint.
+    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
+        self.endpoint = endpoint.into();
+        self
+    }
+
+    /// Set the API key.
+    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
+        self.api_key = Some(api_key.into());
+        self
+    }
+
+    /// Set the maximum tokens.
+    pub fn with_max_tokens(mut self, max_tokens: usize) -> Self {
+        self.max_tokens = max_tokens;
+        self
+    }
+
+    /// Set the temperature.
+    pub fn with_temperature(mut self, temperature: f32) -> Self {
+        self.temperature = temperature;
+        self
+    }
+
+    /// Get the API key from config.
+    pub fn get_api_key(&self) -> Option<&str> {
+        self.api_key.as_deref()
+    }
+}
+
+/// Summary model configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SummaryConfig {
+    /// Model name for summarization.
+    #[serde(default = "default_summary_model")]
+    pub model: String,
+
+    /// API endpoint for summary model.
+    #[serde(default = "default_endpoint")]
+    pub endpoint: String,
+
+    /// API key.
+    #[serde(default)]
+    pub api_key: Option<String>,
+
+    /// Maximum tokens for summary generation.
+    #[serde(default = "default_max_summary_tokens")]
+    pub max_tokens: usize,
+
+    /// Temperature for summary generation.
+    #[serde(default = "default_temperature")]
+    pub temperature: f32,
+}
+
+fn default_summary_model() -> String {
+    "gpt-4o-mini".to_string()
+}
+
+fn default_max_summary_tokens() -> usize {
+    200
+}
+
+impl Default for SummaryConfig {
+    fn default() -> Self {
+        Self {
+            model: default_summary_model(),
+            endpoint: default_endpoint(),
+            api_key: None,
+            max_tokens: default_max_summary_tokens(),
+            temperature: default_temperature(),
+        }
+    }
+}
+
+impl SummaryConfig {
+    /// Create a new summary config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the model.
+    pub fn with_model(mut self, model: impl Into<String>) -> Self {
+        self.model = model.into();
+        self
+    }
+
+    /// Set the endpoint.
+    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
+        self.endpoint = endpoint.into();
+        self
+    }
+
+    /// Set the API key.
+    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
+        self.api_key = Some(api_key.into());
+        self
+    }
+
+    /// Set the maximum tokens.
+    pub fn with_max_tokens(mut self, max_tokens: usize) -> Self {
+        self.max_tokens = max_tokens;
+        self
+    }
+
+    /// Convert to generic LLM config.
+    pub fn to_llm_config(&self) -> LlmConfig {
+        LlmConfig {
+            model: self.model.clone(),
+            endpoint: self.endpoint.clone(),
+            api_key: self.api_key.clone(),
+            max_tokens: self.max_tokens,
+            temperature: self.temperature,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_llm_config_defaults() {
+        let config = LlmConfig::default();
+        assert_eq!(config.model, "gpt-4o-mini");
+        assert_eq!(config.endpoint, "https://api.openai.com/v1");
+        assert!(config.api_key.is_none());
+    }
+
+    #[test]
+    fn test_llm_config_builder() {
+        let config = LlmConfig::new()
+            .with_model("gpt-4o")
+            .with_api_key("test-key")
+            .with_max_tokens(2000);
+
+        assert_eq!(config.model, "gpt-4o");
+        assert_eq!(config.api_key, Some("test-key".to_string()));
+        assert_eq!(config.max_tokens, 2000);
+    }
+
+    #[test]
+    fn test_summary_config() {
+        let config = SummaryConfig::default();
+        assert_eq!(config.model, "gpt-4o-mini");
+        assert_eq!(config.max_tokens, 200);
+    }
+}
diff --git a/src/config/types/mod.rs b/src/config/types/mod.rs
new file mode 100644
index 00000000..a824ee3f
--- /dev/null
+++ b/src/config/types/mod.rs
@@ -0,0 +1,336 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration type definitions.
+//!
+//! All configuration values are defined inline in `Default` trait implementations.
+//! Configuration is loaded from TOML files only - no environment variable magic.
+
+mod content;
+mod concurrency;
+mod fallback;
+mod indexer;
+mod llm;
+mod retrieval;
+mod storage;
+
+use serde::{Deserialize, Serialize};
+
+pub use content::ContentAggregatorConfig;
+pub use concurrency::ConcurrencyConfig;
+pub use fallback::{FallbackBehavior, FallbackConfig, OnAllFailedBehavior};
+pub use indexer::IndexerConfig;
+pub use llm::{LlmConfig, SummaryConfig};
+pub use retrieval::{RetrievalConfig, SearchConfig};
+pub use storage::{
+    CacheConfig, StorageConfig, StrategyConfig, SufficiencyConfig,
+};
+
+/// Main configuration for vectorless.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Config {
+    /// Indexer configuration.
+    #[serde(default)]
+    pub indexer: IndexerConfig,
+
+    /// Summary model configuration.
+    #[serde(default)]
+    pub summary: SummaryConfig,
+
+    /// Retrieval model configuration.
+    #[serde(default)]
+    pub retrieval: RetrievalConfig,
+
+    /// Storage configuration.
+    #[serde(default)]
+    pub storage: StorageConfig,
+
+    /// Concurrency control configuration.
+    #[serde(default)]
+    pub concurrency: ConcurrencyConfig,
+
+    /// Fallback/error recovery configuration.
+    #[serde(default)]
+    pub fallback: FallbackConfig,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            indexer: IndexerConfig::default(),
+            summary: SummaryConfig::default(),
+            retrieval: RetrievalConfig::default(),
+            storage: StorageConfig::default(),
+            concurrency: ConcurrencyConfig::default(),
+            fallback: FallbackConfig::default(),
+        }
+    }
+}
+
+impl Config {
+    /// Create a new configuration with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the indexer configuration.
+    pub fn with_indexer(mut self, indexer: IndexerConfig) -> Self {
+        self.indexer = indexer;
+        self
+    }
+
+    /// Set the summary configuration.
+    pub fn with_summary(mut self, summary: SummaryConfig) -> Self {
+        self.summary = summary;
+        self
+    }
+
+    /// Set the retrieval configuration.
+    pub fn with_retrieval(mut self, retrieval: RetrievalConfig) -> Self {
+        self.retrieval = retrieval;
+        self
+    }
+
+    /// Set the storage configuration.
+    pub fn with_storage(mut self, storage: StorageConfig) -> Self {
+        self.storage = storage;
+        self
+    }
+
+    /// Set the concurrency configuration.
+    pub fn with_concurrency(mut self, concurrency: ConcurrencyConfig) -> Self {
+        self.concurrency = concurrency;
+        self
+    }
+
+    /// Set the fallback configuration.
+    pub fn with_fallback(mut self, fallback: FallbackConfig) -> Self {
+        self.fallback = fallback;
+        self
+    }
+
+    /// Validate the configuration.
+    pub fn validate(&self) -> Result<(), ConfigValidationError> {
+        let mut errors = Vec::new();
+
+        // Validate indexer
+        if self.indexer.subsection_threshold == 0 {
+            errors.push(ValidationError::error(
+                "indexer.subsection_threshold",
+                "Subsection threshold must be greater than 0",
+            ));
+        }
+
+        // Validate summary
+        if self.summary.max_tokens == 0 {
+            errors.push(ValidationError::error(
+                "summary.max_tokens",
+                "Summary max tokens must be greater than 0",
+            ));
+        }
+
+        // Validate retrieval
+        if self.retrieval.top_k == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.top_k",
+                "Top K must be greater than 0",
+            ));
+        }
+
+        if self.retrieval.temperature < 0.0 || self.retrieval.temperature > 2.0 {
+            errors.push(ValidationError::warning(
+                "retrieval.temperature",
+                "Temperature outside typical range [0.0, 2.0]",
+            ).with_actual(self.retrieval.temperature.to_string()));
+        }
+
+        // Validate content aggregator
+        if self.retrieval.content.token_budget == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.content.token_budget",
+                "Token budget must be greater than 0",
+            ));
+        }
+
+        if self.retrieval.content.min_relevance_score < 0.0
+            || self.retrieval.content.min_relevance_score > 1.0
+        {
+            errors.push(ValidationError::error(
+                "retrieval.content.min_relevance_score",
+                "Min relevance score must be between 0.0 and 1.0",
+            )
+            .with_expected("0.0 - 1.0")
+            .with_actual(self.retrieval.content.min_relevance_score.to_string()));
+        }
+
+        // Validate concurrency
+        if self.concurrency.max_concurrent_requests == 0 {
+            errors.push(ValidationError::error(
+                "concurrency.max_concurrent_requests",
+                "Max concurrent requests must be greater than 0",
+            ));
+        }
+
+        // Validate fallback
+        if self.fallback.enabled && self.fallback.models.is_empty() {
+            errors.push(ValidationError::warning(
+                "fallback.models",
+                "Fallback enabled but no fallback models configured",
+            ));
+        }
+
+        if errors.is_empty() {
+            Ok(())
+        } else {
+            Err(ConfigValidationError { errors })
+        }
+    }
+}
+
+/// Configuration validation error.
+#[derive(Debug, Clone, thiserror::Error)]
+#[error("Configuration validation failed with {} error(s)", self.errors.len())]
+pub struct ConfigValidationError {
+    /// Validation errors.
+    pub errors: Vec<ValidationError>,
+}
+
+/// A single validation error.
+#[derive(Debug, Clone)]
+pub struct ValidationError {
+    /// Field path (e.g., "retrieval.content.token_budget").
+    pub path: String,
+
+    /// Error message.
+    pub message: String,
+
+    /// Expected value/range.
+    pub expected: Option<String>,
+
+    /// Actual value.
+    pub actual: Option<String>,
+
+    /// Severity level.
+    pub severity: Severity,
+}
+
+impl ValidationError {
+    /// Create an error-level validation error.
+    pub fn error(path: impl Into<String>, message: impl Into<String>) -> Self {
+        Self {
+            path: path.into(),
+            message: message.into(),
+            expected: None,
+            actual: None,
+            severity: Severity::Error,
+        }
+    }
+
+    /// Create a warning-level validation error.
+    pub fn warning(path: impl Into<String>, message: impl Into<String>) -> Self {
+        Self {
+            path: path.into(),
+            message: message.into(),
+            expected: None,
+            actual: None,
+            severity: Severity::Warning,
+        }
+    }
+
+    /// Create an info-level validation error.
+    pub fn info(path: impl Into<String>, message: impl Into<String>) -> Self {
+        Self {
+            path: path.into(),
+            message: message.into(),
+            expected: None,
+            actual: None,
+            severity: Severity::Info,
+        }
+    }
+
+    /// Set the expected value.
+    pub fn with_expected(mut self, expected: impl Into<String>) -> Self {
+        self.expected = Some(expected.into());
+        self
+    }
+
+    /// Set the actual value.
+    pub fn with_actual(mut self, actual: impl Into<String>) -> Self {
+        self.actual = Some(actual.into());
+        self
+    }
+}
+
+impl std::fmt::Display for ValidationError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let severity = match self.severity {
+            Severity::Error => "ERROR",
+            Severity::Warning => "WARNING",
+            Severity::Info => "INFO",
+        };
+        write!(f, "[{}] {}: {}", severity, self.path, self.message)?;
+        if let Some(ref expected) = self.expected {
+            write!(f, " (expected: {})", expected)?;
+        }
+        if let Some(ref actual) = self.actual {
+            write!(f, " (actual: {})", actual)?;
+        }
+        Ok(())
+    }
+}
+
+/// Validation severity level.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Severity {
+    /// Error - must fix.
+    Error,
+    /// Warning - should fix.
+    Warning,
+    /// Info - suggestion.
+    Info,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_defaults() {
+        let config = Config::default();
+        assert_eq!(config.indexer.subsection_threshold, 300);
+        assert_eq!(config.summary.model, "gpt-4o-mini");
+        assert_eq!(config.retrieval.model, "gpt-4o");
+        assert_eq!(config.concurrency.max_concurrent_requests, 10);
+    }
+
+    #[test]
+    fn test_config_validation_success() {
+        let config = Config::default();
+        assert!(config.validate().is_ok());
+    }
+
+    #[test]
+    fn test_config_validation_errors() {
+        let mut config = Config::default();
+        config.retrieval.content.token_budget = 0;
+        config.retrieval.content.min_relevance_score = 1.5;
+
+        let result = config.validate();
+        assert!(result.is_err());
+
+        let err = result.unwrap_err();
+        assert!(!err.errors.is_empty());
+    }
+
+    #[test]
+    fn test_validation_error_display() {
+        let err = ValidationError::error("test.field", "Invalid value")
+            .with_expected(">= 1")
+            .with_actual("0");
+
+        let display = format!("{}", err);
+        assert!(display.contains("ERROR"));
+        assert!(display.contains("test.field"));
+        assert!(display.contains("expected"));
+    }
+}
diff --git a/src/config/types/retrieval.rs b/src/config/types/retrieval.rs
new file mode 100644
index 00000000..d111b686
--- /dev/null
+++ b/src/config/types/retrieval.rs
@@ -0,0 +1,219 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Retrieval configuration types.
+
+use serde::{Deserialize, Serialize};
+
+use super::content::ContentAggregatorConfig;
+use super::storage::{CacheConfig, StrategyConfig, SufficiencyConfig};
+
+/// Retrieval model configuration (for navigation).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RetrievalConfig {
+    /// Model name for retrieval/navigation.
+    #[serde(default = "default_retrieval_model")]
+    pub model: String,
+
+    /// API endpoint for retrieval model.
+    #[serde(default = "default_endpoint")]
+    pub endpoint: String,
+
+    /// API key.
+    #[serde(default)]
+    pub api_key: Option<String>,
+
+    /// Maximum tokens for retrieval context.
+    #[serde(default = "default_max_retrieval_tokens")]
+    pub max_tokens: usize,
+
+    /// Temperature for retrieval.
+    #[serde(default = "default_temperature")]
+    pub temperature: f32,
+
+    /// Number of top-k results to return.
+    #[serde(default = "default_top_k")]
+    pub top_k: usize,
+
+    /// Search algorithm configuration.
+    #[serde(default)]
+    pub search: SearchConfig,
+
+    /// Sufficiency checker configuration.
+    #[serde(default)]
+    pub sufficiency: SufficiencyConfig,
+
+    /// Cache configuration.
+    #[serde(default)]
+    pub cache: CacheConfig,
+
+    /// Strategy-specific configuration.
+    #[serde(default)]
+    pub strategy: StrategyConfig,
+
+    /// Content aggregator configuration.
+    #[serde(default)]
+    pub content: ContentAggregatorConfig,
+}
+
+fn default_retrieval_model() -> String {
+    "gpt-4o".to_string()
+}
+
+fn default_endpoint() -> String {
+    "https://api.openai.com/v1".to_string()
+}
+
+fn default_max_retrieval_tokens() -> usize {
+    1000
+}
+
+fn default_temperature() -> f32 {
+    0.0
+}
+
+fn default_top_k() -> usize {
+    3
+}
+
+impl Default for RetrievalConfig {
+    fn default() -> Self {
+        Self {
+            model: default_retrieval_model(),
+            endpoint: default_endpoint(),
+            api_key: None,
+            max_tokens: default_max_retrieval_tokens(),
+            temperature: default_temperature(),
+            top_k: default_top_k(),
+            search: SearchConfig::default(),
+            sufficiency: SufficiencyConfig::default(),
+            cache: CacheConfig::default(),
+            strategy: StrategyConfig::default(),
+            content: ContentAggregatorConfig::default(),
+        }
+    }
+}
+
+impl RetrievalConfig {
+    /// Create a new retrieval config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the model.
+    pub fn with_model(mut self, model: impl Into<String>) -> Self {
+        self.model = model.into();
+        self
+    }
+
+    /// Set the endpoint.
+    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
+        self.endpoint = endpoint.into();
+        self
+    }
+
+    /// Set the API key.
+    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
+        self.api_key = Some(api_key.into());
+        self
+    }
+
+    /// Set the top_k.
+    pub fn with_top_k(mut self, top_k: usize) -> Self {
+        self.top_k = top_k;
+        self
+    }
+}
+
+/// Search algorithm configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SearchConfig {
+    /// Number of top-k results to return.
+    #[serde(default = "default_search_top_k")]
+    pub top_k: usize,
+
+    /// Beam width for multi-path search.
+    #[serde(default = "default_beam_width")]
+    pub beam_width: usize,
+
+    /// Maximum iterations for search algorithms.
+    #[serde(default = "default_max_iterations")]
+    pub max_iterations: usize,
+
+    /// Minimum score to include a path.
+    #[serde(default = "default_min_score")]
+    pub min_score: f32,
+}
+
+fn default_search_top_k() -> usize {
+    5
+}
+
+fn default_beam_width() -> usize {
+    3
+}
+
+fn default_max_iterations() -> usize {
+    10
+}
+
+fn default_min_score() -> f32 {
+    0.1
+}
+
+impl Default for SearchConfig {
+    fn default() -> Self {
+        Self {
+            top_k: default_search_top_k(),
+            beam_width: default_beam_width(),
+            max_iterations: default_max_iterations(),
+            min_score: default_min_score(),
+        }
+    }
+}
+
+impl SearchConfig {
+    /// Create new search config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the top_k.
+    pub fn with_top_k(mut self, top_k: usize) -> Self {
+        self.top_k = top_k;
+        self
+    }
+
+    /// Set the beam width.
+    pub fn with_beam_width(mut self, width: usize) -> Self {
+        self.beam_width = width;
+        self
+    }
+
+    /// Set the max iterations.
+    pub fn with_max_iterations(mut self, max: usize) -> Self {
+        self.max_iterations = max;
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_retrieval_config_defaults() {
+        let config = RetrievalConfig::default();
+        assert_eq!(config.model, "gpt-4o");
+        assert_eq!(config.top_k, 3);
+        assert_eq!(config.search.top_k, 5);
+    }
+
+    #[test]
+    fn test_search_config_defaults() {
+        let config = SearchConfig::default();
+        assert_eq!(config.top_k, 5);
+        assert_eq!(config.beam_width, 3);
+        assert_eq!(config.max_iterations, 10);
+    }
+}
diff --git a/src/config/types/storage.rs b/src/config/types/storage.rs
new file mode 100644
index 00000000..0dc55ed9
--- /dev/null
+++ b/src/config/types/storage.rs
@@ -0,0 +1,274 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Storage and sufficiency configuration types.
+
+use serde::{Deserialize, Serialize};
+use std::path::PathBuf;
+
+/// Storage configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StorageConfig {
+    /// Workspace directory for persisted documents.
+    #[serde(default = "default_workspace_dir")]
+    pub workspace_dir: PathBuf,
+}
+
+fn default_workspace_dir() -> PathBuf {
+    PathBuf::from("./workspace")
+}
+
+impl Default for StorageConfig {
+    fn default() -> Self {
+        Self {
+            workspace_dir: default_workspace_dir(),
+        }
+    }
+}
+
+impl StorageConfig {
+    /// Create new storage config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the workspace directory.
+    pub fn with_workspace_dir(mut self, dir: impl Into<PathBuf>) -> Self {
+        self.workspace_dir = dir.into();
+        self
+    }
+}
+
+/// Sufficiency checker configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SufficiencyConfig {
+    /// Minimum tokens for sufficiency.
+    #[serde(default = "default_min_tokens")]
+    pub min_tokens: usize,
+
+    /// Target tokens for full sufficiency.
+    #[serde(default = "default_target_tokens")]
+    pub target_tokens: usize,
+
+    /// Maximum tokens before stopping.
+    #[serde(default = "default_max_tokens")]
+    pub max_tokens: usize,
+
+    /// Minimum content length (characters).
+    #[serde(default = "default_min_content_length")]
+    pub min_content_length: usize,
+
+    /// Confidence threshold for LLM judge.
+    #[serde(default = "default_confidence_threshold")]
+    pub confidence_threshold: f32,
+}
+
+fn default_min_tokens() -> usize {
+    500
+}
+
+fn default_target_tokens() -> usize {
+    2000
+}
+
+fn default_max_tokens() -> usize {
+    4000
+}
+
+fn default_min_content_length() -> usize {
+    200
+}
+
+fn default_confidence_threshold() -> f32 {
+    0.7
+}
+
+impl Default for SufficiencyConfig {
+    fn default() -> Self {
+        Self {
+            min_tokens: default_min_tokens(),
+            target_tokens: default_target_tokens(),
+            max_tokens: default_max_tokens(),
+            min_content_length: default_min_content_length(),
+            confidence_threshold: default_confidence_threshold(),
+        }
+    }
+}
+
+impl SufficiencyConfig {
+    /// Create new sufficiency config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the minimum tokens.
+    pub fn with_min_tokens(mut self, tokens: usize) -> Self {
+        self.min_tokens = tokens;
+        self
+    }
+
+    /// Set the target tokens.
+    pub fn with_target_tokens(mut self, tokens: usize) -> Self {
+        self.target_tokens = tokens;
+        self
+    }
+
+    /// Set the maximum tokens.
+    pub fn with_max_tokens(mut self, tokens: usize) -> Self {
+        self.max_tokens = tokens;
+        self
+    }
+
+    /// Set the confidence threshold.
+    pub fn with_confidence_threshold(mut self, threshold: f32) -> Self {
+        self.confidence_threshold = threshold;
+        self
+    }
+}
+
+/// Cache configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CacheConfig {
+    /// Maximum number of cache entries.
+    #[serde(default = "default_max_entries")]
+    pub max_entries: usize,
+
+    /// Time-to-live for cache entries (seconds).
+    #[serde(default = "default_ttl_secs")]
+    pub ttl_secs: u64,
+}
+
+fn default_max_entries() -> usize {
+    1000
+}
+
+fn default_ttl_secs() -> u64 {
+    3600
+}
+
+impl Default for CacheConfig {
+    fn default() -> Self {
+        Self {
+            max_entries: default_max_entries(),
+            ttl_secs: default_ttl_secs(),
+        }
+    }
+}
+
+impl CacheConfig {
+    /// Create new cache config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the maximum entries.
+    pub fn with_max_entries(mut self, max: usize) -> Self {
+        self.max_entries = max;
+        self
+    }
+
+    /// Set the TTL in seconds.
+    pub fn with_ttl_secs(mut self, secs: u64) -> Self {
+        self.ttl_secs = secs;
+        self
+    }
+}
+
+/// Strategy-specific configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StrategyConfig {
+    /// MCTS exploration weight (sqrt(2) ≈ 1.414).
+    #[serde(default = "default_exploration_weight")]
+    pub exploration_weight: f32,
+
+    /// Semantic similarity threshold.
+    #[serde(default = "default_similarity_threshold")]
+    pub similarity_threshold: f32,
+
+    /// High similarity threshold for "answer" decision.
+    #[serde(default = "default_high_similarity_threshold")]
+    pub high_similarity_threshold: f32,
+
+    /// Low similarity threshold for "explore" decision.
+    #[serde(default = "default_low_similarity_threshold")]
+    pub low_similarity_threshold: f32,
+}
+
+fn default_exploration_weight() -> f32 {
+    1.414
+}
+
+fn default_similarity_threshold() -> f32 {
+    0.5
+}
+
+fn default_high_similarity_threshold() -> f32 {
+    0.8
+}
+
+fn default_low_similarity_threshold() -> f32 {
+    0.3
+}
+
+impl Default for StrategyConfig {
+    fn default() -> Self {
+        Self {
+            exploration_weight: default_exploration_weight(),
+            similarity_threshold: default_similarity_threshold(),
+            high_similarity_threshold: default_high_similarity_threshold(),
+            low_similarity_threshold: default_low_similarity_threshold(),
+        }
+    }
+}
+
+impl StrategyConfig {
+    /// Create new strategy config with defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the exploration weight.
+    pub fn with_exploration_weight(mut self, weight: f32) -> Self {
+        self.exploration_weight = weight;
+        self
+    }
+
+    /// Set the similarity threshold.
+    pub fn with_similarity_threshold(mut self, threshold: f32) -> Self {
+        self.similarity_threshold = threshold;
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_storage_config_defaults() {
+        let config = StorageConfig::default();
+        assert_eq!(config.workspace_dir, PathBuf::from("./workspace"));
+    }
+
+    #[test]
+    fn test_sufficiency_config_defaults() {
+        let config = SufficiencyConfig::default();
+        assert_eq!(config.min_tokens, 500);
+        assert_eq!(config.target_tokens, 2000);
+        assert_eq!(config.max_tokens, 4000);
+    }
+
+    #[test]
+    fn test_cache_config_defaults() {
+        let config = CacheConfig::default();
+        assert_eq!(config.max_entries, 1000);
+        assert_eq!(config.ttl_secs, 3600);
+    }
+
+    #[test]
+    fn test_strategy_config_defaults() {
+        let config = StrategyConfig::default();
+        assert!((config.exploration_weight - 1.414).abs() < 0.001);
+        assert_eq!(config.similarity_threshold, 0.5);
+    }
+}
diff --git a/src/config/validator.rs b/src/config/validator.rs
new file mode 100644
index 00000000..8a3596fd
--- /dev/null
+++ b/src/config/validator.rs
@@ -0,0 +1,359 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Configuration validation.
+//!
+//! This module provides comprehensive validation for configuration values,
+//! including range checks, consistency checks, and dependency validation.
+
+use super::types::{Config, ConfigValidationError, Severity, ValidationError};
+
+/// Configuration validator.
+#[derive(Debug, Default)]
+pub struct ConfigValidator {
+    /// Validation rules to apply.
+    rules: Vec<Box<dyn ValidationRule>>,
+}
+
+impl ConfigValidator {
+    /// Create a new validator with default rules.
+    pub fn new() -> Self {
+        Self {
+            rules: vec![
+                Box::new(RangeValidator),
+                Box::new(ConsistencyValidator),
+                Box::new(DependencyValidator),
+            ],
+        }
+    }
+
+    /// Add a custom validation rule.
+    pub fn with_rule(mut self, rule: Box<dyn ValidationRule>) -> Self {
+        self.rules.push(rule);
+        self
+    }
+
+    /// Validate the configuration.
+    pub fn validate(&self, config: &Config) -> Result<(), ConfigValidationError> {
+        let mut errors = Vec::new();
+
+        for rule in &self.rules {
+            rule.validate(config, &mut errors);
+        }
+
+        // Only fail on errors, not warnings or info
+        let has_errors = errors.iter().any(|e| e.severity == Severity::Error);
+
+        if has_errors {
+            Err(ConfigValidationError { errors })
+        } else {
+            Ok(())
+        }
+    }
+}
+
+/// Trait for validation rules.
+pub trait ValidationRule: std::fmt::Debug + Send + Sync {
+    /// Validate the configuration, appending errors if found.
+    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>);
+}
+
+/// Validates value ranges.
+#[derive(Debug)]
+struct RangeValidator;
+
+impl ValidationRule for RangeValidator {
+    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
+        // Indexer ranges
+        if config.indexer.subsection_threshold == 0 {
+            errors.push(ValidationError::error(
+                "indexer.subsection_threshold",
+                "Subsection threshold must be greater than 0",
+            ));
+        }
+
+        if config.indexer.subsection_threshold > 10000 {
+            errors.push(ValidationError::warning(
+                "indexer.subsection_threshold",
+                "Subsection threshold is very high, may impact performance",
+            ).with_actual(config.indexer.subsection_threshold.to_string()));
+        }
+
+        // Summary ranges
+        if config.summary.max_tokens == 0 {
+            errors.push(ValidationError::error(
+                "summary.max_tokens",
+                "Summary max tokens must be greater than 0",
+            ));
+        }
+
+        if config.summary.temperature < 0.0 || config.summary.temperature > 2.0 {
+            errors.push(ValidationError::warning(
+                "summary.temperature",
+                "Temperature outside typical range [0.0, 2.0]",
+            ).with_actual(config.summary.temperature.to_string()));
+        }
+
+        // Retrieval ranges
+        if config.retrieval.top_k == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.top_k",
+                "Top K must be greater than 0",
+            ));
+        }
+
+        if config.retrieval.search.beam_width == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.search.beam_width",
+                "Beam width must be greater than 0",
+            ));
+        }
+
+        // Content aggregator ranges
+        if config.retrieval.content.token_budget == 0 {
+            errors.push(ValidationError::error(
+                "retrieval.content.token_budget",
+                "Token budget must be greater than 0",
+            ));
+        }
+
+        if config.retrieval.content.min_relevance_score < 0.0
+            || config.retrieval.content.min_relevance_score > 1.0
+        {
+            errors.push(ValidationError::error(
+                "retrieval.content.min_relevance_score",
+                "Min relevance score must be between 0.0 and 1.0",
+            )
+            .with_expected("0.0 - 1.0")
+            .with_actual(config.retrieval.content.min_relevance_score.to_string()));
+        }
+
+        if config.retrieval.content.hierarchical_min_per_level < 0.0
+            || config.retrieval.content.hierarchical_min_per_level > 1.0
+        {
+            errors.push(ValidationError::error(
+                "retrieval.content.hierarchical_min_per_level",
+                "Hierarchical min per level must be between 0.0 and 1.0",
+            ));
+        }
+
+        // Concurrency ranges
+        if config.concurrency.max_concurrent_requests == 0 {
+            errors.push(ValidationError::error(
+                "concurrency.max_concurrent_requests",
+                "Max concurrent requests must be greater than 0",
+            ));
+        }
+
+        if config.concurrency.requests_per_minute == 0 {
+            errors.push(ValidationError::error(
+                "concurrency.requests_per_minute",
+                "Requests per minute must be greater than 0",
+            ));
+        }
+
+        // Fallback ranges
+        if config.fallback.max_retries == 0 {
+            errors.push(ValidationError::warning(
+                "fallback.max_retries",
+                "Max retries is 0, fallback will not retry",
+            ));
+        }
+    }
+}
+
+/// Validates configuration consistency.
+#[derive(Debug)]
+struct ConsistencyValidator;
+
+impl ValidationRule for ConsistencyValidator {
+    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
+        // Check if summary tokens are reasonable
+        if config.summary.max_tokens > config.indexer.max_segment_tokens {
+            errors.push(ValidationError::warning(
+                "summary.max_tokens",
+                "Summary max tokens exceeds max segment tokens",
+            )
+            .with_expected(format!("<= {}", config.indexer.max_segment_tokens))
+            .with_actual(config.summary.max_tokens.to_string()));
+        }
+
+        // Check if content token budget is reasonable
+        if config.retrieval.content.token_budget > 100000 {
+            errors.push(ValidationError::warning(
+                "retrieval.content.token_budget",
+                "Token budget is very high, may cause performance issues",
+            ).with_actual(config.retrieval.content.token_budget.to_string()));
+        }
+
+        // Check if sufficiency thresholds are consistent
+        if config.retrieval.sufficiency.min_tokens > config.retrieval.sufficiency.target_tokens {
+            errors.push(ValidationError::error(
+                "retrieval.sufficiency.min_tokens",
+                "Min tokens cannot exceed target tokens",
+            )
+            .with_expected(format!("<= {}", config.retrieval.sufficiency.target_tokens))
+            .with_actual(config.retrieval.sufficiency.min_tokens.to_string()));
+        }
+
+        if config.retrieval.sufficiency.target_tokens > config.retrieval.sufficiency.max_tokens {
+            errors.push(ValidationError::error(
+                "retrieval.sufficiency.target_tokens",
+                "Target tokens cannot exceed max tokens",
+            )
+            .with_expected(format!("<= {}", config.retrieval.sufficiency.max_tokens))
+            .with_actual(config.retrieval.sufficiency.target_tokens.to_string()));
+        }
+
+        // Check scoring strategy validity
+        let valid_strategies = ["keyword_only", "keyword_bm25", "hybrid"];
+        if !valid_strategies.contains(&config.retrieval.content.scoring_strategy.as_str()) {
+            errors.push(ValidationError::error(
+                "retrieval.content.scoring_strategy",
+                "Invalid scoring strategy",
+            )
+            .with_expected(format!("one of: {:?}", valid_strategies))
+            .with_actual(config.retrieval.content.scoring_strategy.clone()));
+        }
+
+        // Check output format validity
+        let valid_formats = ["markdown", "json", "tree", "flat"];
+        if !valid_formats.contains(&config.retrieval.content.output_format.as_str()) {
+            errors.push(ValidationError::error(
+                "retrieval.content.output_format",
+                "Invalid output format",
+            )
+            .with_expected(format!("one of: {:?}", valid_formats))
+            .with_actual(config.retrieval.content.output_format.clone()));
+        }
+    }
+}
+
+/// Validates configuration dependencies.
+#[derive(Debug)]
+struct DependencyValidator;
+
+impl ValidationRule for DependencyValidator {
+    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
+        // Check if API key is available when summaries are needed
+        if config.summary.api_key.is_none() {
+            // Check if any feature requires LLM
+            if config.indexer.max_summary_tokens > 0 {
+                errors.push(ValidationError::info(
+                    "summary.api_key",
+                    "No API key configured, summary generation will be disabled",
+                ));
+            }
+        }
+
+        // Check fallback configuration
+        if config.fallback.enabled {
+            if config.fallback.models.is_empty() && config.fallback.endpoints.is_empty() {
+                errors.push(ValidationError::warning(
+                    "fallback.models",
+                    "Fallback enabled but no fallback models or endpoints configured",
+                ));
+            }
+
+            // Check retry behavior consistency
+            if matches!(
+                config.fallback.on_rate_limit,
+                super::types::FallbackBehavior::Fallback
+            ) && config.fallback.models.is_empty()
+            {
+                errors.push(ValidationError::error(
+                    "fallback.models",
+                    "Rate limit behavior is 'fallback' but no fallback models configured",
+                ));
+            }
+        }
+
+        // Check cache configuration
+        if config.retrieval.cache.max_entries == 0 {
+            errors.push(ValidationError::warning(
+                "retrieval.cache.max_entries",
+                "Cache disabled (max_entries = 0), performance may be impacted",
+            ));
+        }
+
+        // Check strategy configuration
+        if config.retrieval.strategy.exploration_weight <= 0.0 {
+            errors.push(ValidationError::error(
+                "retrieval.strategy.exploration_weight",
+                "Exploration weight must be positive",
+            ).with_actual(config.retrieval.strategy.exploration_weight.to_string()));
+        }
+
+        // Check similarity thresholds are ordered correctly
+        if config.retrieval.strategy.low_similarity_threshold
+            >= config.retrieval.strategy.high_similarity_threshold
+        {
+            errors.push(ValidationError::error(
+                "retrieval.strategy.low_similarity_threshold",
+                "Low similarity threshold must be less than high similarity threshold",
+            )
+            .with_expected(format!(
+                "< {}",
+                config.retrieval.strategy.high_similarity_threshold
+            ))
+            .with_actual(config.retrieval.strategy.low_similarity_threshold.to_string()));
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_validator_valid_config() {
+        let config = Config::default();
+        let validator = ConfigValidator::new();
+        // Default config should pass validation (no errors, warnings are ok)
+        let result = validator.validate(&config);
+        assert!(result.is_ok(), "Default config should pass validation");
+    }
+
+    #[test]
+    fn test_validator_catches_range_errors() {
+        let mut config = Config::default();
+        config.retrieval.content.token_budget = 0;
+        config.retrieval.content.min_relevance_score = 1.5;
+
+        let validator = ConfigValidator::new();
+        let result = validator.validate(&config);
+
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(err.errors.iter().any(|e| e.path.contains("token_budget")));
+    }
+
+    #[test]
+    fn test_validator_catches_consistency_errors() {
+        let mut config = Config::default();
+        config.retrieval.sufficiency.min_tokens = 3000;
+        config.retrieval.sufficiency.target_tokens = 2000;
+
+        let validator = ConfigValidator::new();
+        let result = validator.validate(&config);
+
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(err.errors.iter().any(|e| e.path.contains("min_tokens")));
+    }
+
+    #[test]
+    fn test_validator_catches_dependency_warnings() {
+        let mut config = Config::default();
+        config.fallback.enabled = true;
+        config.fallback.models.clear();
+
+        let validator = ConfigValidator::new();
+        let result = validator.validate(&config);
+
+        // Should succeed but with warnings
+        if let Err(err) = result {
+            assert!(err.errors.iter().any(|e| e.path.contains("fallback.models")));
+        }
+    }
+}

From d4fd394a965788c6ae04e55e0f3f33ebe8a81461 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 22:08:23 +0800
Subject: [PATCH 19/21] docs(README): update usage example and add examples
 table

- Simplify import statement by using vectorless::Engine directly
- Update return type from vectorless::domain::Result to vectorless::Result
- Change EngineBuilder::new() to Engine::builder() for consistency
- Add error handling with map_err for better error propagation
- Add comprehensive examples table with descriptions
- Update documentation to reflect API changes
---
 README.md | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index b4ccc39a..33f40f42 100644
--- a/README.md
+++ b/README.md
@@ -109,14 +109,15 @@ cp templates/template.toml ./vectorless.toml
 Basic usage:
 
 ```rust
-use vectorless::client::{Engine, EngineBuilder};
+use vectorless::Engine;
 
 #[tokio::main]
-async fn main() -> vectorless::domain::Result<()> {
+async fn main() -> vectorless::Result<()> {
     // Create client
-    let client = EngineBuilder::new()
+    let client = Engine::builder()
         .with_workspace("./workspace")
-        .build()?;
+        .build()
+        .map_err(|e| vectorless::Error::Config(e.to_string()))?;
 
     // Index a document
     let doc_id = client.index("./document.md").await?;
@@ -133,6 +134,21 @@ async fn main() -> vectorless::domain::Result<()> {
 
 See the [examples/](examples/) directory for complete working examples:
 
+| Example | Description |
+|---------|-------------|
+| [basic.rs](examples/basic.rs) | Minimal ~30 line example showing core API |
+| [index.rs](examples/index.rs) | Document indexing pipeline |
+| [retrieve.rs](examples/retrieve.rs) | Retrieval pipeline with options |
+| [events.rs](examples/events.rs) | Event-driven indexing with EventEmitter |
+| [session.rs](examples/session.rs) | Session management with statistics |
+| [batch_processing.rs](examples/batch_processing.rs) | Batch document processing |
+| [content_aggregation.rs](examples/content_aggregation.rs) | Content aggregation strategies |
+| [streaming.rs](examples/streaming.rs) | Streaming document processing |
+| [multi_format.rs](examples/multi_format.rs) | Multi-format document support |
+| [custom_pilot.rs](examples/custom_pilot.rs) | Custom pilot implementation |
+| [cli_tool.rs](examples/cli_tool.rs) | CLI application example |
+| [markdownflow.rs](examples/markdownflow.rs) | Markdown workflow example |
+
 ## Architecture
 
 ### Pilot Architecture

From 1312a89a1bcff65282565a3209e8c756125b502f Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 22:14:33 +0800
Subject: [PATCH 20/21] docs(README): update description and add development
 warning

- Fix grammatical error in Rust description by removing "core"
- Add early development warning notice to inform users about potential breaking changes
- Maintain existing content structure while improving clarity
---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 33f40f42..d0649efb 100644
--- a/README.md
+++ b/README.md
@@ -11,10 +11,12 @@
 
 </div>
 
-Ultra performant document intelligence engine for RAG, with core written in **Rust**. Zero vector database, zero embedding model — just LLM-powered tree navigation. Incremental indexing and multi-format support out-of-box.
+Ultra performant document intelligence engine for RAG, with written in **Rust**. Zero vector database, zero embedding model — just LLM-powered tree navigation. Incremental indexing and multi-format support out-of-box.
 
 ⭐ **Drop a star to help us grow!**
 
+**⚠️ Early Development**: This project is in active development. The API and features are likely to evolve, and breaking changes may occur.
+
 
 ## Why Vectorless?
 

From fdeb14e02bf5c858308c076acf0fdbc873ae6e42 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sat, 4 Apr 2026 22:15:09 +0800
Subject: [PATCH 21/21] chore(release): bump version to 0.1.11

- Update package version from 0.1.10 to 0.1.11 in Cargo.toml
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index ebbd37de..01f983fb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectorless"
-version = "0.1.10"
+version = "0.1.11"
 edition = "2024"
 authors = ["zTgx <beautifularea@gmail.com>"]
 description = "Hierarchical, reasoning-native document intelligence engine"