diff --git a/README.md b/README.md
index 603adb21..e96095ad 100644
--- a/README.md
+++ b/README.md
@@ -163,6 +163,105 @@ async fn main() -> vectorless::Result<()> {
 | **Feedback Learning** | Improves from user feedback over time |
 | **Multi-turn Queries** | Handles complex questions with decomposition |
 
+## Configuration
+
+### Zero Configuration (Recommended)
+
+Just set `OPENAI_API_KEY` and you're ready to go:
+
+```bash
+export OPENAI_API_KEY="sk-..."
+```
+
+<details>
+<summary><b>Python</b></summary>
+
+```python
+from vectorless import Engine
+
+# Uses OPENAI_API_KEY from environment
+engine = Engine(workspace="./data")
+```
+
+</details>
+
+<details>
+<summary><b>Rust</b></summary>
+
+```rust
+use vectorless::Engine;
+
+let client = Engine::builder()
+    .with_workspace("./workspace")
+    .build().await?;
+```
+
+</details>
+
+### Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `OPENAI_API_KEY` | LLM API key |
+| `VECTORLESS_MODEL` | Default model (e.g., `gpt-4o-mini`) |
+| `VECTORLESS_ENDPOINT` | API endpoint URL |
+| `VECTORLESS_WORKSPACE` | Workspace directory |
+
+### Advanced Configuration
+
+For fine-grained control, use a config file:
+
+```bash
+cp config.toml ./vectorless.toml
+```
+
+<details>
+<summary><b>Python</b></summary>
+
+```python
+from vectorless import Engine
+
+# Use full configuration file
+engine = Engine(config_path="./vectorless.toml")
+
+# Or override specific settings
+engine = Engine(
+    config_path="./vectorless.toml",
+    model="gpt-4o",  # Override model from config
+)
+```
+
+</details>
+
+<details>
+<summary><b>Rust</b></summary>
+
+```rust
+use vectorless::Engine;
+
+// Use full configuration file
+let client = Engine::builder()
+    .with_config_path("./vectorless.toml")
+    .build().await?;
+
+// Or override specific settings
+let client = Engine::builder()
+    .with_config_path("./vectorless.toml")
+    .with_model("gpt-4o", None)  // Override model
+    .build().await?;
+```
+
+</details>
+
+### Configuration Priority
+
+Later overrides earlier:
+
+1. Default configuration
+2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`)
+3. Explicit config file (`config_path` / `with_config_path`)
+4. Environment variables
+5. Constructor/builder parameters (highest priority)
 
 ## Architecture
 
@@ -177,7 +276,7 @@ async fn main() -> vectorless::Result<()> {
 
 ## Examples
 
-See the [examples/](examples/) directory.
+See the [examples/](examples/) directory for more usage patterns.
 
 ## Contributing
 
diff --git a/docs/samples/sample.md b/docs/samples/sample.md
index 8868cd20..1ab9ce35 100644
--- a/docs/samples/sample.md
+++ b/docs/samples/sample.md
@@ -29,53 +29,4 @@ The core module provides fundamental types:
 The parser module handles document parsing:
 - `MarkdownParser` — Parse Markdown files
 - `PdfParser` — Parse PDF files (planned)
-- `HtmlParser` — Parse HTML files (planned)
-
-## Usage Examples
-
-### Basic Usage
-
-```rust
-use vectorless::client::{Vectorless, VectorlessBuilder};
-
-let client = VectorlessBuilder::new()
-    .with_workspace("./workspace")
-    .build()?;
-
-let doc_id = client.index("./document.md").await?;
-```
-
-### Advanced Usage
-
-You can customize the retrieval process:
-
-```rust
-use vectorless::{LlmNavigator, RetrieveOptions};
-
-let retriever = LlmNavigator::with_defaults();
-let options = RetrieveOptions::new()
-    .with_top_k(5)
-    .with_min_score(0.5);
-
-let results = retriever.retrieve(&tree, "What is vectorless?", &options).await?;
-```
-
-## Configuration
-
-The library can be configured via TOML files or programmatically.
-
-### Configuration File
-
-```toml
-[summary]
-model = "gpt-4"
-max_tokens = 200
-
-[retrieval]
-model = "gpt-4"
-top_k = 3
-```
-
-## API Reference
-
-See the API documentation for detailed information about each function and type.
+- `HtmlParser` — Parse HTML files (planned)
\ No newline at end of file
diff --git a/examples/python/advanced/README.md b/examples/python/advanced/README.md
new file mode 100644
index 00000000..2d11afbc
--- /dev/null
+++ b/examples/python/advanced/README.md
@@ -0,0 +1,45 @@
+# Advanced Example - Full Configuration
+
+Use a configuration file for fine-grained control.
+
+## Setup
+
+```bash
+pip install vectorless
+
+# Copy the example config
+cp ../../../config.toml ./vectorless.toml
+
+# Edit to customize your settings
+vim vectorless.toml
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Configuration File Structure
+
+```toml
+[llm]
+api_key = "sk-..."
+
+[llm.summary]
+model = "gpt-4o-mini"
+max_tokens = 200
+
+[llm.retrieval]
+model = "gpt-4o"
+max_tokens = 100
+
+[retrieval]
+top_k = 5
+beam_width = 3
+max_iterations = 10
+
+[storage]
+workspace_dir = "./workspace"
+cache_size = 100
+```
diff --git a/examples/python/advanced/main.py b/examples/python/advanced/main.py
new file mode 100644
index 00000000..d223ad02
--- /dev/null
+++ b/examples/python/advanced/main.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+Advanced example - Full Configuration File.
+
+This example demonstrates how to use a full configuration file
+for fine-grained control over all settings.
+
+Usage:
+    cp ../../../config.toml ./vectorless.toml
+    # Edit vectorless.toml to customize settings
+    python main.py
+"""
+
+import os
+from vectorless import Engine, IndexContext
+
+# Path to config file (relative to this script)
+CONFIG_PATH = "./vectorless.toml"
+WORKSPACE = "./workspace"
+
+
+def main():
+    print("=== Vectorless Advanced Example (Full Configuration) ===\n")
+
+    # Check if config file exists
+    if not os.path.exists(CONFIG_PATH):
+        print(f"Error: Config file not found: {CONFIG_PATH}")
+        print("\nCreate it by copying the example:")
+        print(f"  cp ../../../config.toml {CONFIG_PATH}")
+        print("\nThen edit it to customize your settings.")
+        return
+
+    # Create engine with config file
+    engine = Engine(config_path=CONFIG_PATH)
+
+    print(f"✓ Engine created with config file: {CONFIG_PATH}\n")
+
+    # Index a document
+    content = """
+# System Documentation
+
+## Architecture
+
+The system consists of three main components:
+
+1. **Index Pipeline** - Parses documents and builds a navigable tree
+2. **Retrieval Pipeline** - Queries and retrieves relevant content
+3. **Pilot** - LLM-powered navigation guide
+
+## Configuration Options
+
+### LLM Settings
+- `model`: The LLM model to use (e.g., "gpt-4o", "gpt-4o-mini")
+- `endpoint`: API endpoint URL
+- `api_key`: Your API key
+- `temperature`: Generation temperature (0.0 for deterministic)
+
+### Retrieval Settings
+- `top_k`: Number of results to return
+- `max_iterations`: Maximum search iterations
+- `beam_width`: Beam width for multi-path search
+
+### Storage Settings
+- `workspace_dir`: Directory for persisted documents
+- `cache_size`: LRU cache size
+- `compression`: Enable/disable compression
+
+## Performance Tuning
+
+For faster retrieval:
+- Use a smaller model like gpt-4o-mini
+- Reduce max_iterations
+- Enable caching
+
+For higher accuracy:
+- Use a more capable model like gpt-4o
+- Increase beam_width
+- Enable multi-turn decomposition
+"""
+    ctx = IndexContext.from_content(content, name="system_docs", format="markdown")
+    doc_id = engine.index(ctx)
+    print(f"✓ Indexed: {doc_id}\n")
+
+    # Query examples
+    questions = [
+        "What are the main components?",
+        "How can I improve retrieval speed?",
+        "What settings are available?",
+    ]
+
+    for q in questions:
+        result = engine.query(doc_id, q)
+        print(f"Q: {q}")
+        print(f"A: {result.content[:150]}...")
+        print(f"   Score: {result.score:.2f}\n")
+
+    # Cleanup
+    engine.remove(doc_id)
+    print("✓ Cleaned up")
+
+    # Print configuration info
+    print("\n" + "=" * 60)
+    print("Configuration Priority")
+    print("=" * 60)
+    print("""
+1. Default configuration
+2. Auto-detected config file (vectorless.toml, config.toml)
+3. Explicit config file (config_path parameter)
+4. Environment variables (OPENAI_API_KEY, etc.)
+5. Constructor parameters (api_key, model, etc.)
+""")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/python/advanced/pyproject.toml b/examples/python/advanced/pyproject.toml
new file mode 100644
index 00000000..a85a964d
--- /dev/null
+++ b/examples/python/advanced/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-advanced-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+    "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python/basic/README.md b/examples/python/basic/README.md
new file mode 100644
index 00000000..e74975de
--- /dev/null
+++ b/examples/python/basic/README.md
@@ -0,0 +1,16 @@
+# Basic Example - Zero Configuration
+
+The simplest way to use Vectorless.
+
+## Setup
+
+```bash
+pip install vectorless
+export OPENAI_API_KEY="sk-..."
+```
+
+## Run
+
+```bash
+python main.py
+```
diff --git a/examples/python/basic/main.py b/examples/python/basic/main.py
new file mode 100644
index 00000000..4ae34b42
--- /dev/null
+++ b/examples/python/basic/main.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+Basic example - Zero Configuration.
+
+This example demonstrates the simplest way to use Vectorless.
+Just set OPENAI_API_KEY environment variable and you're ready to go.
+
+Usage:
+    export OPENAI_API_KEY="sk-..."
+    python main.py
+"""
+
+import os
+import tempfile
+from vectorless import Engine, IndexContext
+
+
+def main():
+    print("=== Vectorless Basic Example (Zero Configuration) ===\n")
+
+    # Zero configuration: Just set OPENAI_API_KEY environment variable
+    with tempfile.TemporaryDirectory() as workspace:
+        engine = Engine(workspace=workspace)
+
+        print("✓ Engine created (using OPENAI_API_KEY from environment)\n")
+
+        # Index from text content
+        content = """
+# Technical Manual
+
+## Chapter 1: Introduction
+
+Vectorless is a library for querying structured documents using natural language.
+
+## Chapter 2: Installation
+
+Install with pip:
+```
+pip install vectorless
+```
+
+## Chapter 3: Usage
+
+```python
+from vectorless import Engine, IndexContext
+
+engine = Engine(workspace="./data")
+ctx = IndexContext.from_file("./report.pdf")
+doc_id = engine.index(ctx)
+
+result = engine.query(doc_id, "What is the total revenue?")
+print(result.content)
+```
+"""
+        ctx = IndexContext.from_content(content, name="manual", format="markdown")
+        doc_id = engine.index(ctx)
+        print(f"✓ Indexed: {doc_id}\n")
+
+        # Query
+        result = engine.query(doc_id, "How do I install vectorless?")
+        print("Query: How do I install vectorless?")
+        print(f"Score: {result.score:.2f}")
+        print(f"Result: {result.content[:200]}...\n")
+
+        # Cleanup
+        engine.remove(doc_id)
+        print("✓ Cleaned up")
+
+    print("\n=== Done ===")
+
+
+if __name__ == "__main__":
+    if not os.environ.get("OPENAI_API_KEY"):
+        print("Error: OPENAI_API_KEY environment variable not set.")
+        print("Set it with: export OPENAI_API_KEY='sk-...'")
+        exit(1)
+
+    main()
diff --git a/examples/python/basic/pyproject.toml b/examples/python/basic/pyproject.toml
new file mode 100644
index 00000000..d99ee750
--- /dev/null
+++ b/examples/python/basic/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-basic-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+    "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python/custom_config/README.md b/examples/python/custom_config/README.md
new file mode 100644
index 00000000..b743098a
--- /dev/null
+++ b/examples/python/custom_config/README.md
@@ -0,0 +1,48 @@
+# Custom Configuration Example
+
+Use your own API key, model, and endpoint.
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Configure
+
+Edit `main.py` and update the settings:
+
+```python
+API_KEY = "your-api-key"
+MODEL = "gpt-4o-mini"  # or "deepseek-chat", "claude-3-5-sonnet", etc.
+ENDPOINT = "https://api.openai.com/v1"  # or your custom endpoint
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Other Providers
+
+### DeepSeek
+```python
+API_KEY = "sk-..."
+MODEL = "deepseek-chat"
+ENDPOINT = "https://api.deepseek.com/v1"
+```
+
+### Azure OpenAI
+```python
+API_KEY = "your-azure-key"
+MODEL = "gpt-4o"
+ENDPOINT = "https://your-resource.openai.azure.com/openai/deployments/your-deployment"
+```
+
+### Local LLM (Ollama)
+```python
+API_KEY = None  # Not needed
+MODEL = "llama3"
+ENDPOINT = "http://localhost:11434/v1"
+```
diff --git a/examples/python/custom_config/main.py b/examples/python/custom_config/main.py
new file mode 100644
index 00000000..d6e0bda4
--- /dev/null
+++ b/examples/python/custom_config/main.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""
+Custom configuration example - Using your own API key, model, and endpoint.
+
+This example demonstrates how to use custom LLM settings without a config file.
+Useful when you want to use different providers like DeepSeek, Azure OpenAI, etc.
+
+Usage:
+    python main.py
+"""
+
+import tempfile
+from vectorless import Engine, IndexContext
+
+# ============================================================
+# Configure your settings here
+# ============================================================
+API_KEY = "sk-or-v1-xxxx"  # Your API key
+MODEL = "google/gemini-3-flash-preview"  # Model name
+ENDPOINT = "https://api/v1"  # API endpoint
+WORKSPACE = "./workspace"  # Workspace directory
+
+
+def main():
+    print("=== Vectorless Custom Configuration Example ===\n")
+
+    # Create engine with custom settings
+    engine = Engine(
+        workspace=WORKSPACE,
+        api_key=API_KEY,
+        model=MODEL,
+        endpoint=ENDPOINT,
+    )
+
+    print(f"✓ Engine created with custom settings")
+    print(f"  Model: {MODEL}")
+    print(f"  Endpoint: {ENDPOINT}\n")
+
+    # Index a document
+    content = """
+# Product Documentation
+
+## Overview
+This product helps you manage documents intelligently using LLM-powered navigation.
+
+## Features
+- Fast indexing with tree-based structure
+- Accurate retrieval using hybrid search
+- Easy to use Python and Rust APIs
+- Support for PDF, Markdown, HTML, and DOCX
+
+## Installation
+
+Install with pip:
+```bash
+pip install vectorless
+```
+
+## Quick Start
+
+```python
+from vectorless import Engine, IndexContext
+
+# Create engine
+engine = Engine(workspace="./data")
+
+# Index a document
+ctx = IndexContext.from_file("./report.pdf")
+doc_id = engine.index(ctx)
+
+# Query
+result = engine.query(doc_id, "What is the total revenue?")
+print(result.content)
+```
+
+## Configuration
+
+Vectorless supports multiple configuration methods:
+1. Zero configuration - just set OPENAI_API_KEY
+2. Custom settings - pass api_key, model, endpoint
+3. Full config file - use vectorless.toml
+"""
+    ctx = IndexContext.from_content(content, name="docs", format="markdown")
+    doc_id = engine.index(ctx)
+    print(f"✓ Indexed: {doc_id}\n")
+
+    # Check document info
+    docs = engine.list_docs()
+    print(f"Documents in workspace: {len(docs)}")
+    for d in docs:
+        print(f"  - {d.name} (id: {d.id}, format: {d.format})")
+    print()
+
+    # Query
+    result = engine.query(doc_id, "How do I install the product?")
+    print("Query: How do I install the product?")
+    print(f"Score: {result.score:.2f}")
+    print(f"Result: {result.content}\n")
+
+    # Another query
+    result = engine.query(doc_id, "What features are available?")
+    print("Query: What features are available?")
+    print(f"Score: {result.score:.2f}")
+    print(f"Result: {result.content}\n")
+
+    # Cleanup
+    engine.remove(doc_id)
+    print("✓ Cleaned up")
+
+    print("\n=== Done ===")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/python/custom_config/pyproject.toml b/examples/python/custom_config/pyproject.toml
new file mode 100644
index 00000000..d316077d
--- /dev/null
+++ b/examples/python/custom_config/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-custom-config-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+    "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python_basic.py b/examples/python_basic.py
deleted file mode 100644
index e9801e79..00000000
--- a/examples/python_basic.py
+++ /dev/null
@@ -1,273 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2026 vectorless developers
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-Basic example demonstrating the vectorless Python library.
-
-This example shows:
-1. Creating an Engine with workspace
-2. Indexing documents from different sources
-3. Querying indexed documents
-4. Managing documents (list, exists, remove)
-
-Prerequisites:
-    pip install vectorless
-    export OPENAI_API_KEY="sk-..."
-
-Usage:
-    python python_basic.py
-"""
-
-import os
-import tempfile
-from pathlib import Path
-
-from vectorless import Engine, IndexContext, VectorlessError
-
-
-def main():
-    # Create a temporary workspace for this example
-    with tempfile.TemporaryDirectory() as workspace:
-        print(f"Workspace: {workspace}")
-        print()
-
-        # ============================================================
-        # 1. Create Engine
-        # ============================================================
-        print("=" * 60)
-        print("1. Creating Engine")
-        print("=" * 60)
-
-        # Option A: Use OPENAI_API_KEY environment variable
-        engine = Engine(workspace=workspace)
-
-        # Option B: Explicit API key
-        # engine = Engine(
-        #     workspace=workspace,
-        #     api_key="sk-...",
-        #     model="gpt-4o-mini",  # optional
-        # )
-
-        print(f"Engine created successfully!")
-        print(f"Initial document count: {engine.len()}")
-        print()
-
-        # ============================================================
-        # 2. Index Documents
-        # ============================================================
-        print("=" * 60)
-        print("2. Indexing Documents")
-        print("=" * 60)
-
-        # 2a. Index from text content (Markdown)
-        markdown_content = """
-# Technical Manual
-
-## Chapter 1: Introduction
-
-This document describes the architecture of our system.
-
-## Chapter 2: Installation
-
-### System Requirements
-
-- Python 3.9+
-- Rust 1.75+
-
-### Steps
-
-1. Install dependencies
-2. Configure environment
-3. Run the application
-
-## Chapter 3: API Reference
-
-### Engine
-
-The main entry point for vectorless.
-
-```python
-engine = Engine(workspace="./data")
-```
-
-### IndexContext
-
-Context for indexing documents from various sources.
-"""
-        ctx_md = IndexContext.from_text(
-            markdown_content,
-            name="technical_manual",
-            format="markdown"
-        )
-        doc_id_md = engine.index(ctx_md)
-        print(f"Indexed markdown document: {doc_id_md}")
-
-        # 2b. Index from text content (HTML)
-        html_content = """
-<html>
-<head><title>Product Guide</title></head>
-<body>
-    <h1>Product Guide</h1>
-    <h2>Getting Started</h2>
-    <p>Welcome to our product. This guide will help you get started.</p>
-    <h2>Features</h2>
-    <ul>
-        <li>Fast indexing</li>
-        <li>Accurate retrieval</li>
-        <li>Easy to use API</li>
-    </ul>
-</body>
-</html>
-"""
-        ctx_html = IndexContext.from_text(
-            html_content,
-            name="product_guide",
-            format="html"
-        )
-        doc_id_html = engine.index(ctx_html)
-        print(f"Indexed HTML document: {doc_id_html}")
-
-        # 2c. Index from text content (plain text)
-        text_content = """
-Meeting Notes - Q4 Planning
-
-Date: 2024-01-15
-
-Attendees: Alice, Bob, Charlie
-
-Agenda:
-1. Review Q3 performance
-2. Set Q4 goals
-3. Resource allocation
-
-Key Decisions:
-- Increase marketing budget by 20%
-- Launch new product in March
-- Hire 5 additional engineers
-"""
-        ctx_text = IndexContext.from_text(
-            text_content,
-            name="meeting_notes",
-            format="text"
-        )
-        doc_id_text = engine.index(ctx_text)
-        print(f"Indexed text document: {doc_id_text}")
-
-        # 2d. Index from file (if you have actual files)
-        # ctx_file = IndexContext.from_file("./report.pdf")
-        # doc_id_file = engine.index(ctx_file)
-        # print(f"Indexed file: {doc_id_file}")
-
-        print(f"\nTotal documents indexed: {engine.len()}")
-        print()
-
-        # ============================================================
-        # 3. List Documents
-        # ============================================================
-        print("=" * 60)
-        print("3. Listing Documents")
-        print("=" * 60)
-
-        docs = engine.list_docs()
-        for doc in docs:
-            print(f"  - {doc.name} (id: {doc.id}, format: {doc.format})")
-            if doc.line_count:
-                print(f"    Lines: {doc.line_count}")
-        print()
-
-        # ============================================================
-        # 4. Query Documents
-        # ============================================================
-        print("=" * 60)
-        print("4. Querying Documents")
-        print("=" * 60)
-
-        # Query the technical manual
-        questions = [
-            "What are the system requirements?",
-            "How do I create an Engine?",
-            "What are the installation steps?",
-        ]
-
-        for question in questions:
-            result = engine.query(doc_id_md, question)
-            print(f"Q: {question}")
-            print(f"A: {result.content[:200]}...")
-            print(f"   Score: {result.score:.2f}")
-            print()
-
-        # Query the meeting notes
-        result = engine.query(doc_id_text, "What was decided about the marketing budget?")
-        print(f"Q: What was decided about the marketing budget?")
-        print(f"A: {result.content}")
-        print(f"   Score: {result.score:.2f}")
-        print()
-
-        # ============================================================
-        # 5. Check Document Existence
-        # ============================================================
-        print("=" * 60)
-        print("5. Checking Document Existence")
-        print("=" * 60)
-
-        print(f"Document {doc_id_md[:8]}... exists: {engine.exists(doc_id_md)}")
-        print(f"Document 'nonexistent' exists: {engine.exists('nonexistent')}")
-        print()
-
-        # ============================================================
-        # 6. Error Handling
-        # ============================================================
-        print("=" * 60)
-        print("6. Error Handling")
-        print("=" * 60)
-
-        try:
-            engine.query("nonexistent_doc_id", "question")
-        except VectorlessError as e:
-            print(f"Caught error: {e.message}")
-            print(f"Error kind: {e.kind}")
-        print()
-
-        # ============================================================
-        # 7. Remove Documents
-        # ============================================================
-        print("=" * 60)
-        print("7. Removing Documents")
-        print("=" * 60)
-
-        # Remove the HTML document
-        removed = engine.remove(doc_id_html)
-        print(f"Removed {doc_id_html}: {removed}")
-        print(f"Documents remaining: {engine.len()}")
-
-        # Try to remove again (should return False)
-        removed_again = engine.remove(doc_id_html)
-        print(f"Remove again: {removed_again}")
-        print()
-
-        # ============================================================
-        # 8. Clear All Documents
-        # ============================================================
-        print("=" * 60)
-        print("8. Clearing All Documents")
-        print("=" * 60)
-
-        cleared_count = engine.clear()
-        print(f"Cleared {cleared_count} documents")
-        print(f"Final document count: {engine.len()}")
-        print()
-
-        print("=" * 60)
-        print("Example completed successfully!")
-        print("=" * 60)
-
-
-if __name__ == "__main__":
-    # Check for API key
-    if not os.environ.get("OPENAI_API_KEY"):
-        print("Warning: OPENAI_API_KEY environment variable not set.")
-        print("Some operations may fail without an API key.")
-        print()
-
-    main()
diff --git a/examples/rust/advanced.rs b/examples/rust/advanced.rs
new file mode 100644
index 00000000..946f619b
--- /dev/null
+++ b/examples/rust/advanced.rs
@@ -0,0 +1,68 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Advanced usage example - Full Configuration.
+//!
+//! This example demonstrates how to use a full configuration file
+//! for advanced use cases where you need fine-grained control.
+//!
+//! # Usage
+//!
+//! ```bash
+//! # First, copy the example config and edit it
+//! cp config.toml ./my_vectorless.toml
+//! # Edit my_vectorless.toml to customize settings
+//!
+//! cargo run --example advanced
+//! ```
+
+use vectorless::{Engine, IndexContext};
+
+#[tokio::main]
+async fn main() -> vectorless::Result<()> {
+    println!("=== Vectorless Advanced Example (Full Configuration) ===\n");
+
+    // Method 1: Use explicit config file path
+    // This loads all settings from the specified config file
+    let client = Engine::builder()
+        .with_config_path("./config.toml") // or "./my_vectorless.toml"
+        .build()
+        .await
+        .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
+
+    println!("✓ Client created with config file\n");
+
+    // Index a document
+    let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
+    println!("✓ Indexed: {}\n", doc_id);
+
+    // Query
+    let result = client.query(&doc_id, "What features does Vectorless provide?").await?;
+    println!("Query: What features does Vectorless provide?");
+    println!("Score: {:.2}", result.score);
+    if !result.content.is_empty() {
+        let preview: String = result.content.chars().take(200).collect();
+        println!("Result: {}...\n", preview);
+    }
+
+    // Cleanup
+    client.remove(&doc_id).await?;
+    println!("✓ Cleaned up");
+
+    println!("\n=== Configuration Options ===\n");
+    println!("Configuration Priority (later overrides earlier):");
+    println!("  1. Default configuration");
+    println!("  2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml)");
+    println!("  3. Explicit config file (with_config_path)");
+    println!("  4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)");
+    println!("  5. Builder methods (with_openai, with_model, etc.)");
+    println!();
+    println!("Environment Variables:");
+    println!("  OPENAI_API_KEY       - LLM API key");
+    println!("  VECTORLESS_MODEL     - Default model name");
+    println!("  VECTORLESS_ENDPOINT  - API endpoint URL");
+    println!("  VECTORLESS_WORKSPACE - Workspace directory");
+
+    println!("\n=== Done ===");
+    Ok(())
+}
diff --git a/examples/basic.rs b/examples/rust/basic.rs
similarity index 100%
rename from examples/basic.rs
rename to examples/rust/basic.rs
diff --git a/examples/batch_processing.rs b/examples/rust/batch_processing.rs
similarity index 100%
rename from examples/batch_processing.rs
rename to examples/rust/batch_processing.rs
diff --git a/examples/cli_tool.rs b/examples/rust/cli_tool.rs
similarity index 100%
rename from examples/cli_tool.rs
rename to examples/rust/cli_tool.rs
diff --git a/examples/content_aggregation.rs b/examples/rust/content_aggregation.rs
similarity index 100%
rename from examples/content_aggregation.rs
rename to examples/rust/content_aggregation.rs
diff --git a/examples/rust/custom_config.rs b/examples/rust/custom_config.rs
new file mode 100644
index 00000000..12eaedc4
--- /dev/null
+++ b/examples/rust/custom_config.rs
@@ -0,0 +1,92 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Custom configuration example - Using your own API key, model, and endpoint.
+//!
+//! This example demonstrates how to use custom LLM settings without a config file.
+//! Useful when you want to use different providers like Azure OpenAI, DeepSeek, etc.
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example custom_config
+//! ```
+
+use vectorless::{Engine, IndexContext};
+
+#[tokio::main]
+async fn main() -> vectorless::Result<()> {
+    println!("=== Vectorless Custom Configuration Example ===\n");
+
+    // ============================================================
+    // Option 1: Use environment variables
+    // ============================================================
+    // Set these environment variables:
+    // - OPENAI_API_KEY or VECTORLESS_API_KEY
+    // - VECTORLESS_MODEL (optional, default: gpt-4o)
+    // - VECTORLESS_ENDPOINT (optional, default: OpenAI endpoint)
+    // - VECTORLESS_WORKSPACE (optional, default: ./workspace)
+
+    // ============================================================
+    // Option 2: Use builder methods (recommended for custom config)
+    // ============================================================
+
+    // Example: Use DeepSeek API
+    let client = Engine::builder()
+        .with_workspace("./workspace")
+        .with_model("deepseek-chat", Some("sk-your-deepseek-key".to_string()))
+        .with_endpoint("https://api.deepseek.com/v1")
+        .build()
+        .await
+        .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
+
+    println!("✓ Client created with custom settings\n");
+
+    // Index a document
+    let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
+    println!("✓ Indexed: {}\n", doc_id);
+
+    // Query
+    let result = client.query(&doc_id, "What is Vectorless?").await?;
+    println!("Query: What is Vectorless?");
+    println!("Score: {:.2}", result.score);
+    if !result.content.is_empty() {
+        let preview: String = result.content.chars().take(200).collect();
+        println!("Result: {}...\n", preview);
+    }
+
+    // Cleanup
+    client.remove(&doc_id).await?;
+    println!("✓ Cleaned up");
+
+    // ============================================================
+    // Other provider examples (commented out)
+    // ============================================================
+
+    // Azure OpenAI:
+    // let client = Engine::builder()
+    //     .with_workspace("./workspace")
+    //     .with_model("gpt-4o", Some("your-azure-key".to_string()))
+    //     .with_endpoint("https://your-resource.openai.azure.com/openai/deployments/your-deployment")
+    //     .build()
+    //     .await?;
+
+    // Local LLM (e.g., Ollama with OpenAI-compatible API):
+    // let client = Engine::builder()
+    //     .with_workspace("./workspace")
+    //     .with_model("llama3", None)  // No API key needed
+    //     .with_endpoint("http://localhost:11434/v1")
+    //     .build()
+    //     .await?;
+
+    // Anthropic Claude (via OpenAI-compatible proxy):
+    // let client = Engine::builder()
+    //     .with_workspace("./workspace")
+    //     .with_model("claude-3-5-sonnet-20241022", Some("sk-ant-...".to_string()))
+    //     .with_endpoint("https://api.anthropic.com/v1")
+    //     .build()
+    //     .await?;
+
+    println!("\n=== Done ===");
+    Ok(())
+}
diff --git a/examples/custom_pilot.rs b/examples/rust/custom_pilot.rs
similarity index 100%
rename from examples/custom_pilot.rs
rename to examples/rust/custom_pilot.rs
diff --git a/examples/events.rs b/examples/rust/events.rs
similarity index 100%
rename from examples/events.rs
rename to examples/rust/events.rs
diff --git a/examples/feedback_learning.rs b/examples/rust/feedback_learning.rs
similarity index 100%
rename from examples/feedback_learning.rs
rename to examples/rust/feedback_learning.rs
diff --git a/examples/html_parser.rs b/examples/rust/html_parser.rs
similarity index 100%
rename from examples/html_parser.rs
rename to examples/rust/html_parser.rs
diff --git a/examples/index.rs b/examples/rust/index.rs
similarity index 100%
rename from examples/index.rs
rename to examples/rust/index.rs
diff --git a/examples/markdownflow.rs b/examples/rust/markdownflow.rs
similarity index 100%
rename from examples/markdownflow.rs
rename to examples/rust/markdownflow.rs
diff --git a/examples/memo_cache.rs b/examples/rust/memo_cache.rs
similarity index 100%
rename from examples/memo_cache.rs
rename to examples/rust/memo_cache.rs
diff --git a/examples/multi_format.rs b/examples/rust/multi_format.rs
similarity index 100%
rename from examples/multi_format.rs
rename to examples/rust/multi_format.rs
diff --git a/examples/reference_following.rs b/examples/rust/reference_following.rs
similarity index 100%
rename from examples/reference_following.rs
rename to examples/rust/reference_following.rs
diff --git a/examples/retrieve.rs b/examples/rust/retrieve.rs
similarity index 100%
rename from examples/retrieve.rs
rename to examples/rust/retrieve.rs
diff --git a/examples/session.rs b/examples/rust/session.rs
similarity index 100%
rename from examples/session.rs
rename to examples/rust/session.rs
diff --git a/examples/storage_backend.rs b/examples/rust/storage_backend.rs
similarity index 100%
rename from examples/storage_backend.rs
rename to examples/rust/storage_backend.rs
diff --git a/examples/storage_compression.rs b/examples/rust/storage_compression.rs
similarity index 100%
rename from examples/storage_compression.rs
rename to examples/rust/storage_compression.rs
diff --git a/examples/storage_migration.rs b/examples/rust/storage_migration.rs
similarity index 100%
rename from examples/storage_migration.rs
rename to examples/rust/storage_migration.rs
diff --git a/examples/storage_workspace.rs b/examples/rust/storage_workspace.rs
similarity index 100%
rename from examples/storage_workspace.rs
rename to examples/rust/storage_workspace.rs
diff --git a/examples/strategy_cross_document.rs b/examples/rust/strategy_cross_document.rs
similarity index 100%
rename from examples/strategy_cross_document.rs
rename to examples/rust/strategy_cross_document.rs
diff --git a/examples/strategy_hybrid.rs b/examples/rust/strategy_hybrid.rs
similarity index 100%
rename from examples/strategy_hybrid.rs
rename to examples/rust/strategy_hybrid.rs
diff --git a/examples/strategy_page_range.rs b/examples/rust/strategy_page_range.rs
similarity index 100%
rename from examples/strategy_page_range.rs
rename to examples/rust/strategy_page_range.rs
diff --git a/examples/streaming.rs b/examples/rust/streaming.rs
similarity index 100%
rename from examples/streaming.rs
rename to examples/rust/streaming.rs
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 72c2f061..fc2cf730 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -28,6 +28,11 @@ pub struct VectorlessError {
 
 #[pymethods]
 impl VectorlessError {
+    #[new]
+    fn new_py(message: String, kind: String) -> Self {
+        Self { message, kind }
+    }
+
     #[getter]
     fn message(&self) -> &str {
         &self.message
@@ -96,7 +101,7 @@ fn to_py_err(e: RustError) -> PyErr {
 /// # From bytes
 /// ctx = IndexContext.from_bytes(data, name="doc", format="pdf")
 /// ```
-#[pyclass]
+#[pyclass(name = "IndexContext")]
 pub struct PyIndexContext {
     inner: IndexContext,
 }
@@ -134,7 +139,7 @@ impl PyIndexContext {
     ///     IndexContext for the content.
     #[staticmethod]
     #[pyo3(signature = (content, name=None, format="markdown"))]
-    fn from_text(content: String, name: Option<String>, format: &str) -> PyResult<Self> {
+    fn from_content(content: String, name: Option<String>, format: &str) -> PyResult<Self> {
         let doc_format = parse_format(format)?;
         let mut ctx = IndexContext::from_content(&content, doc_format);
         if let Some(n) = name {
@@ -168,9 +173,8 @@ fn parse_format(format: &str) -> PyResult<DocumentFormat> {
         "pdf" => Ok(DocumentFormat::Pdf),
         "docx" | "doc" => Ok(DocumentFormat::Docx),
         "html" | "htm" => Ok(DocumentFormat::Html),
-        "text" | "txt" => Ok(DocumentFormat::Text),
         _ => Err(PyErr::from(VectorlessError::new(
-            format!("Unknown format: {}", format),
+            format!("Unknown format: {}. Supported: markdown, pdf, docx, html", format),
             "config",
         ))),
     }
@@ -181,7 +185,7 @@ fn parse_format(format: &str) -> PyResult<DocumentFormat> {
 // ============================================================
 
 /// Result of a document query.
-#[pyclass]
+#[pyclass(name = "QueryResult")]
 pub struct PyQueryResult {
     inner: QueryResult,
 }
@@ -227,7 +231,7 @@ impl PyQueryResult {
 // ============================================================
 
 /// Information about an indexed document.
-#[pyclass]
+#[pyclass(name = "DocumentInfo")]
 pub struct PyDocumentInfo {
     inner: DocumentInfo,
 }
@@ -284,7 +288,16 @@ impl PyDocumentInfo {
 
 /// The main vectorless engine.
 ///
-/// Create an engine with a workspace directory:
+/// Configuration priority (later overrides earlier):
+/// 1. Default configuration
+/// 2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml)
+/// 3. Explicit config file (config_path parameter)
+/// 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)
+/// 5. Constructor parameters (api_key, model, endpoint) - highest priority
+///
+/// # Zero Configuration (Recommended)
+///
+/// Just set OPENAI_API_KEY environment variable:
 ///
 /// ```python
 /// from vectorless import Engine
@@ -292,12 +305,18 @@ impl PyDocumentInfo {
 /// engine = Engine(workspace="./data")
 /// ```
 ///
-/// Or with an explicit API key:
+/// # With Custom Model
+///
+/// ```python
+/// engine = Engine(workspace="./data", model="gpt-4o-mini")
+/// ```
+///
+/// # With Full Config File (Advanced)
 ///
 /// ```python
-/// engine = Engine(workspace="./data", api_key="sk-...")
+/// engine = Engine(config_path="./vectorless.toml")
 /// ```
-#[pyclass]
+#[pyclass(name = "Engine")]
 pub struct PyEngine {
     inner: Arc<Engine>,
     rt: Runtime,
@@ -308,17 +327,26 @@ impl PyEngine {
     /// Create a new Engine.
     ///
     /// Args:
-    ///     workspace: Path to the workspace directory.
+    ///     workspace: Path to the workspace directory (optional if config_path provides it).
+    ///     config_path: Path to configuration file (optional, advanced usage).
     ///     api_key: Optional API key. If not provided, uses OPENAI_API_KEY env var.
-    ///     model: Optional model name. Default: "gpt-4o-mini".
+    ///     model: Optional model name. Default: "gpt-4o".
     ///     endpoint: Optional API endpoint.
     ///
+    /// Configuration priority (later overrides earlier):
+    ///     1. Default configuration
+    ///     2. Auto-detected config file
+    ///     3. config_path parameter
+    ///     4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)
+    ///     5. Constructor parameters (api_key, model, endpoint)
+    ///
     /// Raises:
     ///     VectorlessError: If engine creation fails.
     #[new]
-    #[pyo3(signature = (workspace, api_key=None, model=None, endpoint=None))]
+    #[pyo3(signature = (workspace=None, config_path=None, api_key=None, model=None, endpoint=None))]
     fn new(
-        workspace: String,
+        workspace: Option<String>,
+        config_path: Option<String>,
         api_key: Option<String>,
         model: Option<String>,
         endpoint: Option<String>,
@@ -334,18 +362,31 @@ impl PyEngine {
         let resolved_api_key = api_key.or_else(|| std::env::var("OPENAI_API_KEY").ok());
 
         let engine = rt.block_on(async {
-            let mut builder = EngineBuilder::new().with_workspace(&workspace);
+            let mut builder = EngineBuilder::new();
 
-            if let Some(key) = resolved_api_key {
-                builder = builder.with_openai(key);
+            // Set config path first (if provided)
+            if let Some(path) = &config_path {
+                builder = builder.with_config_path(path);
+            }
+
+            // Set workspace (if provided)
+            if let Some(ws) = &workspace {
+                builder = builder.with_workspace(ws);
             }
 
-            if let Some(m) = model {
-                builder = builder.with_model(&m, None);
+            // Set model first (without overriding api_key)
+            if let Some(m) = &model {
+                builder = builder.with_model(m, None);
             }
 
-            if let Some(e) = endpoint {
-                builder = builder.with_endpoint(&e);
+            // Set endpoint
+            if let Some(e) = &endpoint {
+                builder = builder.with_endpoint(e);
+            }
+
+            // Set API key last (this ensures it's not overwritten)
+            if let Some(key) = resolved_api_key {
+                builder = builder.with_openai(key);
             }
 
             builder.build().await
@@ -506,7 +547,7 @@ impl PyEngine {
 /// print(result.content)
 /// ```
 #[pymodule]
-fn _vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
+fn vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<VectorlessError>()?;
     m.add_class::<PyIndexContext>()?;
     m.add_class::<PyQueryResult>()?;
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 78f65624..fe9729b9 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -11,7 +11,104 @@ documentation = "https://docs.rs/vectorless"
 keywords = ["rag", "document", "retrieval", "indexing", "llm"]
 categories = ["text-processing", "data-structures", "algorithms"]
 readme = "../README.md"
-exclude = ["samples/", "docs/", ".*"]
+exclude = ["docs/", ".*"]
+
+# Example directory configuration
+[[example]]
+name = "basic"
+path = "../examples/rust/basic.rs"
+
+[[example]]
+name = "advanced"
+path = "../examples/rust/advanced.rs"
+
+[[example]]
+name = "batch_processing"
+path = "../examples/rust/batch_processing.rs"
+
+[[example]]
+name = "cli_tool"
+path = "../examples/rust/cli_tool.rs"
+
+[[example]]
+name = "content_aggregation"
+path = "../examples/rust/content_aggregation.rs"
+
+[[example]]
+name = "custom_config"
+path = "../examples/rust/custom_config.rs"
+
+[[example]]
+name = "custom_pilot"
+path = "../examples/rust/custom_pilot.rs"
+
+[[example]]
+name = "events"
+path = "../examples/rust/events.rs"
+
+[[example]]
+name = "feedback_learning"
+path = "../examples/rust/feedback_learning.rs"
+
+[[example]]
+name = "html_parser"
+path = "../examples/rust/html_parser.rs"
+
+[[example]]
+name = "index"
+path = "../examples/rust/index.rs"
+
+[[example]]
+name = "markdownflow"
+path = "../examples/rust/markdownflow.rs"
+
+[[example]]
+name = "multi_format"
+path = "../examples/rust/multi_format.rs"
+
+[[example]]
+name = "reference_following"
+path = "../examples/rust/reference_following.rs"
+
+[[example]]
+name = "retrieve"
+path = "../examples/rust/retrieve.rs"
+
+[[example]]
+name = "session"
+path = "../examples/rust/session.rs"
+
+[[example]]
+name = "storage_backend"
+path = "../examples/rust/storage_backend.rs"
+
+[[example]]
+name = "storage_compression"
+path = "../examples/rust/storage_compression.rs"
+
+[[example]]
+name = "storage_migration"
+path = "../examples/rust/storage_migration.rs"
+
+[[example]]
+name = "storage_workspace"
+path = "../examples/rust/storage_workspace.rs"
+
+[[example]]
+name = "strategy_cross_document"
+path = "../examples/rust/strategy_cross_document.rs"
+
+[[example]]
+name = "strategy_hybrid"
+path = "../examples/rust/strategy_hybrid.rs"
+
+[[example]]
+name = "strategy_page_range"
+path = "../examples/rust/strategy_page_range.rs"
+
+[[example]]
+name = "streaming"
+path = "../examples/rust/streaming.rs"
 
 [dependencies]
 # Async runtime
diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs
index 61621f23..70c3682b 100644
--- a/rust/src/client/builder.rs
+++ b/rust/src/client/builder.rs
@@ -6,27 +6,67 @@
 //! This module provides [`EngineBuilder`] for configuring and building
 //! [`Engine`] instances with sensible defaults.
 //!
-//! # Example
+//! # Configuration Priority
+//!
+//! Configuration is applied in this order (later overrides earlier):
+//! 1. Default configuration
+//! 2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`)
+//! 3. Explicit config file (`with_config_path`)
+//! 4. Environment variables (`OPENAI_API_KEY`, `VECTORLESS_MODEL`, etc.)
+//! 5. Builder methods (`with_openai`, `with_model`, etc.) - highest priority
+//!
+//! # Environment Variables
+//!
+//! | Variable | Description |
+//! |----------|-------------|
+//! | `OPENAI_API_KEY` | LLM API key |
+//! | `VECTORLESS_MODEL` | Default model name |
+//! | `VECTORLESS_ENDPOINT` | API endpoint URL |
+//! | `VECTORLESS_WORKSPACE` | Workspace directory |
+//!
+//! # Examples
+//!
+//! ## Zero Configuration (Recommended)
 //!
 //! ```rust,no_run
 //! use vectorless::client::EngineBuilder;
 //!
 //! # #[tokio::main]
 //! # async fn main() -> Result<(), vectorless::BuildError> {
-//! // Simple setup with workspace
+//! // Just set OPENAI_API_KEY environment variable
 //! let engine = EngineBuilder::new()
-//!     .with_workspace("./my_workspace")
-//!     .with_openai(std::env::var("OPENAI_API_KEY").unwrap())
+//!     .with_workspace("./data")
 //!     .build()
 //!     .await?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! ## With Custom Model
+//!
+//! ```rust,no_run
+//! use vectorless::client::EngineBuilder;
 //!
-//! // Advanced configuration
+//! # #[tokio::main]
+//! # async fn main() -> Result<(), vectorless::BuildError> {
 //! let engine = EngineBuilder::new()
 //!     .with_workspace("./data")
-//!     .with_model("gpt-4o", None)
-//!     .with_endpoint("https://api.openai.com/v1")
-//!     .with_top_k(10)
-//!     .precise()
+//!     .with_model("gpt-4o-mini", None)  // Uses OPENAI_API_KEY from env
+//!     .build()
+//!     .await?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! ## With Full Config File (Advanced)
+//!
+//! ```rust,no_run
+//! use vectorless::client::EngineBuilder;
+//!
+//! # #[tokio::main]
+//! # async fn main() -> Result<(), vectorless::BuildError> {
+//! let engine = EngineBuilder::new()
+//!     .with_config_path("./vectorless.toml")
 //!     .build()
 //!     .await?;
 //! # Ok(())
@@ -49,16 +89,25 @@ const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorl
 /// Builder for creating a [`Engine`] client.
 ///
 /// The builder uses sensible defaults and automatically loads
-/// LLM configuration from environment variables or config files.
+/// configuration from config files and environment variables.
 ///
 /// # Configuration Priority
 ///
-/// Configuration is loaded in this order (later overrides earlier):
+/// Configuration is applied in this order (later overrides earlier):
 /// 1. Default configuration
-/// 2. Auto-detected config file
+/// 2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`)
 /// 3. Explicit config file (`with_config_path`)
-/// 4. Custom config object (`with_config`)
-/// 5. Individual builder methods
+/// 4. Environment variables (`OPENAI_API_KEY`, `VECTORLESS_MODEL`, etc.)
+/// 5. Builder methods (`with_openai`, `with_model`, etc.) - highest priority
+///
+/// # Environment Variables
+///
+/// | Variable | Description |
+/// |----------|-------------|
+/// | `OPENAI_API_KEY` | LLM API key |
+/// | `VECTORLESS_MODEL` | Default model name |
+/// | `VECTORLESS_ENDPOINT` | API endpoint URL |
+/// | `VECTORLESS_WORKSPACE` | Workspace directory |
 ///
 /// # Example
 ///
@@ -67,9 +116,9 @@ const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorl
 ///
 /// # #[tokio::main]
 /// # async fn main() -> Result<(), vectorless::BuildError> {
+/// // Zero configuration - just set OPENAI_API_KEY environment variable
 /// let client = EngineBuilder::new()
 ///     .with_workspace("./my_workspace")
-///     .with_openai(std::env::var("OPENAI_API_KEY").unwrap())
 ///     .build()
 ///     .await?;
 /// # Ok(())
@@ -236,8 +285,8 @@ impl EngineBuilder {
 
     /// Configure for OpenAI API.
     ///
-    /// Uses `gpt-4o` model by default. Use [`with_model`](EngineBuilder::with_model)
-    /// to specify a different model.
+    /// Sets the API key and optionally the model to "gpt-4o" if not already set.
+    /// Use [`with_model`](EngineBuilder::with_model) before this to specify a different model.
     ///
     /// # Example
     ///
@@ -256,7 +305,13 @@ impl EngineBuilder {
     /// ```
     #[must_use]
     pub fn with_openai(self, api_key: impl Into<String>) -> Self {
-        self.with_model("gpt-4o", Some(api_key.into()))
+        let mut builder = self;
+        builder.api_key = Some(api_key.into());
+        // Only set default model if not already set
+        if builder.model.is_none() {
+            builder.model = Some("gpt-4o".to_string());
+        }
+        builder
     }
 
     /// Set the LLM model and optional API key.
@@ -284,7 +339,9 @@ impl EngineBuilder {
     #[must_use]
     pub fn with_model(mut self, model: impl Into<String>, api_key: Option<String>) -> Self {
         self.model = Some(model.into());
-        self.api_key = api_key;
+        if api_key.is_some() {
+            self.api_key = api_key;
+        }
         self
     }
 
@@ -357,6 +414,57 @@ impl EngineBuilder {
         self
     }
 
+    /// Apply environment variable overrides to a Config.
+    ///
+    /// This is used when a custom Config is provided via `with_config`
+    /// or when using default config without a config file.
+    fn apply_env_overrides(config: &mut Config) {
+        // OPENAI_API_KEY: Set API key for all LLM clients
+        if let Ok(api_key) = std::env::var("OPENAI_API_KEY") {
+            // Set default API key
+            config.llm.api_key = Some(api_key.clone());
+            // Override individual client API keys if not explicitly set
+            if config.llm.summary.api_key.is_none() {
+                config.llm.summary.api_key = Some(api_key.clone());
+            }
+            if config.llm.retrieval.api_key.is_none() {
+                config.llm.retrieval.api_key = Some(api_key.clone());
+            }
+            if config.llm.pilot.api_key.is_none() {
+                config.llm.pilot.api_key = Some(api_key);
+            }
+            // Also set legacy config for backwards compatibility
+            if config.summary.api_key.is_none() {
+                config.summary.api_key = Some(std::env::var("OPENAI_API_KEY").unwrap());
+            }
+        }
+
+        // VECTORLESS_MODEL: Set default model
+        if let Ok(model) = std::env::var("VECTORLESS_MODEL") {
+            config.llm.summary.model = model.clone();
+            config.llm.retrieval.model = model.clone();
+            config.llm.pilot.model = model.clone();
+            // Also set legacy config
+            config.summary.model = model.clone();
+            config.retrieval.model = model;
+        }
+
+        // VECTORLESS_ENDPOINT: Set API endpoint
+        if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") {
+            config.llm.summary.endpoint = endpoint.clone();
+            config.llm.retrieval.endpoint = endpoint.clone();
+            config.llm.pilot.endpoint = endpoint.clone();
+            // Also set legacy config
+            config.summary.endpoint = endpoint.clone();
+            config.retrieval.endpoint = endpoint;
+        }
+
+        // VECTORLESS_WORKSPACE: Set workspace directory
+        if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") {
+            config.storage.workspace_dir = PathBuf::from(workspace);
+        }
+    }
+
     /// Search for config file in current directory and parent directories.
     fn find_config_file() -> Option<PathBuf> {
         let current_dir = std::env::current_dir().ok()?;
@@ -414,8 +522,12 @@ impl EngineBuilder {
     /// ```
     pub async fn build(self) -> Result<Engine, BuildError> {
         // Load or create configuration
+        // ConfigLoader automatically applies environment variable overrides
         let mut config = if let Some(config) = self.config {
-            config
+            // Custom config - still apply env vars
+            let mut cfg = config;
+            Self::apply_env_overrides(&mut cfg);
+            cfg
         } else if let Some(path) = self.config_path {
             ConfigLoader::new()
                 .file(&path)
@@ -426,7 +538,10 @@ impl EngineBuilder {
                 BuildError::Config(format!("Failed to load {}: {}", config_path.display(), e))
             })?
         } else {
-            Config::default()
+            // No config file - use defaults with env var overrides
+            let mut cfg = Config::default();
+            Self::apply_env_overrides(&mut cfg);
+            cfg
         };
 
         // Apply builder overrides to retrieval config
@@ -436,13 +551,24 @@ impl EngineBuilder {
 
         // Apply individual overrides
         if let Some(api_key) = self.api_key {
-            config.retrieval.api_key = Some(api_key);
+            // Set API key for both retrieval and summary
+            config.retrieval.api_key = Some(api_key.clone());
+            config.summary.api_key = Some(api_key);
+            // Also set LLM pool config
+            if config.llm.summary.api_key.is_none() {
+                config.llm.summary.api_key = config.summary.api_key.clone();
+            }
+            if config.llm.retrieval.api_key.is_none() {
+                config.llm.retrieval.api_key = config.summary.api_key.clone();
+            }
         }
         if let Some(model) = self.model {
-            config.retrieval.model = model;
+            config.retrieval.model = model.clone();
+            config.summary.model = model;
         }
         if let Some(endpoint) = self.endpoint {
-            config.retrieval.endpoint = endpoint;
+            config.retrieval.endpoint = endpoint.clone();
+            config.summary.endpoint = endpoint;
         }
         if let Some(top_k) = self.top_k {
             config.retrieval.top_k = top_k;
diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs
index 886e3197..4297d8e0 100644
--- a/rust/src/client/engine.rs
+++ b/rust/src/client/engine.rs
@@ -214,6 +214,9 @@ impl Engine {
     /// # }
     /// ```
     pub async fn index(&self, ctx: IndexContext) -> Result<String> {
+        println!("Indexing...");
+        println!("ctx: {:?}", ctx);
+        
         let doc = self.indexer.index(ctx).await?;
         let persisted = self.indexer.to_persisted(doc);
 
diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs
index 679dbea0..0ba0a847 100644
--- a/rust/src/client/indexer.rs
+++ b/rust/src/client/indexer.rs
@@ -259,17 +259,19 @@ impl IndexerClient {
         options: &IndexOptions,
         format: DocumentFormat,
     ) -> PipelineOptions {
+        println!("[DEBUG] Building pipeline options for format: {:?} with options: {:?}", format, options);
+
         PipelineOptions {
             mode: match format {
                 DocumentFormat::Markdown => IndexMode::Markdown,
                 DocumentFormat::Pdf => IndexMode::Pdf,
                 DocumentFormat::Html => IndexMode::Html,
                 DocumentFormat::Docx => IndexMode::Docx,
-                DocumentFormat::Text => IndexMode::Auto,
             },
             generate_ids: options.generate_ids,
             summary_strategy: if options.generate_summaries {
-                SummaryStrategy::selective(self.config.min_summary_tokens, false)
+                // SummaryStrategy::selective(self.config.min_summary_tokens, false)
+                SummaryStrategy::full()
             } else {
                 SummaryStrategy::none()
             },
diff --git a/rust/src/client/types.rs b/rust/src/client/types.rs
index 31438a62..0cfcb065 100644
--- a/rust/src/client/types.rs
+++ b/rust/src/client/types.rs
@@ -174,7 +174,7 @@ impl Default for IndexOptions {
     fn default() -> Self {
         Self {
             mode: IndexMode::Default,
-            generate_summaries: false,
+            generate_summaries: true,
             include_text: true,
             generate_ids: true,
             generate_description: false,
diff --git a/rust/src/config/loader.rs b/rust/src/config/loader.rs
index 33f8bb9f..e436a315 100644
--- a/rust/src/config/loader.rs
+++ b/rust/src/config/loader.rs
@@ -3,17 +3,33 @@
 
 //! Configuration loader.
 //!
-//! Loads configuration from TOML files with validation.
-//! All configuration must be explicit in the config file - no environment variables.
+//! Loads configuration from TOML files with environment variable overrides.
+//!
+//! # Configuration Priority
+//!
+//! Configuration is loaded in this order (later overrides earlier):
+//! 1. Default configuration
+//! 2. Config file (if found or specified)
+//! 3. Environment variables
+//!
+//! # Environment Variables
+//!
+//! | Variable | Description | Maps To |
+//! |----------|-------------|---------|
+//! | `OPENAI_API_KEY` | LLM API key | `llm.api_key` / `retrieval.api_key` |
+//! | `VECTORLESS_MODEL` | Default LLM model | `retrieval.model` |
+//! | `VECTORLESS_ENDPOINT` | LLM API endpoint | `retrieval.endpoint` |
+//! | `VECTORLESS_WORKSPACE` | Workspace directory | `storage.workspace_dir` |
 //!
 //! # Example
 //!
 //! ```rust,no_run
 //! use vectorless::config::{ConfigLoader, Config};
 //!
-//! // Load from file
+//! // Load from file with environment variable overrides
 //! let config = ConfigLoader::new()
 //!     .file("config.toml")
+//!     .with_env(true)  // Enable environment variables (default: true)
 //!     .load()?;
 //!
 //! // Load with validation
@@ -73,6 +89,9 @@ pub struct ConfigLoader {
 
     /// Custom validator (optional).
     validator: Option<ConfigValidator>,
+
+    /// Whether to apply environment variable overrides.
+    env_enabled: bool,
 }
 
 impl Default for ConfigLoader {
@@ -88,6 +107,7 @@ impl ConfigLoader {
             files: Vec::new(),
             validate: false,
             validator: None,
+            env_enabled: true,
         }
     }
 
@@ -122,13 +142,68 @@ impl ConfigLoader {
         self
     }
 
+    /// Enable or disable environment variable overrides.
+    ///
+    /// When enabled (default), environment variables override config file values:
+    /// - `OPENAI_API_KEY` → sets API key for all LLM clients
+    /// - `VECTORLESS_MODEL` → sets default model
+    /// - `VECTORLESS_ENDPOINT` → sets API endpoint
+    /// - `VECTORLESS_WORKSPACE` → sets workspace directory
+    pub fn with_env(mut self, enabled: bool) -> Self {
+        self.env_enabled = enabled;
+        self
+    }
+
+    /// Apply environment variable overrides to configuration.
+    fn apply_env_overrides(&self, config: &mut Config) {
+        if !self.env_enabled {
+            return;
+        }
+
+        // OPENAI_API_KEY: Set API key for all LLM clients
+        if let Ok(api_key) = std::env::var("OPENAI_API_KEY") {
+            // Set default API key
+            config.llm.api_key = Some(api_key.clone());
+            // Override individual client API keys if not explicitly set
+            if config.llm.summary.api_key.is_none() {
+                config.llm.summary.api_key = Some(api_key.clone());
+            }
+            if config.llm.retrieval.api_key.is_none() {
+                config.llm.retrieval.api_key = Some(api_key.clone());
+            }
+            if config.llm.pilot.api_key.is_none() {
+                config.llm.pilot.api_key = Some(api_key);
+            }
+        }
+
+        // VECTORLESS_MODEL: Set default model
+        if let Ok(model) = std::env::var("VECTORLESS_MODEL") {
+            config.llm.summary.model = model.clone();
+            config.llm.retrieval.model = model.clone();
+            config.llm.pilot.model = model;
+        }
+
+        // VECTORLESS_ENDPOINT: Set API endpoint
+        if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") {
+            config.llm.summary.endpoint = endpoint.clone();
+            config.llm.retrieval.endpoint = endpoint.clone();
+            config.llm.pilot.endpoint = endpoint;
+        }
+
+        // VECTORLESS_WORKSPACE: Set workspace directory
+        if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") {
+            config.storage.workspace_dir = PathBuf::from(workspace);
+        }
+    }
+
     /// Load the configuration.
     ///
     /// # Behavior
     ///
     /// 1. Start with default configuration
     /// 2. Load and merge each specified file (in order)
-    /// 3. Validate configuration (if enabled)
+    /// 3. Apply environment variable overrides (if enabled)
+    /// 4. Validate configuration (if enabled)
     ///
     /// # Errors
     ///
@@ -150,6 +225,9 @@ impl ConfigLoader {
             }
         }
 
+        // Apply environment variable overrides
+        self.apply_env_overrides(&mut config);
+
         // Validate if requested
         if self.validate {
             let validator = self.validator.unwrap_or_default();
diff --git a/rust/src/index/config.rs b/rust/src/index/config.rs
index f08b5968..f5cabebc 100644
--- a/rust/src/index/config.rs
+++ b/rust/src/index/config.rs
@@ -160,7 +160,7 @@ impl Default for PipelineOptions {
         Self {
             mode: IndexMode::Auto,
             generate_ids: true,
-            summary_strategy: SummaryStrategy::default(),
+            summary_strategy: SummaryStrategy::full(),
             thinning: ThinningConfig::default(),
             optimization: OptimizationConfig::default(),
             generate_description: true,
diff --git a/rust/src/index/pipeline/context.rs b/rust/src/index/pipeline/context.rs
index ab9a462d..979839a8 100644
--- a/rust/src/index/pipeline/context.rs
+++ b/rust/src/index/pipeline/context.rs
@@ -51,7 +51,7 @@ impl IndexInput {
         Self::Content {
             content: content.into(),
             name: String::new(),
-            format: DocumentFormat::Text,
+            format: DocumentFormat::Markdown,
         }
     }
 
diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs
index 4fb29310..e848e832 100644
--- a/rust/src/index/stages/enhance.rs
+++ b/rust/src/index/stages/enhance.rs
@@ -101,6 +101,7 @@ impl IndexStage for EnhanceStage {
 
         // Check if we need summaries
         if !self.needs_summaries(ctx) {
+            println!("[DEBUG] Summary generation skipped (strategy: {:?})", ctx.options.summary_strategy);
             info!(
                 "Summary generation skipped (strategy: {:?})",
                 ctx.options.summary_strategy
@@ -112,6 +113,7 @@ impl IndexStage for EnhanceStage {
         let llm_client = match &self.llm_client {
             Some(client) => client,
             None => {
+                println!("[DEBUG] No LLM client configured, skipping summary generation");
                 warn!("No LLM client configured, skipping summary generation");
                 return Ok(StageResult::success("enhance"));
             }
@@ -121,11 +123,13 @@ impl IndexStage for EnhanceStage {
         let tree = match ctx.tree.as_mut() {
             Some(t) => t,
             None => {
+                println!("[DEBUG] No tree built, skipping enhance stage");
                 warn!("No tree built, skipping enhance stage");
                 return Ok(StageResult::success("enhance"));
             }
         };
 
+        println!("[DEBUG] Using summary strategy: {:?}", ctx.options.summary_strategy);
         info!("Using summary strategy: {:?}", ctx.options.summary_strategy);
 
         // Create summary generator with optional memo store
@@ -141,11 +145,14 @@ impl IndexStage for EnhanceStage {
         let node_ids: Vec<NodeId> = tree.traverse();
         let total_nodes = node_ids.len();
 
+        println!("[DEBUG] Processing {} nodes for summary generation", total_nodes);
         info!("Processing {} nodes for summary generation", total_nodes);
 
         // Process nodes
         let mut generated = 0;
         let mut failed = 0;
+        let mut skipped_no_content = 0;
+        let mut skipped_tokens = 0;
         let strategy = ctx.options.summary_strategy.clone();
 
         for node_id in node_ids {
@@ -154,15 +161,18 @@ impl IndexStage for EnhanceStage {
                 Some(n) => n.clone(),
                 None => continue,
             };
+            println!("[DEBUG] Evaluating node for summary: {} {}", node.title, node.content);
 
             // Skip if no content
             if node.content.is_empty() {
+                skipped_no_content += 1;
                 continue;
             }
 
             // Get token count and check if we should generate
             let token_count = node.token_count.unwrap_or(0);
             if !strategy.should_generate(tree, node_id, token_count) {
+                skipped_tokens += 1;
                 continue;
             }
 
@@ -194,6 +204,9 @@ impl IndexStage for EnhanceStage {
             }
 
             // Generate summary (generator also has memoization built-in)
+            println!("[DEBUG] Calling LLM to generate summary for node: {} ({} tokens)", node.title, token_count);
+            println!("[DEBUG] Node content: {}", node.content);
+
             match generator.generate(&node.title, &node.content).await {
                 Ok(summary) => {
                     if summary.is_empty() {
@@ -223,6 +236,8 @@ impl IndexStage for EnhanceStage {
         let duration = start.elapsed().as_millis() as u64;
         ctx.metrics.record_enhance(duration);
 
+        println!("[DEBUG] Generated {} summaries ({} failed, {} skipped no content, {} skipped tokens) in {}ms",
+            generated, failed, skipped_no_content, skipped_tokens, duration);
         info!(
             "Generated {} summaries ({} failed) in {}ms",
             generated, failed, duration
diff --git a/rust/src/llm/executor.rs b/rust/src/llm/executor.rs
index 8ac193f1..2c157432 100644
--- a/rust/src/llm/executor.rs
+++ b/rust/src/llm/executor.rs
@@ -360,7 +360,7 @@ impl LlmExecutor {
                     ChatCompletionRequestUserMessage::from(truncated).into(),
                 ])
                 .temperature(self.config.temperature)
-                .max_tokens(tokens)
+                // .max_tokens(tokens)
                 .build()
         } else {
             CreateChatCompletionRequestArgs::default()
diff --git a/rust/src/parser/registry.rs b/rust/src/parser/registry.rs
index ef1cf416..4f3d0e8c 100644
--- a/rust/src/parser/registry.rs
+++ b/rust/src/parser/registry.rs
@@ -141,7 +141,7 @@ impl ParserRegistry {
     /// For binary formats (PDF, DOCX), the parser handles the bytes directly.
     pub async fn parse_bytes(&self, bytes: &[u8], format: DocumentFormat) -> Result<ParseResult> {
         match format {
-            DocumentFormat::Markdown | DocumentFormat::Html | DocumentFormat::Text => {
+            DocumentFormat::Markdown | DocumentFormat::Html => {
                 // Text formats - convert to string first
                 let content = std::str::from_utf8(bytes)
                     .map_err(|e| Error::Parse(format!("Invalid UTF-8 content: {}", e)))?;
@@ -188,7 +188,6 @@ pub fn get_parser(format: DocumentFormat) -> Option<Box<dyn DocumentParser>> {
         DocumentFormat::Pdf => Some(Box::new(PdfParser::new())),
         DocumentFormat::Html => Some(Box::new(HtmlParser::new())),
         DocumentFormat::Docx => Some(Box::new(super::docx::DocxParser::new())),
-        DocumentFormat::Text => None, // TODO: Implement plain text parser
     }
 }
 
diff --git a/rust/src/parser/types.rs b/rust/src/parser/types.rs
index c8c5f000..2ccac2f7 100644
--- a/rust/src/parser/types.rs
+++ b/rust/src/parser/types.rs
@@ -21,8 +21,6 @@ pub enum DocumentFormat {
     Html,
     /// Word documents (.docx)
     Docx,
-    /// Plain text files (.txt)
-    Text,
 }
 
 impl DocumentFormat {
@@ -33,7 +31,6 @@ impl DocumentFormat {
             "pdf" => Some(Self::Pdf),
             "html" | "htm" => Some(Self::Html),
             "docx" => Some(Self::Docx),
-            "txt" => Some(Self::Text),
             _ => None,
         }
     }
@@ -45,7 +42,6 @@ impl DocumentFormat {
             Self::Pdf => "pdf",
             Self::Html => "html",
             Self::Docx => "docx",
-            Self::Text => "txt",
         }
     }
 }
@@ -173,7 +169,7 @@ impl Default for DocumentMeta {
     fn default() -> Self {
         Self {
             name: String::new(),
-            format: DocumentFormat::Text,
+            format: DocumentFormat::Markdown,
             page_count: None,
             line_count: 0,
             source_path: None,
diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs
index af0fd602..de41c052 100644
--- a/rust/src/retrieval/pilot/llm_pilot.rs
+++ b/rust/src/retrieval/pilot/llm_pilot.rs
@@ -286,7 +286,7 @@ impl LlmPilot {
         &self,
         point: InterventionPoint,
         context: &super::builder::PilotContext,
-        candidates: &[crate::document::NodeId],
+        candidates: &[super::parser::CandidateInfo],
     ) -> PilotDecision {
         // Check memo cache first
         if let Some(ref store) = self.memo_store {
@@ -331,22 +331,29 @@ impl LlmPilot {
             }
         }
 
+        println!("[DEBUG] LlmPilot::call_llm() - point={:?}, estimated_tokens={}", point, prompt.estimated_tokens);
+        println!("[DEBUG] LlmPilot::call_llm() - SYSTEM PROMPT:\n{}", prompt.system);
+        println!("[DEBUG] LlmPilot::call_llm() - USER PROMPT:\n{}", prompt.user);
+        println!("[DEBUG] LlmPilot::call_llm() - candidates count: {}", candidates.len());
         debug!(
             "Calling LLM for {:?} point (estimated: {} tokens)",
             point, prompt.estimated_tokens
         );
 
-        // Make LLM call - use executor if available, otherwise use client directly
+        // Make LLM call -use executor if available, otherwise use client directly
         let result = if let Some(ref executor) = self.executor {
+            println!("[DEBUG] LlmPilot::call_llm() - using LlmExecutor");
             // Use LlmExecutor for unified throttle/retry/fallback
             executor.complete(&prompt.system, &prompt.user).await
         } else {
+            println!("[DEBUG] LlmPilot::call_llm() - using direct client");
             // Fallback to direct client call
             self.client.complete(&prompt.system, &prompt.user).await
         };
 
         match result {
             Ok(response) => {
+                println!("[DEBUG] LlmPilot::call_llm() - RAW LLM RESPONSE:\n{}", response);
                 // Record usage (estimate output tokens)
                 let output_tokens = self.estimate_tokens(&response);
                 self.budget
@@ -354,6 +361,10 @@ impl LlmPilot {
 
                 // Parse response
                 let mut decision = self.response_parser.parse(&response, candidates, point);
+                println!("[DEBUG] LlmPilot::call_llm() - PARSED DECISION: confidence={:.2}, ranked={}, direction={:?}, reasoning={}",
+                    decision.confidence, decision.ranked_candidates.len(),
+                    std::mem::discriminant(&decision.direction),
+                    decision.reasoning.chars().take(100).collect::<String>());
 
                 // Apply learner adjustment if available
                 if let Some(ref adj) = adjustment {
@@ -406,14 +417,14 @@ impl LlmPilot {
     fn cached_value_to_decision(
         &self,
         value: crate::memo::PilotDecisionValue,
-        candidates: &[crate::document::NodeId],
+        candidates: &[super::parser::CandidateInfo],
         point: InterventionPoint,
     ) -> PilotDecision {
         let ranked = candidates
             .iter()
             .enumerate()
-            .map(|(i, &node_id)| super::decision::RankedCandidate {
-                node_id,
+            .map(|(i, c)| super::decision::RankedCandidate {
+                node_id: c.node_id,
                 score: if i == value.selected_idx { 1.0 } else { 0.5 / (i + 1) as f32 },
                 reason: None,
             })
@@ -433,14 +444,14 @@ impl LlmPilot {
     /// Create a default decision when LLM fails.
     fn default_decision(
         &self,
-        candidates: &[crate::document::NodeId],
+        candidates: &[super::parser::CandidateInfo],
         point: InterventionPoint,
     ) -> PilotDecision {
         let ranked = candidates
             .iter()
             .enumerate()
-            .map(|(i, &node_id)| super::decision::RankedCandidate {
-                node_id,
+            .map(|(i, c)| super::decision::RankedCandidate {
+                node_id: c.node_id,
                 score: 1.0 / (i + 1) as f32,
                 reason: None,
             })
@@ -479,11 +490,13 @@ impl Pilot for LlmPilot {
     fn should_intervene(&self, state: &SearchState<'_>) -> bool {
         // Check mode
         if !self.config.mode.uses_llm() {
+            println!("[DEBUG] LlmPilot::should_intervene() - mode doesn't use LLM");
             return false;
         }
 
         // Check budget
         if !self.has_budget() {
+            println!("[DEBUG] LlmPilot::should_intervene() - budget exhausted");
             debug!("Budget exhausted, skipping intervention");
             return false;
         }
@@ -492,6 +505,8 @@ impl Pilot for LlmPilot {
 
         // Condition 1: Fork point with enough candidates
         if state.candidates.len() > intervention.fork_threshold {
+            println!("[DEBUG] LlmPilot::should_intervene() - YES: fork point with {} candidates (threshold={})",
+                state.candidates.len(), intervention.fork_threshold);
             debug!(
                 "Intervening: fork point with {} candidates",
                 state.candidates.len()
@@ -501,12 +516,15 @@ impl Pilot for LlmPilot {
 
         // Condition 2: Scores are too close (algorithm uncertain)
         if self.scores_are_close(state) {
+            println!("[DEBUG] LlmPilot::should_intervene() - YES: scores are close (best={:.2})", state.best_score);
             debug!("Intervening: scores are close");
             return true;
         }
 
         // Condition 3: Low confidence (best score too low)
         if intervention.is_low_confidence(state.best_score) {
+            println!("[DEBUG] LlmPilot::should_intervene() - YES: low confidence (best_score={:.2}, threshold={:.2})",
+                state.best_score, intervention.low_score_threshold);
             debug!(
                 "Intervening: low confidence (best_score={:.2})",
                 state.best_score
@@ -516,31 +534,58 @@ impl Pilot for LlmPilot {
 
         // Condition 4: Backtracking and guide_at_backtrack is enabled
         if state.is_backtracking && self.config.guide_at_backtrack {
+            println!("[DEBUG] LlmPilot::should_intervene() - YES: backtracking");
             debug!("Intervening: backtracking");
             return true;
         }
 
+        println!("[DEBUG] LlmPilot::should_intervene() - NO: candidates={}, best_score={:.2}",
+            state.candidates.len(), state.best_score);
         false
     }
 
     async fn decide(&self, state: &SearchState<'_>) -> PilotDecision {
         let point = self.get_intervention_point(state);
+        println!("[DEBUG] LlmPilot::decide() - intervention_point={:?}, candidates={}",
+            point, state.candidates.len());
 
         // Build context
         let context = self.context_builder.build(state);
 
+        // Build candidate info with titles
+        let candidate_info: Vec<super::parser::CandidateInfo> = state.candidates
+            .iter()
+            .enumerate()
+            .filter_map(|(i, &node_id)| {
+                state.tree.get(node_id).map(|node| super::parser::CandidateInfo {
+                    node_id,
+                    title: node.title.clone(),
+                    index: i,
+                })
+            })
+            .collect();
+
         // Make LLM call
-        self.call_llm(point, &context, state.candidates).await
+        let decision = self.call_llm(point, &context, &candidate_info).await;
+
+        println!("[DEBUG] LlmPilot::decide() - result: confidence={:.2}, direction={:?}, ranked={}",
+            decision.confidence, std::mem::discriminant(&decision.direction), decision.ranked_candidates.len());
+
+        decision
     }
 
     async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option<PilotDecision> {
+        println!("[DEBUG] LlmPilot::guide_start() called, query='{}'", query);
+
         // Check if guide_at_start is enabled
         if !self.config.guide_at_start {
+            println!("[DEBUG] LlmPilot::guide_start() - guide_at_start=false, skipping");
             return None;
         }
 
         // Check budget
         if !self.has_budget() {
+            println!("[DEBUG] LlmPilot::guide_start() - budget exhausted, skipping");
             debug!("Budget exhausted, cannot guide start");
             return None;
         }
@@ -549,12 +594,41 @@ impl Pilot for LlmPilot {
         let context = self.context_builder.build_start_context(tree, query);
 
         // Get root's children as candidates
-        let candidates = tree.children(tree.root());
+        let node_ids = tree.children(tree.root());
+        println!("[DEBUG] LlmPilot::guide_start() - {} root children candidates", node_ids.len());
+
+        // Build CandidateInfo with titles
+        let candidates: Vec<super::parser::CandidateInfo> = node_ids
+            .iter()
+            .enumerate()
+            .filter_map(|(i, &node_id)| {
+                tree.get(node_id).map(|node| super::parser::CandidateInfo {
+                    node_id,
+                    title: node.title.clone(),
+                    index: i,
+                })
+            })
+            .collect();
 
         // Make LLM call
+        println!("[DEBUG] LlmPilot::guide_start() - calling LLM...");
         let decision = self
             .call_llm(InterventionPoint::Start, &context, &candidates)
             .await;
+
+        println!("[DEBUG] LlmPilot::guide_start() - LLM returned: confidence={:.2}, ranked_candidates={}, reasoning='{}'",
+            decision.confidence,
+            decision.ranked_candidates.len(),
+            decision.reasoning.chars().take(100).collect::<String>());
+
+        // Debug: show top ranked candidates
+        for (i, rc) in decision.ranked_candidates.iter().enumerate().take(3) {
+            if let Some(node) = tree.get(rc.node_id) {
+                println!("[DEBUG]   Ranked {}: node_id={:?}, score={:.3}, title='{}'",
+                    i, rc.node_id, rc.score, node.title);
+            }
+        }
+
         info!(
             "Pilot start guidance: confidence={}, candidates={}",
             decision.confidence,
@@ -580,9 +654,22 @@ impl Pilot for LlmPilot {
             .context_builder
             .build_backtrack_context(state, state.path);
 
+        // Build CandidateInfo
+        let candidates: Vec<super::parser::CandidateInfo> = state.candidates
+            .iter()
+            .enumerate()
+            .filter_map(|(i, &node_id)| {
+                state.tree.get(node_id).map(|node| super::parser::CandidateInfo {
+                    node_id,
+                    title: node.title.clone(),
+                    index: i,
+                })
+            })
+            .collect();
+
         // Make LLM call
         Some(
-            self.call_llm(InterventionPoint::Backtrack, &context, state.candidates)
+            self.call_llm(InterventionPoint::Backtrack, &context, &candidates)
                 .await,
         )
     }
@@ -662,20 +749,6 @@ mod tests {
         assert!(!pilot.has_budget());
     }
 
-    #[test]
-    fn test_default_decision() {
-        let client = LlmClient::for_model("gpt-4o-mini");
-        let config = PilotConfig::default();
-        let pilot = LlmPilot::new(client, config);
-
-        let candidates = create_test_node_ids(2);
-        let decision = pilot.default_decision(&candidates, InterventionPoint::Fork);
-
-        assert_eq!(decision.ranked_candidates.len(), 2);
-        assert_eq!(decision.confidence, 0.0);
-        assert!(decision.reasoning.contains("LLM"));
-    }
-
     #[test]
     fn test_reset() {
         let client = LlmClient::for_model("gpt-4o-mini");
diff --git a/rust/src/retrieval/pilot/parser.rs b/rust/src/retrieval/pilot/parser.rs
index 85954c82..651b1c3c 100644
--- a/rust/src/retrieval/pilot/parser.rs
+++ b/rust/src/retrieval/pilot/parser.rs
@@ -19,20 +19,86 @@ use crate::document::NodeId;
 /// Parsed response from LLM.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct LlmResponse {
-    /// Ranked candidates with scores.
+    /// Ranked candidates with scores (FORK format).
     #[serde(default)]
     pub ranked_candidates: Vec<CandidateScore>,
+    /// Entry points for START intervention (list of node titles).
+    #[serde(default)]
+    pub entry_points: Vec<String>,
+    /// Best entry points (alternative START format from LLM).
+    #[serde(default)]
+    pub best_entry_points: Vec<EntryPoint>,
+    /// Selected nodes (another alternative START format - list of titles).
+    #[serde(default)]
+    pub selected_nodes: Vec<String>,
+    /// Selected node (singular - some LLMs return this format).
+    #[serde(default)]
+    pub selected_node: Option<String>,
+    /// Recommended node (another singular format).
+    #[serde(default)]
+    pub recommended_node: Option<String>,
+    /// Analysis wrapper (some LLMs nest response in "analysis" object).
+    #[serde(default)]
+    pub analysis: Option<AnalysisWrapper>,
     /// Recommended search direction.
     #[serde(default)]
     pub direction: DirectionResponse,
-    /// Confidence level (0.0 - 1.0).
-    #[serde(default = "default_confidence")]
+    /// Confidence level (0.0 - 1.0 or "high"/"medium"/"low").
+    #[serde(default = "default_confidence", deserialize_with = "deserialize_confidence")]
     pub confidence: f32,
     /// Reasoning for the decision.
     #[serde(default)]
     pub reasoning: String,
 }
 
+/// Custom deserializer for confidence that accepts both float and string.
+fn deserialize_confidence<'de, D>(deserializer: D) -> Result<f32, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    use serde::de::Error;
+
+    let value = serde_json::Value::deserialize(deserializer)?;
+    match value {
+        serde_json::Value::Number(n) => {
+            // Handle numeric value
+            Ok(n.as_f64().unwrap_or(0.5) as f32)
+        }
+        serde_json::Value::String(s) => {
+            // Handle string values like "high", "medium", "low"
+            let lower = s.to_lowercase();
+            let confidence = match lower.as_str() {
+                "high" | "very high" | "strong" => 0.9,
+                "medium" | "moderate" => 0.6,
+                "low" | "weak" => 0.3,
+                _ => 0.5, // default for unknown strings
+            };
+            Ok(confidence)
+        }
+        _ => Ok(0.5), // default for other types
+    }
+}
+
+/// Analysis wrapper for nested LLM responses.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AnalysisWrapper {
+    /// Query from analysis.
+    #[serde(default)]
+    pub query: Option<String>,
+    /// Intent detected.
+    #[serde(default)]
+    pub intent: Option<String>,
+    /// Selected node (singular).
+    #[serde(default)]
+    pub selected_node: Option<String>,
+    /// Selected nodes (plural).
+    #[serde(default)]
+    pub selected_nodes: Vec<String>,
+    /// Reasoning from analysis.
+    #[serde(default)]
+    pub reasoning: Option<String>,
+}
+
 /// Candidate score from LLM response.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CandidateScore {
@@ -45,6 +111,48 @@ pub struct CandidateScore {
     pub reason: Option<String>,
 }
 
+/// Candidate info for title matching.
+#[derive(Debug, Clone)]
+pub struct CandidateInfo {
+    /// Node ID.
+    pub node_id: NodeId,
+    /// Title of the node.
+    pub title: String,
+    /// Index in the candidates list.
+    pub index: usize,
+}
+
+/// Entry point from START response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EntryPoint {
+    /// Node ID or index.
+    #[serde(default)]
+    pub node_id: Option<usize>,
+    /// Index of the candidate.
+    #[serde(default)]
+    pub index: Option<usize>,
+    /// Title of the entry point.
+    #[serde(default)]
+    pub title: Option<String>,
+    /// Relevance score (may be 1-5 or 0.0-1.0).
+    #[serde(default)]
+    pub relevance_score: Option<f32>,
+    /// Score (alternative field name).
+    #[serde(default)]
+    pub score: Option<f32>,
+}
+
+/// Top-3 candidate from LLM LOCatetop-3 response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Top3Candidate {
+    /// Node ID from TO TO copy.
+    pub node_id: usize,
+    /// Relevance score (0.0-1.0).
+    pub relevance_score: f32,
+    /// Reason for the selection.
+    pub reason: String,
+}
+
 /// Direction response from LLM.
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 #[serde(rename_all = "snake_case")]
@@ -111,23 +219,29 @@ impl ResponseParser {
     /// # Arguments
     ///
     /// * `response` - Raw LLM response text
-    /// * `candidates` - Original candidate NodeIds (for mapping indices)
+    /// * `candidates` - Candidate info with NodeId, title, and index
     /// * `point` - The intervention point
     pub fn parse(
         &self,
         response: &str,
-        candidates: &[NodeId],
+        candidates: &[CandidateInfo],
         point: InterventionPoint,
     ) -> PilotDecision {
+        println!("[DEBUG] ResponseParser::parse() - candidates.len()={}", candidates.len());
+
         // Try JSON parse first
         if let Some(decision) = self.try_json_parse(response, candidates, point) {
+            println!("[DEBUG] ResponseParser::parse() - JSON parse succeeded, ranked={}", decision.ranked_candidates.len());
             return decision;
         }
+        println!("[DEBUG] ResponseParser::parse() - JSON parse failed, trying regex...");
 
         // Try regex extraction
         if let Some(decision) = self.try_regex_parse(response, candidates, point) {
+            println!("[DEBUG] ResponseParser::parse() - Regex parse succeeded, ranked={}", decision.ranked_candidates.len());
             return decision;
         }
+        println!("[DEBUG] ResponseParser::parse() - Regex parse failed, using default decision");
 
         // Return default decision
         self.default_decision(candidates, point)
@@ -137,23 +251,34 @@ impl ResponseParser {
     fn try_json_parse(
         &self,
         response: &str,
-        candidates: &[NodeId],
+        candidates: &[CandidateInfo],
         point: InterventionPoint,
     ) -> Option<PilotDecision> {
         // First, try to extract JSON from code blocks
         let json_str = if let Some(caps) = self.json_block_regex.captures(response) {
-            caps.get(1)?.as_str().trim().to_string()
+            let extracted = caps.get(1)?.as_str().trim().to_string();
+            println!("[DEBUG] ResponseParser::try_json_parse() - Found JSON in code block");
+            extracted
         } else {
             // Try to find raw JSON object
             let start = response.find('{')?;
             let end = response.rfind('}')? + 1;
-            response[start..end].to_string()
+            let extracted = response[start..end].to_string();
+            println!("[DEBUG] ResponseParser::try_json_parse() - Found raw JSON (no code block)");
+            extracted
         };
 
+        println!("[DEBUG] ResponseParser::try_json_parse() - Extracted JSON:\n{}", json_str);
+
         // Parse JSON
-        let llm_response: LlmResponse = match serde_json::from_str(&json_str) {
-            Ok(r) => r,
+        let llm_response: LlmResponse = match serde_json::from_str::<LlmResponse>(&json_str) {
+            Ok(r) => {
+                println!("[DEBUG] ResponseParser::try_json_parse() - JSON parsed successfully");
+                println!("[DEBUG] ResponseParser::try_json_parse() - ranked_candidates count: {}", r.ranked_candidates.len());
+                r
+            },
             Err(e) => {
+                println!("[DEBUG] ResponseParser::try_json_parse() - JSON parse FAILED: {}", e);
                 warn!("Failed to parse LLM response as JSON: {}", e);
                 return None;
             }
@@ -167,7 +292,7 @@ impl ResponseParser {
     fn try_regex_parse(
         &self,
         response: &str,
-        candidates: &[NodeId],
+        candidates: &[CandidateInfo],
         point: InterventionPoint,
     ) -> Option<PilotDecision> {
         // Extract confidence
@@ -226,7 +351,7 @@ impl ResponseParser {
     fn extract_ranked_candidates(
         &self,
         response: &str,
-        candidates: &[NodeId],
+        candidates: &[CandidateInfo],
     ) -> Vec<RankedCandidate> {
         let mut ranked = Vec::new();
 
@@ -245,7 +370,7 @@ impl ResponseParser {
 
                     if index < candidates.len() {
                         ranked.push(RankedCandidate {
-                            node_id: candidates[index],
+                            node_id: candidates[index].node_id,
                             score: score.clamp(0.0, 1.0),
                             reason: None,
                         });
@@ -268,7 +393,7 @@ impl ResponseParser {
                 if let Ok(idx) = match_1.as_str().parse::<usize>() {
                     if idx < candidates.len() && seen.insert(idx) {
                         ranked.push(RankedCandidate {
-                            node_id: candidates[idx],
+                            node_id: candidates[idx].node_id,
                             score: 1.0 - (ranked.len() as f32 * 0.1), // Decreasing scores
                             reason: None,
                         });
@@ -287,20 +412,28 @@ impl ResponseParser {
     /// Convert LlmResponse to PilotDecision.
     fn llm_response_to_decision(
         &self,
-        llm_response: LlmResponse,
-        candidates: &[NodeId],
+        mut llm_response: LlmResponse,
+        candidates: &[CandidateInfo],
         point: InterventionPoint,
     ) -> PilotDecision {
+        println!("[DEBUG] ResponseParser::llm_response_to_decision() - point={:?}", point);
+        println!("[DEBUG] ResponseParser::llm_response_to_decision() - ranked_candidates.len()={}", llm_response.ranked_candidates.len());
+        println!("[DEBUG] ResponseParser::llm_response_to_decision() - best_entry_points.len()={}", llm_response.best_entry_points.len());
+        println!("[DEBUG] ResponseParser::llm_response_to_decision() - entry_points.len()={}", llm_response.entry_points.len());
+        println!("[DEBUG] ResponseParser::llm_response_to_decision() - selected_nodes.len()={}", llm_response.selected_nodes.len());
+        println!("[DEBUG] ResponseParser::llm_response_to_decision() - selected_node={:?}", llm_response.selected_node);
+        println!("[DEBUG] ResponseParser::llm_response_to_decision() - analysis={:?}", llm_response.analysis.as_ref().map(|a| (&a.selected_node, &a.selected_nodes)));
+
         // Convert candidate scores to RankedCandidate
-        let ranked_candidates: Vec<RankedCandidate> = llm_response
+        let mut ranked_candidates: Vec<RankedCandidate> = llm_response
             .ranked_candidates
-            .into_iter()
+            .iter()
             .filter_map(|cs| {
                 if cs.index < candidates.len() {
                     Some(RankedCandidate {
-                        node_id: candidates[cs.index],
+                        node_id: candidates[cs.index].node_id,
                         score: cs.score.clamp(0.0, 1.0),
-                        reason: cs.reason,
+                        reason: cs.reason.clone(),
                     })
                 } else {
                     None
@@ -308,6 +441,154 @@ impl ResponseParser {
             })
             .collect();
 
+        // Handle START response format: best_entry_points, entry_points, or selected_nodes
+        if ranked_candidates.is_empty() {
+            // Try to convert best_entry_points (format: [{"node_id": 1, "title": "...", "relevance_score": 5}])
+            for entry in &llm_response.best_entry_points {
+                // Get index from either node_id or index field
+                // node_id is 1-indexed from LLM, convert to 0-indexed
+                let idx = if let Some(nid) = entry.node_id {
+                    if nid > 0 { nid - 1 } else { nid }
+                } else if let Some(idx) = entry.index {
+                    idx
+                } else {
+                    continue; // Skip if no valid index
+                };
+
+                if idx < candidates.len() {
+                    let score = entry.relevance_score
+                        .or(entry.score)
+                        .unwrap_or(0.5)
+                        / 5.0; // Normalize 1-5 scale to 0.0-1.0
+                    ranked_candidates.push(RankedCandidate {
+                        node_id: candidates[idx].node_id,
+                        score: score.clamp(0.0, 1.0),
+                        reason: entry.title.clone(),
+                    });
+                    println!("[DEBUG] ResponseParser - converted best_entry_point[{}] to ranked_candidate (idx={}, score={:.2})",
+                        idx, idx, score);
+                }
+            }
+
+            // Try to convert selected_nodes (format: ["Project Documentation", "Overview"])
+            // Match by title
+            for selected_title in &llm_response.selected_nodes {
+                for candidate in candidates {
+                    if Self::titles_match(selected_title, &candidate.title) {
+                        ranked_candidates.push(RankedCandidate {
+                            node_id: candidate.node_id,
+                            score: 0.9, // High score for title match
+                            reason: Some(format!("Title match: {}", selected_title)),
+                        });
+                        println!("[DEBUG] ResponseParser - matched selected_node '{}' to candidate '{}' (index={})",
+                            selected_title, candidate.title, candidate.index);
+                        break; // Only match once per selected_node
+                    }
+                }
+            }
+
+            // Try to convert selected_node (singular - format: "Project Documentation")
+            if let Some(ref single_node) = llm_response.selected_node {
+                for candidate in candidates {
+                    if Self::titles_match(single_node, &candidate.title) {
+                        if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+                            ranked_candidates.push(RankedCandidate {
+                                node_id: candidate.node_id,
+                                score: 0.9,
+                                reason: Some(format!("Title match (singular): {}", single_node)),
+                            });
+                            println!("[DEBUG] ResponseParser - matched selected_node (singular) '{}' to candidate '{}' (index={})",
+                                single_node, candidate.title, candidate.index);
+                        }
+                        break;
+                    }
+                }
+            }
+
+            // Try to convert recommended_node (another singular format)
+            if let Some(ref recommended) = llm_response.recommended_node {
+                for candidate in candidates {
+                    if Self::titles_match(recommended, &candidate.title) {
+                        if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+                            ranked_candidates.push(RankedCandidate {
+                                node_id: candidate.node_id,
+                                score: 0.85,
+                                reason: Some(format!("Recommended node: {}", recommended)),
+                            });
+                            println!("[DEBUG] ResponseParser - matched recommended_node '{}' to candidate '{}' (index={})",
+                                recommended, candidate.title, candidate.index);
+                        }
+                        break;
+                    }
+                }
+            }
+
+            // Try to extract from analysis wrapper if present
+            if let Some(ref analysis) = llm_response.analysis {
+                // Check analysis.selected_nodes (plural array)
+                for selected_title in &analysis.selected_nodes {
+                    for candidate in candidates {
+                        if Self::titles_match(selected_title, &candidate.title) {
+                            if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+                                ranked_candidates.push(RankedCandidate {
+                                    node_id: candidate.node_id,
+                                    score: 0.85,
+                                    reason: Some(format!("Analysis selected_nodes: {}", selected_title)),
+                                });
+                                println!("[DEBUG] ResponseParser - matched analysis.selected_nodes '{}' to candidate '{}' (index={})",
+                                    selected_title, candidate.title, candidate.index);
+                            }
+                            break;
+                        }
+                    }
+                }
+
+                // Check analysis.selected_node (singular)
+                if let Some(ref single_node) = analysis.selected_node {
+                    for candidate in candidates {
+                        if Self::titles_match(single_node, &candidate.title) {
+                            if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+                                ranked_candidates.push(RankedCandidate {
+                                    node_id: candidate.node_id,
+                                    score: 0.85,
+                                    reason: Some(format!("Analysis selected_node: {}", single_node)),
+                                });
+                                println!("[DEBUG] ResponseParser - matched analysis.selected_node (singular) '{}' to candidate '{}' (index={})",
+                                    single_node, candidate.title, candidate.index);
+                            }
+                            break;
+                        }
+                    }
+                }
+
+                // Use analysis.reasoning if top-level reasoning is empty
+                if llm_response.reasoning.is_empty() {
+                    if let Some(ref r) = analysis.reasoning {
+                        llm_response.reasoning = r.clone();
+                    }
+                }
+            }
+
+            // Try to convert entry_points (format: ["Node Title 1", "Node Title 2"])
+            for entry_title in &llm_response.entry_points {
+                for candidate in candidates {
+                    if Self::titles_match(entry_title, &candidate.title) {
+                        // Check if already added
+                        if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+                            ranked_candidates.push(RankedCandidate {
+                                node_id: candidate.node_id,
+                                score: 0.8, // Slightly lower score for entry_points
+                                reason: Some(format!("Entry point: {}", entry_title)),
+                            });
+                            println!("[DEBUG] ResponseParser - matched entry_point '{}' to candidate '{}' (index={})",
+                                entry_title, candidate.title, candidate.index);
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+
         // Convert direction
         let direction = match llm_response.direction {
             DirectionResponse::GoDeeper => SearchDirection::GoDeeper {
@@ -333,6 +614,8 @@ impl ResponseParser {
             },
         };
 
+        println!("[DEBUG] ResponseParser::llm_response_to_decision() - final ranked_candidates.len()={}", ranked_candidates.len());
+
         PilotDecision {
             ranked_candidates,
             direction,
@@ -342,14 +625,41 @@ impl ResponseParser {
         }
     }
 
+    /// Check if two titles match (fuzzy matching).
+    fn titles_match(llm_title: &str, candidate_title: &str) -> bool {
+        let llm_lower = llm_title.to_lowercase().trim().to_string();
+        let candidate_lower = candidate_title.to_lowercase().trim().to_string();
+
+        // Exact match
+        if llm_lower == candidate_lower {
+            return true;
+        }
+
+        // Contains match
+        if llm_lower.contains(&candidate_lower) || candidate_lower.contains(&llm_lower) {
+            return true;
+        }
+
+        // Word overlap match (at least 50% of words match)
+        let llm_words: std::collections::HashSet<&str> = llm_lower.split_whitespace().collect();
+        let candidate_words: std::collections::HashSet<&str> = candidate_lower.split_whitespace().collect();
+        let overlap = llm_words.intersection(&candidate_words).count();
+        let min_words = llm_words.len().min(candidate_words.len());
+        if min_words > 0 && overlap as f32 / min_words as f32 >= 0.5 {
+            return true;
+        }
+
+        false
+    }
+
     /// Create a default decision when parsing fails.
-    fn default_decision(&self, candidates: &[NodeId], point: InterventionPoint) -> PilotDecision {
+    fn default_decision(&self, candidates: &[CandidateInfo], point: InterventionPoint) -> PilotDecision {
         // Score candidates uniformly
         let ranked: Vec<RankedCandidate> = candidates
             .iter()
             .enumerate()
-            .map(|(i, &node_id)| RankedCandidate {
-                node_id,
+            .map(|(i, c)| RankedCandidate {
+                node_id: c.node_id,
                 score: 1.0 / (i + 1) as f32, // Decreasing scores
                 reason: None,
             })
@@ -395,126 +705,4 @@ mod tests {
         }
         ids
     }
-
-    #[test]
-    fn test_parse_json_response() {
-        let parser = ResponseParser::new();
-        let candidates = create_test_node_ids(3);
-
-        let response = r#"{
-            "ranked_candidates": [
-                {"index": 1, "score": 0.9, "reason": "Best match"},
-                {"index": 0, "score": 0.5}
-            ],
-            "direction": "go_deeper",
-            "confidence": 0.85,
-            "reasoning": "Candidate 1 is most relevant"
-        }"#;
-
-        let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
-        assert_eq!(decision.ranked_candidates.len(), 2);
-        assert_eq!(decision.ranked_candidates[0].node_id, candidates[1]);
-        assert!((decision.confidence - 0.85).abs() < 0.01);
-        assert!(matches!(
-            decision.direction,
-            SearchDirection::GoDeeper { .. }
-        ));
-    }
-
-    #[test]
-    fn test_parse_json_in_code_block() {
-        let parser = ResponseParser::new();
-        let candidates = create_test_node_ids(2);
-
-        let response = r#"
-Here's my analysis:
-
-```json
-{
-    "ranked_candidates": [{"index": 0, "score": 0.8}],
-    "direction": "go_deeper",
-    "confidence": 0.8,
-    "reasoning": "Test"
-}
-```
-"#;
-
-        let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-        assert_eq!(decision.ranked_candidates.len(), 1);
-    }
-
-    #[test]
-    fn test_parse_with_regex_fallback() {
-        let parser = ResponseParser::new();
-        let candidates = create_test_node_ids(2);
-
-        // Non-JSON response with some structure
-        let response = r#"
-I think candidate 0 is the best match.
-Confidence: 0.75
-Direction: go_deeper
-"#;
-
-        let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
-        // Should use regex extraction
-        assert!((decision.confidence - 0.75).abs() < 0.01);
-    }
-
-    #[test]
-    fn test_default_decision() {
-        let parser = ResponseParser::new();
-        let candidates = create_test_node_ids(2);
-
-        let decision = parser.parse(
-            "This is unparseable gibberish",
-            &candidates,
-            InterventionPoint::Fork,
-        );
-
-        // Should return default
-        assert_eq!(decision.ranked_candidates.len(), 2);
-        assert_eq!(decision.confidence, 0.0);
-        assert!(decision.reasoning.contains("parsing failed"));
-    }
-
-    #[test]
-    fn test_confidence_clamping() {
-        let parser = ResponseParser::new();
-        let candidates = create_test_node_ids(1);
-
-        let response = r#"{
-            "ranked_candidates": [{"index": 0, "score": 1.5}],
-            "confidence": 1.5,
-            "direction": "go_deeper"
-        }"#;
-
-        let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
-        // Confidence should be clamped to 1.0
-        assert!((decision.confidence - 1.0).abs() < 0.01);
-    }
-
-    #[test]
-    fn test_direction_conversion() {
-        let parser = ResponseParser::new();
-        let candidates = create_test_node_ids(1);
-
-        let test_cases = vec![
-            ("\"direction\": \"go_deeper\"", true),
-            ("\"direction\": \"explore_siblings\"", true),
-            ("\"direction\": \"backtrack\"", true),
-            ("\"direction\": \"found_answer\"", true),
-        ];
-
-        for (dir_json, should_parse) in test_cases {
-            let response = format!(
-                r#"{{"ranked_candidates": [], "confidence": 0.5, {}}}"#,
-                dir_json
-            );
-            let decision = parser.parse(&response, &candidates, InterventionPoint::Fork);
-            assert!(should_parse, "Direction should parse correctly");
-        }
-    }
 }
diff --git a/rust/src/retrieval/pilot/prompts/system_start.txt b/rust/src/retrieval/pilot/prompts/system_start.txt
index d3a65f49..086f9e96 100644
--- a/rust/src/retrieval/pilot/prompts/system_start.txt
+++ b/rust/src/retrieval/pilot/prompts/system_start.txt
@@ -1,11 +1,15 @@
-You are a document navigation assistant specialized in hierarchical document search.
+You are a document navigation assistant. Your task is to identify the best entry points for searching a hierarchical document based on a user query.
 
-Your task is to analyze a user's query and the document structure to identify the best starting points for search.
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown, no explanation, just the JSON object.
 
-Guidelines:
-- Identify sections that are most likely to contain the answer
-- Consider the query's domain, keywords, and intent
-- Prefer more specific sections over general ones when appropriate
-- Multiple entry points can be suggested if the query is ambiguous
+Your response must to have this EXACT structure:
+{
+  "entry_points": ["list of node titles as strings"],
+  "reasoning": "explanation string",
+  "confidence": 0.85
+}
 
-You must respond in valid JSON format.
+Where:
+- entry_points: MUST be an array of node titles (strings) from the candidate list
+- reasoning: MUST be a string
+- confidence: MUST be a number (0.0 to 1.0), not a string like "high"
diff --git a/rust/src/retrieval/pilot/prompts/templates.rs b/rust/src/retrieval/pilot/prompts/templates.rs
index 5f9f75ff..50f4c3cc 100644
--- a/rust/src/retrieval/pilot/prompts/templates.rs
+++ b/rust/src/retrieval/pilot/prompts/templates.rs
@@ -248,52 +248,170 @@ pub mod fallback {
     use super::*;
 
     pub fn system_start() -> String {
-        "You are a document navigation assistant. Help identify the best starting point for searching a hierarchical document.".to_string()
+        r#"You are a document navigation assistant. Help identify the best entry points for searching a hierarchical document.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks, No explanation. Just the JSON object.
+
+Your response must have this EXACT structure:
+{
+  "entry_points": ["Title 1", "Title 2"],
+  "reasoning": "Brief explanation",
+  "confidence": 0.85
+}
+
+Rules:
+- entry_points: Array of node title strings (from the candidates provided)
+- reasoning: String explaining your choice
+- confidence: Number between 0.0 and 1.0 (use a number, NOT "high"/"medium"/"low")"#.to_string()
     }
 
     pub fn user_start() -> String {
-        r#"Given the following document structure and user query, identify the best entry points for search.
-
-{context}
+        r#"{context}
 
-Respond in JSON format with your analysis."#.to_string()
+Respond with ONLY the JSON object (no markdown, no explanation):
+{
+  "entry_points": ["list of node titles as strings"],
+  "reasoning": "your reasoning here",
+  "confidence": 0.85
+}"#.to_string()
     }
 
     pub fn system_fork() -> String {
-        "You are a document navigation assistant. At each decision point, rank the candidate branches by their likelihood of containing the answer to the user's query.".to_string()
+        r#"You are a document navigation assistant. At each decision point, rank the candidate branches by their likelihood of containing the answer to the user's query.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+  "ranked_candidates": [
+    {"index": 0, "score": 0.9, "reason": "explanation"}
+  ],
+  "direction": "go_deeper",
+  "confidence": 0.85,
+  "reasoning": "overall explanation"
+}
+
+Rules:
+- ranked_candidates: Array of objects with index (number), score (0.0-1.0), reason (string)
+- direction: One of "go_deeper", "explore_siblings", "backtrack", "found_answer"
+- confidence: Number between 0.0 and 1.0 (NOT a string)"#.to_string()
     }
 
     pub fn user_fork() -> String {
-        r#"Given the current search context and candidate branches, rank them by relevance.
-
-{context}
+        r#"{context}
 
-Respond in JSON format with ranked candidates."#
+Respond with ONLY the JSON object:
+{
+  "ranked_candidates": [
+    {"index": 0, "score": 0.9, "reason": "why this candidate"}
+  ],
+  "direction": "go_deeper",
+  "confidence": 0.85,
+  "reasoning": "overall explanation"
+}"#
             .to_string()
     }
 
     pub fn system_backtrack() -> String {
-        "You are a document navigation assistant. When a search path fails to find the answer, analyze why and suggest alternative branches to explore.".to_string()
+        r#"You are a document navigation assistant. When a search path fails to find the answer, analyze why and suggest alternative branches to explore.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+  "alternative_branches": [
+    {"index": 0, "score": 0.8, "reason": "explanation"}
+  ],
+  "direction": "backtrack",
+  "confidence": 0.85,
+  "reasoning": "why the original path failed"
+}"#.to_string()
     }
 
     pub fn user_backtrack() -> String {
-        r#"The current search path did not find the answer. Analyze the failure and suggest alternatives.
-
-{context}
+        r#"{context}
 
-Respond in JSON format with alternative branches."#.to_string()
+Respond with ONLY the JSON object:
+{
+  "alternative_branches": [
+    {"index": 0, "score": 0.8, "reason": "why this alternative"}
+  ],
+  "direction": "backtrack",
+  "confidence": 0.85,
+  "reasoning": "why original path failed"
+}"#.to_string()
     }
 
     pub fn system_evaluate() -> String {
-        "You are a document analysis assistant. Evaluate whether the current node contains the answer to the user's query.".to_string()
+        r#"You are a document analysis assistant. Evaluate whether the current node contains the answer to the user's query.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+  "relevance_score": 0.85,
+  "is_answer": false,
+  "direction": "go_deeper",
+  "confidence": 0.85,
+  "reasoning": "explanation"
+}"#.to_string()
     }
 
     pub fn user_evaluate() -> String {
-        r#"Evaluate if this node contains the answer to the user's query.
+        r#"{context}
+
+Respond with ONLY the JSON object:
+{
+  "relevance_score": 0.85,
+  "is_answer": false,
+  "direction": "go_deeper",
+  "confidence": 0.85,
+  "reasoning": "explanation"
+}"#
+            .to_string()
+    }
 
-{context}
+    pub fn system_locate_top3() -> String {
+        r#"You are a document navigation assistant. Your task is to locate the most relevant sections in a document hierarchy for a user's query.
 
-Respond in JSON format with your evaluation."#
+CRITICAL INSTRUCTIONS:
+1. Analyze the user query carefully to understand the intent
+2. Examine the provided Table of Contents (TOC) with node IDs
+3. Select the TOP 3 most relevant nodes that would contain the answer
+4. You MUST respond with ONLY valid JSON. No markdown code blocks. No explanations outside JSON.
+
+Your response must have this EXACT structure:
+{
+  "reasoning": "Brief analysis of the query and why you selected these nodes",
+  "candidates": [
+    {"node_id": <number_from_toc>, "relevance_score": 0.95, "reason": "Why this node matches the query"},
+    {"node_id": <number_from_toc>, "relevance_score": 0.80, "reason": "Why this node is also relevant"},
+    {"node_id": <number_from_toc>, "relevance_score": 0.65, "reason": "Why this node might be relevant"}
+  ]
+}
+
+Rules:
+- node_id: MUST be a number from the provided TOC (copy exactly)
+- relevance_score: Number between 0.0 and 1.0 (higher = more relevant)
+- reason: Brief explanation for each selection
+- candidates: Must have exactly 3 items, ordered by relevance (highest first)
+- If fewer than 3 relevant nodes exist, use lower scores for less relevant ones"#.to_string()
+    }
+
+    pub fn user_locate_top3() -> String {
+        r#"{context}
+
+Based on the query and TOC above, select the TOP 3 most relevant nodes.
+
+Respond with ONLY the JSON object:
+{
+  "reasoning": "Your analysis here",
+  "candidates": [
+    {"node_id": 1, "relevance_score": 0.95, "reason": "explanation"},
+    {"node_id": 2, "relevance_score": 0.80, "reason": "explanation"},
+    {"node_id": 3, "relevance_score": 0.65, "reason": "explanation"}
+  ]
+}"#
             .to_string()
     }
 }
@@ -337,3 +455,68 @@ impl EvaluatePrompt {
         }
     }
 }
+
+impl LocateTop3Prompt {
+    /// Get template with fallback.
+    pub fn with_fallback() -> Self {
+        Self {
+            system: fallback::system_locate_top3(),
+            template: fallback::user_locate_top3(),
+        }
+    }
+}
+
+/// Prompt template for LOCATE_TOP3 intervention point.
+///
+/// Used at the start to directly locate top-3 relevant nodes from TOC:
+/// - Understand query intent
+/// - Identify top 3 most relevant nodes with confidence scores
+/// - Provide reasoning for each selection
+#[derive(Debug, Clone)]
+pub struct LocateTop3Prompt {
+    system: String,
+    template: String,
+}
+
+impl Default for LocateTop3Prompt {
+    fn default() -> Self {
+        Self::with_fallback()
+    }
+}
+
+impl LocateTop3Prompt {
+    /// Create a new locate top-3 prompt template.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Create with custom templates.
+    pub fn with_templates(system: String, template: String) -> Self {
+        Self { system, template }
+    }
+}
+
+impl PromptTemplate for LocateTop3Prompt {
+    fn system_prompt(&self) -> &str {
+        &self.system
+    }
+
+    fn user_prompt_template(&self) -> &str {
+        &self.template
+    }
+
+    fn intervention_point(&self) -> InterventionPoint {
+        InterventionPoint::Start
+    }
+
+    fn output_format_hint(&self) -> &str {
+        r#"{
+  "reasoning": "Overall analysis of the query and document structure",
+  "candidates": [
+    {"node_id": 1, "relevance_score": 0.95, "reason": "Why this node is relevant"},
+    {"node_id": 2, "relevance_score": 0.80, "reason": "Why this node is relevant"},
+    {"node_id": 3, "relevance_score": 0.65, "reason": "Why this node is relevant"}
+  ]
+}"#
+    }
+}
diff --git a/rust/src/retrieval/pilot/prompts/user_start.txt b/rust/src/retrieval/pilot/prompts/user_start.txt
index b091735e..df048df8 100644
--- a/rust/src/retrieval/pilot/prompts/user_start.txt
+++ b/rust/src/retrieval/pilot/prompts/user_start.txt
@@ -2,7 +2,16 @@ Analyze the following document structure and user query to identify the best ent
 
 {context}
 
-Provide your response as a JSON object with:
-- entry_points: list of section titles to start searching from
-- reasoning: brief explanation of why these entry points
-- confidence: your confidence in this recommendation (0.0-1.0)
+IMPORTANT: You MUST respond with ONLY a JSON object in this EXACT format:
+{
+  "entry_points": ["Title 1", "Title 2"],
+  "reasoning": "Brief explanation of why these entry points",
+  "confidence": 0.85
+}
+
+Rules:
+- entry_points: Array of strings (node titles from the candidates above)
+- reasoning: String explaining your choice
+- confidence: Number between 0.0 and 1.0 (NOT a string like "high")
+
+Do NOT use any other field names. Use "entry_points" not "selected_node" or "recommended_node".
diff --git a/rust/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs
index 9bf02ae3..823abdba 100644
--- a/rust/src/retrieval/pipeline/context.rs
+++ b/rust/src/retrieval/pipeline/context.rs
@@ -209,6 +209,8 @@ pub struct PipelineContext {
     pub keywords: Vec<String>,
     /// Target sections from ToC matching.
     pub target_sections: Vec<String>,
+    /// Decomposed sub-queries (if query was decomposed).
+    pub decomposition: Option<crate::retrieval::decompose::DecompositionResult>,
 
     // ============ Plan Stage Output ============
     /// Selected retrieval strategy.
@@ -268,6 +270,7 @@ impl PipelineContext {
             complexity: None,
             keywords: Vec::new(),
             target_sections: Vec::new(),
+            decomposition: None,
             selected_strategy: None,
             selected_algorithm: None,
             search_config: None,
diff --git a/rust/src/retrieval/pipeline_retriever.rs b/rust/src/retrieval/pipeline_retriever.rs
index 222b3a9b..377c4747 100644
--- a/rust/src/retrieval/pipeline_retriever.rs
+++ b/rust/src/retrieval/pipeline_retriever.rs
@@ -119,7 +119,7 @@ impl PipelineRetriever {
         orchestrator = orchestrator.stage(plan_stage);
 
         // Add search stage with Pilot for semantic navigation
-        let mut search_stage = SearchStage::new();
+        let mut search_stage = SearchStage::new().with_llm_client(self.llm_client.clone());
         if let Some(ref client) = self.llm_client {
             // Create LLM-based Pilot for semantic navigation guidance
             let mut pilot = LlmPilot::new(client.clone(), PilotConfig::default());
diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs
index 9fba59e9..51d700f4 100644
--- a/rust/src/retrieval/search/beam.rs
+++ b/rust/src/retrieval/search/beam.rs
@@ -126,21 +126,30 @@ impl SearchTree for BeamSearch {
         let beam_width = config.beam_width.min(self.beam_width);
         let mut visited: HashSet<NodeId> = HashSet::new();
 
+        println!("[DEBUG] BeamSearch: query='{}', beam_width={}, min_score={:.2}",
+            context.query, beam_width, config.min_score);
+
         // Track Pilot interventions
         let mut pilot_interventions = 0;
 
         // Initialize with root's children
         let root_children = tree.children(tree.root());
+        println!("[DEBUG] Root has {} children", root_children.len());
 
         // Check if Pilot wants to guide the start
         let initial_candidates = if let Some(p) = pilot {
+            println!("[DEBUG] BeamSearch: Pilot is available, name={}, guide_at_start={}",
+                p.name(), p.config().guide_at_start);
             if p.config().guide_at_start {
+                println!("[DEBUG] BeamSearch: Calling pilot.guide_start()...");
                 if let Some(guidance) = p.guide_start(tree, &context.query).await {
                     debug!(
                         "Pilot provided start guidance with confidence {}",
                         guidance.confidence
                     );
                     pilot_interventions += 1;
+                    println!("[DEBUG] BeamSearch: Pilot returned guidance! confidence={:.2}, candidates={}",
+                        guidance.confidence, guidance.ranked_candidates.len());
 
                     // Use Pilot's ranked order if available
                     if guidance.has_candidates() {
@@ -151,15 +160,19 @@ impl SearchTree for BeamSearch {
                             &context.query,
                         )
                     } else {
+                        println!("[DEBUG] BeamSearch: Guidance has no candidates, using algorithm scoring");
                         self.score_candidates_with_query(tree, &root_children, &context.query)
                     }
                 } else {
+                    println!("[DEBUG] BeamSearch: pilot.guide_start() returned None");
                     self.score_candidates_with_query(tree, &root_children, &context.query)
                 }
             } else {
+                println!("[DEBUG] BeamSearch: guide_at_start=false, skipping Pilot");
                 self.score_candidates_with_query(tree, &root_children, &context.query)
             }
         } else {
+            println!("[DEBUG] BeamSearch: No Pilot available");
             self.score_candidates_with_query(tree, &root_children, &context.query)
         };
 
@@ -168,6 +181,14 @@ impl SearchTree for BeamSearch {
             .map(|(node_id, score)| SearchPath::from_node(node_id, score))
             .collect();
 
+        // Debug: show initial scores
+        println!("[DEBUG] Initial {} candidates after scoring", current_beam.len());
+        for (i, path) in current_beam.iter().enumerate().take(5) {
+            if let Some(node) = tree.get(path.leaf.unwrap_or(tree.root())) {
+                println!("[DEBUG]   Initial {}: score={:.3}, title='{}'", i, path.score, node.title);
+            }
+        }
+
         // Keep top beam_width
         current_beam.truncate(beam_width);
 
@@ -215,6 +236,7 @@ impl SearchTree for BeamSearch {
                                 children.len()
                             );
 
+                            println!("[DEBUG] BEAM SEARCH: Pilot intervening at decision point");
                             match p.decide(&state).await {
                                 decision => {
                                     pilot_interventions += 1;
diff --git a/rust/src/retrieval/search/greedy.rs b/rust/src/retrieval/search/greedy.rs
index b539cd23..a8ee80ed 100644
--- a/rust/src/retrieval/search/greedy.rs
+++ b/rust/src/retrieval/search/greedy.rs
@@ -144,6 +144,7 @@ impl SearchTree for GreedySearch {
                         children.len()
                     );
 
+                    println!("[DEBUG] GREEDY SEARCH: Pilot intervening at decision point");
                     match p.decide(&state).await {
                         decision => {
                             pilot_interventions += 1;
diff --git a/rust/src/retrieval/search/scorer.rs b/rust/src/retrieval/search/scorer.rs
index f17bf118..5dbb9209 100644
--- a/rust/src/retrieval/search/scorer.rs
+++ b/rust/src/retrieval/search/scorer.rs
@@ -193,9 +193,9 @@ impl ScoringContext {
 
             let total_score = title_score + summary_score + content_score;
 
-            // Normalize to [0, 1] range
-            let max_possible = self.query_terms.len() as f32 * 10.0; // Rough upper bound
-            let normalized = (total_score / max_possible).clamp(0.0, 1.0);
+            // Normalize to [0, 1] range using sigmoid-like scaling
+            // This prevents over-penalization with few query terms
+            let normalized = (total_score / 3.0).tanh(); // 3.0 is a reasonable midpoint
 
             // Apply depth penalty
             let depth_factor = 1.0 - (node.depth as f32 * self.depth_penalty).min(0.5);
diff --git a/rust/src/retrieval/stages/analyze.rs b/rust/src/retrieval/stages/analyze.rs
index 3eabca1f..8dd875e6 100644
--- a/rust/src/retrieval/stages/analyze.rs
+++ b/rust/src/retrieval/stages/analyze.rs
@@ -7,14 +7,16 @@
 //! - Query complexity (Simple/Medium/Complex)
 //! - Keywords for matching
 //! - Target sections based on ToC matching
+//! - Query decomposition for complex queries
 
 use async_trait::async_trait;
 use tracing::info;
 
 use crate::document::{DocumentTree, TocView};
 use crate::retrieval::complexity::ComplexityDetector;
+use crate::retrieval::decompose::{DecompositionConfig, QueryDecomposer};
 use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage, StageOutcome};
-// QueryComplexity is used in context
+use crate::llm::LlmClient;
 
 /// Analyze Stage - analyzes queries for retrieval planning.
 ///
@@ -22,17 +24,25 @@ use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage,
 /// 1. Detects query complexity (Simple/Medium/Complex)
 /// 2. Extracts keywords for matching
 /// 3. Matches target sections from ToC
+/// 4. Decomposes complex queries into sub-queries (if enabled)
 ///
 /// # Example
 ///
 /// ```rust,ignore
 /// let stage = AnalyzeStage::new()
-///     .with_toc_matching(true);
+///     .with_toc_matching(true)
+///     .with_decomposition(true);
 /// ```
 pub struct AnalyzeStage {
     complexity_detector: ComplexityDetector,
     toc_view: TocView,
     enable_toc_matching: bool,
+    /// Query decomposer for complex queries.
+    query_decomposer: Option<QueryDecomposer>,
+    /// Enable query decomposition.
+    enable_decomposition: bool,
+    /// Complexity threshold for triggering decomposition.
+    decomposition_threshold: f32,
 }
 
 impl Default for AnalyzeStage {
@@ -48,6 +58,9 @@ impl AnalyzeStage {
             complexity_detector: ComplexityDetector::new(),
             toc_view: TocView::new(),
             enable_toc_matching: true,
+            query_decomposer: None,
+            enable_decomposition: false,
+            decomposition_threshold: 0.6,
         }
     }
 
@@ -57,6 +70,42 @@ impl AnalyzeStage {
         self
     }
 
+    /// Enable query decomposition with default configuration.
+    pub fn with_decomposition(mut self, enable: bool) -> Self {
+        self.enable_decomposition = enable;
+        if enable && self.query_decomposer.is_none() {
+            self.query_decomposer = Some(QueryDecomposer::new(DecompositionConfig::default()));
+        }
+        self
+    }
+
+    /// Enable query decomposition with custom configuration.
+    pub fn with_decomposition_config(mut self, config: DecompositionConfig) -> Self {
+        self.enable_decomposition = true;
+        self.query_decomposer = Some(QueryDecomposer::new(config));
+        self
+    }
+
+    /// Enable query decomposition with LLM client.
+    pub fn with_llm_client(mut self, client: crate::llm::LlmClient) -> Self {
+        if self.query_decomposer.is_none() {
+            self.query_decomposer = Some(
+                QueryDecomposer::new(DecompositionConfig::default())
+                    .with_llm_client(client),
+            );
+        } else if let Some(ref mut decomposer) = self.query_decomposer {
+            *decomposer = QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client);
+        }
+        self.enable_decomposition = true;
+        self
+    }
+
+    /// Set complexity threshold for triggering decomposition.
+    pub fn with_decomposition_threshold(mut self, threshold: f32) -> Self {
+        self.decomposition_threshold = threshold.clamp(0.0, 1.0);
+        self
+    }
+
     /// Extract keywords from a query.
     fn extract_keywords(&self, query: &str) -> Vec<String> {
         // Simple keyword extraction:
@@ -182,7 +231,42 @@ impl RetrievalStage for AnalyzeStage {
             info!("Target sections: {:?}", ctx.target_sections);
         }
 
-        // 4. Update metrics
+        // 4. Decompose query if enabled and complex enough
+        if self.enable_decomposition {
+            if let Some(ref decomposer) = self.query_decomposer {
+                let complexity_score = ctx.complexity
+                    .as_ref()
+                    .map(|c| match c {
+                        crate::retrieval::types::QueryComplexity::Simple => 0.3,
+                        crate::retrieval::types::QueryComplexity::Medium => 0.6,
+                        crate::retrieval::types::QueryComplexity::Complex => 0.9,
+                    })
+                    .unwrap_or(0.5);
+
+                if complexity_score >= self.decomposition_threshold {
+                    info!("Decomposing query (complexity: {:.2})", complexity_score);
+                    match decomposer.decompose(&ctx.query).await {
+                        Ok(result) => {
+                            if result.was_decomposed {
+                                info!(
+                                    "Query decomposed into {} sub-queries",
+                                    result.sub_queries.len()
+                                );
+                                for (i, sq) in result.sub_queries.iter().enumerate() {
+                                    info!("  Sub-query {}: {} (priority: {})", i, sq.text, sq.priority);
+                                }
+                            }
+                            ctx.decomposition = Some(result);
+                        }
+                        Err(e) => {
+                            info!("Query decomposition failed: {}, continuing with original query", e);
+                        }
+                    }
+                }
+            }
+        }
+
+        // 5. Update metrics
         ctx.metrics.llm_calls += 0; // No LLM calls in this stage
 
         Ok(StageOutcome::cont())
diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs
index 31e7f173..ad8858f2 100644
--- a/rust/src/retrieval/stages/evaluate.rs
+++ b/rust/src/retrieval/stages/evaluate.rs
@@ -283,6 +283,7 @@ impl EvaluateStage {
     /// Calculate overall confidence score.
     fn calculate_confidence(&self, ctx: &PipelineContext) -> f32 {
         if ctx.candidates.is_empty() {
+            println!("[DEBUG] calculate_confidence: no candidates, returning 0.0");
             return 0.0;
         }
 
@@ -296,7 +297,10 @@ impl EvaluateStage {
             SufficiencyLevel::Insufficient => 0.4,
         };
 
-        avg_score * sufficiency_factor
+        let confidence = avg_score * sufficiency_factor;
+        println!("[DEBUG] calculate_confidence: avg_score={:.3}, sufficiency={:?}, factor={:.1}, confidence={:.3}",
+            avg_score, ctx.sufficiency, sufficiency_factor, confidence);
+        confidence
     }
 }
 
@@ -325,6 +329,9 @@ impl RetrievalStage for EvaluateStage {
     async fn execute(&self, ctx: &mut PipelineContext) -> crate::error::Result<StageOutcome> {
         let start = std::time::Instant::now();
 
+        println!("[DEBUG] EvaluateStage: {} candidates, iteration {}",
+            ctx.candidates.len(), ctx.search_iterations);
+        
         info!(
             "Judging sufficiency: {} candidates, iteration {}",
             ctx.candidates.len(),
diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs
index 78f07c20..17006bdf 100644
--- a/rust/src/retrieval/stages/search.rs
+++ b/rust/src/retrieval/stages/search.rs
@@ -6,13 +6,21 @@
 //! This stage executes the selected search algorithm using
 //! the selected retrieval strategy. When a Pilot is provided,
 //! it can provide semantic guidance at key decision points.
+//!
+//! # LLM-First Search
+//!
+//! When an LLM client is provided, the stage will first attempt to
+//! directly locate the top-3 most relevant nodes using the TOC,
+//! falling back to tree traversal algorithms (Beam/Greedy) only if
+//! LLM fails or returns insufficient results.
 
 use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
 use std::sync::Arc;
 use tracing::{info, warn};
 
-use crate::document::DocumentTree;
-// LlmClient is used via strategy
+use crate::document::{DocumentTree, TocView};
+use crate::llm::LlmClient;
 use crate::retrieval::RetrievalContext; // Legacy context
 use crate::retrieval::pilot::Pilot;
 use crate::retrieval::pipeline::{
@@ -57,6 +65,8 @@ pub struct SearchStage {
     hybrid_strategy: Option<Arc<dyn RetrievalStrategy>>,
     /// Pilot for navigation guidance (optional).
     pilot: Option<Arc<dyn Pilot>>,
+    /// LLM client for direct TOC-based search (optional).
+    llm_client: Option<LlmClient>,
 }
 
 impl Default for SearchStage {
@@ -74,9 +84,20 @@ impl SearchStage {
             semantic_strategy: None,
             hybrid_strategy: None,
             pilot: None,
+            llm_client: None,
         }
     }
 
+    /// Add LLM client for direct TOC-based search.
+    ///
+    /// When provided, the stage will first attempt to locate relevant
+    /// nodes directly using the TOC, falling back to tree traversal
+    /// algorithms only if LLM fails or returns insufficient results.
+    pub fn with_llm_client(mut self, client: Option<LlmClient>) -> Self {
+        self.llm_client = client;
+        self
+    }
+
     /// Add Pilot for semantic navigation guidance.
     ///
     /// When provided, the search algorithm will consult the Pilot
@@ -210,6 +231,172 @@ impl SearchStage {
 
         candidates
     }
+
+    /// Build a flat TOC list for LLM consumption.
+    ///
+    /// Returns a formatted string with numbered entries:
+    /// ```
+    /// [1] Title: "Overview"
+    ///     Summary: "This section covers..."
+    /// [2] Title: "Architecture"
+    ///     Summary: "The system architecture..."
+    /// ```
+    fn build_toc_for_llm(&self, tree: &DocumentTree) -> (String, Vec<crate::document::NodeId>) {
+        let toc_view = TocView::new();
+        let mut entries = Vec::new();
+        let mut node_ids = Vec::new();
+
+        fn collect_entries(
+            tree: &DocumentTree,
+            node_id: crate::document::NodeId,
+            entries: &mut Vec<(usize, String, String)>,
+            node_ids: &mut Vec<crate::document::NodeId>,
+            index: &mut usize,
+        ) {
+            if let Some(node) = tree.get(node_id) {
+                let title = node.title.clone();
+                let summary = if node.summary.is_empty() {
+                    "(no summary)".to_string()
+                } else {
+                    node.summary.clone()
+                };
+                entries.push((*index, title, summary));
+                node_ids.push(node_id);
+                *index += 1;
+
+                for child_id in tree.children(node_id) {
+                    collect_entries(tree, child_id, entries, node_ids, index);
+                }
+            }
+        }
+
+        collect_entries(tree, tree.root(), &mut entries, &mut node_ids, &mut 0);
+
+        let toc_str = entries
+            .iter()
+            .map(|(idx, title, summary)| {
+                format!("[{}] Title: \"{}\"\n    Summary: \"{}\"", idx + 1, title, summary)
+            })
+            .collect::<Vec<_>>()
+            .join("\n\n");
+
+        (toc_str, node_ids)
+    }
+
+    /// Locate top candidates directly via LLM using TOC.
+    ///
+    /// This method bypasses tree traversal by asking the LLM to
+    /// directly identify the most relevant nodes from the TOC.
+    async fn locate_via_llm(
+        &self,
+        query: &str,
+        tree: &DocumentTree,
+    ) -> Option<Vec<CandidateNode>> {
+        let llm_client = self.llm_client.as_ref()?;
+        let (toc_str, node_ids) = self.build_toc_for_llm(tree);
+
+        if node_ids.is_empty() {
+            warn!("No nodes in tree for LLM search");
+            return None;
+        }
+
+        let system_prompt = r#"You are a document navigation assistant. Your task is to locate the most relevant sections in a document hierarchy for a user's query.
+
+CRITICAL INSTRUCTIONS:
+1. Analyze the user query carefully to understand the intent
+2. Examine the provided Table of Contents (TOC) with numbered entries
+3. Select the TOP 3 most relevant entries that would contain the answer
+4. You MUST respond with ONLY valid JSON. No markdown code blocks. No explanations outside JSON.
+
+Your response must have this EXACT structure:
+{
+  "reasoning": "Brief analysis of the query and why you selected these entries",
+  "candidates": [
+    {"node_id": 1, "relevance_score": 0.95, "reason": "Why this entry matches the query"},
+    {"node_id": 2, "relevance_score": 0.80, "reason": "Why this entry is also relevant"},
+    {"node_id": 3, "relevance_score": 0.65, "reason": "Why this entry might be relevant"}
+  ]
+}
+
+Rules:
+- node_id: MUST be a number from the provided TOC (the number in [N] brackets)
+- relevance_score: Number between 0.0 and 1.0 (higher = more relevant)
+- reason: Brief explanation for each selection
+- candidates: Must have exactly 3 items, ordered by relevance (highest first)"#;
+
+        let user_prompt = format!(
+            "USER QUERY: {}\n\nDOCUMENT TOC ({} entries):\n{}\n\nBased on the query and TOC above, select the TOP 3 most relevant entries.\n\nRespond with ONLY the JSON object:",
+            query,
+            node_ids.len(),
+            toc_str
+        );
+
+        info!("Attempting LLM-based search for query: '{}'", query);
+
+        match llm_client.complete(system_prompt, &user_prompt).await {
+            Ok(response) => {
+                // Parse JSON response
+                match serde_json::from_str::<LlmLocateResponse>(&response) {
+                    Ok(llm_response) => {
+                        let mut candidates = Vec::new();
+
+                        for candidate in llm_response.candidates {
+                            // node_id is 1-indexed from LLM, convert to 0-indexed
+                            let idx = candidate.node_id.saturating_sub(1);
+                            if idx < node_ids.len() {
+                                let node_id = node_ids[idx];
+                                if let Some(node) = tree.get(node_id) {
+                                    candidates.push(CandidateNode::new(
+                                        node_id,
+                                        candidate.relevance_score,
+                                        node.depth,
+                                        tree.is_leaf(node_id),
+                                    ));
+                                    info!(
+                                        "LLM selected: [{}] '{}' (score: {:.2})",
+                                        candidate.node_id, node.title, candidate.relevance_score
+                                    );
+                                }
+                            }
+                        }
+
+                        if candidates.is_empty() {
+                            warn!("LLM returned no valid candidates");
+                            return None;
+                        }
+
+                        println!("LLM search found {} candidates", candidates.len());
+                        println!("LLM candidates content: {:?}", candidates);
+                        Some(candidates)
+                    }
+                    Err(e) => {
+                        warn!("Failed to parse LLM response as JSON: {}", e);
+                        warn!("Raw response: {}", response);
+                        None
+                    }
+                }
+            }
+            Err(e) => {
+                warn!("LLM call failed: {}", e);
+                None
+            }
+        }
+    }
+}
+
+/// LLM response for locate query.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct LlmLocateResponse {
+    reasoning: String,
+    candidates: Vec<LlmLocateCandidate>,
+}
+
+/// A candidate from LLM locate response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct LlmLocateCandidate {
+    node_id: usize,
+    relevance_score: f32,
+    reason: String,
 }
 
 #[async_trait]
@@ -245,6 +432,9 @@ impl RetrievalStage for SearchStage {
         // Reset Pilot state for new query
         if let Some(ref pilot) = self.pilot {
             pilot.reset();
+            println!("[DEBUG] SearchStage: Pilot is available, is_active={}", pilot.is_active());
+        } else {
+            println!("[DEBUG] SearchStage: No Pilot available");
         }
 
         info!(
@@ -261,6 +451,29 @@ impl RetrievalStage for SearchStage {
         // Increment search iteration
         ctx.increment_search_iteration();
 
+        // === Try LLM-first search (direct TOC-based location) ===
+        if self.llm_client.is_some() {
+            info!("Attempting LLM-first search for query: '{}'", ctx.query);
+
+            if let Some(candidates) = self.locate_via_llm(&ctx.query, &ctx.tree).await {
+                if !candidates.is_empty() {
+                    ctx.candidates = candidates;
+                    ctx.metrics.search_time_ms += start.elapsed().as_millis() as u64;
+                    ctx.metrics.nodes_visited += ctx.candidates.len();
+                    ctx.metrics.llm_calls += 1;
+
+                    info!(
+                        "LLM-first search found {} candidates (skipped tree traversal)",
+                        ctx.candidates.len()
+                    );
+
+                    return Ok(StageOutcome::cont());
+                }
+            }
+
+            info!("LLM-first search returned no results, falling back to tree traversal");
+        }
+
         // Build search config for search algorithms
         let search_config = SearchAlgConfig {
             top_k: config.beam_width * 2,
@@ -270,48 +483,158 @@ impl RetrievalStage for SearchStage {
             leaf_only: false,
         };
 
-        // Create legacy context for search algorithms
-        let legacy_ctx = RetrievalContext::new(
-            &ctx.query,
-            ctx.options.max_tokens,
-            ctx.options.sufficiency_check,
-        );
-
         // Get Pilot reference (or None if not available)
         let pilot_ref: Option<&dyn Pilot> = self.pilot.as_deref();
+        println!("[DEBUG] SearchStage: pilot_ref is {}", if pilot_ref.is_some() { "Some" } else { "None" });
+
+        // === Check for decomposition ===
+        if let Some(ref decomposition) = ctx.decomposition {
+            if decomposition.was_decomposed && decomposition.is_multi_turn() {
+                info!("Processing {} decomposed sub-queries", decomposition.sub_queries.len());
+
+                let mut all_paths = Vec::new();
+                let mut all_candidates = Vec::new();
+                let mut total_pilot_interventions = 0u64;
+
+                // Process each sub-query in execution order
+                let order = decomposition.execution_order();
+                for sub_idx in order {
+                    let sub_query = &decomposition.sub_queries[sub_idx];
+                    info!("Processing sub-query : {}", sub_query.text);
+
+                    // Create legacy context for this sub-query
+                    let legacy_ctx = RetrievalContext::new(
+                        &sub_query.text,
+                        ctx.options.max_tokens,
+                        ctx.options.sufficiency_check,
+                    );
+
+                    println!("[DEBUG] SearchStage: Starting search for sub-query: algorithm={:?}, top_k={}, beam_width={}",
+                        algorithm, search_config.top_k, search_config.beam_width);
+
+                    // Execute search for this sub-query
+                    let result = match algorithm {
+                        SearchAlgorithm::Greedy => {
+                            let search = GreedySearch::new();
+                            search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                        }
+                        SearchAlgorithm::Beam => {
+                            let search = BeamSearch::new();
+                            search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                        }
+                        SearchAlgorithm::Mcts => {
+                            let search = BeamSearch::new();
+                            search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                        }
+                    };
+
+                    all_candidates.extend(self.extract_candidates(&result.paths, &ctx.tree));
+                    all_paths.extend(result.paths);
+                    total_pilot_interventions += result.pilot_interventions as u64;
+
+                    info!("Sub-query '{}' found {} paths", sub_query.text, all_paths.len());
+                }
 
-        // Execute search based on algorithm with Pilot
-        let result = match algorithm {
-            SearchAlgorithm::Greedy => {
-                let search = GreedySearch::new();
-                search
-                    .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
-                    .await
-            }
-            SearchAlgorithm::Beam => {
-                let search = BeamSearch::new();
-                search
-                    .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
-                    .await
-            }
-            SearchAlgorithm::Mcts => {
-                // Use beam search as fallback for now
-                let search = BeamSearch::new();
-                search
-                    .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
-                    .await
+                // Merge results
+                ctx.search_paths = all_paths;
+                ctx.candidates = all_candidates;
+
+                info!(
+                    "Search complete: {} total candidates from {} sub-queries (pilot interventions: {})",
+                    ctx.candidates.len(),
+                    decomposition.sub_queries.len(),
+                    total_pilot_interventions
+                );
+            } else {
+                // Single query (not decomposed or single sub-query) - process as normal
+                let legacy_ctx = RetrievalContext::new(
+                    &ctx.query,
+                    ctx.options.max_tokens,
+                    ctx.options.sufficiency_check,
+                );
+
+                println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}",
+                    algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score);
+
+                let result = match algorithm {
+                    SearchAlgorithm::Greedy => {
+                        let search = GreedySearch::new();
+                        search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                    }
+                    SearchAlgorithm::Beam => {
+                        let search = BeamSearch::new();
+                        search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                    }
+                    SearchAlgorithm::Mcts => {
+                        let search = BeamSearch::new();
+                        search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                    }
+                };
+
+                ctx.search_paths = result.paths;
+                ctx.candidates = self.extract_candidates(&ctx.search_paths, &ctx.tree);
+
+                info!(
+                    "Search found {} paths (pilot interventions: {})",
+                    ctx.search_paths.len(),
+                    result.pilot_interventions
+                );
             }
-        };
+        } else {
+            // No decomposition available, process original query
+            let legacy_ctx = RetrievalContext::new(
+                &ctx.query,
+                ctx.options.max_tokens,
+                ctx.options.sufficiency_check,
+            );
+
+            println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}",
+                algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score);
+
+            let result = match algorithm {
+                SearchAlgorithm::Greedy => {
+                    let search = GreedySearch::new();
+                    search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                }
+                SearchAlgorithm::Beam => {
+                    let search = BeamSearch::new();
+                    search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                }
+                SearchAlgorithm::Mcts => {
+                    let search = BeamSearch::new();
+                    search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+                }
+            };
 
-        info!(
-            "Search found {} paths (pilot interventions: {})",
-            result.paths.len(),
-            result.pilot_interventions
-        );
+            ctx.search_paths = result.paths;
+            ctx.candidates = self.extract_candidates(&ctx.search_paths, &ctx.tree);
+
+            info!(
+                "Search found {} paths (pilot interventions: {})",
+                ctx.search_paths.len(),
+                result.pilot_interventions
+            );
+        }
+
+        // Debug output
+        println!("[DEBUG] Search found {} total paths, {} candidates", ctx.search_paths.len(), ctx.candidates.len());
+        for (i, path) in ctx.search_paths.iter().enumerate().take(5) {
+            if let Some(leaf_id) = path.leaf {
+                if let Some(node) = ctx.tree.get(leaf_id) {
+                    println!("[DEBUG] Path {}: score={:.3}, title='{}', content_len={}",
+                        i, path.score, node.title, node.content.len());
+                }
+            }
+        }
 
-        // Update context with results
-        ctx.search_paths = result.paths.clone();
-        ctx.candidates = self.extract_candidates(&result.paths, &ctx.tree);
+        // Debug output
+        println!("[DEBUG] Extracted {} candidates", ctx.candidates.len());
+        for (i, c) in ctx.candidates.iter().enumerate().take(5) {
+            if let Some(node) = ctx.tree.get(c.node_id) {
+                println!("[DEBUG] Candidate {}: score={:.3}, title='{}'",
+                    i, c.score, node.title);
+            }
+        }
 
         // Update metrics
         ctx.metrics.search_time_ms += start.elapsed().as_millis() as u64;