diff --git a/README.md b/README.md
index 603adb21..e96095ad 100644
--- a/README.md
+++ b/README.md
@@ -163,6 +163,105 @@ async fn main() -> vectorless::Result<()> {
| **Feedback Learning** | Improves from user feedback over time |
| **Multi-turn Queries** | Handles complex questions with decomposition |
+## Configuration
+
+### Zero Configuration (Recommended)
+
+Just set `OPENAI_API_KEY` and you're ready to go:
+
+```bash
+export OPENAI_API_KEY="sk-..."
+```
+
+
+Python
+
+```python
+from vectorless import Engine
+
+# Uses OPENAI_API_KEY from environment
+engine = Engine(workspace="./data")
+```
+
+
+
+
+Rust
+
+```rust
+use vectorless::Engine;
+
+let client = Engine::builder()
+ .with_workspace("./workspace")
+ .build().await?;
+```
+
+
+
+### Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `OPENAI_API_KEY` | LLM API key |
+| `VECTORLESS_MODEL` | Default model (e.g., `gpt-4o-mini`) |
+| `VECTORLESS_ENDPOINT` | API endpoint URL |
+| `VECTORLESS_WORKSPACE` | Workspace directory |
+
+### Advanced Configuration
+
+For fine-grained control, use a config file:
+
+```bash
+cp config.toml ./vectorless.toml
+```
+
+
+Python
+
+```python
+from vectorless import Engine
+
+# Use full configuration file
+engine = Engine(config_path="./vectorless.toml")
+
+# Or override specific settings
+engine = Engine(
+ config_path="./vectorless.toml",
+ model="gpt-4o", # Override model from config
+)
+```
+
+
+
+
+Rust
+
+```rust
+use vectorless::Engine;
+
+// Use full configuration file
+let client = Engine::builder()
+ .with_config_path("./vectorless.toml")
+ .build().await?;
+
+// Or override specific settings
+let client = Engine::builder()
+ .with_config_path("./vectorless.toml")
+ .with_model("gpt-4o", None) // Override model
+ .build().await?;
+```
+
+
+
+### Configuration Priority
+
+Later overrides earlier:
+
+1. Default configuration
+2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`)
+3. Explicit config file (`config_path` / `with_config_path`)
+4. Environment variables
+5. Constructor/builder parameters (highest priority)
## Architecture
@@ -177,7 +276,7 @@ async fn main() -> vectorless::Result<()> {
## Examples
-See the [examples/](examples/) directory.
+See the [examples/](examples/) directory for more usage patterns.
## Contributing
diff --git a/docs/samples/sample.md b/docs/samples/sample.md
index 8868cd20..1ab9ce35 100644
--- a/docs/samples/sample.md
+++ b/docs/samples/sample.md
@@ -29,53 +29,4 @@ The core module provides fundamental types:
The parser module handles document parsing:
- `MarkdownParser` — Parse Markdown files
- `PdfParser` — Parse PDF files (planned)
-- `HtmlParser` — Parse HTML files (planned)
-
-## Usage Examples
-
-### Basic Usage
-
-```rust
-use vectorless::client::{Vectorless, VectorlessBuilder};
-
-let client = VectorlessBuilder::new()
- .with_workspace("./workspace")
- .build()?;
-
-let doc_id = client.index("./document.md").await?;
-```
-
-### Advanced Usage
-
-You can customize the retrieval process:
-
-```rust
-use vectorless::{LlmNavigator, RetrieveOptions};
-
-let retriever = LlmNavigator::with_defaults();
-let options = RetrieveOptions::new()
- .with_top_k(5)
- .with_min_score(0.5);
-
-let results = retriever.retrieve(&tree, "What is vectorless?", &options).await?;
-```
-
-## Configuration
-
-The library can be configured via TOML files or programmatically.
-
-### Configuration File
-
-```toml
-[summary]
-model = "gpt-4"
-max_tokens = 200
-
-[retrieval]
-model = "gpt-4"
-top_k = 3
-```
-
-## API Reference
-
-See the API documentation for detailed information about each function and type.
+- `HtmlParser` — Parse HTML files (planned)
\ No newline at end of file
diff --git a/examples/python/advanced/README.md b/examples/python/advanced/README.md
new file mode 100644
index 00000000..2d11afbc
--- /dev/null
+++ b/examples/python/advanced/README.md
@@ -0,0 +1,45 @@
+# Advanced Example - Full Configuration
+
+Use a configuration file for fine-grained control.
+
+## Setup
+
+```bash
+pip install vectorless
+
+# Copy the example config
+cp ../../../config.toml ./vectorless.toml
+
+# Edit to customize your settings
+vim vectorless.toml
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Configuration File Structure
+
+```toml
+[llm]
+api_key = "sk-..."
+
+[llm.summary]
+model = "gpt-4o-mini"
+max_tokens = 200
+
+[llm.retrieval]
+model = "gpt-4o"
+max_tokens = 100
+
+[retrieval]
+top_k = 5
+beam_width = 3
+max_iterations = 10
+
+[storage]
+workspace_dir = "./workspace"
+cache_size = 100
+```
diff --git a/examples/python/advanced/main.py b/examples/python/advanced/main.py
new file mode 100644
index 00000000..d223ad02
--- /dev/null
+++ b/examples/python/advanced/main.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+Advanced example - Full Configuration File.
+
+This example demonstrates how to use a full configuration file
+for fine-grained control over all settings.
+
+Usage:
+ cp ../../../config.toml ./vectorless.toml
+ # Edit vectorless.toml to customize settings
+ python main.py
+"""
+
+import os
+from vectorless import Engine, IndexContext
+
+# Path to config file (relative to this script)
+CONFIG_PATH = "./vectorless.toml"
+WORKSPACE = "./workspace"
+
+
+def main():
+ print("=== Vectorless Advanced Example (Full Configuration) ===\n")
+
+ # Check if config file exists
+ if not os.path.exists(CONFIG_PATH):
+ print(f"Error: Config file not found: {CONFIG_PATH}")
+ print("\nCreate it by copying the example:")
+ print(f" cp ../../../config.toml {CONFIG_PATH}")
+ print("\nThen edit it to customize your settings.")
+ return
+
+ # Create engine with config file
+ engine = Engine(config_path=CONFIG_PATH)
+
+ print(f"✓ Engine created with config file: {CONFIG_PATH}\n")
+
+ # Index a document
+ content = """
+# System Documentation
+
+## Architecture
+
+The system consists of three main components:
+
+1. **Index Pipeline** - Parses documents and builds a navigable tree
+2. **Retrieval Pipeline** - Queries and retrieves relevant content
+3. **Pilot** - LLM-powered navigation guide
+
+## Configuration Options
+
+### LLM Settings
+- `model`: The LLM model to use (e.g., "gpt-4o", "gpt-4o-mini")
+- `endpoint`: API endpoint URL
+- `api_key`: Your API key
+- `temperature`: Generation temperature (0.0 for deterministic)
+
+### Retrieval Settings
+- `top_k`: Number of results to return
+- `max_iterations`: Maximum search iterations
+- `beam_width`: Beam width for multi-path search
+
+### Storage Settings
+- `workspace_dir`: Directory for persisted documents
+- `cache_size`: LRU cache size
+- `compression`: Enable/disable compression
+
+## Performance Tuning
+
+For faster retrieval:
+- Use a smaller model like gpt-4o-mini
+- Reduce max_iterations
+- Enable caching
+
+For higher accuracy:
+- Use a more capable model like gpt-4o
+- Increase beam_width
+- Enable multi-turn decomposition
+"""
+ ctx = IndexContext.from_content(content, name="system_docs", format="markdown")
+ doc_id = engine.index(ctx)
+ print(f"✓ Indexed: {doc_id}\n")
+
+ # Query examples
+ questions = [
+ "What are the main components?",
+ "How can I improve retrieval speed?",
+ "What settings are available?",
+ ]
+
+ for q in questions:
+ result = engine.query(doc_id, q)
+ print(f"Q: {q}")
+ print(f"A: {result.content[:150]}...")
+ print(f" Score: {result.score:.2f}\n")
+
+ # Cleanup
+ engine.remove(doc_id)
+ print("✓ Cleaned up")
+
+ # Print configuration info
+ print("\n" + "=" * 60)
+ print("Configuration Priority")
+ print("=" * 60)
+ print("""
+1. Default configuration
+2. Auto-detected config file (vectorless.toml, config.toml)
+3. Explicit config file (config_path parameter)
+4. Environment variables (OPENAI_API_KEY, etc.)
+5. Constructor parameters (api_key, model, etc.)
+""")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/python/advanced/pyproject.toml b/examples/python/advanced/pyproject.toml
new file mode 100644
index 00000000..a85a964d
--- /dev/null
+++ b/examples/python/advanced/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-advanced-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+ "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python/basic/README.md b/examples/python/basic/README.md
new file mode 100644
index 00000000..e74975de
--- /dev/null
+++ b/examples/python/basic/README.md
@@ -0,0 +1,16 @@
+# Basic Example - Zero Configuration
+
+The simplest way to use Vectorless.
+
+## Setup
+
+```bash
+pip install vectorless
+export OPENAI_API_KEY="sk-..."
+```
+
+## Run
+
+```bash
+python main.py
+```
diff --git a/examples/python/basic/main.py b/examples/python/basic/main.py
new file mode 100644
index 00000000..4ae34b42
--- /dev/null
+++ b/examples/python/basic/main.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+Basic example - Zero Configuration.
+
+This example demonstrates the simplest way to use Vectorless.
+Just set OPENAI_API_KEY environment variable and you're ready to go.
+
+Usage:
+ export OPENAI_API_KEY="sk-..."
+ python main.py
+"""
+
+import os
+import tempfile
+from vectorless import Engine, IndexContext
+
+
+def main():
+ print("=== Vectorless Basic Example (Zero Configuration) ===\n")
+
+ # Zero configuration: Just set OPENAI_API_KEY environment variable
+ with tempfile.TemporaryDirectory() as workspace:
+ engine = Engine(workspace=workspace)
+
+ print("✓ Engine created (using OPENAI_API_KEY from environment)\n")
+
+ # Index from text content
+ content = """
+# Technical Manual
+
+## Chapter 1: Introduction
+
+Vectorless is a library for querying structured documents using natural language.
+
+## Chapter 2: Installation
+
+Install with pip:
+```
+pip install vectorless
+```
+
+## Chapter 3: Usage
+
+```python
+from vectorless import Engine, IndexContext
+
+engine = Engine(workspace="./data")
+ctx = IndexContext.from_file("./report.pdf")
+doc_id = engine.index(ctx)
+
+result = engine.query(doc_id, "What is the total revenue?")
+print(result.content)
+```
+"""
+ ctx = IndexContext.from_content(content, name="manual", format="markdown")
+ doc_id = engine.index(ctx)
+ print(f"✓ Indexed: {doc_id}\n")
+
+ # Query
+ result = engine.query(doc_id, "How do I install vectorless?")
+ print("Query: How do I install vectorless?")
+ print(f"Score: {result.score:.2f}")
+ print(f"Result: {result.content[:200]}...\n")
+
+ # Cleanup
+ engine.remove(doc_id)
+ print("✓ Cleaned up")
+
+ print("\n=== Done ===")
+
+
+if __name__ == "__main__":
+ if not os.environ.get("OPENAI_API_KEY"):
+ print("Error: OPENAI_API_KEY environment variable not set.")
+ print("Set it with: export OPENAI_API_KEY='sk-...'")
+ exit(1)
+
+ main()
diff --git a/examples/python/basic/pyproject.toml b/examples/python/basic/pyproject.toml
new file mode 100644
index 00000000..d99ee750
--- /dev/null
+++ b/examples/python/basic/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-basic-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+ "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python/custom_config/README.md b/examples/python/custom_config/README.md
new file mode 100644
index 00000000..b743098a
--- /dev/null
+++ b/examples/python/custom_config/README.md
@@ -0,0 +1,48 @@
+# Custom Configuration Example
+
+Use your own API key, model, and endpoint.
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Configure
+
+Edit `main.py` and update the settings:
+
+```python
+API_KEY = "your-api-key"
+MODEL = "gpt-4o-mini" # or "deepseek-chat", "claude-3-5-sonnet", etc.
+ENDPOINT = "https://api.openai.com/v1" # or your custom endpoint
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Other Providers
+
+### DeepSeek
+```python
+API_KEY = "sk-..."
+MODEL = "deepseek-chat"
+ENDPOINT = "https://api.deepseek.com/v1"
+```
+
+### Azure OpenAI
+```python
+API_KEY = "your-azure-key"
+MODEL = "gpt-4o"
+ENDPOINT = "https://your-resource.openai.azure.com/openai/deployments/your-deployment"
+```
+
+### Local LLM (Ollama)
+```python
+API_KEY = None # Not needed
+MODEL = "llama3"
+ENDPOINT = "http://localhost:11434/v1"
+```
diff --git a/examples/python/custom_config/main.py b/examples/python/custom_config/main.py
new file mode 100644
index 00000000..d6e0bda4
--- /dev/null
+++ b/examples/python/custom_config/main.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""
+Custom configuration example - Using your own API key, model, and endpoint.
+
+This example demonstrates how to use custom LLM settings without a config file.
+Useful when you want to use different providers like DeepSeek, Azure OpenAI, etc.
+
+Usage:
+ python main.py
+"""
+
+import tempfile
+from vectorless import Engine, IndexContext
+
+# ============================================================
+# Configure your settings here
+# ============================================================
+API_KEY = "sk-or-v1-xxxx" # Your API key
+MODEL = "google/gemini-3-flash-preview" # Model name
+ENDPOINT = "https://api/v1" # API endpoint
+WORKSPACE = "./workspace" # Workspace directory
+
+
+def main():
+ print("=== Vectorless Custom Configuration Example ===\n")
+
+ # Create engine with custom settings
+ engine = Engine(
+ workspace=WORKSPACE,
+ api_key=API_KEY,
+ model=MODEL,
+ endpoint=ENDPOINT,
+ )
+
+ print(f"✓ Engine created with custom settings")
+ print(f" Model: {MODEL}")
+ print(f" Endpoint: {ENDPOINT}\n")
+
+ # Index a document
+ content = """
+# Product Documentation
+
+## Overview
+This product helps you manage documents intelligently using LLM-powered navigation.
+
+## Features
+- Fast indexing with tree-based structure
+- Accurate retrieval using hybrid search
+- Easy to use Python and Rust APIs
+- Support for PDF, Markdown, HTML, and DOCX
+
+## Installation
+
+Install with pip:
+```bash
+pip install vectorless
+```
+
+## Quick Start
+
+```python
+from vectorless import Engine, IndexContext
+
+# Create engine
+engine = Engine(workspace="./data")
+
+# Index a document
+ctx = IndexContext.from_file("./report.pdf")
+doc_id = engine.index(ctx)
+
+# Query
+result = engine.query(doc_id, "What is the total revenue?")
+print(result.content)
+```
+
+## Configuration
+
+Vectorless supports multiple configuration methods:
+1. Zero configuration - just set OPENAI_API_KEY
+2. Custom settings - pass api_key, model, endpoint
+3. Full config file - use vectorless.toml
+"""
+ ctx = IndexContext.from_content(content, name="docs", format="markdown")
+ doc_id = engine.index(ctx)
+ print(f"✓ Indexed: {doc_id}\n")
+
+ # Check document info
+ docs = engine.list_docs()
+ print(f"Documents in workspace: {len(docs)}")
+ for d in docs:
+ print(f" - {d.name} (id: {d.id}, format: {d.format})")
+ print()
+
+ # Query
+ result = engine.query(doc_id, "How do I install the product?")
+ print("Query: How do I install the product?")
+ print(f"Score: {result.score:.2f}")
+ print(f"Result: {result.content}\n")
+
+ # Another query
+ result = engine.query(doc_id, "What features are available?")
+ print("Query: What features are available?")
+ print(f"Score: {result.score:.2f}")
+ print(f"Result: {result.content}\n")
+
+ # Cleanup
+ engine.remove(doc_id)
+ print("✓ Cleaned up")
+
+ print("\n=== Done ===")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/python/custom_config/pyproject.toml b/examples/python/custom_config/pyproject.toml
new file mode 100644
index 00000000..d316077d
--- /dev/null
+++ b/examples/python/custom_config/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-custom-config-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+ "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python_basic.py b/examples/python_basic.py
deleted file mode 100644
index e9801e79..00000000
--- a/examples/python_basic.py
+++ /dev/null
@@ -1,273 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2026 vectorless developers
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-Basic example demonstrating the vectorless Python library.
-
-This example shows:
-1. Creating an Engine with workspace
-2. Indexing documents from different sources
-3. Querying indexed documents
-4. Managing documents (list, exists, remove)
-
-Prerequisites:
- pip install vectorless
- export OPENAI_API_KEY="sk-..."
-
-Usage:
- python python_basic.py
-"""
-
-import os
-import tempfile
-from pathlib import Path
-
-from vectorless import Engine, IndexContext, VectorlessError
-
-
-def main():
- # Create a temporary workspace for this example
- with tempfile.TemporaryDirectory() as workspace:
- print(f"Workspace: {workspace}")
- print()
-
- # ============================================================
- # 1. Create Engine
- # ============================================================
- print("=" * 60)
- print("1. Creating Engine")
- print("=" * 60)
-
- # Option A: Use OPENAI_API_KEY environment variable
- engine = Engine(workspace=workspace)
-
- # Option B: Explicit API key
- # engine = Engine(
- # workspace=workspace,
- # api_key="sk-...",
- # model="gpt-4o-mini", # optional
- # )
-
- print(f"Engine created successfully!")
- print(f"Initial document count: {engine.len()}")
- print()
-
- # ============================================================
- # 2. Index Documents
- # ============================================================
- print("=" * 60)
- print("2. Indexing Documents")
- print("=" * 60)
-
- # 2a. Index from text content (Markdown)
- markdown_content = """
-# Technical Manual
-
-## Chapter 1: Introduction
-
-This document describes the architecture of our system.
-
-## Chapter 2: Installation
-
-### System Requirements
-
-- Python 3.9+
-- Rust 1.75+
-
-### Steps
-
-1. Install dependencies
-2. Configure environment
-3. Run the application
-
-## Chapter 3: API Reference
-
-### Engine
-
-The main entry point for vectorless.
-
-```python
-engine = Engine(workspace="./data")
-```
-
-### IndexContext
-
-Context for indexing documents from various sources.
-"""
- ctx_md = IndexContext.from_text(
- markdown_content,
- name="technical_manual",
- format="markdown"
- )
- doc_id_md = engine.index(ctx_md)
- print(f"Indexed markdown document: {doc_id_md}")
-
- # 2b. Index from text content (HTML)
- html_content = """
-
-
Product Guide
-
- Product Guide
- Getting Started
- Welcome to our product. This guide will help you get started.
- Features
-
- - Fast indexing
- - Accurate retrieval
- - Easy to use API
-
-
-
-"""
- ctx_html = IndexContext.from_text(
- html_content,
- name="product_guide",
- format="html"
- )
- doc_id_html = engine.index(ctx_html)
- print(f"Indexed HTML document: {doc_id_html}")
-
- # 2c. Index from text content (plain text)
- text_content = """
-Meeting Notes - Q4 Planning
-
-Date: 2024-01-15
-
-Attendees: Alice, Bob, Charlie
-
-Agenda:
-1. Review Q3 performance
-2. Set Q4 goals
-3. Resource allocation
-
-Key Decisions:
-- Increase marketing budget by 20%
-- Launch new product in March
-- Hire 5 additional engineers
-"""
- ctx_text = IndexContext.from_text(
- text_content,
- name="meeting_notes",
- format="text"
- )
- doc_id_text = engine.index(ctx_text)
- print(f"Indexed text document: {doc_id_text}")
-
- # 2d. Index from file (if you have actual files)
- # ctx_file = IndexContext.from_file("./report.pdf")
- # doc_id_file = engine.index(ctx_file)
- # print(f"Indexed file: {doc_id_file}")
-
- print(f"\nTotal documents indexed: {engine.len()}")
- print()
-
- # ============================================================
- # 3. List Documents
- # ============================================================
- print("=" * 60)
- print("3. Listing Documents")
- print("=" * 60)
-
- docs = engine.list_docs()
- for doc in docs:
- print(f" - {doc.name} (id: {doc.id}, format: {doc.format})")
- if doc.line_count:
- print(f" Lines: {doc.line_count}")
- print()
-
- # ============================================================
- # 4. Query Documents
- # ============================================================
- print("=" * 60)
- print("4. Querying Documents")
- print("=" * 60)
-
- # Query the technical manual
- questions = [
- "What are the system requirements?",
- "How do I create an Engine?",
- "What are the installation steps?",
- ]
-
- for question in questions:
- result = engine.query(doc_id_md, question)
- print(f"Q: {question}")
- print(f"A: {result.content[:200]}...")
- print(f" Score: {result.score:.2f}")
- print()
-
- # Query the meeting notes
- result = engine.query(doc_id_text, "What was decided about the marketing budget?")
- print(f"Q: What was decided about the marketing budget?")
- print(f"A: {result.content}")
- print(f" Score: {result.score:.2f}")
- print()
-
- # ============================================================
- # 5. Check Document Existence
- # ============================================================
- print("=" * 60)
- print("5. Checking Document Existence")
- print("=" * 60)
-
- print(f"Document {doc_id_md[:8]}... exists: {engine.exists(doc_id_md)}")
- print(f"Document 'nonexistent' exists: {engine.exists('nonexistent')}")
- print()
-
- # ============================================================
- # 6. Error Handling
- # ============================================================
- print("=" * 60)
- print("6. Error Handling")
- print("=" * 60)
-
- try:
- engine.query("nonexistent_doc_id", "question")
- except VectorlessError as e:
- print(f"Caught error: {e.message}")
- print(f"Error kind: {e.kind}")
- print()
-
- # ============================================================
- # 7. Remove Documents
- # ============================================================
- print("=" * 60)
- print("7. Removing Documents")
- print("=" * 60)
-
- # Remove the HTML document
- removed = engine.remove(doc_id_html)
- print(f"Removed {doc_id_html}: {removed}")
- print(f"Documents remaining: {engine.len()}")
-
- # Try to remove again (should return False)
- removed_again = engine.remove(doc_id_html)
- print(f"Remove again: {removed_again}")
- print()
-
- # ============================================================
- # 8. Clear All Documents
- # ============================================================
- print("=" * 60)
- print("8. Clearing All Documents")
- print("=" * 60)
-
- cleared_count = engine.clear()
- print(f"Cleared {cleared_count} documents")
- print(f"Final document count: {engine.len()}")
- print()
-
- print("=" * 60)
- print("Example completed successfully!")
- print("=" * 60)
-
-
-if __name__ == "__main__":
- # Check for API key
- if not os.environ.get("OPENAI_API_KEY"):
- print("Warning: OPENAI_API_KEY environment variable not set.")
- print("Some operations may fail without an API key.")
- print()
-
- main()
diff --git a/examples/rust/advanced.rs b/examples/rust/advanced.rs
new file mode 100644
index 00000000..946f619b
--- /dev/null
+++ b/examples/rust/advanced.rs
@@ -0,0 +1,68 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Advanced usage example - Full Configuration.
+//!
+//! This example demonstrates how to use a full configuration file
+//! for advanced use cases where you need fine-grained control.
+//!
+//! # Usage
+//!
+//! ```bash
+//! # First, copy the example config and edit it
+//! cp config.toml ./my_vectorless.toml
+//! # Edit my_vectorless.toml to customize settings
+//!
+//! cargo run --example advanced
+//! ```
+
+use vectorless::{Engine, IndexContext};
+
+#[tokio::main]
+async fn main() -> vectorless::Result<()> {
+ println!("=== Vectorless Advanced Example (Full Configuration) ===\n");
+
+ // Method 1: Use explicit config file path
+ // This loads all settings from the specified config file
+ let client = Engine::builder()
+ .with_config_path("./config.toml") // or "./my_vectorless.toml"
+ .build()
+ .await
+ .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
+
+ println!("✓ Client created with config file\n");
+
+ // Index a document
+ let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
+ println!("✓ Indexed: {}\n", doc_id);
+
+ // Query
+ let result = client.query(&doc_id, "What features does Vectorless provide?").await?;
+ println!("Query: What features does Vectorless provide?");
+ println!("Score: {:.2}", result.score);
+ if !result.content.is_empty() {
+ let preview: String = result.content.chars().take(200).collect();
+ println!("Result: {}...\n", preview);
+ }
+
+ // Cleanup
+ client.remove(&doc_id).await?;
+ println!("✓ Cleaned up");
+
+ println!("\n=== Configuration Options ===\n");
+ println!("Configuration Priority (later overrides earlier):");
+ println!(" 1. Default configuration");
+ println!(" 2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml)");
+ println!(" 3. Explicit config file (with_config_path)");
+ println!(" 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)");
+ println!(" 5. Builder methods (with_openai, with_model, etc.)");
+ println!();
+ println!("Environment Variables:");
+ println!(" OPENAI_API_KEY - LLM API key");
+ println!(" VECTORLESS_MODEL - Default model name");
+ println!(" VECTORLESS_ENDPOINT - API endpoint URL");
+ println!(" VECTORLESS_WORKSPACE - Workspace directory");
+
+ println!("\n=== Done ===");
+ Ok(())
+}
diff --git a/examples/basic.rs b/examples/rust/basic.rs
similarity index 100%
rename from examples/basic.rs
rename to examples/rust/basic.rs
diff --git a/examples/batch_processing.rs b/examples/rust/batch_processing.rs
similarity index 100%
rename from examples/batch_processing.rs
rename to examples/rust/batch_processing.rs
diff --git a/examples/cli_tool.rs b/examples/rust/cli_tool.rs
similarity index 100%
rename from examples/cli_tool.rs
rename to examples/rust/cli_tool.rs
diff --git a/examples/content_aggregation.rs b/examples/rust/content_aggregation.rs
similarity index 100%
rename from examples/content_aggregation.rs
rename to examples/rust/content_aggregation.rs
diff --git a/examples/rust/custom_config.rs b/examples/rust/custom_config.rs
new file mode 100644
index 00000000..12eaedc4
--- /dev/null
+++ b/examples/rust/custom_config.rs
@@ -0,0 +1,92 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Custom configuration example - Using your own API key, model, and endpoint.
+//!
+//! This example demonstrates how to use custom LLM settings without a config file.
+//! Useful when you want to use different providers like Azure OpenAI, DeepSeek, etc.
+//!
+//! # Usage
+//!
+//! ```bash
+//! cargo run --example custom_config
+//! ```
+
+use vectorless::{Engine, IndexContext};
+
+#[tokio::main]
+async fn main() -> vectorless::Result<()> {
+ println!("=== Vectorless Custom Configuration Example ===\n");
+
+ // ============================================================
+ // Option 1: Use environment variables
+ // ============================================================
+ // Set these environment variables:
+ // - OPENAI_API_KEY or VECTORLESS_API_KEY
+ // - VECTORLESS_MODEL (optional, default: gpt-4o)
+ // - VECTORLESS_ENDPOINT (optional, default: OpenAI endpoint)
+ // - VECTORLESS_WORKSPACE (optional, default: ./workspace)
+
+ // ============================================================
+ // Option 2: Use builder methods (recommended for custom config)
+ // ============================================================
+
+ // Example: Use DeepSeek API
+ let client = Engine::builder()
+ .with_workspace("./workspace")
+ .with_model("deepseek-chat", Some("sk-your-deepseek-key".to_string()))
+ .with_endpoint("https://api.deepseek.com/v1")
+ .build()
+ .await
+ .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?;
+
+ println!("✓ Client created with custom settings\n");
+
+ // Index a document
+ let doc_id = client.index(IndexContext::from_path("./README.md")).await?;
+ println!("✓ Indexed: {}\n", doc_id);
+
+ // Query
+ let result = client.query(&doc_id, "What is Vectorless?").await?;
+ println!("Query: What is Vectorless?");
+ println!("Score: {:.2}", result.score);
+ if !result.content.is_empty() {
+ let preview: String = result.content.chars().take(200).collect();
+ println!("Result: {}...\n", preview);
+ }
+
+ // Cleanup
+ client.remove(&doc_id).await?;
+ println!("✓ Cleaned up");
+
+ // ============================================================
+ // Other provider examples (commented out)
+ // ============================================================
+
+ // Azure OpenAI:
+ // let client = Engine::builder()
+ // .with_workspace("./workspace")
+ // .with_model("gpt-4o", Some("your-azure-key".to_string()))
+ // .with_endpoint("https://your-resource.openai.azure.com/openai/deployments/your-deployment")
+ // .build()
+ // .await?;
+
+ // Local LLM (e.g., Ollama with OpenAI-compatible API):
+ // let client = Engine::builder()
+ // .with_workspace("./workspace")
+ // .with_model("llama3", None) // No API key needed
+ // .with_endpoint("http://localhost:11434/v1")
+ // .build()
+ // .await?;
+
+ // Anthropic Claude (via OpenAI-compatible proxy):
+ // let client = Engine::builder()
+ // .with_workspace("./workspace")
+ // .with_model("claude-3-5-sonnet-20241022", Some("sk-ant-...".to_string()))
+ // .with_endpoint("https://api.anthropic.com/v1")
+ // .build()
+ // .await?;
+
+ println!("\n=== Done ===");
+ Ok(())
+}
diff --git a/examples/custom_pilot.rs b/examples/rust/custom_pilot.rs
similarity index 100%
rename from examples/custom_pilot.rs
rename to examples/rust/custom_pilot.rs
diff --git a/examples/events.rs b/examples/rust/events.rs
similarity index 100%
rename from examples/events.rs
rename to examples/rust/events.rs
diff --git a/examples/feedback_learning.rs b/examples/rust/feedback_learning.rs
similarity index 100%
rename from examples/feedback_learning.rs
rename to examples/rust/feedback_learning.rs
diff --git a/examples/html_parser.rs b/examples/rust/html_parser.rs
similarity index 100%
rename from examples/html_parser.rs
rename to examples/rust/html_parser.rs
diff --git a/examples/index.rs b/examples/rust/index.rs
similarity index 100%
rename from examples/index.rs
rename to examples/rust/index.rs
diff --git a/examples/markdownflow.rs b/examples/rust/markdownflow.rs
similarity index 100%
rename from examples/markdownflow.rs
rename to examples/rust/markdownflow.rs
diff --git a/examples/memo_cache.rs b/examples/rust/memo_cache.rs
similarity index 100%
rename from examples/memo_cache.rs
rename to examples/rust/memo_cache.rs
diff --git a/examples/multi_format.rs b/examples/rust/multi_format.rs
similarity index 100%
rename from examples/multi_format.rs
rename to examples/rust/multi_format.rs
diff --git a/examples/reference_following.rs b/examples/rust/reference_following.rs
similarity index 100%
rename from examples/reference_following.rs
rename to examples/rust/reference_following.rs
diff --git a/examples/retrieve.rs b/examples/rust/retrieve.rs
similarity index 100%
rename from examples/retrieve.rs
rename to examples/rust/retrieve.rs
diff --git a/examples/session.rs b/examples/rust/session.rs
similarity index 100%
rename from examples/session.rs
rename to examples/rust/session.rs
diff --git a/examples/storage_backend.rs b/examples/rust/storage_backend.rs
similarity index 100%
rename from examples/storage_backend.rs
rename to examples/rust/storage_backend.rs
diff --git a/examples/storage_compression.rs b/examples/rust/storage_compression.rs
similarity index 100%
rename from examples/storage_compression.rs
rename to examples/rust/storage_compression.rs
diff --git a/examples/storage_migration.rs b/examples/rust/storage_migration.rs
similarity index 100%
rename from examples/storage_migration.rs
rename to examples/rust/storage_migration.rs
diff --git a/examples/storage_workspace.rs b/examples/rust/storage_workspace.rs
similarity index 100%
rename from examples/storage_workspace.rs
rename to examples/rust/storage_workspace.rs
diff --git a/examples/strategy_cross_document.rs b/examples/rust/strategy_cross_document.rs
similarity index 100%
rename from examples/strategy_cross_document.rs
rename to examples/rust/strategy_cross_document.rs
diff --git a/examples/strategy_hybrid.rs b/examples/rust/strategy_hybrid.rs
similarity index 100%
rename from examples/strategy_hybrid.rs
rename to examples/rust/strategy_hybrid.rs
diff --git a/examples/strategy_page_range.rs b/examples/rust/strategy_page_range.rs
similarity index 100%
rename from examples/strategy_page_range.rs
rename to examples/rust/strategy_page_range.rs
diff --git a/examples/streaming.rs b/examples/rust/streaming.rs
similarity index 100%
rename from examples/streaming.rs
rename to examples/rust/streaming.rs
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 72c2f061..fc2cf730 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -28,6 +28,11 @@ pub struct VectorlessError {
#[pymethods]
impl VectorlessError {
+ #[new]
+ fn new_py(message: String, kind: String) -> Self {
+ Self { message, kind }
+ }
+
#[getter]
fn message(&self) -> &str {
&self.message
@@ -96,7 +101,7 @@ fn to_py_err(e: RustError) -> PyErr {
/// # From bytes
/// ctx = IndexContext.from_bytes(data, name="doc", format="pdf")
/// ```
-#[pyclass]
+#[pyclass(name = "IndexContext")]
pub struct PyIndexContext {
inner: IndexContext,
}
@@ -134,7 +139,7 @@ impl PyIndexContext {
/// IndexContext for the content.
#[staticmethod]
#[pyo3(signature = (content, name=None, format="markdown"))]
- fn from_text(content: String, name: Option, format: &str) -> PyResult {
+ fn from_content(content: String, name: Option, format: &str) -> PyResult {
let doc_format = parse_format(format)?;
let mut ctx = IndexContext::from_content(&content, doc_format);
if let Some(n) = name {
@@ -168,9 +173,8 @@ fn parse_format(format: &str) -> PyResult {
"pdf" => Ok(DocumentFormat::Pdf),
"docx" | "doc" => Ok(DocumentFormat::Docx),
"html" | "htm" => Ok(DocumentFormat::Html),
- "text" | "txt" => Ok(DocumentFormat::Text),
_ => Err(PyErr::from(VectorlessError::new(
- format!("Unknown format: {}", format),
+ format!("Unknown format: {}. Supported: markdown, pdf, docx, html", format),
"config",
))),
}
@@ -181,7 +185,7 @@ fn parse_format(format: &str) -> PyResult {
// ============================================================
/// Result of a document query.
-#[pyclass]
+#[pyclass(name = "QueryResult")]
pub struct PyQueryResult {
inner: QueryResult,
}
@@ -227,7 +231,7 @@ impl PyQueryResult {
// ============================================================
/// Information about an indexed document.
-#[pyclass]
+#[pyclass(name = "DocumentInfo")]
pub struct PyDocumentInfo {
inner: DocumentInfo,
}
@@ -284,7 +288,16 @@ impl PyDocumentInfo {
/// The main vectorless engine.
///
-/// Create an engine with a workspace directory:
+/// Configuration priority (later overrides earlier):
+/// 1. Default configuration
+/// 2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml)
+/// 3. Explicit config file (config_path parameter)
+/// 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)
+/// 5. Constructor parameters (api_key, model, endpoint) - highest priority
+///
+/// # Zero Configuration (Recommended)
+///
+/// Just set OPENAI_API_KEY environment variable:
///
/// ```python
/// from vectorless import Engine
@@ -292,12 +305,18 @@ impl PyDocumentInfo {
/// engine = Engine(workspace="./data")
/// ```
///
-/// Or with an explicit API key:
+/// # With Custom Model
+///
+/// ```python
+/// engine = Engine(workspace="./data", model="gpt-4o-mini")
+/// ```
+///
+/// # With Full Config File (Advanced)
///
/// ```python
-/// engine = Engine(workspace="./data", api_key="sk-...")
+/// engine = Engine(config_path="./vectorless.toml")
/// ```
-#[pyclass]
+#[pyclass(name = "Engine")]
pub struct PyEngine {
inner: Arc,
rt: Runtime,
@@ -308,17 +327,26 @@ impl PyEngine {
/// Create a new Engine.
///
/// Args:
- /// workspace: Path to the workspace directory.
+ /// workspace: Path to the workspace directory (optional if config_path provides it).
+ /// config_path: Path to configuration file (optional, advanced usage).
/// api_key: Optional API key. If not provided, uses OPENAI_API_KEY env var.
- /// model: Optional model name. Default: "gpt-4o-mini".
+ /// model: Optional model name. Default: "gpt-4o".
/// endpoint: Optional API endpoint.
///
+ /// Configuration priority (later overrides earlier):
+ /// 1. Default configuration
+ /// 2. Auto-detected config file
+ /// 3. config_path parameter
+ /// 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)
+ /// 5. Constructor parameters (api_key, model, endpoint)
+ ///
/// Raises:
/// VectorlessError: If engine creation fails.
#[new]
- #[pyo3(signature = (workspace, api_key=None, model=None, endpoint=None))]
+ #[pyo3(signature = (workspace=None, config_path=None, api_key=None, model=None, endpoint=None))]
fn new(
- workspace: String,
+ workspace: Option,
+ config_path: Option,
api_key: Option,
model: Option,
endpoint: Option,
@@ -334,18 +362,31 @@ impl PyEngine {
let resolved_api_key = api_key.or_else(|| std::env::var("OPENAI_API_KEY").ok());
let engine = rt.block_on(async {
- let mut builder = EngineBuilder::new().with_workspace(&workspace);
+ let mut builder = EngineBuilder::new();
- if let Some(key) = resolved_api_key {
- builder = builder.with_openai(key);
+ // Set config path first (if provided)
+ if let Some(path) = &config_path {
+ builder = builder.with_config_path(path);
+ }
+
+ // Set workspace (if provided)
+ if let Some(ws) = &workspace {
+ builder = builder.with_workspace(ws);
}
- if let Some(m) = model {
- builder = builder.with_model(&m, None);
+ // Set model first (without overriding api_key)
+ if let Some(m) = &model {
+ builder = builder.with_model(m, None);
}
- if let Some(e) = endpoint {
- builder = builder.with_endpoint(&e);
+ // Set endpoint
+ if let Some(e) = &endpoint {
+ builder = builder.with_endpoint(e);
+ }
+
+ // Set API key last (this ensures it's not overwritten)
+ if let Some(key) = resolved_api_key {
+ builder = builder.with_openai(key);
}
builder.build().await
@@ -506,7 +547,7 @@ impl PyEngine {
/// print(result.content)
/// ```
#[pymodule]
-fn _vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
+fn vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::()?;
m.add_class::()?;
m.add_class::()?;
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 78f65624..fe9729b9 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -11,7 +11,104 @@ documentation = "https://docs.rs/vectorless"
keywords = ["rag", "document", "retrieval", "indexing", "llm"]
categories = ["text-processing", "data-structures", "algorithms"]
readme = "../README.md"
-exclude = ["samples/", "docs/", ".*"]
+exclude = ["docs/", ".*"]
+
+# Example directory configuration
+[[example]]
+name = "basic"
+path = "../examples/rust/basic.rs"
+
+[[example]]
+name = "advanced"
+path = "../examples/rust/advanced.rs"
+
+[[example]]
+name = "batch_processing"
+path = "../examples/rust/batch_processing.rs"
+
+[[example]]
+name = "cli_tool"
+path = "../examples/rust/cli_tool.rs"
+
+[[example]]
+name = "content_aggregation"
+path = "../examples/rust/content_aggregation.rs"
+
+[[example]]
+name = "custom_config"
+path = "../examples/rust/custom_config.rs"
+
+[[example]]
+name = "custom_pilot"
+path = "../examples/rust/custom_pilot.rs"
+
+[[example]]
+name = "events"
+path = "../examples/rust/events.rs"
+
+[[example]]
+name = "feedback_learning"
+path = "../examples/rust/feedback_learning.rs"
+
+[[example]]
+name = "html_parser"
+path = "../examples/rust/html_parser.rs"
+
+[[example]]
+name = "index"
+path = "../examples/rust/index.rs"
+
+[[example]]
+name = "markdownflow"
+path = "../examples/rust/markdownflow.rs"
+
+[[example]]
+name = "multi_format"
+path = "../examples/rust/multi_format.rs"
+
+[[example]]
+name = "reference_following"
+path = "../examples/rust/reference_following.rs"
+
+[[example]]
+name = "retrieve"
+path = "../examples/rust/retrieve.rs"
+
+[[example]]
+name = "session"
+path = "../examples/rust/session.rs"
+
+[[example]]
+name = "storage_backend"
+path = "../examples/rust/storage_backend.rs"
+
+[[example]]
+name = "storage_compression"
+path = "../examples/rust/storage_compression.rs"
+
+[[example]]
+name = "storage_migration"
+path = "../examples/rust/storage_migration.rs"
+
+[[example]]
+name = "storage_workspace"
+path = "../examples/rust/storage_workspace.rs"
+
+[[example]]
+name = "strategy_cross_document"
+path = "../examples/rust/strategy_cross_document.rs"
+
+[[example]]
+name = "strategy_hybrid"
+path = "../examples/rust/strategy_hybrid.rs"
+
+[[example]]
+name = "strategy_page_range"
+path = "../examples/rust/strategy_page_range.rs"
+
+[[example]]
+name = "streaming"
+path = "../examples/rust/streaming.rs"
[dependencies]
# Async runtime
diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs
index 61621f23..70c3682b 100644
--- a/rust/src/client/builder.rs
+++ b/rust/src/client/builder.rs
@@ -6,27 +6,67 @@
//! This module provides [`EngineBuilder`] for configuring and building
//! [`Engine`] instances with sensible defaults.
//!
-//! # Example
+//! # Configuration Priority
+//!
+//! Configuration is applied in this order (later overrides earlier):
+//! 1. Default configuration
+//! 2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`)
+//! 3. Explicit config file (`with_config_path`)
+//! 4. Environment variables (`OPENAI_API_KEY`, `VECTORLESS_MODEL`, etc.)
+//! 5. Builder methods (`with_openai`, `with_model`, etc.) - highest priority
+//!
+//! # Environment Variables
+//!
+//! | Variable | Description |
+//! |----------|-------------|
+//! | `OPENAI_API_KEY` | LLM API key |
+//! | `VECTORLESS_MODEL` | Default model name |
+//! | `VECTORLESS_ENDPOINT` | API endpoint URL |
+//! | `VECTORLESS_WORKSPACE` | Workspace directory |
+//!
+//! # Examples
+//!
+//! ## Zero Configuration (Recommended)
//!
//! ```rust,no_run
//! use vectorless::client::EngineBuilder;
//!
//! # #[tokio::main]
//! # async fn main() -> Result<(), vectorless::BuildError> {
-//! // Simple setup with workspace
+//! // Just set OPENAI_API_KEY environment variable
//! let engine = EngineBuilder::new()
-//! .with_workspace("./my_workspace")
-//! .with_openai(std::env::var("OPENAI_API_KEY").unwrap())
+//! .with_workspace("./data")
//! .build()
//! .await?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! ## With Custom Model
+//!
+//! ```rust,no_run
+//! use vectorless::client::EngineBuilder;
//!
-//! // Advanced configuration
+//! # #[tokio::main]
+//! # async fn main() -> Result<(), vectorless::BuildError> {
//! let engine = EngineBuilder::new()
//! .with_workspace("./data")
-//! .with_model("gpt-4o", None)
-//! .with_endpoint("https://api.openai.com/v1")
-//! .with_top_k(10)
-//! .precise()
+//! .with_model("gpt-4o-mini", None) // Uses OPENAI_API_KEY from env
+//! .build()
+//! .await?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! ## With Full Config File (Advanced)
+//!
+//! ```rust,no_run
+//! use vectorless::client::EngineBuilder;
+//!
+//! # #[tokio::main]
+//! # async fn main() -> Result<(), vectorless::BuildError> {
+//! let engine = EngineBuilder::new()
+//! .with_config_path("./vectorless.toml")
//! .build()
//! .await?;
//! # Ok(())
@@ -49,16 +89,25 @@ const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorl
/// Builder for creating a [`Engine`] client.
///
/// The builder uses sensible defaults and automatically loads
-/// LLM configuration from environment variables or config files.
+/// configuration from config files and environment variables.
///
/// # Configuration Priority
///
-/// Configuration is loaded in this order (later overrides earlier):
+/// Configuration is applied in this order (later overrides earlier):
/// 1. Default configuration
-/// 2. Auto-detected config file
+/// 2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`)
/// 3. Explicit config file (`with_config_path`)
-/// 4. Custom config object (`with_config`)
-/// 5. Individual builder methods
+/// 4. Environment variables (`OPENAI_API_KEY`, `VECTORLESS_MODEL`, etc.)
+/// 5. Builder methods (`with_openai`, `with_model`, etc.) - highest priority
+///
+/// # Environment Variables
+///
+/// | Variable | Description |
+/// |----------|-------------|
+/// | `OPENAI_API_KEY` | LLM API key |
+/// | `VECTORLESS_MODEL` | Default model name |
+/// | `VECTORLESS_ENDPOINT` | API endpoint URL |
+/// | `VECTORLESS_WORKSPACE` | Workspace directory |
///
/// # Example
///
@@ -67,9 +116,9 @@ const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorl
///
/// # #[tokio::main]
/// # async fn main() -> Result<(), vectorless::BuildError> {
+/// // Zero configuration - just set OPENAI_API_KEY environment variable
/// let client = EngineBuilder::new()
/// .with_workspace("./my_workspace")
-/// .with_openai(std::env::var("OPENAI_API_KEY").unwrap())
/// .build()
/// .await?;
/// # Ok(())
@@ -236,8 +285,8 @@ impl EngineBuilder {
/// Configure for OpenAI API.
///
- /// Uses `gpt-4o` model by default. Use [`with_model`](EngineBuilder::with_model)
- /// to specify a different model.
+ /// Sets the API key and optionally the model to "gpt-4o" if not already set.
+ /// Use [`with_model`](EngineBuilder::with_model) before this to specify a different model.
///
/// # Example
///
@@ -256,7 +305,13 @@ impl EngineBuilder {
/// ```
#[must_use]
pub fn with_openai(self, api_key: impl Into) -> Self {
- self.with_model("gpt-4o", Some(api_key.into()))
+ let mut builder = self;
+ builder.api_key = Some(api_key.into());
+ // Only set default model if not already set
+ if builder.model.is_none() {
+ builder.model = Some("gpt-4o".to_string());
+ }
+ builder
}
/// Set the LLM model and optional API key.
@@ -284,7 +339,9 @@ impl EngineBuilder {
#[must_use]
pub fn with_model(mut self, model: impl Into, api_key: Option) -> Self {
self.model = Some(model.into());
- self.api_key = api_key;
+ if api_key.is_some() {
+ self.api_key = api_key;
+ }
self
}
@@ -357,6 +414,57 @@ impl EngineBuilder {
self
}
+ /// Apply environment variable overrides to a Config.
+ ///
+ /// This is used when a custom Config is provided via `with_config`
+ /// or when using default config without a config file.
+ fn apply_env_overrides(config: &mut Config) {
+ // OPENAI_API_KEY: Set API key for all LLM clients
+ if let Ok(api_key) = std::env::var("OPENAI_API_KEY") {
+ // Set default API key
+ config.llm.api_key = Some(api_key.clone());
+ // Override individual client API keys if not explicitly set
+ if config.llm.summary.api_key.is_none() {
+ config.llm.summary.api_key = Some(api_key.clone());
+ }
+ if config.llm.retrieval.api_key.is_none() {
+ config.llm.retrieval.api_key = Some(api_key.clone());
+ }
+ if config.llm.pilot.api_key.is_none() {
+ config.llm.pilot.api_key = Some(api_key);
+ }
+ // Also set legacy config for backwards compatibility
+ if config.summary.api_key.is_none() {
+ config.summary.api_key = Some(std::env::var("OPENAI_API_KEY").unwrap());
+ }
+ }
+
+ // VECTORLESS_MODEL: Set default model
+ if let Ok(model) = std::env::var("VECTORLESS_MODEL") {
+ config.llm.summary.model = model.clone();
+ config.llm.retrieval.model = model.clone();
+ config.llm.pilot.model = model.clone();
+ // Also set legacy config
+ config.summary.model = model.clone();
+ config.retrieval.model = model;
+ }
+
+ // VECTORLESS_ENDPOINT: Set API endpoint
+ if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") {
+ config.llm.summary.endpoint = endpoint.clone();
+ config.llm.retrieval.endpoint = endpoint.clone();
+ config.llm.pilot.endpoint = endpoint.clone();
+ // Also set legacy config
+ config.summary.endpoint = endpoint.clone();
+ config.retrieval.endpoint = endpoint;
+ }
+
+ // VECTORLESS_WORKSPACE: Set workspace directory
+ if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") {
+ config.storage.workspace_dir = PathBuf::from(workspace);
+ }
+ }
+
/// Search for config file in current directory and parent directories.
fn find_config_file() -> Option {
let current_dir = std::env::current_dir().ok()?;
@@ -414,8 +522,12 @@ impl EngineBuilder {
/// ```
pub async fn build(self) -> Result {
// Load or create configuration
+ // ConfigLoader automatically applies environment variable overrides
let mut config = if let Some(config) = self.config {
- config
+ // Custom config - still apply env vars
+ let mut cfg = config;
+ Self::apply_env_overrides(&mut cfg);
+ cfg
} else if let Some(path) = self.config_path {
ConfigLoader::new()
.file(&path)
@@ -426,7 +538,10 @@ impl EngineBuilder {
BuildError::Config(format!("Failed to load {}: {}", config_path.display(), e))
})?
} else {
- Config::default()
+ // No config file - use defaults with env var overrides
+ let mut cfg = Config::default();
+ Self::apply_env_overrides(&mut cfg);
+ cfg
};
// Apply builder overrides to retrieval config
@@ -436,13 +551,24 @@ impl EngineBuilder {
// Apply individual overrides
if let Some(api_key) = self.api_key {
- config.retrieval.api_key = Some(api_key);
+ // Set API key for both retrieval and summary
+ config.retrieval.api_key = Some(api_key.clone());
+ config.summary.api_key = Some(api_key);
+ // Also set LLM pool config
+ if config.llm.summary.api_key.is_none() {
+ config.llm.summary.api_key = config.summary.api_key.clone();
+ }
+ if config.llm.retrieval.api_key.is_none() {
+ config.llm.retrieval.api_key = config.summary.api_key.clone();
+ }
}
if let Some(model) = self.model {
- config.retrieval.model = model;
+ config.retrieval.model = model.clone();
+ config.summary.model = model;
}
if let Some(endpoint) = self.endpoint {
- config.retrieval.endpoint = endpoint;
+ config.retrieval.endpoint = endpoint.clone();
+ config.summary.endpoint = endpoint;
}
if let Some(top_k) = self.top_k {
config.retrieval.top_k = top_k;
diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs
index 886e3197..4297d8e0 100644
--- a/rust/src/client/engine.rs
+++ b/rust/src/client/engine.rs
@@ -214,6 +214,9 @@ impl Engine {
/// # }
/// ```
pub async fn index(&self, ctx: IndexContext) -> Result {
+ println!("Indexing...");
+ println!("ctx: {:?}", ctx);
+
let doc = self.indexer.index(ctx).await?;
let persisted = self.indexer.to_persisted(doc);
diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs
index 679dbea0..0ba0a847 100644
--- a/rust/src/client/indexer.rs
+++ b/rust/src/client/indexer.rs
@@ -259,17 +259,19 @@ impl IndexerClient {
options: &IndexOptions,
format: DocumentFormat,
) -> PipelineOptions {
+ println!("[DEBUG] Building pipeline options for format: {:?} with options: {:?}", format, options);
+
PipelineOptions {
mode: match format {
DocumentFormat::Markdown => IndexMode::Markdown,
DocumentFormat::Pdf => IndexMode::Pdf,
DocumentFormat::Html => IndexMode::Html,
DocumentFormat::Docx => IndexMode::Docx,
- DocumentFormat::Text => IndexMode::Auto,
},
generate_ids: options.generate_ids,
summary_strategy: if options.generate_summaries {
- SummaryStrategy::selective(self.config.min_summary_tokens, false)
+ // SummaryStrategy::selective(self.config.min_summary_tokens, false)
+ SummaryStrategy::full()
} else {
SummaryStrategy::none()
},
diff --git a/rust/src/client/types.rs b/rust/src/client/types.rs
index 31438a62..0cfcb065 100644
--- a/rust/src/client/types.rs
+++ b/rust/src/client/types.rs
@@ -174,7 +174,7 @@ impl Default for IndexOptions {
fn default() -> Self {
Self {
mode: IndexMode::Default,
- generate_summaries: false,
+ generate_summaries: true,
include_text: true,
generate_ids: true,
generate_description: false,
diff --git a/rust/src/config/loader.rs b/rust/src/config/loader.rs
index 33f8bb9f..e436a315 100644
--- a/rust/src/config/loader.rs
+++ b/rust/src/config/loader.rs
@@ -3,17 +3,33 @@
//! Configuration loader.
//!
-//! Loads configuration from TOML files with validation.
-//! All configuration must be explicit in the config file - no environment variables.
+//! Loads configuration from TOML files with environment variable overrides.
+//!
+//! # Configuration Priority
+//!
+//! Configuration is loaded in this order (later overrides earlier):
+//! 1. Default configuration
+//! 2. Config file (if found or specified)
+//! 3. Environment variables
+//!
+//! # Environment Variables
+//!
+//! | Variable | Description | Maps To |
+//! |----------|-------------|---------|
+//! | `OPENAI_API_KEY` | LLM API key | `llm.api_key` / `retrieval.api_key` |
+//! | `VECTORLESS_MODEL` | Default LLM model | `retrieval.model` |
+//! | `VECTORLESS_ENDPOINT` | LLM API endpoint | `retrieval.endpoint` |
+//! | `VECTORLESS_WORKSPACE` | Workspace directory | `storage.workspace_dir` |
//!
//! # Example
//!
//! ```rust,no_run
//! use vectorless::config::{ConfigLoader, Config};
//!
-//! // Load from file
+//! // Load from file with environment variable overrides
//! let config = ConfigLoader::new()
//! .file("config.toml")
+//! .with_env(true) // Enable environment variables (default: true)
//! .load()?;
//!
//! // Load with validation
@@ -73,6 +89,9 @@ pub struct ConfigLoader {
/// Custom validator (optional).
validator: Option,
+
+ /// Whether to apply environment variable overrides.
+ env_enabled: bool,
}
impl Default for ConfigLoader {
@@ -88,6 +107,7 @@ impl ConfigLoader {
files: Vec::new(),
validate: false,
validator: None,
+ env_enabled: true,
}
}
@@ -122,13 +142,68 @@ impl ConfigLoader {
self
}
+ /// Enable or disable environment variable overrides.
+ ///
+ /// When enabled (default), environment variables override config file values:
+ /// - `OPENAI_API_KEY` → sets API key for all LLM clients
+ /// - `VECTORLESS_MODEL` → sets default model
+ /// - `VECTORLESS_ENDPOINT` → sets API endpoint
+ /// - `VECTORLESS_WORKSPACE` → sets workspace directory
+ pub fn with_env(mut self, enabled: bool) -> Self {
+ self.env_enabled = enabled;
+ self
+ }
+
+ /// Apply environment variable overrides to configuration.
+ fn apply_env_overrides(&self, config: &mut Config) {
+ if !self.env_enabled {
+ return;
+ }
+
+ // OPENAI_API_KEY: Set API key for all LLM clients
+ if let Ok(api_key) = std::env::var("OPENAI_API_KEY") {
+ // Set default API key
+ config.llm.api_key = Some(api_key.clone());
+ // Override individual client API keys if not explicitly set
+ if config.llm.summary.api_key.is_none() {
+ config.llm.summary.api_key = Some(api_key.clone());
+ }
+ if config.llm.retrieval.api_key.is_none() {
+ config.llm.retrieval.api_key = Some(api_key.clone());
+ }
+ if config.llm.pilot.api_key.is_none() {
+ config.llm.pilot.api_key = Some(api_key);
+ }
+ }
+
+ // VECTORLESS_MODEL: Set default model
+ if let Ok(model) = std::env::var("VECTORLESS_MODEL") {
+ config.llm.summary.model = model.clone();
+ config.llm.retrieval.model = model.clone();
+ config.llm.pilot.model = model;
+ }
+
+ // VECTORLESS_ENDPOINT: Set API endpoint
+ if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") {
+ config.llm.summary.endpoint = endpoint.clone();
+ config.llm.retrieval.endpoint = endpoint.clone();
+ config.llm.pilot.endpoint = endpoint;
+ }
+
+ // VECTORLESS_WORKSPACE: Set workspace directory
+ if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") {
+ config.storage.workspace_dir = PathBuf::from(workspace);
+ }
+ }
+
/// Load the configuration.
///
/// # Behavior
///
/// 1. Start with default configuration
/// 2. Load and merge each specified file (in order)
- /// 3. Validate configuration (if enabled)
+ /// 3. Apply environment variable overrides (if enabled)
+ /// 4. Validate configuration (if enabled)
///
/// # Errors
///
@@ -150,6 +225,9 @@ impl ConfigLoader {
}
}
+ // Apply environment variable overrides
+ self.apply_env_overrides(&mut config);
+
// Validate if requested
if self.validate {
let validator = self.validator.unwrap_or_default();
diff --git a/rust/src/index/config.rs b/rust/src/index/config.rs
index f08b5968..f5cabebc 100644
--- a/rust/src/index/config.rs
+++ b/rust/src/index/config.rs
@@ -160,7 +160,7 @@ impl Default for PipelineOptions {
Self {
mode: IndexMode::Auto,
generate_ids: true,
- summary_strategy: SummaryStrategy::default(),
+ summary_strategy: SummaryStrategy::full(),
thinning: ThinningConfig::default(),
optimization: OptimizationConfig::default(),
generate_description: true,
diff --git a/rust/src/index/pipeline/context.rs b/rust/src/index/pipeline/context.rs
index ab9a462d..979839a8 100644
--- a/rust/src/index/pipeline/context.rs
+++ b/rust/src/index/pipeline/context.rs
@@ -51,7 +51,7 @@ impl IndexInput {
Self::Content {
content: content.into(),
name: String::new(),
- format: DocumentFormat::Text,
+ format: DocumentFormat::Markdown,
}
}
diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs
index 4fb29310..e848e832 100644
--- a/rust/src/index/stages/enhance.rs
+++ b/rust/src/index/stages/enhance.rs
@@ -101,6 +101,7 @@ impl IndexStage for EnhanceStage {
// Check if we need summaries
if !self.needs_summaries(ctx) {
+ println!("[DEBUG] Summary generation skipped (strategy: {:?})", ctx.options.summary_strategy);
info!(
"Summary generation skipped (strategy: {:?})",
ctx.options.summary_strategy
@@ -112,6 +113,7 @@ impl IndexStage for EnhanceStage {
let llm_client = match &self.llm_client {
Some(client) => client,
None => {
+ println!("[DEBUG] No LLM client configured, skipping summary generation");
warn!("No LLM client configured, skipping summary generation");
return Ok(StageResult::success("enhance"));
}
@@ -121,11 +123,13 @@ impl IndexStage for EnhanceStage {
let tree = match ctx.tree.as_mut() {
Some(t) => t,
None => {
+ println!("[DEBUG] No tree built, skipping enhance stage");
warn!("No tree built, skipping enhance stage");
return Ok(StageResult::success("enhance"));
}
};
+ println!("[DEBUG] Using summary strategy: {:?}", ctx.options.summary_strategy);
info!("Using summary strategy: {:?}", ctx.options.summary_strategy);
// Create summary generator with optional memo store
@@ -141,11 +145,14 @@ impl IndexStage for EnhanceStage {
let node_ids: Vec = tree.traverse();
let total_nodes = node_ids.len();
+ println!("[DEBUG] Processing {} nodes for summary generation", total_nodes);
info!("Processing {} nodes for summary generation", total_nodes);
// Process nodes
let mut generated = 0;
let mut failed = 0;
+ let mut skipped_no_content = 0;
+ let mut skipped_tokens = 0;
let strategy = ctx.options.summary_strategy.clone();
for node_id in node_ids {
@@ -154,15 +161,18 @@ impl IndexStage for EnhanceStage {
Some(n) => n.clone(),
None => continue,
};
+ println!("[DEBUG] Evaluating node for summary: {} {}", node.title, node.content);
// Skip if no content
if node.content.is_empty() {
+ skipped_no_content += 1;
continue;
}
// Get token count and check if we should generate
let token_count = node.token_count.unwrap_or(0);
if !strategy.should_generate(tree, node_id, token_count) {
+ skipped_tokens += 1;
continue;
}
@@ -194,6 +204,9 @@ impl IndexStage for EnhanceStage {
}
// Generate summary (generator also has memoization built-in)
+ println!("[DEBUG] Calling LLM to generate summary for node: {} ({} tokens)", node.title, token_count);
+ println!("[DEBUG] Node content: {}", node.content);
+
match generator.generate(&node.title, &node.content).await {
Ok(summary) => {
if summary.is_empty() {
@@ -223,6 +236,8 @@ impl IndexStage for EnhanceStage {
let duration = start.elapsed().as_millis() as u64;
ctx.metrics.record_enhance(duration);
+ println!("[DEBUG] Generated {} summaries ({} failed, {} skipped no content, {} skipped tokens) in {}ms",
+ generated, failed, skipped_no_content, skipped_tokens, duration);
info!(
"Generated {} summaries ({} failed) in {}ms",
generated, failed, duration
diff --git a/rust/src/llm/executor.rs b/rust/src/llm/executor.rs
index 8ac193f1..2c157432 100644
--- a/rust/src/llm/executor.rs
+++ b/rust/src/llm/executor.rs
@@ -360,7 +360,7 @@ impl LlmExecutor {
ChatCompletionRequestUserMessage::from(truncated).into(),
])
.temperature(self.config.temperature)
- .max_tokens(tokens)
+ // .max_tokens(tokens)
.build()
} else {
CreateChatCompletionRequestArgs::default()
diff --git a/rust/src/parser/registry.rs b/rust/src/parser/registry.rs
index ef1cf416..4f3d0e8c 100644
--- a/rust/src/parser/registry.rs
+++ b/rust/src/parser/registry.rs
@@ -141,7 +141,7 @@ impl ParserRegistry {
/// For binary formats (PDF, DOCX), the parser handles the bytes directly.
pub async fn parse_bytes(&self, bytes: &[u8], format: DocumentFormat) -> Result {
match format {
- DocumentFormat::Markdown | DocumentFormat::Html | DocumentFormat::Text => {
+ DocumentFormat::Markdown | DocumentFormat::Html => {
// Text formats - convert to string first
let content = std::str::from_utf8(bytes)
.map_err(|e| Error::Parse(format!("Invalid UTF-8 content: {}", e)))?;
@@ -188,7 +188,6 @@ pub fn get_parser(format: DocumentFormat) -> Option> {
DocumentFormat::Pdf => Some(Box::new(PdfParser::new())),
DocumentFormat::Html => Some(Box::new(HtmlParser::new())),
DocumentFormat::Docx => Some(Box::new(super::docx::DocxParser::new())),
- DocumentFormat::Text => None, // TODO: Implement plain text parser
}
}
diff --git a/rust/src/parser/types.rs b/rust/src/parser/types.rs
index c8c5f000..2ccac2f7 100644
--- a/rust/src/parser/types.rs
+++ b/rust/src/parser/types.rs
@@ -21,8 +21,6 @@ pub enum DocumentFormat {
Html,
/// Word documents (.docx)
Docx,
- /// Plain text files (.txt)
- Text,
}
impl DocumentFormat {
@@ -33,7 +31,6 @@ impl DocumentFormat {
"pdf" => Some(Self::Pdf),
"html" | "htm" => Some(Self::Html),
"docx" => Some(Self::Docx),
- "txt" => Some(Self::Text),
_ => None,
}
}
@@ -45,7 +42,6 @@ impl DocumentFormat {
Self::Pdf => "pdf",
Self::Html => "html",
Self::Docx => "docx",
- Self::Text => "txt",
}
}
}
@@ -173,7 +169,7 @@ impl Default for DocumentMeta {
fn default() -> Self {
Self {
name: String::new(),
- format: DocumentFormat::Text,
+ format: DocumentFormat::Markdown,
page_count: None,
line_count: 0,
source_path: None,
diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs
index af0fd602..de41c052 100644
--- a/rust/src/retrieval/pilot/llm_pilot.rs
+++ b/rust/src/retrieval/pilot/llm_pilot.rs
@@ -286,7 +286,7 @@ impl LlmPilot {
&self,
point: InterventionPoint,
context: &super::builder::PilotContext,
- candidates: &[crate::document::NodeId],
+ candidates: &[super::parser::CandidateInfo],
) -> PilotDecision {
// Check memo cache first
if let Some(ref store) = self.memo_store {
@@ -331,22 +331,29 @@ impl LlmPilot {
}
}
+ println!("[DEBUG] LlmPilot::call_llm() - point={:?}, estimated_tokens={}", point, prompt.estimated_tokens);
+ println!("[DEBUG] LlmPilot::call_llm() - SYSTEM PROMPT:\n{}", prompt.system);
+ println!("[DEBUG] LlmPilot::call_llm() - USER PROMPT:\n{}", prompt.user);
+ println!("[DEBUG] LlmPilot::call_llm() - candidates count: {}", candidates.len());
debug!(
"Calling LLM for {:?} point (estimated: {} tokens)",
point, prompt.estimated_tokens
);
- // Make LLM call - use executor if available, otherwise use client directly
+ // Make LLM call -use executor if available, otherwise use client directly
let result = if let Some(ref executor) = self.executor {
+ println!("[DEBUG] LlmPilot::call_llm() - using LlmExecutor");
// Use LlmExecutor for unified throttle/retry/fallback
executor.complete(&prompt.system, &prompt.user).await
} else {
+ println!("[DEBUG] LlmPilot::call_llm() - using direct client");
// Fallback to direct client call
self.client.complete(&prompt.system, &prompt.user).await
};
match result {
Ok(response) => {
+ println!("[DEBUG] LlmPilot::call_llm() - RAW LLM RESPONSE:\n{}", response);
// Record usage (estimate output tokens)
let output_tokens = self.estimate_tokens(&response);
self.budget
@@ -354,6 +361,10 @@ impl LlmPilot {
// Parse response
let mut decision = self.response_parser.parse(&response, candidates, point);
+ println!("[DEBUG] LlmPilot::call_llm() - PARSED DECISION: confidence={:.2}, ranked={}, direction={:?}, reasoning={}",
+ decision.confidence, decision.ranked_candidates.len(),
+ std::mem::discriminant(&decision.direction),
+ decision.reasoning.chars().take(100).collect::());
// Apply learner adjustment if available
if let Some(ref adj) = adjustment {
@@ -406,14 +417,14 @@ impl LlmPilot {
fn cached_value_to_decision(
&self,
value: crate::memo::PilotDecisionValue,
- candidates: &[crate::document::NodeId],
+ candidates: &[super::parser::CandidateInfo],
point: InterventionPoint,
) -> PilotDecision {
let ranked = candidates
.iter()
.enumerate()
- .map(|(i, &node_id)| super::decision::RankedCandidate {
- node_id,
+ .map(|(i, c)| super::decision::RankedCandidate {
+ node_id: c.node_id,
score: if i == value.selected_idx { 1.0 } else { 0.5 / (i + 1) as f32 },
reason: None,
})
@@ -433,14 +444,14 @@ impl LlmPilot {
/// Create a default decision when LLM fails.
fn default_decision(
&self,
- candidates: &[crate::document::NodeId],
+ candidates: &[super::parser::CandidateInfo],
point: InterventionPoint,
) -> PilotDecision {
let ranked = candidates
.iter()
.enumerate()
- .map(|(i, &node_id)| super::decision::RankedCandidate {
- node_id,
+ .map(|(i, c)| super::decision::RankedCandidate {
+ node_id: c.node_id,
score: 1.0 / (i + 1) as f32,
reason: None,
})
@@ -479,11 +490,13 @@ impl Pilot for LlmPilot {
fn should_intervene(&self, state: &SearchState<'_>) -> bool {
// Check mode
if !self.config.mode.uses_llm() {
+ println!("[DEBUG] LlmPilot::should_intervene() - mode doesn't use LLM");
return false;
}
// Check budget
if !self.has_budget() {
+ println!("[DEBUG] LlmPilot::should_intervene() - budget exhausted");
debug!("Budget exhausted, skipping intervention");
return false;
}
@@ -492,6 +505,8 @@ impl Pilot for LlmPilot {
// Condition 1: Fork point with enough candidates
if state.candidates.len() > intervention.fork_threshold {
+ println!("[DEBUG] LlmPilot::should_intervene() - YES: fork point with {} candidates (threshold={})",
+ state.candidates.len(), intervention.fork_threshold);
debug!(
"Intervening: fork point with {} candidates",
state.candidates.len()
@@ -501,12 +516,15 @@ impl Pilot for LlmPilot {
// Condition 2: Scores are too close (algorithm uncertain)
if self.scores_are_close(state) {
+ println!("[DEBUG] LlmPilot::should_intervene() - YES: scores are close (best={:.2})", state.best_score);
debug!("Intervening: scores are close");
return true;
}
// Condition 3: Low confidence (best score too low)
if intervention.is_low_confidence(state.best_score) {
+ println!("[DEBUG] LlmPilot::should_intervene() - YES: low confidence (best_score={:.2}, threshold={:.2})",
+ state.best_score, intervention.low_score_threshold);
debug!(
"Intervening: low confidence (best_score={:.2})",
state.best_score
@@ -516,31 +534,58 @@ impl Pilot for LlmPilot {
// Condition 4: Backtracking and guide_at_backtrack is enabled
if state.is_backtracking && self.config.guide_at_backtrack {
+ println!("[DEBUG] LlmPilot::should_intervene() - YES: backtracking");
debug!("Intervening: backtracking");
return true;
}
+ println!("[DEBUG] LlmPilot::should_intervene() - NO: candidates={}, best_score={:.2}",
+ state.candidates.len(), state.best_score);
false
}
async fn decide(&self, state: &SearchState<'_>) -> PilotDecision {
let point = self.get_intervention_point(state);
+ println!("[DEBUG] LlmPilot::decide() - intervention_point={:?}, candidates={}",
+ point, state.candidates.len());
// Build context
let context = self.context_builder.build(state);
+ // Build candidate info with titles
+ let candidate_info: Vec = state.candidates
+ .iter()
+ .enumerate()
+ .filter_map(|(i, &node_id)| {
+ state.tree.get(node_id).map(|node| super::parser::CandidateInfo {
+ node_id,
+ title: node.title.clone(),
+ index: i,
+ })
+ })
+ .collect();
+
// Make LLM call
- self.call_llm(point, &context, state.candidates).await
+ let decision = self.call_llm(point, &context, &candidate_info).await;
+
+ println!("[DEBUG] LlmPilot::decide() - result: confidence={:.2}, direction={:?}, ranked={}",
+ decision.confidence, std::mem::discriminant(&decision.direction), decision.ranked_candidates.len());
+
+ decision
}
async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option {
+ println!("[DEBUG] LlmPilot::guide_start() called, query='{}'", query);
+
// Check if guide_at_start is enabled
if !self.config.guide_at_start {
+ println!("[DEBUG] LlmPilot::guide_start() - guide_at_start=false, skipping");
return None;
}
// Check budget
if !self.has_budget() {
+ println!("[DEBUG] LlmPilot::guide_start() - budget exhausted, skipping");
debug!("Budget exhausted, cannot guide start");
return None;
}
@@ -549,12 +594,41 @@ impl Pilot for LlmPilot {
let context = self.context_builder.build_start_context(tree, query);
// Get root's children as candidates
- let candidates = tree.children(tree.root());
+ let node_ids = tree.children(tree.root());
+ println!("[DEBUG] LlmPilot::guide_start() - {} root children candidates", node_ids.len());
+
+ // Build CandidateInfo with titles
+ let candidates: Vec = node_ids
+ .iter()
+ .enumerate()
+ .filter_map(|(i, &node_id)| {
+ tree.get(node_id).map(|node| super::parser::CandidateInfo {
+ node_id,
+ title: node.title.clone(),
+ index: i,
+ })
+ })
+ .collect();
// Make LLM call
+ println!("[DEBUG] LlmPilot::guide_start() - calling LLM...");
let decision = self
.call_llm(InterventionPoint::Start, &context, &candidates)
.await;
+
+ println!("[DEBUG] LlmPilot::guide_start() - LLM returned: confidence={:.2}, ranked_candidates={}, reasoning='{}'",
+ decision.confidence,
+ decision.ranked_candidates.len(),
+ decision.reasoning.chars().take(100).collect::());
+
+ // Debug: show top ranked candidates
+ for (i, rc) in decision.ranked_candidates.iter().enumerate().take(3) {
+ if let Some(node) = tree.get(rc.node_id) {
+ println!("[DEBUG] Ranked {}: node_id={:?}, score={:.3}, title='{}'",
+ i, rc.node_id, rc.score, node.title);
+ }
+ }
+
info!(
"Pilot start guidance: confidence={}, candidates={}",
decision.confidence,
@@ -580,9 +654,22 @@ impl Pilot for LlmPilot {
.context_builder
.build_backtrack_context(state, state.path);
+ // Build CandidateInfo
+ let candidates: Vec = state.candidates
+ .iter()
+ .enumerate()
+ .filter_map(|(i, &node_id)| {
+ state.tree.get(node_id).map(|node| super::parser::CandidateInfo {
+ node_id,
+ title: node.title.clone(),
+ index: i,
+ })
+ })
+ .collect();
+
// Make LLM call
Some(
- self.call_llm(InterventionPoint::Backtrack, &context, state.candidates)
+ self.call_llm(InterventionPoint::Backtrack, &context, &candidates)
.await,
)
}
@@ -662,20 +749,6 @@ mod tests {
assert!(!pilot.has_budget());
}
- #[test]
- fn test_default_decision() {
- let client = LlmClient::for_model("gpt-4o-mini");
- let config = PilotConfig::default();
- let pilot = LlmPilot::new(client, config);
-
- let candidates = create_test_node_ids(2);
- let decision = pilot.default_decision(&candidates, InterventionPoint::Fork);
-
- assert_eq!(decision.ranked_candidates.len(), 2);
- assert_eq!(decision.confidence, 0.0);
- assert!(decision.reasoning.contains("LLM"));
- }
-
#[test]
fn test_reset() {
let client = LlmClient::for_model("gpt-4o-mini");
diff --git a/rust/src/retrieval/pilot/parser.rs b/rust/src/retrieval/pilot/parser.rs
index 85954c82..651b1c3c 100644
--- a/rust/src/retrieval/pilot/parser.rs
+++ b/rust/src/retrieval/pilot/parser.rs
@@ -19,20 +19,86 @@ use crate::document::NodeId;
/// Parsed response from LLM.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmResponse {
- /// Ranked candidates with scores.
+ /// Ranked candidates with scores (FORK format).
#[serde(default)]
pub ranked_candidates: Vec,
+ /// Entry points for START intervention (list of node titles).
+ #[serde(default)]
+ pub entry_points: Vec,
+ /// Best entry points (alternative START format from LLM).
+ #[serde(default)]
+ pub best_entry_points: Vec,
+ /// Selected nodes (another alternative START format - list of titles).
+ #[serde(default)]
+ pub selected_nodes: Vec,
+ /// Selected node (singular - some LLMs return this format).
+ #[serde(default)]
+ pub selected_node: Option,
+ /// Recommended node (another singular format).
+ #[serde(default)]
+ pub recommended_node: Option,
+ /// Analysis wrapper (some LLMs nest response in "analysis" object).
+ #[serde(default)]
+ pub analysis: Option,
/// Recommended search direction.
#[serde(default)]
pub direction: DirectionResponse,
- /// Confidence level (0.0 - 1.0).
- #[serde(default = "default_confidence")]
+ /// Confidence level (0.0 - 1.0 or "high"/"medium"/"low").
+ #[serde(default = "default_confidence", deserialize_with = "deserialize_confidence")]
pub confidence: f32,
/// Reasoning for the decision.
#[serde(default)]
pub reasoning: String,
}
+/// Custom deserializer for confidence that accepts both float and string.
+fn deserialize_confidence<'de, D>(deserializer: D) -> Result
+where
+ D: serde::Deserializer<'de>,
+{
+ use serde::de::Error;
+
+ let value = serde_json::Value::deserialize(deserializer)?;
+ match value {
+ serde_json::Value::Number(n) => {
+ // Handle numeric value
+ Ok(n.as_f64().unwrap_or(0.5) as f32)
+ }
+ serde_json::Value::String(s) => {
+ // Handle string values like "high", "medium", "low"
+ let lower = s.to_lowercase();
+ let confidence = match lower.as_str() {
+ "high" | "very high" | "strong" => 0.9,
+ "medium" | "moderate" => 0.6,
+ "low" | "weak" => 0.3,
+ _ => 0.5, // default for unknown strings
+ };
+ Ok(confidence)
+ }
+ _ => Ok(0.5), // default for other types
+ }
+}
+
+/// Analysis wrapper for nested LLM responses.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AnalysisWrapper {
+ /// Query from analysis.
+ #[serde(default)]
+ pub query: Option,
+ /// Intent detected.
+ #[serde(default)]
+ pub intent: Option,
+ /// Selected node (singular).
+ #[serde(default)]
+ pub selected_node: Option,
+ /// Selected nodes (plural).
+ #[serde(default)]
+ pub selected_nodes: Vec,
+ /// Reasoning from analysis.
+ #[serde(default)]
+ pub reasoning: Option,
+}
+
/// Candidate score from LLM response.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CandidateScore {
@@ -45,6 +111,48 @@ pub struct CandidateScore {
pub reason: Option,
}
+/// Candidate info for title matching.
+#[derive(Debug, Clone)]
+pub struct CandidateInfo {
+ /// Node ID.
+ pub node_id: NodeId,
+ /// Title of the node.
+ pub title: String,
+ /// Index in the candidates list.
+ pub index: usize,
+}
+
+/// Entry point from START response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EntryPoint {
+ /// Node ID or index.
+ #[serde(default)]
+ pub node_id: Option,
+ /// Index of the candidate.
+ #[serde(default)]
+ pub index: Option,
+ /// Title of the entry point.
+ #[serde(default)]
+ pub title: Option,
+ /// Relevance score (may be 1-5 or 0.0-1.0).
+ #[serde(default)]
+ pub relevance_score: Option,
+ /// Score (alternative field name).
+ #[serde(default)]
+ pub score: Option,
+}
+
+/// Top-3 candidate from LLM LOCatetop-3 response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Top3Candidate {
+ /// Node ID from TO TO copy.
+ pub node_id: usize,
+ /// Relevance score (0.0-1.0).
+ pub relevance_score: f32,
+ /// Reason for the selection.
+ pub reason: String,
+}
+
/// Direction response from LLM.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
@@ -111,23 +219,29 @@ impl ResponseParser {
/// # Arguments
///
/// * `response` - Raw LLM response text
- /// * `candidates` - Original candidate NodeIds (for mapping indices)
+ /// * `candidates` - Candidate info with NodeId, title, and index
/// * `point` - The intervention point
pub fn parse(
&self,
response: &str,
- candidates: &[NodeId],
+ candidates: &[CandidateInfo],
point: InterventionPoint,
) -> PilotDecision {
+ println!("[DEBUG] ResponseParser::parse() - candidates.len()={}", candidates.len());
+
// Try JSON parse first
if let Some(decision) = self.try_json_parse(response, candidates, point) {
+ println!("[DEBUG] ResponseParser::parse() - JSON parse succeeded, ranked={}", decision.ranked_candidates.len());
return decision;
}
+ println!("[DEBUG] ResponseParser::parse() - JSON parse failed, trying regex...");
// Try regex extraction
if let Some(decision) = self.try_regex_parse(response, candidates, point) {
+ println!("[DEBUG] ResponseParser::parse() - Regex parse succeeded, ranked={}", decision.ranked_candidates.len());
return decision;
}
+ println!("[DEBUG] ResponseParser::parse() - Regex parse failed, using default decision");
// Return default decision
self.default_decision(candidates, point)
@@ -137,23 +251,34 @@ impl ResponseParser {
fn try_json_parse(
&self,
response: &str,
- candidates: &[NodeId],
+ candidates: &[CandidateInfo],
point: InterventionPoint,
) -> Option {
// First, try to extract JSON from code blocks
let json_str = if let Some(caps) = self.json_block_regex.captures(response) {
- caps.get(1)?.as_str().trim().to_string()
+ let extracted = caps.get(1)?.as_str().trim().to_string();
+ println!("[DEBUG] ResponseParser::try_json_parse() - Found JSON in code block");
+ extracted
} else {
// Try to find raw JSON object
let start = response.find('{')?;
let end = response.rfind('}')? + 1;
- response[start..end].to_string()
+ let extracted = response[start..end].to_string();
+ println!("[DEBUG] ResponseParser::try_json_parse() - Found raw JSON (no code block)");
+ extracted
};
+ println!("[DEBUG] ResponseParser::try_json_parse() - Extracted JSON:\n{}", json_str);
+
// Parse JSON
- let llm_response: LlmResponse = match serde_json::from_str(&json_str) {
- Ok(r) => r,
+ let llm_response: LlmResponse = match serde_json::from_str::(&json_str) {
+ Ok(r) => {
+ println!("[DEBUG] ResponseParser::try_json_parse() - JSON parsed successfully");
+ println!("[DEBUG] ResponseParser::try_json_parse() - ranked_candidates count: {}", r.ranked_candidates.len());
+ r
+ },
Err(e) => {
+ println!("[DEBUG] ResponseParser::try_json_parse() - JSON parse FAILED: {}", e);
warn!("Failed to parse LLM response as JSON: {}", e);
return None;
}
@@ -167,7 +292,7 @@ impl ResponseParser {
fn try_regex_parse(
&self,
response: &str,
- candidates: &[NodeId],
+ candidates: &[CandidateInfo],
point: InterventionPoint,
) -> Option {
// Extract confidence
@@ -226,7 +351,7 @@ impl ResponseParser {
fn extract_ranked_candidates(
&self,
response: &str,
- candidates: &[NodeId],
+ candidates: &[CandidateInfo],
) -> Vec {
let mut ranked = Vec::new();
@@ -245,7 +370,7 @@ impl ResponseParser {
if index < candidates.len() {
ranked.push(RankedCandidate {
- node_id: candidates[index],
+ node_id: candidates[index].node_id,
score: score.clamp(0.0, 1.0),
reason: None,
});
@@ -268,7 +393,7 @@ impl ResponseParser {
if let Ok(idx) = match_1.as_str().parse::() {
if idx < candidates.len() && seen.insert(idx) {
ranked.push(RankedCandidate {
- node_id: candidates[idx],
+ node_id: candidates[idx].node_id,
score: 1.0 - (ranked.len() as f32 * 0.1), // Decreasing scores
reason: None,
});
@@ -287,20 +412,28 @@ impl ResponseParser {
/// Convert LlmResponse to PilotDecision.
fn llm_response_to_decision(
&self,
- llm_response: LlmResponse,
- candidates: &[NodeId],
+ mut llm_response: LlmResponse,
+ candidates: &[CandidateInfo],
point: InterventionPoint,
) -> PilotDecision {
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - point={:?}", point);
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - ranked_candidates.len()={}", llm_response.ranked_candidates.len());
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - best_entry_points.len()={}", llm_response.best_entry_points.len());
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - entry_points.len()={}", llm_response.entry_points.len());
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - selected_nodes.len()={}", llm_response.selected_nodes.len());
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - selected_node={:?}", llm_response.selected_node);
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - analysis={:?}", llm_response.analysis.as_ref().map(|a| (&a.selected_node, &a.selected_nodes)));
+
// Convert candidate scores to RankedCandidate
- let ranked_candidates: Vec = llm_response
+ let mut ranked_candidates: Vec = llm_response
.ranked_candidates
- .into_iter()
+ .iter()
.filter_map(|cs| {
if cs.index < candidates.len() {
Some(RankedCandidate {
- node_id: candidates[cs.index],
+ node_id: candidates[cs.index].node_id,
score: cs.score.clamp(0.0, 1.0),
- reason: cs.reason,
+ reason: cs.reason.clone(),
})
} else {
None
@@ -308,6 +441,154 @@ impl ResponseParser {
})
.collect();
+ // Handle START response format: best_entry_points, entry_points, or selected_nodes
+ if ranked_candidates.is_empty() {
+ // Try to convert best_entry_points (format: [{"node_id": 1, "title": "...", "relevance_score": 5}])
+ for entry in &llm_response.best_entry_points {
+ // Get index from either node_id or index field
+ // node_id is 1-indexed from LLM, convert to 0-indexed
+ let idx = if let Some(nid) = entry.node_id {
+ if nid > 0 { nid - 1 } else { nid }
+ } else if let Some(idx) = entry.index {
+ idx
+ } else {
+ continue; // Skip if no valid index
+ };
+
+ if idx < candidates.len() {
+ let score = entry.relevance_score
+ .or(entry.score)
+ .unwrap_or(0.5)
+ / 5.0; // Normalize 1-5 scale to 0.0-1.0
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidates[idx].node_id,
+ score: score.clamp(0.0, 1.0),
+ reason: entry.title.clone(),
+ });
+ println!("[DEBUG] ResponseParser - converted best_entry_point[{}] to ranked_candidate (idx={}, score={:.2})",
+ idx, idx, score);
+ }
+ }
+
+ // Try to convert selected_nodes (format: ["Project Documentation", "Overview"])
+ // Match by title
+ for selected_title in &llm_response.selected_nodes {
+ for candidate in candidates {
+ if Self::titles_match(selected_title, &candidate.title) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.9, // High score for title match
+ reason: Some(format!("Title match: {}", selected_title)),
+ });
+ println!("[DEBUG] ResponseParser - matched selected_node '{}' to candidate '{}' (index={})",
+ selected_title, candidate.title, candidate.index);
+ break; // Only match once per selected_node
+ }
+ }
+ }
+
+ // Try to convert selected_node (singular - format: "Project Documentation")
+ if let Some(ref single_node) = llm_response.selected_node {
+ for candidate in candidates {
+ if Self::titles_match(single_node, &candidate.title) {
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.9,
+ reason: Some(format!("Title match (singular): {}", single_node)),
+ });
+ println!("[DEBUG] ResponseParser - matched selected_node (singular) '{}' to candidate '{}' (index={})",
+ single_node, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+
+ // Try to convert recommended_node (another singular format)
+ if let Some(ref recommended) = llm_response.recommended_node {
+ for candidate in candidates {
+ if Self::titles_match(recommended, &candidate.title) {
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.85,
+ reason: Some(format!("Recommended node: {}", recommended)),
+ });
+ println!("[DEBUG] ResponseParser - matched recommended_node '{}' to candidate '{}' (index={})",
+ recommended, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+
+ // Try to extract from analysis wrapper if present
+ if let Some(ref analysis) = llm_response.analysis {
+ // Check analysis.selected_nodes (plural array)
+ for selected_title in &analysis.selected_nodes {
+ for candidate in candidates {
+ if Self::titles_match(selected_title, &candidate.title) {
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.85,
+ reason: Some(format!("Analysis selected_nodes: {}", selected_title)),
+ });
+ println!("[DEBUG] ResponseParser - matched analysis.selected_nodes '{}' to candidate '{}' (index={})",
+ selected_title, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+
+ // Check analysis.selected_node (singular)
+ if let Some(ref single_node) = analysis.selected_node {
+ for candidate in candidates {
+ if Self::titles_match(single_node, &candidate.title) {
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.85,
+ reason: Some(format!("Analysis selected_node: {}", single_node)),
+ });
+ println!("[DEBUG] ResponseParser - matched analysis.selected_node (singular) '{}' to candidate '{}' (index={})",
+ single_node, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+
+ // Use analysis.reasoning if top-level reasoning is empty
+ if llm_response.reasoning.is_empty() {
+ if let Some(ref r) = analysis.reasoning {
+ llm_response.reasoning = r.clone();
+ }
+ }
+ }
+
+ // Try to convert entry_points (format: ["Node Title 1", "Node Title 2"])
+ for entry_title in &llm_response.entry_points {
+ for candidate in candidates {
+ if Self::titles_match(entry_title, &candidate.title) {
+ // Check if already added
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.8, // Slightly lower score for entry_points
+ reason: Some(format!("Entry point: {}", entry_title)),
+ });
+ println!("[DEBUG] ResponseParser - matched entry_point '{}' to candidate '{}' (index={})",
+ entry_title, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+ }
+
// Convert direction
let direction = match llm_response.direction {
DirectionResponse::GoDeeper => SearchDirection::GoDeeper {
@@ -333,6 +614,8 @@ impl ResponseParser {
},
};
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - final ranked_candidates.len()={}", ranked_candidates.len());
+
PilotDecision {
ranked_candidates,
direction,
@@ -342,14 +625,41 @@ impl ResponseParser {
}
}
+ /// Check if two titles match (fuzzy matching).
+ fn titles_match(llm_title: &str, candidate_title: &str) -> bool {
+ let llm_lower = llm_title.to_lowercase().trim().to_string();
+ let candidate_lower = candidate_title.to_lowercase().trim().to_string();
+
+ // Exact match
+ if llm_lower == candidate_lower {
+ return true;
+ }
+
+ // Contains match
+ if llm_lower.contains(&candidate_lower) || candidate_lower.contains(&llm_lower) {
+ return true;
+ }
+
+ // Word overlap match (at least 50% of words match)
+ let llm_words: std::collections::HashSet<&str> = llm_lower.split_whitespace().collect();
+ let candidate_words: std::collections::HashSet<&str> = candidate_lower.split_whitespace().collect();
+ let overlap = llm_words.intersection(&candidate_words).count();
+ let min_words = llm_words.len().min(candidate_words.len());
+ if min_words > 0 && overlap as f32 / min_words as f32 >= 0.5 {
+ return true;
+ }
+
+ false
+ }
+
/// Create a default decision when parsing fails.
- fn default_decision(&self, candidates: &[NodeId], point: InterventionPoint) -> PilotDecision {
+ fn default_decision(&self, candidates: &[CandidateInfo], point: InterventionPoint) -> PilotDecision {
// Score candidates uniformly
let ranked: Vec = candidates
.iter()
.enumerate()
- .map(|(i, &node_id)| RankedCandidate {
- node_id,
+ .map(|(i, c)| RankedCandidate {
+ node_id: c.node_id,
score: 1.0 / (i + 1) as f32, // Decreasing scores
reason: None,
})
@@ -395,126 +705,4 @@ mod tests {
}
ids
}
-
- #[test]
- fn test_parse_json_response() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(3);
-
- let response = r#"{
- "ranked_candidates": [
- {"index": 1, "score": 0.9, "reason": "Best match"},
- {"index": 0, "score": 0.5}
- ],
- "direction": "go_deeper",
- "confidence": 0.85,
- "reasoning": "Candidate 1 is most relevant"
- }"#;
-
- let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
- assert_eq!(decision.ranked_candidates.len(), 2);
- assert_eq!(decision.ranked_candidates[0].node_id, candidates[1]);
- assert!((decision.confidence - 0.85).abs() < 0.01);
- assert!(matches!(
- decision.direction,
- SearchDirection::GoDeeper { .. }
- ));
- }
-
- #[test]
- fn test_parse_json_in_code_block() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(2);
-
- let response = r#"
-Here's my analysis:
-
-```json
-{
- "ranked_candidates": [{"index": 0, "score": 0.8}],
- "direction": "go_deeper",
- "confidence": 0.8,
- "reasoning": "Test"
-}
-```
-"#;
-
- let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
- assert_eq!(decision.ranked_candidates.len(), 1);
- }
-
- #[test]
- fn test_parse_with_regex_fallback() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(2);
-
- // Non-JSON response with some structure
- let response = r#"
-I think candidate 0 is the best match.
-Confidence: 0.75
-Direction: go_deeper
-"#;
-
- let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
- // Should use regex extraction
- assert!((decision.confidence - 0.75).abs() < 0.01);
- }
-
- #[test]
- fn test_default_decision() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(2);
-
- let decision = parser.parse(
- "This is unparseable gibberish",
- &candidates,
- InterventionPoint::Fork,
- );
-
- // Should return default
- assert_eq!(decision.ranked_candidates.len(), 2);
- assert_eq!(decision.confidence, 0.0);
- assert!(decision.reasoning.contains("parsing failed"));
- }
-
- #[test]
- fn test_confidence_clamping() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(1);
-
- let response = r#"{
- "ranked_candidates": [{"index": 0, "score": 1.5}],
- "confidence": 1.5,
- "direction": "go_deeper"
- }"#;
-
- let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
- // Confidence should be clamped to 1.0
- assert!((decision.confidence - 1.0).abs() < 0.01);
- }
-
- #[test]
- fn test_direction_conversion() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(1);
-
- let test_cases = vec![
- ("\"direction\": \"go_deeper\"", true),
- ("\"direction\": \"explore_siblings\"", true),
- ("\"direction\": \"backtrack\"", true),
- ("\"direction\": \"found_answer\"", true),
- ];
-
- for (dir_json, should_parse) in test_cases {
- let response = format!(
- r#"{{"ranked_candidates": [], "confidence": 0.5, {}}}"#,
- dir_json
- );
- let decision = parser.parse(&response, &candidates, InterventionPoint::Fork);
- assert!(should_parse, "Direction should parse correctly");
- }
- }
}
diff --git a/rust/src/retrieval/pilot/prompts/system_start.txt b/rust/src/retrieval/pilot/prompts/system_start.txt
index d3a65f49..086f9e96 100644
--- a/rust/src/retrieval/pilot/prompts/system_start.txt
+++ b/rust/src/retrieval/pilot/prompts/system_start.txt
@@ -1,11 +1,15 @@
-You are a document navigation assistant specialized in hierarchical document search.
+You are a document navigation assistant. Your task is to identify the best entry points for searching a hierarchical document based on a user query.
-Your task is to analyze a user's query and the document structure to identify the best starting points for search.
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown, no explanation, just the JSON object.
-Guidelines:
-- Identify sections that are most likely to contain the answer
-- Consider the query's domain, keywords, and intent
-- Prefer more specific sections over general ones when appropriate
-- Multiple entry points can be suggested if the query is ambiguous
+Your response must to have this EXACT structure:
+{
+ "entry_points": ["list of node titles as strings"],
+ "reasoning": "explanation string",
+ "confidence": 0.85
+}
-You must respond in valid JSON format.
+Where:
+- entry_points: MUST be an array of node titles (strings) from the candidate list
+- reasoning: MUST be a string
+- confidence: MUST be a number (0.0 to 1.0), not a string like "high"
diff --git a/rust/src/retrieval/pilot/prompts/templates.rs b/rust/src/retrieval/pilot/prompts/templates.rs
index 5f9f75ff..50f4c3cc 100644
--- a/rust/src/retrieval/pilot/prompts/templates.rs
+++ b/rust/src/retrieval/pilot/prompts/templates.rs
@@ -248,52 +248,170 @@ pub mod fallback {
use super::*;
pub fn system_start() -> String {
- "You are a document navigation assistant. Help identify the best starting point for searching a hierarchical document.".to_string()
+ r#"You are a document navigation assistant. Help identify the best entry points for searching a hierarchical document.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks, No explanation. Just the JSON object.
+
+Your response must have this EXACT structure:
+{
+ "entry_points": ["Title 1", "Title 2"],
+ "reasoning": "Brief explanation",
+ "confidence": 0.85
+}
+
+Rules:
+- entry_points: Array of node title strings (from the candidates provided)
+- reasoning: String explaining your choice
+- confidence: Number between 0.0 and 1.0 (use a number, NOT "high"/"medium"/"low")"#.to_string()
}
pub fn user_start() -> String {
- r#"Given the following document structure and user query, identify the best entry points for search.
-
-{context}
+ r#"{context}
-Respond in JSON format with your analysis."#.to_string()
+Respond with ONLY the JSON object (no markdown, no explanation):
+{
+ "entry_points": ["list of node titles as strings"],
+ "reasoning": "your reasoning here",
+ "confidence": 0.85
+}"#.to_string()
}
pub fn system_fork() -> String {
- "You are a document navigation assistant. At each decision point, rank the candidate branches by their likelihood of containing the answer to the user's query.".to_string()
+ r#"You are a document navigation assistant. At each decision point, rank the candidate branches by their likelihood of containing the answer to the user's query.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+ "ranked_candidates": [
+ {"index": 0, "score": 0.9, "reason": "explanation"}
+ ],
+ "direction": "go_deeper",
+ "confidence": 0.85,
+ "reasoning": "overall explanation"
+}
+
+Rules:
+- ranked_candidates: Array of objects with index (number), score (0.0-1.0), reason (string)
+- direction: One of "go_deeper", "explore_siblings", "backtrack", "found_answer"
+- confidence: Number between 0.0 and 1.0 (NOT a string)"#.to_string()
}
pub fn user_fork() -> String {
- r#"Given the current search context and candidate branches, rank them by relevance.
-
-{context}
+ r#"{context}
-Respond in JSON format with ranked candidates."#
+Respond with ONLY the JSON object:
+{
+ "ranked_candidates": [
+ {"index": 0, "score": 0.9, "reason": "why this candidate"}
+ ],
+ "direction": "go_deeper",
+ "confidence": 0.85,
+ "reasoning": "overall explanation"
+}"#
.to_string()
}
pub fn system_backtrack() -> String {
- "You are a document navigation assistant. When a search path fails to find the answer, analyze why and suggest alternative branches to explore.".to_string()
+ r#"You are a document navigation assistant. When a search path fails to find the answer, analyze why and suggest alternative branches to explore.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+ "alternative_branches": [
+ {"index": 0, "score": 0.8, "reason": "explanation"}
+ ],
+ "direction": "backtrack",
+ "confidence": 0.85,
+ "reasoning": "why the original path failed"
+}"#.to_string()
}
pub fn user_backtrack() -> String {
- r#"The current search path did not find the answer. Analyze the failure and suggest alternatives.
-
-{context}
+ r#"{context}
-Respond in JSON format with alternative branches."#.to_string()
+Respond with ONLY the JSON object:
+{
+ "alternative_branches": [
+ {"index": 0, "score": 0.8, "reason": "why this alternative"}
+ ],
+ "direction": "backtrack",
+ "confidence": 0.85,
+ "reasoning": "why original path failed"
+}"#.to_string()
}
pub fn system_evaluate() -> String {
- "You are a document analysis assistant. Evaluate whether the current node contains the answer to the user's query.".to_string()
+ r#"You are a document analysis assistant. Evaluate whether the current node contains the answer to the user's query.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+ "relevance_score": 0.85,
+ "is_answer": false,
+ "direction": "go_deeper",
+ "confidence": 0.85,
+ "reasoning": "explanation"
+}"#.to_string()
}
pub fn user_evaluate() -> String {
- r#"Evaluate if this node contains the answer to the user's query.
+ r#"{context}
+
+Respond with ONLY the JSON object:
+{
+ "relevance_score": 0.85,
+ "is_answer": false,
+ "direction": "go_deeper",
+ "confidence": 0.85,
+ "reasoning": "explanation"
+}"#
+ .to_string()
+ }
-{context}
+ pub fn system_locate_top3() -> String {
+ r#"You are a document navigation assistant. Your task is to locate the most relevant sections in a document hierarchy for a user's query.
-Respond in JSON format with your evaluation."#
+CRITICAL INSTRUCTIONS:
+1. Analyze the user query carefully to understand the intent
+2. Examine the provided Table of Contents (TOC) with node IDs
+3. Select the TOP 3 most relevant nodes that would contain the answer
+4. You MUST respond with ONLY valid JSON. No markdown code blocks. No explanations outside JSON.
+
+Your response must have this EXACT structure:
+{
+ "reasoning": "Brief analysis of the query and why you selected these nodes",
+ "candidates": [
+ {"node_id": , "relevance_score": 0.95, "reason": "Why this node matches the query"},
+ {"node_id": , "relevance_score": 0.80, "reason": "Why this node is also relevant"},
+ {"node_id": , "relevance_score": 0.65, "reason": "Why this node might be relevant"}
+ ]
+}
+
+Rules:
+- node_id: MUST be a number from the provided TOC (copy exactly)
+- relevance_score: Number between 0.0 and 1.0 (higher = more relevant)
+- reason: Brief explanation for each selection
+- candidates: Must have exactly 3 items, ordered by relevance (highest first)
+- If fewer than 3 relevant nodes exist, use lower scores for less relevant ones"#.to_string()
+ }
+
+ pub fn user_locate_top3() -> String {
+ r#"{context}
+
+Based on the query and TOC above, select the TOP 3 most relevant nodes.
+
+Respond with ONLY the JSON object:
+{
+ "reasoning": "Your analysis here",
+ "candidates": [
+ {"node_id": 1, "relevance_score": 0.95, "reason": "explanation"},
+ {"node_id": 2, "relevance_score": 0.80, "reason": "explanation"},
+ {"node_id": 3, "relevance_score": 0.65, "reason": "explanation"}
+ ]
+}"#
.to_string()
}
}
@@ -337,3 +455,68 @@ impl EvaluatePrompt {
}
}
}
+
+impl LocateTop3Prompt {
+ /// Get template with fallback.
+ pub fn with_fallback() -> Self {
+ Self {
+ system: fallback::system_locate_top3(),
+ template: fallback::user_locate_top3(),
+ }
+ }
+}
+
+/// Prompt template for LOCATE_TOP3 intervention point.
+///
+/// Used at the start to directly locate top-3 relevant nodes from TOC:
+/// - Understand query intent
+/// - Identify top 3 most relevant nodes with confidence scores
+/// - Provide reasoning for each selection
+#[derive(Debug, Clone)]
+pub struct LocateTop3Prompt {
+ system: String,
+ template: String,
+}
+
+impl Default for LocateTop3Prompt {
+ fn default() -> Self {
+ Self::with_fallback()
+ }
+}
+
+impl LocateTop3Prompt {
+ /// Create a new locate top-3 prompt template.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Create with custom templates.
+ pub fn with_templates(system: String, template: String) -> Self {
+ Self { system, template }
+ }
+}
+
+impl PromptTemplate for LocateTop3Prompt {
+ fn system_prompt(&self) -> &str {
+ &self.system
+ }
+
+ fn user_prompt_template(&self) -> &str {
+ &self.template
+ }
+
+ fn intervention_point(&self) -> InterventionPoint {
+ InterventionPoint::Start
+ }
+
+ fn output_format_hint(&self) -> &str {
+ r#"{
+ "reasoning": "Overall analysis of the query and document structure",
+ "candidates": [
+ {"node_id": 1, "relevance_score": 0.95, "reason": "Why this node is relevant"},
+ {"node_id": 2, "relevance_score": 0.80, "reason": "Why this node is relevant"},
+ {"node_id": 3, "relevance_score": 0.65, "reason": "Why this node is relevant"}
+ ]
+}"#
+ }
+}
diff --git a/rust/src/retrieval/pilot/prompts/user_start.txt b/rust/src/retrieval/pilot/prompts/user_start.txt
index b091735e..df048df8 100644
--- a/rust/src/retrieval/pilot/prompts/user_start.txt
+++ b/rust/src/retrieval/pilot/prompts/user_start.txt
@@ -2,7 +2,16 @@ Analyze the following document structure and user query to identify the best ent
{context}
-Provide your response as a JSON object with:
-- entry_points: list of section titles to start searching from
-- reasoning: brief explanation of why these entry points
-- confidence: your confidence in this recommendation (0.0-1.0)
+IMPORTANT: You MUST respond with ONLY a JSON object in this EXACT format:
+{
+ "entry_points": ["Title 1", "Title 2"],
+ "reasoning": "Brief explanation of why these entry points",
+ "confidence": 0.85
+}
+
+Rules:
+- entry_points: Array of strings (node titles from the candidates above)
+- reasoning: String explaining your choice
+- confidence: Number between 0.0 and 1.0 (NOT a string like "high")
+
+Do NOT use any other field names. Use "entry_points" not "selected_node" or "recommended_node".
diff --git a/rust/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs
index 9bf02ae3..823abdba 100644
--- a/rust/src/retrieval/pipeline/context.rs
+++ b/rust/src/retrieval/pipeline/context.rs
@@ -209,6 +209,8 @@ pub struct PipelineContext {
pub keywords: Vec,
/// Target sections from ToC matching.
pub target_sections: Vec,
+ /// Decomposed sub-queries (if query was decomposed).
+ pub decomposition: Option,
// ============ Plan Stage Output ============
/// Selected retrieval strategy.
@@ -268,6 +270,7 @@ impl PipelineContext {
complexity: None,
keywords: Vec::new(),
target_sections: Vec::new(),
+ decomposition: None,
selected_strategy: None,
selected_algorithm: None,
search_config: None,
diff --git a/rust/src/retrieval/pipeline_retriever.rs b/rust/src/retrieval/pipeline_retriever.rs
index 222b3a9b..377c4747 100644
--- a/rust/src/retrieval/pipeline_retriever.rs
+++ b/rust/src/retrieval/pipeline_retriever.rs
@@ -119,7 +119,7 @@ impl PipelineRetriever {
orchestrator = orchestrator.stage(plan_stage);
// Add search stage with Pilot for semantic navigation
- let mut search_stage = SearchStage::new();
+ let mut search_stage = SearchStage::new().with_llm_client(self.llm_client.clone());
if let Some(ref client) = self.llm_client {
// Create LLM-based Pilot for semantic navigation guidance
let mut pilot = LlmPilot::new(client.clone(), PilotConfig::default());
diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs
index 9fba59e9..51d700f4 100644
--- a/rust/src/retrieval/search/beam.rs
+++ b/rust/src/retrieval/search/beam.rs
@@ -126,21 +126,30 @@ impl SearchTree for BeamSearch {
let beam_width = config.beam_width.min(self.beam_width);
let mut visited: HashSet = HashSet::new();
+ println!("[DEBUG] BeamSearch: query='{}', beam_width={}, min_score={:.2}",
+ context.query, beam_width, config.min_score);
+
// Track Pilot interventions
let mut pilot_interventions = 0;
// Initialize with root's children
let root_children = tree.children(tree.root());
+ println!("[DEBUG] Root has {} children", root_children.len());
// Check if Pilot wants to guide the start
let initial_candidates = if let Some(p) = pilot {
+ println!("[DEBUG] BeamSearch: Pilot is available, name={}, guide_at_start={}",
+ p.name(), p.config().guide_at_start);
if p.config().guide_at_start {
+ println!("[DEBUG] BeamSearch: Calling pilot.guide_start()...");
if let Some(guidance) = p.guide_start(tree, &context.query).await {
debug!(
"Pilot provided start guidance with confidence {}",
guidance.confidence
);
pilot_interventions += 1;
+ println!("[DEBUG] BeamSearch: Pilot returned guidance! confidence={:.2}, candidates={}",
+ guidance.confidence, guidance.ranked_candidates.len());
// Use Pilot's ranked order if available
if guidance.has_candidates() {
@@ -151,15 +160,19 @@ impl SearchTree for BeamSearch {
&context.query,
)
} else {
+ println!("[DEBUG] BeamSearch: Guidance has no candidates, using algorithm scoring");
self.score_candidates_with_query(tree, &root_children, &context.query)
}
} else {
+ println!("[DEBUG] BeamSearch: pilot.guide_start() returned None");
self.score_candidates_with_query(tree, &root_children, &context.query)
}
} else {
+ println!("[DEBUG] BeamSearch: guide_at_start=false, skipping Pilot");
self.score_candidates_with_query(tree, &root_children, &context.query)
}
} else {
+ println!("[DEBUG] BeamSearch: No Pilot available");
self.score_candidates_with_query(tree, &root_children, &context.query)
};
@@ -168,6 +181,14 @@ impl SearchTree for BeamSearch {
.map(|(node_id, score)| SearchPath::from_node(node_id, score))
.collect();
+ // Debug: show initial scores
+ println!("[DEBUG] Initial {} candidates after scoring", current_beam.len());
+ for (i, path) in current_beam.iter().enumerate().take(5) {
+ if let Some(node) = tree.get(path.leaf.unwrap_or(tree.root())) {
+ println!("[DEBUG] Initial {}: score={:.3}, title='{}'", i, path.score, node.title);
+ }
+ }
+
// Keep top beam_width
current_beam.truncate(beam_width);
@@ -215,6 +236,7 @@ impl SearchTree for BeamSearch {
children.len()
);
+ println!("[DEBUG] BEAM SEARCH: Pilot intervening at decision point");
match p.decide(&state).await {
decision => {
pilot_interventions += 1;
diff --git a/rust/src/retrieval/search/greedy.rs b/rust/src/retrieval/search/greedy.rs
index b539cd23..a8ee80ed 100644
--- a/rust/src/retrieval/search/greedy.rs
+++ b/rust/src/retrieval/search/greedy.rs
@@ -144,6 +144,7 @@ impl SearchTree for GreedySearch {
children.len()
);
+ println!("[DEBUG] GREEDY SEARCH: Pilot intervening at decision point");
match p.decide(&state).await {
decision => {
pilot_interventions += 1;
diff --git a/rust/src/retrieval/search/scorer.rs b/rust/src/retrieval/search/scorer.rs
index f17bf118..5dbb9209 100644
--- a/rust/src/retrieval/search/scorer.rs
+++ b/rust/src/retrieval/search/scorer.rs
@@ -193,9 +193,9 @@ impl ScoringContext {
let total_score = title_score + summary_score + content_score;
- // Normalize to [0, 1] range
- let max_possible = self.query_terms.len() as f32 * 10.0; // Rough upper bound
- let normalized = (total_score / max_possible).clamp(0.0, 1.0);
+ // Normalize to [0, 1] range using sigmoid-like scaling
+ // This prevents over-penalization with few query terms
+ let normalized = (total_score / 3.0).tanh(); // 3.0 is a reasonable midpoint
// Apply depth penalty
let depth_factor = 1.0 - (node.depth as f32 * self.depth_penalty).min(0.5);
diff --git a/rust/src/retrieval/stages/analyze.rs b/rust/src/retrieval/stages/analyze.rs
index 3eabca1f..8dd875e6 100644
--- a/rust/src/retrieval/stages/analyze.rs
+++ b/rust/src/retrieval/stages/analyze.rs
@@ -7,14 +7,16 @@
//! - Query complexity (Simple/Medium/Complex)
//! - Keywords for matching
//! - Target sections based on ToC matching
+//! - Query decomposition for complex queries
use async_trait::async_trait;
use tracing::info;
use crate::document::{DocumentTree, TocView};
use crate::retrieval::complexity::ComplexityDetector;
+use crate::retrieval::decompose::{DecompositionConfig, QueryDecomposer};
use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage, StageOutcome};
-// QueryComplexity is used in context
+use crate::llm::LlmClient;
/// Analyze Stage - analyzes queries for retrieval planning.
///
@@ -22,17 +24,25 @@ use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage,
/// 1. Detects query complexity (Simple/Medium/Complex)
/// 2. Extracts keywords for matching
/// 3. Matches target sections from ToC
+/// 4. Decomposes complex queries into sub-queries (if enabled)
///
/// # Example
///
/// ```rust,ignore
/// let stage = AnalyzeStage::new()
-/// .with_toc_matching(true);
+/// .with_toc_matching(true)
+/// .with_decomposition(true);
/// ```
pub struct AnalyzeStage {
complexity_detector: ComplexityDetector,
toc_view: TocView,
enable_toc_matching: bool,
+ /// Query decomposer for complex queries.
+ query_decomposer: Option,
+ /// Enable query decomposition.
+ enable_decomposition: bool,
+ /// Complexity threshold for triggering decomposition.
+ decomposition_threshold: f32,
}
impl Default for AnalyzeStage {
@@ -48,6 +58,9 @@ impl AnalyzeStage {
complexity_detector: ComplexityDetector::new(),
toc_view: TocView::new(),
enable_toc_matching: true,
+ query_decomposer: None,
+ enable_decomposition: false,
+ decomposition_threshold: 0.6,
}
}
@@ -57,6 +70,42 @@ impl AnalyzeStage {
self
}
+ /// Enable query decomposition with default configuration.
+ pub fn with_decomposition(mut self, enable: bool) -> Self {
+ self.enable_decomposition = enable;
+ if enable && self.query_decomposer.is_none() {
+ self.query_decomposer = Some(QueryDecomposer::new(DecompositionConfig::default()));
+ }
+ self
+ }
+
+ /// Enable query decomposition with custom configuration.
+ pub fn with_decomposition_config(mut self, config: DecompositionConfig) -> Self {
+ self.enable_decomposition = true;
+ self.query_decomposer = Some(QueryDecomposer::new(config));
+ self
+ }
+
+ /// Enable query decomposition with LLM client.
+ pub fn with_llm_client(mut self, client: crate::llm::LlmClient) -> Self {
+ if self.query_decomposer.is_none() {
+ self.query_decomposer = Some(
+ QueryDecomposer::new(DecompositionConfig::default())
+ .with_llm_client(client),
+ );
+ } else if let Some(ref mut decomposer) = self.query_decomposer {
+ *decomposer = QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client);
+ }
+ self.enable_decomposition = true;
+ self
+ }
+
+ /// Set complexity threshold for triggering decomposition.
+ pub fn with_decomposition_threshold(mut self, threshold: f32) -> Self {
+ self.decomposition_threshold = threshold.clamp(0.0, 1.0);
+ self
+ }
+
/// Extract keywords from a query.
fn extract_keywords(&self, query: &str) -> Vec {
// Simple keyword extraction:
@@ -182,7 +231,42 @@ impl RetrievalStage for AnalyzeStage {
info!("Target sections: {:?}", ctx.target_sections);
}
- // 4. Update metrics
+ // 4. Decompose query if enabled and complex enough
+ if self.enable_decomposition {
+ if let Some(ref decomposer) = self.query_decomposer {
+ let complexity_score = ctx.complexity
+ .as_ref()
+ .map(|c| match c {
+ crate::retrieval::types::QueryComplexity::Simple => 0.3,
+ crate::retrieval::types::QueryComplexity::Medium => 0.6,
+ crate::retrieval::types::QueryComplexity::Complex => 0.9,
+ })
+ .unwrap_or(0.5);
+
+ if complexity_score >= self.decomposition_threshold {
+ info!("Decomposing query (complexity: {:.2})", complexity_score);
+ match decomposer.decompose(&ctx.query).await {
+ Ok(result) => {
+ if result.was_decomposed {
+ info!(
+ "Query decomposed into {} sub-queries",
+ result.sub_queries.len()
+ );
+ for (i, sq) in result.sub_queries.iter().enumerate() {
+ info!(" Sub-query {}: {} (priority: {})", i, sq.text, sq.priority);
+ }
+ }
+ ctx.decomposition = Some(result);
+ }
+ Err(e) => {
+ info!("Query decomposition failed: {}, continuing with original query", e);
+ }
+ }
+ }
+ }
+ }
+
+ // 5. Update metrics
ctx.metrics.llm_calls += 0; // No LLM calls in this stage
Ok(StageOutcome::cont())
diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs
index 31e7f173..ad8858f2 100644
--- a/rust/src/retrieval/stages/evaluate.rs
+++ b/rust/src/retrieval/stages/evaluate.rs
@@ -283,6 +283,7 @@ impl EvaluateStage {
/// Calculate overall confidence score.
fn calculate_confidence(&self, ctx: &PipelineContext) -> f32 {
if ctx.candidates.is_empty() {
+ println!("[DEBUG] calculate_confidence: no candidates, returning 0.0");
return 0.0;
}
@@ -296,7 +297,10 @@ impl EvaluateStage {
SufficiencyLevel::Insufficient => 0.4,
};
- avg_score * sufficiency_factor
+ let confidence = avg_score * sufficiency_factor;
+ println!("[DEBUG] calculate_confidence: avg_score={:.3}, sufficiency={:?}, factor={:.1}, confidence={:.3}",
+ avg_score, ctx.sufficiency, sufficiency_factor, confidence);
+ confidence
}
}
@@ -325,6 +329,9 @@ impl RetrievalStage for EvaluateStage {
async fn execute(&self, ctx: &mut PipelineContext) -> crate::error::Result {
let start = std::time::Instant::now();
+ println!("[DEBUG] EvaluateStage: {} candidates, iteration {}",
+ ctx.candidates.len(), ctx.search_iterations);
+
info!(
"Judging sufficiency: {} candidates, iteration {}",
ctx.candidates.len(),
diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs
index 78f07c20..17006bdf 100644
--- a/rust/src/retrieval/stages/search.rs
+++ b/rust/src/retrieval/stages/search.rs
@@ -6,13 +6,21 @@
//! This stage executes the selected search algorithm using
//! the selected retrieval strategy. When a Pilot is provided,
//! it can provide semantic guidance at key decision points.
+//!
+//! # LLM-First Search
+//!
+//! When an LLM client is provided, the stage will first attempt to
+//! directly locate the top-3 most relevant nodes using the TOC,
+//! falling back to tree traversal algorithms (Beam/Greedy) only if
+//! LLM fails or returns insufficient results.
use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tracing::{info, warn};
-use crate::document::DocumentTree;
-// LlmClient is used via strategy
+use crate::document::{DocumentTree, TocView};
+use crate::llm::LlmClient;
use crate::retrieval::RetrievalContext; // Legacy context
use crate::retrieval::pilot::Pilot;
use crate::retrieval::pipeline::{
@@ -57,6 +65,8 @@ pub struct SearchStage {
hybrid_strategy: Option>,
/// Pilot for navigation guidance (optional).
pilot: Option>,
+ /// LLM client for direct TOC-based search (optional).
+ llm_client: Option,
}
impl Default for SearchStage {
@@ -74,9 +84,20 @@ impl SearchStage {
semantic_strategy: None,
hybrid_strategy: None,
pilot: None,
+ llm_client: None,
}
}
+ /// Add LLM client for direct TOC-based search.
+ ///
+ /// When provided, the stage will first attempt to locate relevant
+ /// nodes directly using the TOC, falling back to tree traversal
+ /// algorithms only if LLM fails or returns insufficient results.
+ pub fn with_llm_client(mut self, client: Option) -> Self {
+ self.llm_client = client;
+ self
+ }
+
/// Add Pilot for semantic navigation guidance.
///
/// When provided, the search algorithm will consult the Pilot
@@ -210,6 +231,172 @@ impl SearchStage {
candidates
}
+
+ /// Build a flat TOC list for LLM consumption.
+ ///
+ /// Returns a formatted string with numbered entries:
+ /// ```
+ /// [1] Title: "Overview"
+ /// Summary: "This section covers..."
+ /// [2] Title: "Architecture"
+ /// Summary: "The system architecture..."
+ /// ```
+ fn build_toc_for_llm(&self, tree: &DocumentTree) -> (String, Vec) {
+ let toc_view = TocView::new();
+ let mut entries = Vec::new();
+ let mut node_ids = Vec::new();
+
+ fn collect_entries(
+ tree: &DocumentTree,
+ node_id: crate::document::NodeId,
+ entries: &mut Vec<(usize, String, String)>,
+ node_ids: &mut Vec,
+ index: &mut usize,
+ ) {
+ if let Some(node) = tree.get(node_id) {
+ let title = node.title.clone();
+ let summary = if node.summary.is_empty() {
+ "(no summary)".to_string()
+ } else {
+ node.summary.clone()
+ };
+ entries.push((*index, title, summary));
+ node_ids.push(node_id);
+ *index += 1;
+
+ for child_id in tree.children(node_id) {
+ collect_entries(tree, child_id, entries, node_ids, index);
+ }
+ }
+ }
+
+ collect_entries(tree, tree.root(), &mut entries, &mut node_ids, &mut 0);
+
+ let toc_str = entries
+ .iter()
+ .map(|(idx, title, summary)| {
+ format!("[{}] Title: \"{}\"\n Summary: \"{}\"", idx + 1, title, summary)
+ })
+ .collect::>()
+ .join("\n\n");
+
+ (toc_str, node_ids)
+ }
+
+ /// Locate top candidates directly via LLM using TOC.
+ ///
+ /// This method bypasses tree traversal by asking the LLM to
+ /// directly identify the most relevant nodes from the TOC.
+ async fn locate_via_llm(
+ &self,
+ query: &str,
+ tree: &DocumentTree,
+ ) -> Option> {
+ let llm_client = self.llm_client.as_ref()?;
+ let (toc_str, node_ids) = self.build_toc_for_llm(tree);
+
+ if node_ids.is_empty() {
+ warn!("No nodes in tree for LLM search");
+ return None;
+ }
+
+ let system_prompt = r#"You are a document navigation assistant. Your task is to locate the most relevant sections in a document hierarchy for a user's query.
+
+CRITICAL INSTRUCTIONS:
+1. Analyze the user query carefully to understand the intent
+2. Examine the provided Table of Contents (TOC) with numbered entries
+3. Select the TOP 3 most relevant entries that would contain the answer
+4. You MUST respond with ONLY valid JSON. No markdown code blocks. No explanations outside JSON.
+
+Your response must have this EXACT structure:
+{
+ "reasoning": "Brief analysis of the query and why you selected these entries",
+ "candidates": [
+ {"node_id": 1, "relevance_score": 0.95, "reason": "Why this entry matches the query"},
+ {"node_id": 2, "relevance_score": 0.80, "reason": "Why this entry is also relevant"},
+ {"node_id": 3, "relevance_score": 0.65, "reason": "Why this entry might be relevant"}
+ ]
+}
+
+Rules:
+- node_id: MUST be a number from the provided TOC (the number in [N] brackets)
+- relevance_score: Number between 0.0 and 1.0 (higher = more relevant)
+- reason: Brief explanation for each selection
+- candidates: Must have exactly 3 items, ordered by relevance (highest first)"#;
+
+ let user_prompt = format!(
+ "USER QUERY: {}\n\nDOCUMENT TOC ({} entries):\n{}\n\nBased on the query and TOC above, select the TOP 3 most relevant entries.\n\nRespond with ONLY the JSON object:",
+ query,
+ node_ids.len(),
+ toc_str
+ );
+
+ info!("Attempting LLM-based search for query: '{}'", query);
+
+ match llm_client.complete(system_prompt, &user_prompt).await {
+ Ok(response) => {
+ // Parse JSON response
+ match serde_json::from_str::(&response) {
+ Ok(llm_response) => {
+ let mut candidates = Vec::new();
+
+ for candidate in llm_response.candidates {
+ // node_id is 1-indexed from LLM, convert to 0-indexed
+ let idx = candidate.node_id.saturating_sub(1);
+ if idx < node_ids.len() {
+ let node_id = node_ids[idx];
+ if let Some(node) = tree.get(node_id) {
+ candidates.push(CandidateNode::new(
+ node_id,
+ candidate.relevance_score,
+ node.depth,
+ tree.is_leaf(node_id),
+ ));
+ info!(
+ "LLM selected: [{}] '{}' (score: {:.2})",
+ candidate.node_id, node.title, candidate.relevance_score
+ );
+ }
+ }
+ }
+
+ if candidates.is_empty() {
+ warn!("LLM returned no valid candidates");
+ return None;
+ }
+
+ println!("LLM search found {} candidates", candidates.len());
+ println!("LLM candidates content: {:?}", candidates);
+ Some(candidates)
+ }
+ Err(e) => {
+ warn!("Failed to parse LLM response as JSON: {}", e);
+ warn!("Raw response: {}", response);
+ None
+ }
+ }
+ }
+ Err(e) => {
+ warn!("LLM call failed: {}", e);
+ None
+ }
+ }
+ }
+}
+
+/// LLM response for locate query.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct LlmLocateResponse {
+ reasoning: String,
+ candidates: Vec,
+}
+
+/// A candidate from LLM locate response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct LlmLocateCandidate {
+ node_id: usize,
+ relevance_score: f32,
+ reason: String,
}
#[async_trait]
@@ -245,6 +432,9 @@ impl RetrievalStage for SearchStage {
// Reset Pilot state for new query
if let Some(ref pilot) = self.pilot {
pilot.reset();
+ println!("[DEBUG] SearchStage: Pilot is available, is_active={}", pilot.is_active());
+ } else {
+ println!("[DEBUG] SearchStage: No Pilot available");
}
info!(
@@ -261,6 +451,29 @@ impl RetrievalStage for SearchStage {
// Increment search iteration
ctx.increment_search_iteration();
+ // === Try LLM-first search (direct TOC-based location) ===
+ if self.llm_client.is_some() {
+ info!("Attempting LLM-first search for query: '{}'", ctx.query);
+
+ if let Some(candidates) = self.locate_via_llm(&ctx.query, &ctx.tree).await {
+ if !candidates.is_empty() {
+ ctx.candidates = candidates;
+ ctx.metrics.search_time_ms += start.elapsed().as_millis() as u64;
+ ctx.metrics.nodes_visited += ctx.candidates.len();
+ ctx.metrics.llm_calls += 1;
+
+ info!(
+ "LLM-first search found {} candidates (skipped tree traversal)",
+ ctx.candidates.len()
+ );
+
+ return Ok(StageOutcome::cont());
+ }
+ }
+
+ info!("LLM-first search returned no results, falling back to tree traversal");
+ }
+
// Build search config for search algorithms
let search_config = SearchAlgConfig {
top_k: config.beam_width * 2,
@@ -270,48 +483,158 @@ impl RetrievalStage for SearchStage {
leaf_only: false,
};
- // Create legacy context for search algorithms
- let legacy_ctx = RetrievalContext::new(
- &ctx.query,
- ctx.options.max_tokens,
- ctx.options.sufficiency_check,
- );
-
// Get Pilot reference (or None if not available)
let pilot_ref: Option<&dyn Pilot> = self.pilot.as_deref();
+ println!("[DEBUG] SearchStage: pilot_ref is {}", if pilot_ref.is_some() { "Some" } else { "None" });
+
+ // === Check for decomposition ===
+ if let Some(ref decomposition) = ctx.decomposition {
+ if decomposition.was_decomposed && decomposition.is_multi_turn() {
+ info!("Processing {} decomposed sub-queries", decomposition.sub_queries.len());
+
+ let mut all_paths = Vec::new();
+ let mut all_candidates = Vec::new();
+ let mut total_pilot_interventions = 0u64;
+
+ // Process each sub-query in execution order
+ let order = decomposition.execution_order();
+ for sub_idx in order {
+ let sub_query = &decomposition.sub_queries[sub_idx];
+ info!("Processing sub-query : {}", sub_query.text);
+
+ // Create legacy context for this sub-query
+ let legacy_ctx = RetrievalContext::new(
+ &sub_query.text,
+ ctx.options.max_tokens,
+ ctx.options.sufficiency_check,
+ );
+
+ println!("[DEBUG] SearchStage: Starting search for sub-query: algorithm={:?}, top_k={}, beam_width={}",
+ algorithm, search_config.top_k, search_config.beam_width);
+
+ // Execute search for this sub-query
+ let result = match algorithm {
+ SearchAlgorithm::Greedy => {
+ let search = GreedySearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Beam => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Mcts => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ };
+
+ all_candidates.extend(self.extract_candidates(&result.paths, &ctx.tree));
+ all_paths.extend(result.paths);
+ total_pilot_interventions += result.pilot_interventions as u64;
+
+ info!("Sub-query '{}' found {} paths", sub_query.text, all_paths.len());
+ }
- // Execute search based on algorithm with Pilot
- let result = match algorithm {
- SearchAlgorithm::Greedy => {
- let search = GreedySearch::new();
- search
- .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
- .await
- }
- SearchAlgorithm::Beam => {
- let search = BeamSearch::new();
- search
- .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
- .await
- }
- SearchAlgorithm::Mcts => {
- // Use beam search as fallback for now
- let search = BeamSearch::new();
- search
- .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
- .await
+ // Merge results
+ ctx.search_paths = all_paths;
+ ctx.candidates = all_candidates;
+
+ info!(
+ "Search complete: {} total candidates from {} sub-queries (pilot interventions: {})",
+ ctx.candidates.len(),
+ decomposition.sub_queries.len(),
+ total_pilot_interventions
+ );
+ } else {
+ // Single query (not decomposed or single sub-query) - process as normal
+ let legacy_ctx = RetrievalContext::new(
+ &ctx.query,
+ ctx.options.max_tokens,
+ ctx.options.sufficiency_check,
+ );
+
+ println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}",
+ algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score);
+
+ let result = match algorithm {
+ SearchAlgorithm::Greedy => {
+ let search = GreedySearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Beam => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Mcts => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ };
+
+ ctx.search_paths = result.paths;
+ ctx.candidates = self.extract_candidates(&ctx.search_paths, &ctx.tree);
+
+ info!(
+ "Search found {} paths (pilot interventions: {})",
+ ctx.search_paths.len(),
+ result.pilot_interventions
+ );
}
- };
+ } else {
+ // No decomposition available, process original query
+ let legacy_ctx = RetrievalContext::new(
+ &ctx.query,
+ ctx.options.max_tokens,
+ ctx.options.sufficiency_check,
+ );
+
+ println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}",
+ algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score);
+
+ let result = match algorithm {
+ SearchAlgorithm::Greedy => {
+ let search = GreedySearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Beam => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Mcts => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ };
- info!(
- "Search found {} paths (pilot interventions: {})",
- result.paths.len(),
- result.pilot_interventions
- );
+ ctx.search_paths = result.paths;
+ ctx.candidates = self.extract_candidates(&ctx.search_paths, &ctx.tree);
+
+ info!(
+ "Search found {} paths (pilot interventions: {})",
+ ctx.search_paths.len(),
+ result.pilot_interventions
+ );
+ }
+
+ // Debug output
+ println!("[DEBUG] Search found {} total paths, {} candidates", ctx.search_paths.len(), ctx.candidates.len());
+ for (i, path) in ctx.search_paths.iter().enumerate().take(5) {
+ if let Some(leaf_id) = path.leaf {
+ if let Some(node) = ctx.tree.get(leaf_id) {
+ println!("[DEBUG] Path {}: score={:.3}, title='{}', content_len={}",
+ i, path.score, node.title, node.content.len());
+ }
+ }
+ }
- // Update context with results
- ctx.search_paths = result.paths.clone();
- ctx.candidates = self.extract_candidates(&result.paths, &ctx.tree);
+ // Debug output
+ println!("[DEBUG] Extracted {} candidates", ctx.candidates.len());
+ for (i, c) in ctx.candidates.iter().enumerate().take(5) {
+ if let Some(node) = ctx.tree.get(c.node_id) {
+ println!("[DEBUG] Candidate {}: score={:.3}, title='{}'",
+ i, c.score, node.title);
+ }
+ }
// Update metrics
ctx.metrics.search_time_ms += start.elapsed().as_millis() as u64;