,
+
+ /// Whether to apply environment variable overrides.
+ env_enabled: bool,
}
impl Default for ConfigLoader {
@@ -88,6 +107,7 @@ impl ConfigLoader {
files: Vec::new(),
validate: false,
validator: None,
+ env_enabled: true,
}
}
@@ -122,13 +142,68 @@ impl ConfigLoader {
self
}
+ /// Enable or disable environment variable overrides.
+ ///
+ /// When enabled (default), environment variables override config file values:
+ /// - `OPENAI_API_KEY` → sets API key for all LLM clients
+ /// - `VECTORLESS_MODEL` → sets default model
+ /// - `VECTORLESS_ENDPOINT` → sets API endpoint
+ /// - `VECTORLESS_WORKSPACE` → sets workspace directory
+ pub fn with_env(mut self, enabled: bool) -> Self {
+ self.env_enabled = enabled;
+ self
+ }
+
+ /// Apply environment variable overrides to configuration.
+ fn apply_env_overrides(&self, config: &mut Config) {
+ if !self.env_enabled {
+ return;
+ }
+
+ // OPENAI_API_KEY: Set API key for all LLM clients
+ if let Ok(api_key) = std::env::var("OPENAI_API_KEY") {
+ // Set default API key
+ config.llm.api_key = Some(api_key.clone());
+ // Override individual client API keys if not explicitly set
+ if config.llm.summary.api_key.is_none() {
+ config.llm.summary.api_key = Some(api_key.clone());
+ }
+ if config.llm.retrieval.api_key.is_none() {
+ config.llm.retrieval.api_key = Some(api_key.clone());
+ }
+ if config.llm.pilot.api_key.is_none() {
+ config.llm.pilot.api_key = Some(api_key);
+ }
+ }
+
+ // VECTORLESS_MODEL: Set default model
+ if let Ok(model) = std::env::var("VECTORLESS_MODEL") {
+ config.llm.summary.model = model.clone();
+ config.llm.retrieval.model = model.clone();
+ config.llm.pilot.model = model;
+ }
+
+ // VECTORLESS_ENDPOINT: Set API endpoint
+ if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") {
+ config.llm.summary.endpoint = endpoint.clone();
+ config.llm.retrieval.endpoint = endpoint.clone();
+ config.llm.pilot.endpoint = endpoint;
+ }
+
+ // VECTORLESS_WORKSPACE: Set workspace directory
+ if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") {
+ config.storage.workspace_dir = PathBuf::from(workspace);
+ }
+ }
+
/// Load the configuration.
///
/// # Behavior
///
/// 1. Start with default configuration
/// 2. Load and merge each specified file (in order)
- /// 3. Validate configuration (if enabled)
+ /// 3. Apply environment variable overrides (if enabled)
+ /// 4. Validate configuration (if enabled)
///
/// # Errors
///
@@ -150,6 +225,9 @@ impl ConfigLoader {
}
}
+ // Apply environment variable overrides
+ self.apply_env_overrides(&mut config);
+
// Validate if requested
if self.validate {
let validator = self.validator.unwrap_or_default();
From 21eedaba1df1bf0c3633cff8acf33eaa05267174 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Wed, 8 Apr 2026 11:57:27 +0800
Subject: [PATCH 2/5] refactor(examples): restructure Python examples with
dedicated directories
- Remove old advanced.py and custom_config.py files
- Add new example structure with dedicated directories for basic,
advanced, and custom_config examples
- Each example now has its own README.md, main.py, and pyproject.toml
- Update basic example to use from_content instead of from_text
- Add proper configuration examples and documentation
fix(python): improve IndexContext and Engine class naming
- Rename from_text to from_content for better semantic meaning
- Add explicit class names for Python bindings (IndexContext,
QueryResult, DocumentInfo, Engine)
- Update error handling with proper constructor
- Fix module name from _vectorless to vectorless
refactor(rust): update document format handling and builder logic
- Remove Text format support temporarily
- Set default format to Markdown instead of Text
- Fix EngineBuilder to properly handle API key and model precedence
- Update with_openai to not override existing model configuration
- Improve documentation for builder methods
---
examples/python/advanced.py | 118 --------
examples/python/advanced/README.md | 45 +++
examples/python/advanced/main.py | 115 ++++++++
examples/python/advanced/pyproject.toml | 11 +
examples/python/basic/README.md | 16 ++
examples/python/{basic.py => basic/main.py} | 10 +-
examples/python/basic/pyproject.toml | 11 +
examples/python/custom_config.py | 107 --------
examples/python/custom_config/README.md | 48 ++++
examples/python/custom_config/main.py | 114 ++++++++
examples/python/custom_config/pyproject.toml | 11 +
examples/python/python_basic.py | 273 -------------------
python/src/lib.rs | 32 ++-
rust/Cargo.toml | 2 +-
rust/src/client/builder.rs | 16 +-
rust/src/client/indexer.rs | 1 -
rust/src/index/pipeline/context.rs | 2 +-
rust/src/parser/registry.rs | 3 +-
rust/src/parser/types.rs | 6 +-
19 files changed, 408 insertions(+), 533 deletions(-)
delete mode 100644 examples/python/advanced.py
create mode 100644 examples/python/advanced/README.md
create mode 100644 examples/python/advanced/main.py
create mode 100644 examples/python/advanced/pyproject.toml
create mode 100644 examples/python/basic/README.md
rename examples/python/{basic.py => basic/main.py} (87%)
create mode 100644 examples/python/basic/pyproject.toml
delete mode 100644 examples/python/custom_config.py
create mode 100644 examples/python/custom_config/README.md
create mode 100644 examples/python/custom_config/main.py
create mode 100644 examples/python/custom_config/pyproject.toml
delete mode 100644 examples/python/python_basic.py
diff --git a/examples/python/advanced.py b/examples/python/advanced.py
deleted file mode 100644
index c86c00b3..00000000
--- a/examples/python/advanced.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2026 vectorless developers
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-Advanced usage example - Full Configuration.
-
-This example demonstrates how to use a full configuration file
-for advanced use cases where you need fine-grained control.
-
-Usage:
- # First, copy the example config
- cp config.toml ./my_vectorless.toml
-
- # Edit my_vectorless.toml to customize settings
-
- # Run the example
- python advanced.py
-"""
-
-import os
-import tempfile
-from vectorless import Engine, IndexContext
-
-
-def main():
- print("=== Vectorless Advanced Example (Full Configuration) ===\n")
-
- # Method 1: Use explicit config file path
- # This loads all settings from the specified config file
- engine = Engine(config_path="./config.toml")
-
- print("✓ Engine created with config file\n")
-
- # Index a document
- content = """
-# Product Documentation
-
-## Overview
-
-This is a comprehensive guide for our product.
-
-## Configuration
-
-The system supports multiple configuration methods:
-
-### 1. Zero Configuration
-Just set OPENAI_API_KEY environment variable.
-
-### 2. Environment Variables
-- VECTORLESS_MODEL: Set default model
-- VECTORLESS_ENDPOINT: Set API endpoint
-- VECTORLESS_WORKSPACE: Set workspace directory
-
-### 3. Config File
-Create a vectorless.toml file with full configuration.
-
-## API Reference
-
-### Engine
-The main entry point for vectorless.
-
-### IndexContext
-Context for indexing documents from various sources.
-"""
- ctx = IndexContext.from_text(content, name="docs", format="markdown")
- doc_id = engine.index(ctx)
- print(f"✓ Indexed: {doc_id}\n")
-
- # Query
- result = engine.query(doc_id, "What configuration methods are available?")
- print("Query: What configuration methods are available?")
- print(f"Score: {result.score:.2f}")
- print(f"Result: {result.content[:200]}...\n")
-
- # Cleanup
- engine.remove(doc_id)
- print("✓ Cleaned up")
-
- print("\n" + "=" * 60)
- print("Configuration Options")
- print("=" * 60)
- print()
- print("Configuration Priority (later overrides earlier):")
- print(" 1. Default configuration")
- print(" 2. Auto-detected config file (vectorless.toml, config.toml)")
- print(" 3. Explicit config file (config_path parameter)")
- print(" 4. Environment variables")
- print(" 5. Constructor parameters (api_key, model, etc.)")
- print()
- print("Environment Variables:")
- print(" OPENAI_API_KEY - LLM API key")
- print(" VECTORLESS_MODEL - Default model name")
- print(" VECTORLESS_ENDPOINT - API endpoint URL")
- print(" VECTORLESS_WORKSPACE - Workspace directory")
- print()
- print("Usage Examples:")
- print()
- print("# Zero configuration (recommended for beginners)")
- print('engine = Engine(workspace="./data")')
- print()
- print("# With custom model")
- print('engine = Engine(workspace="./data", model="gpt-4o-mini")')
- print()
- print("# With full config file (advanced)")
- print('engine = Engine(config_path="./vectorless.toml")')
- print()
- print("# Override config with parameters")
- print('engine = Engine(')
- print(' config_path="./vectorless.toml",')
- print(' model="gpt-4o", # Override model from config')
- print(')')
-
- print("\n=== Done ===")
-
-
-if __name__ == "__main__":
- main()
diff --git a/examples/python/advanced/README.md b/examples/python/advanced/README.md
new file mode 100644
index 00000000..2d11afbc
--- /dev/null
+++ b/examples/python/advanced/README.md
@@ -0,0 +1,45 @@
+# Advanced Example - Full Configuration
+
+Use a configuration file for fine-grained control.
+
+## Setup
+
+```bash
+pip install vectorless
+
+# Copy the example config
+cp ../../../config.toml ./vectorless.toml
+
+# Edit to customize your settings
+vim vectorless.toml
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Configuration File Structure
+
+```toml
+[llm]
+api_key = "sk-..."
+
+[llm.summary]
+model = "gpt-4o-mini"
+max_tokens = 200
+
+[llm.retrieval]
+model = "gpt-4o"
+max_tokens = 100
+
+[retrieval]
+top_k = 5
+beam_width = 3
+max_iterations = 10
+
+[storage]
+workspace_dir = "./workspace"
+cache_size = 100
+```
diff --git a/examples/python/advanced/main.py b/examples/python/advanced/main.py
new file mode 100644
index 00000000..d223ad02
--- /dev/null
+++ b/examples/python/advanced/main.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+Advanced example - Full Configuration File.
+
+This example demonstrates how to use a full configuration file
+for fine-grained control over all settings.
+
+Usage:
+ cp ../../../config.toml ./vectorless.toml
+ # Edit vectorless.toml to customize settings
+ python main.py
+"""
+
+import os
+from vectorless import Engine, IndexContext
+
+# Path to config file (relative to this script)
+CONFIG_PATH = "./vectorless.toml"
+WORKSPACE = "./workspace"
+
+
+def main():
+ print("=== Vectorless Advanced Example (Full Configuration) ===\n")
+
+ # Check if config file exists
+ if not os.path.exists(CONFIG_PATH):
+ print(f"Error: Config file not found: {CONFIG_PATH}")
+ print("\nCreate it by copying the example:")
+ print(f" cp ../../../config.toml {CONFIG_PATH}")
+ print("\nThen edit it to customize your settings.")
+ return
+
+ # Create engine with config file
+ engine = Engine(config_path=CONFIG_PATH)
+
+ print(f"✓ Engine created with config file: {CONFIG_PATH}\n")
+
+ # Index a document
+ content = """
+# System Documentation
+
+## Architecture
+
+The system consists of three main components:
+
+1. **Index Pipeline** - Parses documents and builds a navigable tree
+2. **Retrieval Pipeline** - Queries and retrieves relevant content
+3. **Pilot** - LLM-powered navigation guide
+
+## Configuration Options
+
+### LLM Settings
+- `model`: The LLM model to use (e.g., "gpt-4o", "gpt-4o-mini")
+- `endpoint`: API endpoint URL
+- `api_key`: Your API key
+- `temperature`: Generation temperature (0.0 for deterministic)
+
+### Retrieval Settings
+- `top_k`: Number of results to return
+- `max_iterations`: Maximum search iterations
+- `beam_width`: Beam width for multi-path search
+
+### Storage Settings
+- `workspace_dir`: Directory for persisted documents
+- `cache_size`: LRU cache size
+- `compression`: Enable/disable compression
+
+## Performance Tuning
+
+For faster retrieval:
+- Use a smaller model like gpt-4o-mini
+- Reduce max_iterations
+- Enable caching
+
+For higher accuracy:
+- Use a more capable model like gpt-4o
+- Increase beam_width
+- Enable multi-turn decomposition
+"""
+ ctx = IndexContext.from_content(content, name="system_docs", format="markdown")
+ doc_id = engine.index(ctx)
+ print(f"✓ Indexed: {doc_id}\n")
+
+ # Query examples
+ questions = [
+ "What are the main components?",
+ "How can I improve retrieval speed?",
+ "What settings are available?",
+ ]
+
+ for q in questions:
+ result = engine.query(doc_id, q)
+ print(f"Q: {q}")
+ print(f"A: {result.content[:150]}...")
+ print(f" Score: {result.score:.2f}\n")
+
+ # Cleanup
+ engine.remove(doc_id)
+ print("✓ Cleaned up")
+
+ # Print configuration info
+ print("\n" + "=" * 60)
+ print("Configuration Priority")
+ print("=" * 60)
+ print("""
+1. Default configuration
+2. Auto-detected config file (vectorless.toml, config.toml)
+3. Explicit config file (config_path parameter)
+4. Environment variables (OPENAI_API_KEY, etc.)
+5. Constructor parameters (api_key, model, etc.)
+""")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/python/advanced/pyproject.toml b/examples/python/advanced/pyproject.toml
new file mode 100644
index 00000000..a85a964d
--- /dev/null
+++ b/examples/python/advanced/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-advanced-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+ "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python/basic/README.md b/examples/python/basic/README.md
new file mode 100644
index 00000000..e74975de
--- /dev/null
+++ b/examples/python/basic/README.md
@@ -0,0 +1,16 @@
+# Basic Example - Zero Configuration
+
+The simplest way to use Vectorless.
+
+## Setup
+
+```bash
+pip install vectorless
+export OPENAI_API_KEY="sk-..."
+```
+
+## Run
+
+```bash
+python main.py
+```
diff --git a/examples/python/basic.py b/examples/python/basic/main.py
similarity index 87%
rename from examples/python/basic.py
rename to examples/python/basic/main.py
index 04567eab..4ae34b42 100644
--- a/examples/python/basic.py
+++ b/examples/python/basic/main.py
@@ -1,16 +1,13 @@
#!/usr/bin/env python3
-# Copyright (c) 2026 vectorless developers
-# SPDX-License-Identifier: Apache-2.0
-
"""
-Basic usage example - Zero Configuration.
+Basic example - Zero Configuration.
This example demonstrates the simplest way to use Vectorless.
Just set OPENAI_API_KEY environment variable and you're ready to go.
Usage:
export OPENAI_API_KEY="sk-..."
- python basic.py
+ python main.py
"""
import os
@@ -22,7 +19,6 @@ def main():
print("=== Vectorless Basic Example (Zero Configuration) ===\n")
# Zero configuration: Just set OPENAI_API_KEY environment variable
- # The engine will automatically use it.
with tempfile.TemporaryDirectory() as workspace:
engine = Engine(workspace=workspace)
@@ -56,7 +52,7 @@ def main():
print(result.content)
```
"""
- ctx = IndexContext.from_text(content, name="manual", format="markdown")
+ ctx = IndexContext.from_content(content, name="manual", format="markdown")
doc_id = engine.index(ctx)
print(f"✓ Indexed: {doc_id}\n")
diff --git a/examples/python/basic/pyproject.toml b/examples/python/basic/pyproject.toml
new file mode 100644
index 00000000..d99ee750
--- /dev/null
+++ b/examples/python/basic/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-basic-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+ "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python/custom_config.py b/examples/python/custom_config.py
deleted file mode 100644
index 3feb6a2e..00000000
--- a/examples/python/custom_config.py
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2026 vectorless developers
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-Custom configuration example - Using your own API key, model, and endpoint.
-
-This example demonstrates how to use custom LLM settings without a config file.
-Useful when you want to use different providers like Azure OpenAI, DeepSeek, etc.
-
-Usage:
- python custom_config.py
-"""
-
-import os
-import tempfile
-from vectorless import Engine, IndexContext
-
-
-def main():
- print("=== Vectorless Custom Configuration Example ===\n")
-
- # ============================================================
- # Option 1: Use environment variables
- # ============================================================
- # Set these environment variables:
- # - OPENAI_API_KEY or VECTORLESS_API_KEY
- # - VECTORLESS_MODEL (optional)
- # - VECTORLESS_ENDPOINT (optional)
-
- # ============================================================
- # Option 2: Use constructor parameters (recommended for custom config)
- # ============================================================
-
- with tempfile.TemporaryDirectory() as workspace:
- # Example: Use DeepSeek API
- engine = Engine(
- workspace=workspace,
- api_key="sk-your-deepseek-key", # Your API key
- model="deepseek-chat", # Model name
- endpoint="https://api.deepseek.com/v1", # API endpoint
- )
-
- print("✓ Engine created with custom settings\n")
-
- # Index a document
- content = """
-# Product Documentation
-
-## Overview
-This product helps you manage documents intelligently.
-
-## Features
-- Fast indexing
-- Accurate retrieval
-- Easy to use API
-
-## Installation
-Install with pip: pip install vectorless
-"""
- ctx = IndexContext.from_text(content, name="docs", format="markdown")
- doc_id = engine.index(ctx)
- print(f"✓ Indexed: {doc_id}\n")
-
- # Query
- result = engine.query(doc_id, "How do I install the product?")
- print("Query: How do I install the product?")
- print(f"Score: {result.score:.2f}")
- print(f"Result: {result.content[:200]}...\n")
-
- # Cleanup
- engine.remove(doc_id)
- print("✓ Cleaned up")
-
- # ============================================================
- # Other provider examples (commented out)
- # ============================================================
-
- # Azure OpenAI:
- # engine = Engine(
- # workspace="./data",
- # api_key="your-azure-key",
- # model="gpt-4o",
- # endpoint="https://your-resource.openai.azure.com/openai/deployments/your-deployment",
- # )
-
- # Local LLM (e.g., Ollama with OpenAI-compatible API):
- # engine = Engine(
- # workspace="./data",
- # model="llama3",
- # endpoint="http://localhost:11434/v1",
- # # No api_key needed for local LLM
- # )
-
- # Anthropic Claude (via OpenAI-compatible proxy):
- # engine = Engine(
- # workspace="./data",
- # api_key="sk-ant-...",
- # model="claude-3-5-sonnet-20241022",
- # endpoint="https://api.anthropic.com/v1",
- # )
-
- print("\n=== Done ===")
-
-
-if __name__ == "__main__":
- main()
diff --git a/examples/python/custom_config/README.md b/examples/python/custom_config/README.md
new file mode 100644
index 00000000..b743098a
--- /dev/null
+++ b/examples/python/custom_config/README.md
@@ -0,0 +1,48 @@
+# Custom Configuration Example
+
+Use your own API key, model, and endpoint.
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Configure
+
+Edit `main.py` and update the settings:
+
+```python
+API_KEY = "your-api-key"
+MODEL = "gpt-4o-mini" # or "deepseek-chat", "claude-3-5-sonnet", etc.
+ENDPOINT = "https://api.openai.com/v1" # or your custom endpoint
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Other Providers
+
+### DeepSeek
+```python
+API_KEY = "sk-..."
+MODEL = "deepseek-chat"
+ENDPOINT = "https://api.deepseek.com/v1"
+```
+
+### Azure OpenAI
+```python
+API_KEY = "your-azure-key"
+MODEL = "gpt-4o"
+ENDPOINT = "https://your-resource.openai.azure.com/openai/deployments/your-deployment"
+```
+
+### Local LLM (Ollama)
+```python
+API_KEY = None # Not needed
+MODEL = "llama3"
+ENDPOINT = "http://localhost:11434/v1"
+```
diff --git a/examples/python/custom_config/main.py b/examples/python/custom_config/main.py
new file mode 100644
index 00000000..d6e0bda4
--- /dev/null
+++ b/examples/python/custom_config/main.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""
+Custom configuration example - Using your own API key, model, and endpoint.
+
+This example demonstrates how to use custom LLM settings without a config file.
+Useful when you want to use different providers like DeepSeek, Azure OpenAI, etc.
+
+Usage:
+ python main.py
+"""
+
+import tempfile
+from vectorless import Engine, IndexContext
+
+# ============================================================
+# Configure your settings here
+# ============================================================
+API_KEY = "sk-or-v1-xxxx" # Your API key
+MODEL = "google/gemini-3-flash-preview" # Model name
+ENDPOINT = "https://api/v1" # API endpoint
+WORKSPACE = "./workspace" # Workspace directory
+
+
+def main():
+ print("=== Vectorless Custom Configuration Example ===\n")
+
+ # Create engine with custom settings
+ engine = Engine(
+ workspace=WORKSPACE,
+ api_key=API_KEY,
+ model=MODEL,
+ endpoint=ENDPOINT,
+ )
+
+ print(f"✓ Engine created with custom settings")
+ print(f" Model: {MODEL}")
+ print(f" Endpoint: {ENDPOINT}\n")
+
+ # Index a document
+ content = """
+# Product Documentation
+
+## Overview
+This product helps you manage documents intelligently using LLM-powered navigation.
+
+## Features
+- Fast indexing with tree-based structure
+- Accurate retrieval using hybrid search
+- Easy to use Python and Rust APIs
+- Support for PDF, Markdown, HTML, and DOCX
+
+## Installation
+
+Install with pip:
+```bash
+pip install vectorless
+```
+
+## Quick Start
+
+```python
+from vectorless import Engine, IndexContext
+
+# Create engine
+engine = Engine(workspace="./data")
+
+# Index a document
+ctx = IndexContext.from_file("./report.pdf")
+doc_id = engine.index(ctx)
+
+# Query
+result = engine.query(doc_id, "What is the total revenue?")
+print(result.content)
+```
+
+## Configuration
+
+Vectorless supports multiple configuration methods:
+1. Zero configuration - just set OPENAI_API_KEY
+2. Custom settings - pass api_key, model, endpoint
+3. Full config file - use vectorless.toml
+"""
+ ctx = IndexContext.from_content(content, name="docs", format="markdown")
+ doc_id = engine.index(ctx)
+ print(f"✓ Indexed: {doc_id}\n")
+
+ # Check document info
+ docs = engine.list_docs()
+ print(f"Documents in workspace: {len(docs)}")
+ for d in docs:
+ print(f" - {d.name} (id: {d.id}, format: {d.format})")
+ print()
+
+ # Query
+ result = engine.query(doc_id, "How do I install the product?")
+ print("Query: How do I install the product?")
+ print(f"Score: {result.score:.2f}")
+ print(f"Result: {result.content}\n")
+
+ # Another query
+ result = engine.query(doc_id, "What features are available?")
+ print("Query: What features are available?")
+ print(f"Score: {result.score:.2f}")
+ print(f"Result: {result.content}\n")
+
+ # Cleanup
+ engine.remove(doc_id)
+ print("✓ Cleaned up")
+
+ print("\n=== Done ===")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/python/custom_config/pyproject.toml b/examples/python/custom_config/pyproject.toml
new file mode 100644
index 00000000..d316077d
--- /dev/null
+++ b/examples/python/custom_config/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vectorless-custom-config-example"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+ "vectorless",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/python/python_basic.py b/examples/python/python_basic.py
deleted file mode 100644
index e9801e79..00000000
--- a/examples/python/python_basic.py
+++ /dev/null
@@ -1,273 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2026 vectorless developers
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-Basic example demonstrating the vectorless Python library.
-
-This example shows:
-1. Creating an Engine with workspace
-2. Indexing documents from different sources
-3. Querying indexed documents
-4. Managing documents (list, exists, remove)
-
-Prerequisites:
- pip install vectorless
- export OPENAI_API_KEY="sk-..."
-
-Usage:
- python python_basic.py
-"""
-
-import os
-import tempfile
-from pathlib import Path
-
-from vectorless import Engine, IndexContext, VectorlessError
-
-
-def main():
- # Create a temporary workspace for this example
- with tempfile.TemporaryDirectory() as workspace:
- print(f"Workspace: {workspace}")
- print()
-
- # ============================================================
- # 1. Create Engine
- # ============================================================
- print("=" * 60)
- print("1. Creating Engine")
- print("=" * 60)
-
- # Option A: Use OPENAI_API_KEY environment variable
- engine = Engine(workspace=workspace)
-
- # Option B: Explicit API key
- # engine = Engine(
- # workspace=workspace,
- # api_key="sk-...",
- # model="gpt-4o-mini", # optional
- # )
-
- print(f"Engine created successfully!")
- print(f"Initial document count: {engine.len()}")
- print()
-
- # ============================================================
- # 2. Index Documents
- # ============================================================
- print("=" * 60)
- print("2. Indexing Documents")
- print("=" * 60)
-
- # 2a. Index from text content (Markdown)
- markdown_content = """
-# Technical Manual
-
-## Chapter 1: Introduction
-
-This document describes the architecture of our system.
-
-## Chapter 2: Installation
-
-### System Requirements
-
-- Python 3.9+
-- Rust 1.75+
-
-### Steps
-
-1. Install dependencies
-2. Configure environment
-3. Run the application
-
-## Chapter 3: API Reference
-
-### Engine
-
-The main entry point for vectorless.
-
-```python
-engine = Engine(workspace="./data")
-```
-
-### IndexContext
-
-Context for indexing documents from various sources.
-"""
- ctx_md = IndexContext.from_text(
- markdown_content,
- name="technical_manual",
- format="markdown"
- )
- doc_id_md = engine.index(ctx_md)
- print(f"Indexed markdown document: {doc_id_md}")
-
- # 2b. Index from text content (HTML)
- html_content = """
-
-Product Guide
-
- Product Guide
- Getting Started
- Welcome to our product. This guide will help you get started.
- Features
-
- - Fast indexing
- - Accurate retrieval
- - Easy to use API
-
-
-
-"""
- ctx_html = IndexContext.from_text(
- html_content,
- name="product_guide",
- format="html"
- )
- doc_id_html = engine.index(ctx_html)
- print(f"Indexed HTML document: {doc_id_html}")
-
- # 2c. Index from text content (plain text)
- text_content = """
-Meeting Notes - Q4 Planning
-
-Date: 2024-01-15
-
-Attendees: Alice, Bob, Charlie
-
-Agenda:
-1. Review Q3 performance
-2. Set Q4 goals
-3. Resource allocation
-
-Key Decisions:
-- Increase marketing budget by 20%
-- Launch new product in March
-- Hire 5 additional engineers
-"""
- ctx_text = IndexContext.from_text(
- text_content,
- name="meeting_notes",
- format="text"
- )
- doc_id_text = engine.index(ctx_text)
- print(f"Indexed text document: {doc_id_text}")
-
- # 2d. Index from file (if you have actual files)
- # ctx_file = IndexContext.from_file("./report.pdf")
- # doc_id_file = engine.index(ctx_file)
- # print(f"Indexed file: {doc_id_file}")
-
- print(f"\nTotal documents indexed: {engine.len()}")
- print()
-
- # ============================================================
- # 3. List Documents
- # ============================================================
- print("=" * 60)
- print("3. Listing Documents")
- print("=" * 60)
-
- docs = engine.list_docs()
- for doc in docs:
- print(f" - {doc.name} (id: {doc.id}, format: {doc.format})")
- if doc.line_count:
- print(f" Lines: {doc.line_count}")
- print()
-
- # ============================================================
- # 4. Query Documents
- # ============================================================
- print("=" * 60)
- print("4. Querying Documents")
- print("=" * 60)
-
- # Query the technical manual
- questions = [
- "What are the system requirements?",
- "How do I create an Engine?",
- "What are the installation steps?",
- ]
-
- for question in questions:
- result = engine.query(doc_id_md, question)
- print(f"Q: {question}")
- print(f"A: {result.content[:200]}...")
- print(f" Score: {result.score:.2f}")
- print()
-
- # Query the meeting notes
- result = engine.query(doc_id_text, "What was decided about the marketing budget?")
- print(f"Q: What was decided about the marketing budget?")
- print(f"A: {result.content}")
- print(f" Score: {result.score:.2f}")
- print()
-
- # ============================================================
- # 5. Check Document Existence
- # ============================================================
- print("=" * 60)
- print("5. Checking Document Existence")
- print("=" * 60)
-
- print(f"Document {doc_id_md[:8]}... exists: {engine.exists(doc_id_md)}")
- print(f"Document 'nonexistent' exists: {engine.exists('nonexistent')}")
- print()
-
- # ============================================================
- # 6. Error Handling
- # ============================================================
- print("=" * 60)
- print("6. Error Handling")
- print("=" * 60)
-
- try:
- engine.query("nonexistent_doc_id", "question")
- except VectorlessError as e:
- print(f"Caught error: {e.message}")
- print(f"Error kind: {e.kind}")
- print()
-
- # ============================================================
- # 7. Remove Documents
- # ============================================================
- print("=" * 60)
- print("7. Removing Documents")
- print("=" * 60)
-
- # Remove the HTML document
- removed = engine.remove(doc_id_html)
- print(f"Removed {doc_id_html}: {removed}")
- print(f"Documents remaining: {engine.len()}")
-
- # Try to remove again (should return False)
- removed_again = engine.remove(doc_id_html)
- print(f"Remove again: {removed_again}")
- print()
-
- # ============================================================
- # 8. Clear All Documents
- # ============================================================
- print("=" * 60)
- print("8. Clearing All Documents")
- print("=" * 60)
-
- cleared_count = engine.clear()
- print(f"Cleared {cleared_count} documents")
- print(f"Final document count: {engine.len()}")
- print()
-
- print("=" * 60)
- print("Example completed successfully!")
- print("=" * 60)
-
-
-if __name__ == "__main__":
- # Check for API key
- if not os.environ.get("OPENAI_API_KEY"):
- print("Warning: OPENAI_API_KEY environment variable not set.")
- print("Some operations may fail without an API key.")
- print()
-
- main()
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 4d0f4d59..fc2cf730 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -28,6 +28,11 @@ pub struct VectorlessError {
#[pymethods]
impl VectorlessError {
+ #[new]
+ fn new_py(message: String, kind: String) -> Self {
+ Self { message, kind }
+ }
+
#[getter]
fn message(&self) -> &str {
&self.message
@@ -96,7 +101,7 @@ fn to_py_err(e: RustError) -> PyErr {
/// # From bytes
/// ctx = IndexContext.from_bytes(data, name="doc", format="pdf")
/// ```
-#[pyclass]
+#[pyclass(name = "IndexContext")]
pub struct PyIndexContext {
inner: IndexContext,
}
@@ -134,7 +139,7 @@ impl PyIndexContext {
/// IndexContext for the content.
#[staticmethod]
#[pyo3(signature = (content, name=None, format="markdown"))]
- fn from_text(content: String, name: Option, format: &str) -> PyResult {
+ fn from_content(content: String, name: Option, format: &str) -> PyResult {
let doc_format = parse_format(format)?;
let mut ctx = IndexContext::from_content(&content, doc_format);
if let Some(n) = name {
@@ -168,9 +173,8 @@ fn parse_format(format: &str) -> PyResult {
"pdf" => Ok(DocumentFormat::Pdf),
"docx" | "doc" => Ok(DocumentFormat::Docx),
"html" | "htm" => Ok(DocumentFormat::Html),
- "text" | "txt" => Ok(DocumentFormat::Text),
_ => Err(PyErr::from(VectorlessError::new(
- format!("Unknown format: {}", format),
+ format!("Unknown format: {}. Supported: markdown, pdf, docx, html", format),
"config",
))),
}
@@ -181,7 +185,7 @@ fn parse_format(format: &str) -> PyResult {
// ============================================================
/// Result of a document query.
-#[pyclass]
+#[pyclass(name = "QueryResult")]
pub struct PyQueryResult {
inner: QueryResult,
}
@@ -227,7 +231,7 @@ impl PyQueryResult {
// ============================================================
/// Information about an indexed document.
-#[pyclass]
+#[pyclass(name = "DocumentInfo")]
pub struct PyDocumentInfo {
inner: DocumentInfo,
}
@@ -312,7 +316,7 @@ impl PyDocumentInfo {
/// ```python
/// engine = Engine(config_path="./vectorless.toml")
/// ```
-#[pyclass]
+#[pyclass(name = "Engine")]
pub struct PyEngine {
inner: Arc,
rt: Runtime,
@@ -370,12 +374,7 @@ impl PyEngine {
builder = builder.with_workspace(ws);
}
- // Set API key
- if let Some(key) = resolved_api_key {
- builder = builder.with_openai(key);
- }
-
- // Set model
+ // Set model first (without overriding api_key)
if let Some(m) = &model {
builder = builder.with_model(m, None);
}
@@ -385,6 +384,11 @@ impl PyEngine {
builder = builder.with_endpoint(e);
}
+ // Set API key last (this ensures it's not overwritten)
+ if let Some(key) = resolved_api_key {
+ builder = builder.with_openai(key);
+ }
+
builder.build().await
});
@@ -543,7 +547,7 @@ impl PyEngine {
/// print(result.content)
/// ```
#[pymodule]
-fn _vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
+fn vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::()?;
m.add_class::()?;
m.add_class::()?;
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 91a28947..fe9729b9 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -11,7 +11,7 @@ documentation = "https://docs.rs/vectorless"
keywords = ["rag", "document", "retrieval", "indexing", "llm"]
categories = ["text-processing", "data-structures", "algorithms"]
readme = "../README.md"
-exclude = ["samples/", "docs/", ".*"]
+exclude = ["docs/", ".*"]
# Example directory configuration
[[example]]
diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs
index 85ecc2c7..ced541c2 100644
--- a/rust/src/client/builder.rs
+++ b/rust/src/client/builder.rs
@@ -285,8 +285,8 @@ impl EngineBuilder {
/// Configure for OpenAI API.
///
- /// Uses `gpt-4o` model by default. Use [`with_model`](EngineBuilder::with_model)
- /// to specify a different model.
+ /// Sets the API key and optionally the model to "gpt-4o" if not already set.
+ /// Use [`with_model`](EngineBuilder::with_model) before this to specify a different model.
///
/// # Example
///
@@ -305,7 +305,13 @@ impl EngineBuilder {
/// ```
#[must_use]
pub fn with_openai(self, api_key: impl Into) -> Self {
- self.with_model("gpt-4o", Some(api_key.into()))
+ let mut builder = self;
+ builder.api_key = Some(api_key.into());
+ // Only set default model if not already set
+ if builder.model.is_none() {
+ builder.model = Some("gpt-4o".to_string());
+ }
+ builder
}
/// Set the LLM model and optional API key.
@@ -333,7 +339,9 @@ impl EngineBuilder {
#[must_use]
pub fn with_model(mut self, model: impl Into, api_key: Option) -> Self {
self.model = Some(model.into());
- self.api_key = api_key;
+ if api_key.is_some() {
+ self.api_key = api_key;
+ }
self
}
diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs
index 679dbea0..e2e96ac3 100644
--- a/rust/src/client/indexer.rs
+++ b/rust/src/client/indexer.rs
@@ -265,7 +265,6 @@ impl IndexerClient {
DocumentFormat::Pdf => IndexMode::Pdf,
DocumentFormat::Html => IndexMode::Html,
DocumentFormat::Docx => IndexMode::Docx,
- DocumentFormat::Text => IndexMode::Auto,
},
generate_ids: options.generate_ids,
summary_strategy: if options.generate_summaries {
diff --git a/rust/src/index/pipeline/context.rs b/rust/src/index/pipeline/context.rs
index ab9a462d..979839a8 100644
--- a/rust/src/index/pipeline/context.rs
+++ b/rust/src/index/pipeline/context.rs
@@ -51,7 +51,7 @@ impl IndexInput {
Self::Content {
content: content.into(),
name: String::new(),
- format: DocumentFormat::Text,
+ format: DocumentFormat::Markdown,
}
}
diff --git a/rust/src/parser/registry.rs b/rust/src/parser/registry.rs
index ef1cf416..4f3d0e8c 100644
--- a/rust/src/parser/registry.rs
+++ b/rust/src/parser/registry.rs
@@ -141,7 +141,7 @@ impl ParserRegistry {
/// For binary formats (PDF, DOCX), the parser handles the bytes directly.
pub async fn parse_bytes(&self, bytes: &[u8], format: DocumentFormat) -> Result {
match format {
- DocumentFormat::Markdown | DocumentFormat::Html | DocumentFormat::Text => {
+ DocumentFormat::Markdown | DocumentFormat::Html => {
// Text formats - convert to string first
let content = std::str::from_utf8(bytes)
.map_err(|e| Error::Parse(format!("Invalid UTF-8 content: {}", e)))?;
@@ -188,7 +188,6 @@ pub fn get_parser(format: DocumentFormat) -> Option> {
DocumentFormat::Pdf => Some(Box::new(PdfParser::new())),
DocumentFormat::Html => Some(Box::new(HtmlParser::new())),
DocumentFormat::Docx => Some(Box::new(super::docx::DocxParser::new())),
- DocumentFormat::Text => None, // TODO: Implement plain text parser
}
}
diff --git a/rust/src/parser/types.rs b/rust/src/parser/types.rs
index c8c5f000..2ccac2f7 100644
--- a/rust/src/parser/types.rs
+++ b/rust/src/parser/types.rs
@@ -21,8 +21,6 @@ pub enum DocumentFormat {
Html,
/// Word documents (.docx)
Docx,
- /// Plain text files (.txt)
- Text,
}
impl DocumentFormat {
@@ -33,7 +31,6 @@ impl DocumentFormat {
"pdf" => Some(Self::Pdf),
"html" | "htm" => Some(Self::Html),
"docx" => Some(Self::Docx),
- "txt" => Some(Self::Text),
_ => None,
}
}
@@ -45,7 +42,6 @@ impl DocumentFormat {
Self::Pdf => "pdf",
Self::Html => "html",
Self::Docx => "docx",
- Self::Text => "txt",
}
}
}
@@ -173,7 +169,7 @@ impl Default for DocumentMeta {
fn default() -> Self {
Self {
name: String::new(),
- format: DocumentFormat::Text,
+ format: DocumentFormat::Markdown,
page_count: None,
line_count: 0,
source_path: None,
From a1bef26bede78101f37292fa33f8ae5714f88255 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Wed, 8 Apr 2026 15:35:15 +0800
Subject: [PATCH 3/5] feat: enhance LLM pilot functionality and summary
generation
- Implement comprehensive API key and configuration propagation across
retrieval and summary components in EngineBuilder
- Enable default summary generation by setting generate_summaries to
true in IndexOptions
- Add extensive debugging output throughout the indexing and retrieval
pipeline for better observability
- Refactor LLM pilot to support candidate title matching and improved
response parsing with flexible JSON formats
- Update prompts to enforce strict JSON-only responses with explicit
structure requirements
- Modify beam search to integrate pilot guidance at start and implement
enhanced scoring normalization
The changes improve the robustness of LLM-based document navigation
and provide better visibility into the decision-making process through
comprehensive logging while maintaining backward compatibility.
---
docs/samples/sample.md | 51 +--
rust/src/client/builder.rs | 17 +-
rust/src/client/types.rs | 2 +-
rust/src/index/stages/enhance.rs | 12 +
rust/src/llm/executor.rs | 2 +-
rust/src/retrieval/pilot/llm_pilot.rs | 109 +++++-
rust/src/retrieval/pilot/parser.rs | 345 ++++++++++++++++--
.../retrieval/pilot/prompts/system_start.txt | 20 +-
.../retrieval/pilot/prompts/user_start.txt | 17 +-
rust/src/retrieval/search/beam.rs | 21 ++
rust/src/retrieval/search/scorer.rs | 6 +-
rust/src/retrieval/stages/evaluate.rs | 8 +-
rust/src/retrieval/stages/search.rs | 26 ++
13 files changed, 531 insertions(+), 105 deletions(-)
diff --git a/docs/samples/sample.md b/docs/samples/sample.md
index 8868cd20..1ab9ce35 100644
--- a/docs/samples/sample.md
+++ b/docs/samples/sample.md
@@ -29,53 +29,4 @@ The core module provides fundamental types:
The parser module handles document parsing:
- `MarkdownParser` — Parse Markdown files
- `PdfParser` — Parse PDF files (planned)
-- `HtmlParser` — Parse HTML files (planned)
-
-## Usage Examples
-
-### Basic Usage
-
-```rust
-use vectorless::client::{Vectorless, VectorlessBuilder};
-
-let client = VectorlessBuilder::new()
- .with_workspace("./workspace")
- .build()?;
-
-let doc_id = client.index("./document.md").await?;
-```
-
-### Advanced Usage
-
-You can customize the retrieval process:
-
-```rust
-use vectorless::{LlmNavigator, RetrieveOptions};
-
-let retriever = LlmNavigator::with_defaults();
-let options = RetrieveOptions::new()
- .with_top_k(5)
- .with_min_score(0.5);
-
-let results = retriever.retrieve(&tree, "What is vectorless?", &options).await?;
-```
-
-## Configuration
-
-The library can be configured via TOML files or programmatically.
-
-### Configuration File
-
-```toml
-[summary]
-model = "gpt-4"
-max_tokens = 200
-
-[retrieval]
-model = "gpt-4"
-top_k = 3
-```
-
-## API Reference
-
-See the API documentation for detailed information about each function and type.
+- `HtmlParser` — Parse HTML files (planned)
\ No newline at end of file
diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs
index ced541c2..70c3682b 100644
--- a/rust/src/client/builder.rs
+++ b/rust/src/client/builder.rs
@@ -551,13 +551,24 @@ impl EngineBuilder {
// Apply individual overrides
if let Some(api_key) = self.api_key {
- config.retrieval.api_key = Some(api_key);
+ // Set API key for both retrieval and summary
+ config.retrieval.api_key = Some(api_key.clone());
+ config.summary.api_key = Some(api_key);
+ // Also set LLM pool config
+ if config.llm.summary.api_key.is_none() {
+ config.llm.summary.api_key = config.summary.api_key.clone();
+ }
+ if config.llm.retrieval.api_key.is_none() {
+ config.llm.retrieval.api_key = config.summary.api_key.clone();
+ }
}
if let Some(model) = self.model {
- config.retrieval.model = model;
+ config.retrieval.model = model.clone();
+ config.summary.model = model;
}
if let Some(endpoint) = self.endpoint {
- config.retrieval.endpoint = endpoint;
+ config.retrieval.endpoint = endpoint.clone();
+ config.summary.endpoint = endpoint;
}
if let Some(top_k) = self.top_k {
config.retrieval.top_k = top_k;
diff --git a/rust/src/client/types.rs b/rust/src/client/types.rs
index 31438a62..0cfcb065 100644
--- a/rust/src/client/types.rs
+++ b/rust/src/client/types.rs
@@ -174,7 +174,7 @@ impl Default for IndexOptions {
fn default() -> Self {
Self {
mode: IndexMode::Default,
- generate_summaries: false,
+ generate_summaries: true,
include_text: true,
generate_ids: true,
generate_description: false,
diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs
index 4fb29310..08cafe8e 100644
--- a/rust/src/index/stages/enhance.rs
+++ b/rust/src/index/stages/enhance.rs
@@ -101,6 +101,7 @@ impl IndexStage for EnhanceStage {
// Check if we need summaries
if !self.needs_summaries(ctx) {
+ println!("[DEBUG] Summary generation skipped (strategy: {:?})", ctx.options.summary_strategy);
info!(
"Summary generation skipped (strategy: {:?})",
ctx.options.summary_strategy
@@ -112,6 +113,7 @@ impl IndexStage for EnhanceStage {
let llm_client = match &self.llm_client {
Some(client) => client,
None => {
+ println!("[DEBUG] No LLM client configured, skipping summary generation");
warn!("No LLM client configured, skipping summary generation");
return Ok(StageResult::success("enhance"));
}
@@ -121,11 +123,13 @@ impl IndexStage for EnhanceStage {
let tree = match ctx.tree.as_mut() {
Some(t) => t,
None => {
+ println!("[DEBUG] No tree built, skipping enhance stage");
warn!("No tree built, skipping enhance stage");
return Ok(StageResult::success("enhance"));
}
};
+ println!("[DEBUG] Using summary strategy: {:?}", ctx.options.summary_strategy);
info!("Using summary strategy: {:?}", ctx.options.summary_strategy);
// Create summary generator with optional memo store
@@ -141,11 +145,14 @@ impl IndexStage for EnhanceStage {
let node_ids: Vec = tree.traverse();
let total_nodes = node_ids.len();
+ println!("[DEBUG] Processing {} nodes for summary generation", total_nodes);
info!("Processing {} nodes for summary generation", total_nodes);
// Process nodes
let mut generated = 0;
let mut failed = 0;
+ let mut skipped_no_content = 0;
+ let mut skipped_tokens = 0;
let strategy = ctx.options.summary_strategy.clone();
for node_id in node_ids {
@@ -157,12 +164,14 @@ impl IndexStage for EnhanceStage {
// Skip if no content
if node.content.is_empty() {
+ skipped_no_content += 1;
continue;
}
// Get token count and check if we should generate
let token_count = node.token_count.unwrap_or(0);
if !strategy.should_generate(tree, node_id, token_count) {
+ skipped_tokens += 1;
continue;
}
@@ -194,6 +203,7 @@ impl IndexStage for EnhanceStage {
}
// Generate summary (generator also has memoization built-in)
+ println!("[DEBUG] Calling LLM to generate summary for node: {} ({} tokens)", node.title, token_count);
match generator.generate(&node.title, &node.content).await {
Ok(summary) => {
if summary.is_empty() {
@@ -223,6 +233,8 @@ impl IndexStage for EnhanceStage {
let duration = start.elapsed().as_millis() as u64;
ctx.metrics.record_enhance(duration);
+ println!("[DEBUG] Generated {} summaries ({} failed, {} skipped no content, {} skipped tokens) in {}ms",
+ generated, failed, skipped_no_content, skipped_tokens, duration);
info!(
"Generated {} summaries ({} failed) in {}ms",
generated, failed, duration
diff --git a/rust/src/llm/executor.rs b/rust/src/llm/executor.rs
index 8ac193f1..2c157432 100644
--- a/rust/src/llm/executor.rs
+++ b/rust/src/llm/executor.rs
@@ -360,7 +360,7 @@ impl LlmExecutor {
ChatCompletionRequestUserMessage::from(truncated).into(),
])
.temperature(self.config.temperature)
- .max_tokens(tokens)
+ // .max_tokens(tokens)
.build()
} else {
CreateChatCompletionRequestArgs::default()
diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs
index af0fd602..30e29393 100644
--- a/rust/src/retrieval/pilot/llm_pilot.rs
+++ b/rust/src/retrieval/pilot/llm_pilot.rs
@@ -286,7 +286,7 @@ impl LlmPilot {
&self,
point: InterventionPoint,
context: &super::builder::PilotContext,
- candidates: &[crate::document::NodeId],
+ candidates: &[super::parser::CandidateInfo],
) -> PilotDecision {
// Check memo cache first
if let Some(ref store) = self.memo_store {
@@ -331,22 +331,29 @@ impl LlmPilot {
}
}
+ println!("[DEBUG] LlmPilot::call_llm() - point={:?}, estimated_tokens={}", point, prompt.estimated_tokens);
+ println!("[DEBUG] LlmPilot::call_llm() - SYSTEM PROMPT:\n{}", prompt.system);
+ println!("[DEBUG] LlmPilot::call_llm() - USER PROMPT:\n{}", prompt.user);
+ println!("[DEBUG] LlmPilot::call_llm() - candidates count: {}", candidates.len());
debug!(
"Calling LLM for {:?} point (estimated: {} tokens)",
point, prompt.estimated_tokens
);
- // Make LLM call - use executor if available, otherwise use client directly
+ // Make LLM call -use executor if available, otherwise use client directly
let result = if let Some(ref executor) = self.executor {
+ println!("[DEBUG] LlmPilot::call_llm() - using LlmExecutor");
// Use LlmExecutor for unified throttle/retry/fallback
executor.complete(&prompt.system, &prompt.user).await
} else {
+ println!("[DEBUG] LlmPilot::call_llm() - using direct client");
// Fallback to direct client call
self.client.complete(&prompt.system, &prompt.user).await
};
match result {
Ok(response) => {
+ println!("[DEBUG] LlmPilot::call_llm() - RAW LLM RESPONSE:\n{}", response);
// Record usage (estimate output tokens)
let output_tokens = self.estimate_tokens(&response);
self.budget
@@ -354,6 +361,10 @@ impl LlmPilot {
// Parse response
let mut decision = self.response_parser.parse(&response, candidates, point);
+ println!("[DEBUG] LlmPilot::call_llm() - PARSED DECISION: confidence={:.2}, ranked={}, direction={:?}, reasoning={}",
+ decision.confidence, decision.ranked_candidates.len(),
+ std::mem::discriminant(&decision.direction),
+ decision.reasoning.chars().take(100).collect::());
// Apply learner adjustment if available
if let Some(ref adj) = adjustment {
@@ -406,14 +417,14 @@ impl LlmPilot {
fn cached_value_to_decision(
&self,
value: crate::memo::PilotDecisionValue,
- candidates: &[crate::document::NodeId],
+ candidates: &[super::parser::CandidateInfo],
point: InterventionPoint,
) -> PilotDecision {
let ranked = candidates
.iter()
.enumerate()
- .map(|(i, &node_id)| super::decision::RankedCandidate {
- node_id,
+ .map(|(i, c)| super::decision::RankedCandidate {
+ node_id: c.node_id,
score: if i == value.selected_idx { 1.0 } else { 0.5 / (i + 1) as f32 },
reason: None,
})
@@ -433,14 +444,14 @@ impl LlmPilot {
/// Create a default decision when LLM fails.
fn default_decision(
&self,
- candidates: &[crate::document::NodeId],
+ candidates: &[super::parser::CandidateInfo],
point: InterventionPoint,
) -> PilotDecision {
let ranked = candidates
.iter()
.enumerate()
- .map(|(i, &node_id)| super::decision::RankedCandidate {
- node_id,
+ .map(|(i, c)| super::decision::RankedCandidate {
+ node_id: c.node_id,
score: 1.0 / (i + 1) as f32,
reason: None,
})
@@ -479,11 +490,13 @@ impl Pilot for LlmPilot {
fn should_intervene(&self, state: &SearchState<'_>) -> bool {
// Check mode
if !self.config.mode.uses_llm() {
+ println!("[DEBUG] LlmPilot::should_intervene() - mode doesn't use LLM");
return false;
}
// Check budget
if !self.has_budget() {
+ println!("[DEBUG] LlmPilot::should_intervene() - budget exhausted");
debug!("Budget exhausted, skipping intervention");
return false;
}
@@ -492,6 +505,8 @@ impl Pilot for LlmPilot {
// Condition 1: Fork point with enough candidates
if state.candidates.len() > intervention.fork_threshold {
+ println!("[DEBUG] LlmPilot::should_intervene() - YES: fork point with {} candidates (threshold={})",
+ state.candidates.len(), intervention.fork_threshold);
debug!(
"Intervening: fork point with {} candidates",
state.candidates.len()
@@ -501,12 +516,15 @@ impl Pilot for LlmPilot {
// Condition 2: Scores are too close (algorithm uncertain)
if self.scores_are_close(state) {
+ println!("[DEBUG] LlmPilot::should_intervene() - YES: scores are close (best={:.2})", state.best_score);
debug!("Intervening: scores are close");
return true;
}
// Condition 3: Low confidence (best score too low)
if intervention.is_low_confidence(state.best_score) {
+ println!("[DEBUG] LlmPilot::should_intervene() - YES: low confidence (best_score={:.2}, threshold={:.2})",
+ state.best_score, intervention.low_score_threshold);
debug!(
"Intervening: low confidence (best_score={:.2})",
state.best_score
@@ -516,31 +534,58 @@ impl Pilot for LlmPilot {
// Condition 4: Backtracking and guide_at_backtrack is enabled
if state.is_backtracking && self.config.guide_at_backtrack {
+ println!("[DEBUG] LlmPilot::should_intervene() - YES: backtracking");
debug!("Intervening: backtracking");
return true;
}
+ println!("[DEBUG] LlmPilot::should_intervene() - NO: candidates={}, best_score={:.2}",
+ state.candidates.len(), state.best_score);
false
}
async fn decide(&self, state: &SearchState<'_>) -> PilotDecision {
let point = self.get_intervention_point(state);
+ println!("[DEBUG] LlmPilot::decide() - intervention_point={:?}, candidates={}",
+ point, state.candidates.len());
// Build context
let context = self.context_builder.build(state);
+ // Build candidate info with titles
+ let candidate_info: Vec = state.candidates
+ .iter()
+ .enumerate()
+ .filter_map(|(i, &node_id)| {
+ state.tree.get(node_id).map(|node| super::parser::CandidateInfo {
+ node_id,
+ title: node.title.clone(),
+ index: i,
+ })
+ })
+ .collect();
+
// Make LLM call
- self.call_llm(point, &context, state.candidates).await
+ let decision = self.call_llm(point, &context, &candidate_info).await;
+
+ println!("[DEBUG] LlmPilot::decide() - result: confidence={:.2}, direction={:?}, ranked={}",
+ decision.confidence, std::mem::discriminant(&decision.direction), decision.ranked_candidates.len());
+
+ decision
}
async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option {
+ println!("[DEBUG] LlmPilot::guide_start() called, query='{}'", query);
+
// Check if guide_at_start is enabled
if !self.config.guide_at_start {
+ println!("[DEBUG] LlmPilot::guide_start() - guide_at_start=false, skipping");
return None;
}
// Check budget
if !self.has_budget() {
+ println!("[DEBUG] LlmPilot::guide_start() - budget exhausted, skipping");
debug!("Budget exhausted, cannot guide start");
return None;
}
@@ -549,12 +594,41 @@ impl Pilot for LlmPilot {
let context = self.context_builder.build_start_context(tree, query);
// Get root's children as candidates
- let candidates = tree.children(tree.root());
+ let node_ids = tree.children(tree.root());
+ println!("[DEBUG] LlmPilot::guide_start() - {} root children candidates", node_ids.len());
+
+ // Build CandidateInfo with titles
+ let candidates: Vec = node_ids
+ .iter()
+ .enumerate()
+ .filter_map(|(i, &node_id)| {
+ tree.get(node_id).map(|node| super::parser::CandidateInfo {
+ node_id,
+ title: node.title.clone(),
+ index: i,
+ })
+ })
+ .collect();
// Make LLM call
+ println!("[DEBUG] LlmPilot::guide_start() - calling LLM...");
let decision = self
.call_llm(InterventionPoint::Start, &context, &candidates)
.await;
+
+ println!("[DEBUG] LlmPilot::guide_start() - LLM returned: confidence={:.2}, ranked_candidates={}, reasoning='{}'",
+ decision.confidence,
+ decision.ranked_candidates.len(),
+ decision.reasoning.chars().take(100).collect::());
+
+ // Debug: show top ranked candidates
+ for (i, rc) in decision.ranked_candidates.iter().enumerate().take(3) {
+ if let Some(node) = tree.get(rc.node_id) {
+ println!("[DEBUG] Ranked {}: node_id={:?}, score={:.3}, title='{}'",
+ i, rc.node_id, rc.score, node.title);
+ }
+ }
+
info!(
"Pilot start guidance: confidence={}, candidates={}",
decision.confidence,
@@ -580,9 +654,22 @@ impl Pilot for LlmPilot {
.context_builder
.build_backtrack_context(state, state.path);
+ // Build CandidateInfo
+ let candidates: Vec = state.candidates
+ .iter()
+ .enumerate()
+ .filter_map(|(i, &node_id)| {
+ state.tree.get(node_id).map(|node| super::parser::CandidateInfo {
+ node_id,
+ title: node.title.clone(),
+ index: i,
+ })
+ })
+ .collect();
+
// Make LLM call
Some(
- self.call_llm(InterventionPoint::Backtrack, &context, state.candidates)
+ self.call_llm(InterventionPoint::Backtrack, &context, &candidates)
.await,
)
}
diff --git a/rust/src/retrieval/pilot/parser.rs b/rust/src/retrieval/pilot/parser.rs
index 85954c82..a6283aaf 100644
--- a/rust/src/retrieval/pilot/parser.rs
+++ b/rust/src/retrieval/pilot/parser.rs
@@ -19,20 +19,86 @@ use crate::document::NodeId;
/// Parsed response from LLM.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmResponse {
- /// Ranked candidates with scores.
+ /// Ranked candidates with scores (FORK format).
#[serde(default)]
pub ranked_candidates: Vec,
+ /// Entry points for START intervention (list of node titles).
+ #[serde(default)]
+ pub entry_points: Vec,
+ /// Best entry points (alternative START format from LLM).
+ #[serde(default)]
+ pub best_entry_points: Vec,
+ /// Selected nodes (another alternative START format - list of titles).
+ #[serde(default)]
+ pub selected_nodes: Vec,
+ /// Selected node (singular - some LLMs return this format).
+ #[serde(default)]
+ pub selected_node: Option,
+ /// Recommended node (another singular format).
+ #[serde(default)]
+ pub recommended_node: Option,
+ /// Analysis wrapper (some LLMs nest response in "analysis" object).
+ #[serde(default)]
+ pub analysis: Option,
/// Recommended search direction.
#[serde(default)]
pub direction: DirectionResponse,
- /// Confidence level (0.0 - 1.0).
- #[serde(default = "default_confidence")]
+ /// Confidence level (0.0 - 1.0 or "high"/"medium"/"low").
+ #[serde(default = "default_confidence", deserialize_with = "deserialize_confidence")]
pub confidence: f32,
/// Reasoning for the decision.
#[serde(default)]
pub reasoning: String,
}
+/// Custom deserializer for confidence that accepts both float and string.
+fn deserialize_confidence<'de, D>(deserializer: D) -> Result
+where
+ D: serde::Deserializer<'de>,
+{
+ use serde::de::Error;
+
+ let value = serde_json::Value::deserialize(deserializer)?;
+ match value {
+ serde_json::Value::Number(n) => {
+ // Handle numeric value
+ Ok(n.as_f64().unwrap_or(0.5) as f32)
+ }
+ serde_json::Value::String(s) => {
+ // Handle string values like "high", "medium", "low"
+ let lower = s.to_lowercase();
+ let confidence = match lower.as_str() {
+ "high" | "very high" | "strong" => 0.9,
+ "medium" | "moderate" => 0.6,
+ "low" | "weak" => 0.3,
+ _ => 0.5, // default for unknown strings
+ };
+ Ok(confidence)
+ }
+ _ => Ok(0.5), // default for other types
+ }
+}
+
+/// Analysis wrapper for nested LLM responses.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AnalysisWrapper {
+ /// Query from analysis.
+ #[serde(default)]
+ pub query: Option,
+ /// Intent detected.
+ #[serde(default)]
+ pub intent: Option,
+ /// Selected node (singular).
+ #[serde(default)]
+ pub selected_node: Option,
+ /// Selected nodes (plural).
+ #[serde(default)]
+ pub selected_nodes: Vec,
+ /// Reasoning from analysis.
+ #[serde(default)]
+ pub reasoning: Option,
+}
+
/// Candidate score from LLM response.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CandidateScore {
@@ -45,6 +111,37 @@ pub struct CandidateScore {
pub reason: Option,
}
+/// Candidate info for title matching.
+#[derive(Debug, Clone)]
+pub struct CandidateInfo {
+ /// Node ID.
+ pub node_id: NodeId,
+ /// Title of the node.
+ pub title: String,
+ /// Index in the candidates list.
+ pub index: usize,
+}
+
+/// Entry point from START response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EntryPoint {
+ /// Node ID or index.
+ #[serde(default)]
+ pub node_id: Option,
+ /// Index of the candidate.
+ #[serde(default)]
+ pub index: Option,
+ /// Title of the entry point.
+ #[serde(default)]
+ pub title: Option,
+ /// Relevance score (may be 1-5 or 0.0-1.0).
+ #[serde(default)]
+ pub relevance_score: Option,
+ /// Score (alternative field name).
+ #[serde(default)]
+ pub score: Option,
+}
+
/// Direction response from LLM.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
@@ -111,23 +208,29 @@ impl ResponseParser {
/// # Arguments
///
/// * `response` - Raw LLM response text
- /// * `candidates` - Original candidate NodeIds (for mapping indices)
+ /// * `candidates` - Candidate info with NodeId, title, and index
/// * `point` - The intervention point
pub fn parse(
&self,
response: &str,
- candidates: &[NodeId],
+ candidates: &[CandidateInfo],
point: InterventionPoint,
) -> PilotDecision {
+ println!("[DEBUG] ResponseParser::parse() - candidates.len()={}", candidates.len());
+
// Try JSON parse first
if let Some(decision) = self.try_json_parse(response, candidates, point) {
+ println!("[DEBUG] ResponseParser::parse() - JSON parse succeeded, ranked={}", decision.ranked_candidates.len());
return decision;
}
+ println!("[DEBUG] ResponseParser::parse() - JSON parse failed, trying regex...");
// Try regex extraction
if let Some(decision) = self.try_regex_parse(response, candidates, point) {
+ println!("[DEBUG] ResponseParser::parse() - Regex parse succeeded, ranked={}", decision.ranked_candidates.len());
return decision;
}
+ println!("[DEBUG] ResponseParser::parse() - Regex parse failed, using default decision");
// Return default decision
self.default_decision(candidates, point)
@@ -137,23 +240,34 @@ impl ResponseParser {
fn try_json_parse(
&self,
response: &str,
- candidates: &[NodeId],
+ candidates: &[CandidateInfo],
point: InterventionPoint,
) -> Option {
// First, try to extract JSON from code blocks
let json_str = if let Some(caps) = self.json_block_regex.captures(response) {
- caps.get(1)?.as_str().trim().to_string()
+ let extracted = caps.get(1)?.as_str().trim().to_string();
+ println!("[DEBUG] ResponseParser::try_json_parse() - Found JSON in code block");
+ extracted
} else {
// Try to find raw JSON object
let start = response.find('{')?;
let end = response.rfind('}')? + 1;
- response[start..end].to_string()
+ let extracted = response[start..end].to_string();
+ println!("[DEBUG] ResponseParser::try_json_parse() - Found raw JSON (no code block)");
+ extracted
};
+ println!("[DEBUG] ResponseParser::try_json_parse() - Extracted JSON:\n{}", json_str);
+
// Parse JSON
- let llm_response: LlmResponse = match serde_json::from_str(&json_str) {
- Ok(r) => r,
+ let llm_response: LlmResponse = match serde_json::from_str::(&json_str) {
+ Ok(r) => {
+ println!("[DEBUG] ResponseParser::try_json_parse() - JSON parsed successfully");
+ println!("[DEBUG] ResponseParser::try_json_parse() - ranked_candidates count: {}", r.ranked_candidates.len());
+ r
+ },
Err(e) => {
+ println!("[DEBUG] ResponseParser::try_json_parse() - JSON parse FAILED: {}", e);
warn!("Failed to parse LLM response as JSON: {}", e);
return None;
}
@@ -167,7 +281,7 @@ impl ResponseParser {
fn try_regex_parse(
&self,
response: &str,
- candidates: &[NodeId],
+ candidates: &[CandidateInfo],
point: InterventionPoint,
) -> Option {
// Extract confidence
@@ -226,7 +340,7 @@ impl ResponseParser {
fn extract_ranked_candidates(
&self,
response: &str,
- candidates: &[NodeId],
+ candidates: &[CandidateInfo],
) -> Vec {
let mut ranked = Vec::new();
@@ -245,7 +359,7 @@ impl ResponseParser {
if index < candidates.len() {
ranked.push(RankedCandidate {
- node_id: candidates[index],
+ node_id: candidates[index].node_id,
score: score.clamp(0.0, 1.0),
reason: None,
});
@@ -268,7 +382,7 @@ impl ResponseParser {
if let Ok(idx) = match_1.as_str().parse::() {
if idx < candidates.len() && seen.insert(idx) {
ranked.push(RankedCandidate {
- node_id: candidates[idx],
+ node_id: candidates[idx].node_id,
score: 1.0 - (ranked.len() as f32 * 0.1), // Decreasing scores
reason: None,
});
@@ -287,20 +401,28 @@ impl ResponseParser {
/// Convert LlmResponse to PilotDecision.
fn llm_response_to_decision(
&self,
- llm_response: LlmResponse,
- candidates: &[NodeId],
+ mut llm_response: LlmResponse,
+ candidates: &[CandidateInfo],
point: InterventionPoint,
) -> PilotDecision {
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - point={:?}", point);
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - ranked_candidates.len()={}", llm_response.ranked_candidates.len());
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - best_entry_points.len()={}", llm_response.best_entry_points.len());
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - entry_points.len()={}", llm_response.entry_points.len());
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - selected_nodes.len()={}", llm_response.selected_nodes.len());
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - selected_node={:?}", llm_response.selected_node);
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - analysis={:?}", llm_response.analysis.as_ref().map(|a| (&a.selected_node, &a.selected_nodes)));
+
// Convert candidate scores to RankedCandidate
- let ranked_candidates: Vec = llm_response
+ let mut ranked_candidates: Vec = llm_response
.ranked_candidates
- .into_iter()
+ .iter()
.filter_map(|cs| {
if cs.index < candidates.len() {
Some(RankedCandidate {
- node_id: candidates[cs.index],
+ node_id: candidates[cs.index].node_id,
score: cs.score.clamp(0.0, 1.0),
- reason: cs.reason,
+ reason: cs.reason.clone(),
})
} else {
None
@@ -308,6 +430,154 @@ impl ResponseParser {
})
.collect();
+ // Handle START response format: best_entry_points, entry_points, or selected_nodes
+ if ranked_candidates.is_empty() {
+ // Try to convert best_entry_points (format: [{"node_id": 1, "title": "...", "relevance_score": 5}])
+ for entry in &llm_response.best_entry_points {
+ // Get index from either node_id or index field
+ // node_id is 1-indexed from LLM, convert to 0-indexed
+ let idx = if let Some(nid) = entry.node_id {
+ if nid > 0 { nid - 1 } else { nid }
+ } else if let Some(idx) = entry.index {
+ idx
+ } else {
+ continue; // Skip if no valid index
+ };
+
+ if idx < candidates.len() {
+ let score = entry.relevance_score
+ .or(entry.score)
+ .unwrap_or(0.5)
+ / 5.0; // Normalize 1-5 scale to 0.0-1.0
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidates[idx].node_id,
+ score: score.clamp(0.0, 1.0),
+ reason: entry.title.clone(),
+ });
+ println!("[DEBUG] ResponseParser - converted best_entry_point[{}] to ranked_candidate (idx={}, score={:.2})",
+ idx, idx, score);
+ }
+ }
+
+ // Try to convert selected_nodes (format: ["Project Documentation", "Overview"])
+ // Match by title
+ for selected_title in &llm_response.selected_nodes {
+ for candidate in candidates {
+ if Self::titles_match(selected_title, &candidate.title) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.9, // High score for title match
+ reason: Some(format!("Title match: {}", selected_title)),
+ });
+ println!("[DEBUG] ResponseParser - matched selected_node '{}' to candidate '{}' (index={})",
+ selected_title, candidate.title, candidate.index);
+ break; // Only match once per selected_node
+ }
+ }
+ }
+
+ // Try to convert selected_node (singular - format: "Project Documentation")
+ if let Some(ref single_node) = llm_response.selected_node {
+ for candidate in candidates {
+ if Self::titles_match(single_node, &candidate.title) {
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.9,
+ reason: Some(format!("Title match (singular): {}", single_node)),
+ });
+ println!("[DEBUG] ResponseParser - matched selected_node (singular) '{}' to candidate '{}' (index={})",
+ single_node, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+
+ // Try to convert recommended_node (another singular format)
+ if let Some(ref recommended) = llm_response.recommended_node {
+ for candidate in candidates {
+ if Self::titles_match(recommended, &candidate.title) {
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.85,
+ reason: Some(format!("Recommended node: {}", recommended)),
+ });
+ println!("[DEBUG] ResponseParser - matched recommended_node '{}' to candidate '{}' (index={})",
+ recommended, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+
+ // Try to extract from analysis wrapper if present
+ if let Some(ref analysis) = llm_response.analysis {
+ // Check analysis.selected_nodes (plural array)
+ for selected_title in &analysis.selected_nodes {
+ for candidate in candidates {
+ if Self::titles_match(selected_title, &candidate.title) {
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.85,
+ reason: Some(format!("Analysis selected_nodes: {}", selected_title)),
+ });
+ println!("[DEBUG] ResponseParser - matched analysis.selected_nodes '{}' to candidate '{}' (index={})",
+ selected_title, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+
+ // Check analysis.selected_node (singular)
+ if let Some(ref single_node) = analysis.selected_node {
+ for candidate in candidates {
+ if Self::titles_match(single_node, &candidate.title) {
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.85,
+ reason: Some(format!("Analysis selected_node: {}", single_node)),
+ });
+ println!("[DEBUG] ResponseParser - matched analysis.selected_node (singular) '{}' to candidate '{}' (index={})",
+ single_node, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+
+ // Use analysis.reasoning if top-level reasoning is empty
+ if llm_response.reasoning.is_empty() {
+ if let Some(ref r) = analysis.reasoning {
+ llm_response.reasoning = r.clone();
+ }
+ }
+ }
+
+ // Try to convert entry_points (format: ["Node Title 1", "Node Title 2"])
+ for entry_title in &llm_response.entry_points {
+ for candidate in candidates {
+ if Self::titles_match(entry_title, &candidate.title) {
+ // Check if already added
+ if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) {
+ ranked_candidates.push(RankedCandidate {
+ node_id: candidate.node_id,
+ score: 0.8, // Slightly lower score for entry_points
+ reason: Some(format!("Entry point: {}", entry_title)),
+ });
+ println!("[DEBUG] ResponseParser - matched entry_point '{}' to candidate '{}' (index={})",
+ entry_title, candidate.title, candidate.index);
+ }
+ break;
+ }
+ }
+ }
+ }
+
// Convert direction
let direction = match llm_response.direction {
DirectionResponse::GoDeeper => SearchDirection::GoDeeper {
@@ -333,6 +603,8 @@ impl ResponseParser {
},
};
+ println!("[DEBUG] ResponseParser::llm_response_to_decision() - final ranked_candidates.len()={}", ranked_candidates.len());
+
PilotDecision {
ranked_candidates,
direction,
@@ -342,14 +614,41 @@ impl ResponseParser {
}
}
+ /// Check if two titles match (fuzzy matching).
+ fn titles_match(llm_title: &str, candidate_title: &str) -> bool {
+ let llm_lower = llm_title.to_lowercase().trim().to_string();
+ let candidate_lower = candidate_title.to_lowercase().trim().to_string();
+
+ // Exact match
+ if llm_lower == candidate_lower {
+ return true;
+ }
+
+ // Contains match
+ if llm_lower.contains(&candidate_lower) || candidate_lower.contains(&llm_lower) {
+ return true;
+ }
+
+ // Word overlap match (at least 50% of words match)
+ let llm_words: std::collections::HashSet<&str> = llm_lower.split_whitespace().collect();
+ let candidate_words: std::collections::HashSet<&str> = candidate_lower.split_whitespace().collect();
+ let overlap = llm_words.intersection(&candidate_words).count();
+ let min_words = llm_words.len().min(candidate_words.len());
+ if min_words > 0 && overlap as f32 / min_words as f32 >= 0.5 {
+ return true;
+ }
+
+ false
+ }
+
/// Create a default decision when parsing fails.
- fn default_decision(&self, candidates: &[NodeId], point: InterventionPoint) -> PilotDecision {
+ fn default_decision(&self, candidates: &[CandidateInfo], point: InterventionPoint) -> PilotDecision {
// Score candidates uniformly
let ranked: Vec = candidates
.iter()
.enumerate()
- .map(|(i, &node_id)| RankedCandidate {
- node_id,
+ .map(|(i, c)| RankedCandidate {
+ node_id: c.node_id,
score: 1.0 / (i + 1) as f32, // Decreasing scores
reason: None,
})
diff --git a/rust/src/retrieval/pilot/prompts/system_start.txt b/rust/src/retrieval/pilot/prompts/system_start.txt
index d3a65f49..086f9e96 100644
--- a/rust/src/retrieval/pilot/prompts/system_start.txt
+++ b/rust/src/retrieval/pilot/prompts/system_start.txt
@@ -1,11 +1,15 @@
-You are a document navigation assistant specialized in hierarchical document search.
+You are a document navigation assistant. Your task is to identify the best entry points for searching a hierarchical document based on a user query.
-Your task is to analyze a user's query and the document structure to identify the best starting points for search.
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown, no explanation, just the JSON object.
-Guidelines:
-- Identify sections that are most likely to contain the answer
-- Consider the query's domain, keywords, and intent
-- Prefer more specific sections over general ones when appropriate
-- Multiple entry points can be suggested if the query is ambiguous
+Your response must to have this EXACT structure:
+{
+ "entry_points": ["list of node titles as strings"],
+ "reasoning": "explanation string",
+ "confidence": 0.85
+}
-You must respond in valid JSON format.
+Where:
+- entry_points: MUST be an array of node titles (strings) from the candidate list
+- reasoning: MUST be a string
+- confidence: MUST be a number (0.0 to 1.0), not a string like "high"
diff --git a/rust/src/retrieval/pilot/prompts/user_start.txt b/rust/src/retrieval/pilot/prompts/user_start.txt
index b091735e..df048df8 100644
--- a/rust/src/retrieval/pilot/prompts/user_start.txt
+++ b/rust/src/retrieval/pilot/prompts/user_start.txt
@@ -2,7 +2,16 @@ Analyze the following document structure and user query to identify the best ent
{context}
-Provide your response as a JSON object with:
-- entry_points: list of section titles to start searching from
-- reasoning: brief explanation of why these entry points
-- confidence: your confidence in this recommendation (0.0-1.0)
+IMPORTANT: You MUST respond with ONLY a JSON object in this EXACT format:
+{
+ "entry_points": ["Title 1", "Title 2"],
+ "reasoning": "Brief explanation of why these entry points",
+ "confidence": 0.85
+}
+
+Rules:
+- entry_points: Array of strings (node titles from the candidates above)
+- reasoning: String explaining your choice
+- confidence: Number between 0.0 and 1.0 (NOT a string like "high")
+
+Do NOT use any other field names. Use "entry_points" not "selected_node" or "recommended_node".
diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs
index 9fba59e9..2adf0808 100644
--- a/rust/src/retrieval/search/beam.rs
+++ b/rust/src/retrieval/search/beam.rs
@@ -126,21 +126,30 @@ impl SearchTree for BeamSearch {
let beam_width = config.beam_width.min(self.beam_width);
let mut visited: HashSet = HashSet::new();
+ println!("[DEBUG] BeamSearch: query='{}', beam_width={}, min_score={:.2}",
+ context.query, beam_width, config.min_score);
+
// Track Pilot interventions
let mut pilot_interventions = 0;
// Initialize with root's children
let root_children = tree.children(tree.root());
+ println!("[DEBUG] Root has {} children", root_children.len());
// Check if Pilot wants to guide the start
let initial_candidates = if let Some(p) = pilot {
+ println!("[DEBUG] BeamSearch: Pilot is available, name={}, guide_at_start={}",
+ p.name(), p.config().guide_at_start);
if p.config().guide_at_start {
+ println!("[DEBUG] BeamSearch: Calling pilot.guide_start()...");
if let Some(guidance) = p.guide_start(tree, &context.query).await {
debug!(
"Pilot provided start guidance with confidence {}",
guidance.confidence
);
pilot_interventions += 1;
+ println!("[DEBUG] BeamSearch: Pilot returned guidance! confidence={:.2}, candidates={}",
+ guidance.confidence, guidance.ranked_candidates.len());
// Use Pilot's ranked order if available
if guidance.has_candidates() {
@@ -151,15 +160,19 @@ impl SearchTree for BeamSearch {
&context.query,
)
} else {
+ println!("[DEBUG] BeamSearch: Guidance has no candidates, using algorithm scoring");
self.score_candidates_with_query(tree, &root_children, &context.query)
}
} else {
+ println!("[DEBUG] BeamSearch: pilot.guide_start() returned None");
self.score_candidates_with_query(tree, &root_children, &context.query)
}
} else {
+ println!("[DEBUG] BeamSearch: guide_at_start=false, skipping Pilot");
self.score_candidates_with_query(tree, &root_children, &context.query)
}
} else {
+ println!("[DEBUG] BeamSearch: No Pilot available");
self.score_candidates_with_query(tree, &root_children, &context.query)
};
@@ -168,6 +181,14 @@ impl SearchTree for BeamSearch {
.map(|(node_id, score)| SearchPath::from_node(node_id, score))
.collect();
+ // Debug: show initial scores
+ println!("[DEBUG] Initial {} candidates after scoring", current_beam.len());
+ for (i, path) in current_beam.iter().enumerate().take(5) {
+ if let Some(node) = tree.get(path.leaf.unwrap_or(tree.root())) {
+ println!("[DEBUG] Initial {}: score={:.3}, title='{}'", i, path.score, node.title);
+ }
+ }
+
// Keep top beam_width
current_beam.truncate(beam_width);
diff --git a/rust/src/retrieval/search/scorer.rs b/rust/src/retrieval/search/scorer.rs
index f17bf118..5dbb9209 100644
--- a/rust/src/retrieval/search/scorer.rs
+++ b/rust/src/retrieval/search/scorer.rs
@@ -193,9 +193,9 @@ impl ScoringContext {
let total_score = title_score + summary_score + content_score;
- // Normalize to [0, 1] range
- let max_possible = self.query_terms.len() as f32 * 10.0; // Rough upper bound
- let normalized = (total_score / max_possible).clamp(0.0, 1.0);
+ // Normalize to [0, 1] range using sigmoid-like scaling
+ // This prevents over-penalization with few query terms
+ let normalized = (total_score / 3.0).tanh(); // 3.0 is a reasonable midpoint
// Apply depth penalty
let depth_factor = 1.0 - (node.depth as f32 * self.depth_penalty).min(0.5);
diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs
index 31e7f173..f9cbcb91 100644
--- a/rust/src/retrieval/stages/evaluate.rs
+++ b/rust/src/retrieval/stages/evaluate.rs
@@ -283,6 +283,7 @@ impl EvaluateStage {
/// Calculate overall confidence score.
fn calculate_confidence(&self, ctx: &PipelineContext) -> f32 {
if ctx.candidates.is_empty() {
+ println!("[DEBUG] calculate_confidence: no candidates, returning 0.0");
return 0.0;
}
@@ -296,7 +297,10 @@ impl EvaluateStage {
SufficiencyLevel::Insufficient => 0.4,
};
- avg_score * sufficiency_factor
+ let confidence = avg_score * sufficiency_factor;
+ println!("[DEBUG] calculate_confidence: avg_score={:.3}, sufficiency={:?}, factor={:.1}, confidence={:.3}",
+ avg_score, ctx.sufficiency, sufficiency_factor, confidence);
+ confidence
}
}
@@ -325,6 +329,8 @@ impl RetrievalStage for EvaluateStage {
async fn execute(&self, ctx: &mut PipelineContext) -> crate::error::Result {
let start = std::time::Instant::now();
+ println!("[DEBUG] EvaluateStage: {} candidates, iteration {}",
+ ctx.candidates.len(), ctx.search_iterations);
info!(
"Judging sufficiency: {} candidates, iteration {}",
ctx.candidates.len(),
diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs
index 78f07c20..756b5616 100644
--- a/rust/src/retrieval/stages/search.rs
+++ b/rust/src/retrieval/stages/search.rs
@@ -245,6 +245,9 @@ impl RetrievalStage for SearchStage {
// Reset Pilot state for new query
if let Some(ref pilot) = self.pilot {
pilot.reset();
+ println!("[DEBUG] SearchStage: Pilot is available, is_active={}", pilot.is_active());
+ } else {
+ println!("[DEBUG] SearchStage: No Pilot available");
}
info!(
@@ -279,7 +282,10 @@ impl RetrievalStage for SearchStage {
// Get Pilot reference (or None if not available)
let pilot_ref: Option<&dyn Pilot> = self.pilot.as_deref();
+ println!("[DEBUG] SearchStage: pilot_ref is {}", if pilot_ref.is_some() { "Some" } else { "None" });
+ println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}",
+ algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score);
// Execute search based on algorithm with Pilot
let result = match algorithm {
SearchAlgorithm::Greedy => {
@@ -309,10 +315,30 @@ impl RetrievalStage for SearchStage {
result.pilot_interventions
);
+ // Debug output
+ println!("[DEBUG] Search found {} paths", result.paths.len());
+ for (i, path) in result.paths.iter().enumerate().take(5) {
+ if let Some(leaf_id) = path.leaf {
+ if let Some(node) = ctx.tree.get(leaf_id) {
+ println!("[DEBUG] Path {}: score={:.3}, title='{}', content_len={}",
+ i, path.score, node.title, node.content.len());
+ }
+ }
+ }
+
// Update context with results
ctx.search_paths = result.paths.clone();
ctx.candidates = self.extract_candidates(&result.paths, &ctx.tree);
+ // Debug output
+ println!("[DEBUG] Extracted {} candidates", ctx.candidates.len());
+ for (i, c) in ctx.candidates.iter().enumerate().take(5) {
+ if let Some(node) = ctx.tree.get(c.node_id) {
+ println!("[DEBUG] Candidate {}: score={:.3}, title='{}'",
+ i, c.score, node.title);
+ }
+ }
+
// Update metrics
ctx.metrics.search_time_ms += start.elapsed().as_millis() as u64;
ctx.metrics.nodes_visited += ctx.candidates.len();
From 5d2757da844387fad96c9ffc089e06cedec25e68 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Wed, 8 Apr 2026 18:27:08 +0800
Subject: [PATCH 4/5] feat(pilot): add query decomposition support and enhance
prompt templates
- Add LocateTop3Prompt for identifying top-3 relevant nodes from TOC
- Implement query decomposition in AnalyzeStage for complex queries
- Add Top3Candidate struct for handling LLM response parsing
- Enhance prompt templates with strict JSON formatting requirements
- Remove redundant default_decision test
- Update SearchStage to handle decomposed sub-queries with multi-turn
processing
- Add decomposition field to PipelineContext to store sub-query results
- Configure complexity thresholds for triggering decomposition
BREAKING CHANGE: Updated prompt formats require strict JSON responses
without markdown code blocks.
---
rust/src/retrieval/pilot/llm_pilot.rs | 14 --
rust/src/retrieval/pilot/parser.rs | 133 +----------
rust/src/retrieval/pilot/prompts/templates.rs | 221 ++++++++++++++++--
rust/src/retrieval/pipeline/context.rs | 3 +
rust/src/retrieval/stages/analyze.rs | 90 ++++++-
rust/src/retrieval/stages/search.rs | 169 ++++++++++----
6 files changed, 431 insertions(+), 199 deletions(-)
diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs
index 30e29393..de41c052 100644
--- a/rust/src/retrieval/pilot/llm_pilot.rs
+++ b/rust/src/retrieval/pilot/llm_pilot.rs
@@ -749,20 +749,6 @@ mod tests {
assert!(!pilot.has_budget());
}
- #[test]
- fn test_default_decision() {
- let client = LlmClient::for_model("gpt-4o-mini");
- let config = PilotConfig::default();
- let pilot = LlmPilot::new(client, config);
-
- let candidates = create_test_node_ids(2);
- let decision = pilot.default_decision(&candidates, InterventionPoint::Fork);
-
- assert_eq!(decision.ranked_candidates.len(), 2);
- assert_eq!(decision.confidence, 0.0);
- assert!(decision.reasoning.contains("LLM"));
- }
-
#[test]
fn test_reset() {
let client = LlmClient::for_model("gpt-4o-mini");
diff --git a/rust/src/retrieval/pilot/parser.rs b/rust/src/retrieval/pilot/parser.rs
index a6283aaf..651b1c3c 100644
--- a/rust/src/retrieval/pilot/parser.rs
+++ b/rust/src/retrieval/pilot/parser.rs
@@ -142,6 +142,17 @@ pub struct EntryPoint {
pub score: Option,
}
+/// Top-3 candidate from LLM LOCatetop-3 response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Top3Candidate {
+ /// Node ID from TO TO copy.
+ pub node_id: usize,
+ /// Relevance score (0.0-1.0).
+ pub relevance_score: f32,
+ /// Reason for the selection.
+ pub reason: String,
+}
+
/// Direction response from LLM.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
@@ -694,126 +705,4 @@ mod tests {
}
ids
}
-
- #[test]
- fn test_parse_json_response() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(3);
-
- let response = r#"{
- "ranked_candidates": [
- {"index": 1, "score": 0.9, "reason": "Best match"},
- {"index": 0, "score": 0.5}
- ],
- "direction": "go_deeper",
- "confidence": 0.85,
- "reasoning": "Candidate 1 is most relevant"
- }"#;
-
- let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
- assert_eq!(decision.ranked_candidates.len(), 2);
- assert_eq!(decision.ranked_candidates[0].node_id, candidates[1]);
- assert!((decision.confidence - 0.85).abs() < 0.01);
- assert!(matches!(
- decision.direction,
- SearchDirection::GoDeeper { .. }
- ));
- }
-
- #[test]
- fn test_parse_json_in_code_block() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(2);
-
- let response = r#"
-Here's my analysis:
-
-```json
-{
- "ranked_candidates": [{"index": 0, "score": 0.8}],
- "direction": "go_deeper",
- "confidence": 0.8,
- "reasoning": "Test"
-}
-```
-"#;
-
- let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
- assert_eq!(decision.ranked_candidates.len(), 1);
- }
-
- #[test]
- fn test_parse_with_regex_fallback() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(2);
-
- // Non-JSON response with some structure
- let response = r#"
-I think candidate 0 is the best match.
-Confidence: 0.75
-Direction: go_deeper
-"#;
-
- let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
- // Should use regex extraction
- assert!((decision.confidence - 0.75).abs() < 0.01);
- }
-
- #[test]
- fn test_default_decision() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(2);
-
- let decision = parser.parse(
- "This is unparseable gibberish",
- &candidates,
- InterventionPoint::Fork,
- );
-
- // Should return default
- assert_eq!(decision.ranked_candidates.len(), 2);
- assert_eq!(decision.confidence, 0.0);
- assert!(decision.reasoning.contains("parsing failed"));
- }
-
- #[test]
- fn test_confidence_clamping() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(1);
-
- let response = r#"{
- "ranked_candidates": [{"index": 0, "score": 1.5}],
- "confidence": 1.5,
- "direction": "go_deeper"
- }"#;
-
- let decision = parser.parse(response, &candidates, InterventionPoint::Fork);
-
- // Confidence should be clamped to 1.0
- assert!((decision.confidence - 1.0).abs() < 0.01);
- }
-
- #[test]
- fn test_direction_conversion() {
- let parser = ResponseParser::new();
- let candidates = create_test_node_ids(1);
-
- let test_cases = vec![
- ("\"direction\": \"go_deeper\"", true),
- ("\"direction\": \"explore_siblings\"", true),
- ("\"direction\": \"backtrack\"", true),
- ("\"direction\": \"found_answer\"", true),
- ];
-
- for (dir_json, should_parse) in test_cases {
- let response = format!(
- r#"{{"ranked_candidates": [], "confidence": 0.5, {}}}"#,
- dir_json
- );
- let decision = parser.parse(&response, &candidates, InterventionPoint::Fork);
- assert!(should_parse, "Direction should parse correctly");
- }
- }
}
diff --git a/rust/src/retrieval/pilot/prompts/templates.rs b/rust/src/retrieval/pilot/prompts/templates.rs
index 5f9f75ff..50f4c3cc 100644
--- a/rust/src/retrieval/pilot/prompts/templates.rs
+++ b/rust/src/retrieval/pilot/prompts/templates.rs
@@ -248,52 +248,170 @@ pub mod fallback {
use super::*;
pub fn system_start() -> String {
- "You are a document navigation assistant. Help identify the best starting point for searching a hierarchical document.".to_string()
+ r#"You are a document navigation assistant. Help identify the best entry points for searching a hierarchical document.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks, No explanation. Just the JSON object.
+
+Your response must have this EXACT structure:
+{
+ "entry_points": ["Title 1", "Title 2"],
+ "reasoning": "Brief explanation",
+ "confidence": 0.85
+}
+
+Rules:
+- entry_points: Array of node title strings (from the candidates provided)
+- reasoning: String explaining your choice
+- confidence: Number between 0.0 and 1.0 (use a number, NOT "high"/"medium"/"low")"#.to_string()
}
pub fn user_start() -> String {
- r#"Given the following document structure and user query, identify the best entry points for search.
-
-{context}
+ r#"{context}
-Respond in JSON format with your analysis."#.to_string()
+Respond with ONLY the JSON object (no markdown, no explanation):
+{
+ "entry_points": ["list of node titles as strings"],
+ "reasoning": "your reasoning here",
+ "confidence": 0.85
+}"#.to_string()
}
pub fn system_fork() -> String {
- "You are a document navigation assistant. At each decision point, rank the candidate branches by their likelihood of containing the answer to the user's query.".to_string()
+ r#"You are a document navigation assistant. At each decision point, rank the candidate branches by their likelihood of containing the answer to the user's query.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+ "ranked_candidates": [
+ {"index": 0, "score": 0.9, "reason": "explanation"}
+ ],
+ "direction": "go_deeper",
+ "confidence": 0.85,
+ "reasoning": "overall explanation"
+}
+
+Rules:
+- ranked_candidates: Array of objects with index (number), score (0.0-1.0), reason (string)
+- direction: One of "go_deeper", "explore_siblings", "backtrack", "found_answer"
+- confidence: Number between 0.0 and 1.0 (NOT a string)"#.to_string()
}
pub fn user_fork() -> String {
- r#"Given the current search context and candidate branches, rank them by relevance.
-
-{context}
+ r#"{context}
-Respond in JSON format with ranked candidates."#
+Respond with ONLY the JSON object:
+{
+ "ranked_candidates": [
+ {"index": 0, "score": 0.9, "reason": "why this candidate"}
+ ],
+ "direction": "go_deeper",
+ "confidence": 0.85,
+ "reasoning": "overall explanation"
+}"#
.to_string()
}
pub fn system_backtrack() -> String {
- "You are a document navigation assistant. When a search path fails to find the answer, analyze why and suggest alternative branches to explore.".to_string()
+ r#"You are a document navigation assistant. When a search path fails to find the answer, analyze why and suggest alternative branches to explore.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+ "alternative_branches": [
+ {"index": 0, "score": 0.8, "reason": "explanation"}
+ ],
+ "direction": "backtrack",
+ "confidence": 0.85,
+ "reasoning": "why the original path failed"
+}"#.to_string()
}
pub fn user_backtrack() -> String {
- r#"The current search path did not find the answer. Analyze the failure and suggest alternatives.
-
-{context}
+ r#"{context}
-Respond in JSON format with alternative branches."#.to_string()
+Respond with ONLY the JSON object:
+{
+ "alternative_branches": [
+ {"index": 0, "score": 0.8, "reason": "why this alternative"}
+ ],
+ "direction": "backtrack",
+ "confidence": 0.85,
+ "reasoning": "why original path failed"
+}"#.to_string()
}
pub fn system_evaluate() -> String {
- "You are a document analysis assistant. Evaluate whether the current node contains the answer to the user's query.".to_string()
+ r#"You are a document analysis assistant. Evaluate whether the current node contains the answer to the user's query.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks.
+
+Your response must have this EXACT structure:
+{
+ "relevance_score": 0.85,
+ "is_answer": false,
+ "direction": "go_deeper",
+ "confidence": 0.85,
+ "reasoning": "explanation"
+}"#.to_string()
}
pub fn user_evaluate() -> String {
- r#"Evaluate if this node contains the answer to the user's query.
+ r#"{context}
+
+Respond with ONLY the JSON object:
+{
+ "relevance_score": 0.85,
+ "is_answer": false,
+ "direction": "go_deeper",
+ "confidence": 0.85,
+ "reasoning": "explanation"
+}"#
+ .to_string()
+ }
-{context}
+ pub fn system_locate_top3() -> String {
+ r#"You are a document navigation assistant. Your task is to locate the most relevant sections in a document hierarchy for a user's query.
-Respond in JSON format with your evaluation."#
+CRITICAL INSTRUCTIONS:
+1. Analyze the user query carefully to understand the intent
+2. Examine the provided Table of Contents (TOC) with node IDs
+3. Select the TOP 3 most relevant nodes that would contain the answer
+4. You MUST respond with ONLY valid JSON. No markdown code blocks. No explanations outside JSON.
+
+Your response must have this EXACT structure:
+{
+ "reasoning": "Brief analysis of the query and why you selected these nodes",
+ "candidates": [
+ {"node_id": , "relevance_score": 0.95, "reason": "Why this node matches the query"},
+ {"node_id": , "relevance_score": 0.80, "reason": "Why this node is also relevant"},
+ {"node_id": , "relevance_score": 0.65, "reason": "Why this node might be relevant"}
+ ]
+}
+
+Rules:
+- node_id: MUST be a number from the provided TOC (copy exactly)
+- relevance_score: Number between 0.0 and 1.0 (higher = more relevant)
+- reason: Brief explanation for each selection
+- candidates: Must have exactly 3 items, ordered by relevance (highest first)
+- If fewer than 3 relevant nodes exist, use lower scores for less relevant ones"#.to_string()
+ }
+
+ pub fn user_locate_top3() -> String {
+ r#"{context}
+
+Based on the query and TOC above, select the TOP 3 most relevant nodes.
+
+Respond with ONLY the JSON object:
+{
+ "reasoning": "Your analysis here",
+ "candidates": [
+ {"node_id": 1, "relevance_score": 0.95, "reason": "explanation"},
+ {"node_id": 2, "relevance_score": 0.80, "reason": "explanation"},
+ {"node_id": 3, "relevance_score": 0.65, "reason": "explanation"}
+ ]
+}"#
.to_string()
}
}
@@ -337,3 +455,68 @@ impl EvaluatePrompt {
}
}
}
+
+impl LocateTop3Prompt {
+ /// Get template with fallback.
+ pub fn with_fallback() -> Self {
+ Self {
+ system: fallback::system_locate_top3(),
+ template: fallback::user_locate_top3(),
+ }
+ }
+}
+
+/// Prompt template for LOCATE_TOP3 intervention point.
+///
+/// Used at the start to directly locate top-3 relevant nodes from TOC:
+/// - Understand query intent
+/// - Identify top 3 most relevant nodes with confidence scores
+/// - Provide reasoning for each selection
+#[derive(Debug, Clone)]
+pub struct LocateTop3Prompt {
+ system: String,
+ template: String,
+}
+
+impl Default for LocateTop3Prompt {
+ fn default() -> Self {
+ Self::with_fallback()
+ }
+}
+
+impl LocateTop3Prompt {
+ /// Create a new locate top-3 prompt template.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Create with custom templates.
+ pub fn with_templates(system: String, template: String) -> Self {
+ Self { system, template }
+ }
+}
+
+impl PromptTemplate for LocateTop3Prompt {
+ fn system_prompt(&self) -> &str {
+ &self.system
+ }
+
+ fn user_prompt_template(&self) -> &str {
+ &self.template
+ }
+
+ fn intervention_point(&self) -> InterventionPoint {
+ InterventionPoint::Start
+ }
+
+ fn output_format_hint(&self) -> &str {
+ r#"{
+ "reasoning": "Overall analysis of the query and document structure",
+ "candidates": [
+ {"node_id": 1, "relevance_score": 0.95, "reason": "Why this node is relevant"},
+ {"node_id": 2, "relevance_score": 0.80, "reason": "Why this node is relevant"},
+ {"node_id": 3, "relevance_score": 0.65, "reason": "Why this node is relevant"}
+ ]
+}"#
+ }
+}
diff --git a/rust/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs
index 9bf02ae3..823abdba 100644
--- a/rust/src/retrieval/pipeline/context.rs
+++ b/rust/src/retrieval/pipeline/context.rs
@@ -209,6 +209,8 @@ pub struct PipelineContext {
pub keywords: Vec,
/// Target sections from ToC matching.
pub target_sections: Vec,
+ /// Decomposed sub-queries (if query was decomposed).
+ pub decomposition: Option,
// ============ Plan Stage Output ============
/// Selected retrieval strategy.
@@ -268,6 +270,7 @@ impl PipelineContext {
complexity: None,
keywords: Vec::new(),
target_sections: Vec::new(),
+ decomposition: None,
selected_strategy: None,
selected_algorithm: None,
search_config: None,
diff --git a/rust/src/retrieval/stages/analyze.rs b/rust/src/retrieval/stages/analyze.rs
index 3eabca1f..8dd875e6 100644
--- a/rust/src/retrieval/stages/analyze.rs
+++ b/rust/src/retrieval/stages/analyze.rs
@@ -7,14 +7,16 @@
//! - Query complexity (Simple/Medium/Complex)
//! - Keywords for matching
//! - Target sections based on ToC matching
+//! - Query decomposition for complex queries
use async_trait::async_trait;
use tracing::info;
use crate::document::{DocumentTree, TocView};
use crate::retrieval::complexity::ComplexityDetector;
+use crate::retrieval::decompose::{DecompositionConfig, QueryDecomposer};
use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage, StageOutcome};
-// QueryComplexity is used in context
+use crate::llm::LlmClient;
/// Analyze Stage - analyzes queries for retrieval planning.
///
@@ -22,17 +24,25 @@ use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage,
/// 1. Detects query complexity (Simple/Medium/Complex)
/// 2. Extracts keywords for matching
/// 3. Matches target sections from ToC
+/// 4. Decomposes complex queries into sub-queries (if enabled)
///
/// # Example
///
/// ```rust,ignore
/// let stage = AnalyzeStage::new()
-/// .with_toc_matching(true);
+/// .with_toc_matching(true)
+/// .with_decomposition(true);
/// ```
pub struct AnalyzeStage {
complexity_detector: ComplexityDetector,
toc_view: TocView,
enable_toc_matching: bool,
+ /// Query decomposer for complex queries.
+ query_decomposer: Option,
+ /// Enable query decomposition.
+ enable_decomposition: bool,
+ /// Complexity threshold for triggering decomposition.
+ decomposition_threshold: f32,
}
impl Default for AnalyzeStage {
@@ -48,6 +58,9 @@ impl AnalyzeStage {
complexity_detector: ComplexityDetector::new(),
toc_view: TocView::new(),
enable_toc_matching: true,
+ query_decomposer: None,
+ enable_decomposition: false,
+ decomposition_threshold: 0.6,
}
}
@@ -57,6 +70,42 @@ impl AnalyzeStage {
self
}
+ /// Enable query decomposition with default configuration.
+ pub fn with_decomposition(mut self, enable: bool) -> Self {
+ self.enable_decomposition = enable;
+ if enable && self.query_decomposer.is_none() {
+ self.query_decomposer = Some(QueryDecomposer::new(DecompositionConfig::default()));
+ }
+ self
+ }
+
+ /// Enable query decomposition with custom configuration.
+ pub fn with_decomposition_config(mut self, config: DecompositionConfig) -> Self {
+ self.enable_decomposition = true;
+ self.query_decomposer = Some(QueryDecomposer::new(config));
+ self
+ }
+
+ /// Enable query decomposition with LLM client.
+ pub fn with_llm_client(mut self, client: crate::llm::LlmClient) -> Self {
+ if self.query_decomposer.is_none() {
+ self.query_decomposer = Some(
+ QueryDecomposer::new(DecompositionConfig::default())
+ .with_llm_client(client),
+ );
+ } else if let Some(ref mut decomposer) = self.query_decomposer {
+ *decomposer = QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client);
+ }
+ self.enable_decomposition = true;
+ self
+ }
+
+ /// Set complexity threshold for triggering decomposition.
+ pub fn with_decomposition_threshold(mut self, threshold: f32) -> Self {
+ self.decomposition_threshold = threshold.clamp(0.0, 1.0);
+ self
+ }
+
/// Extract keywords from a query.
fn extract_keywords(&self, query: &str) -> Vec {
// Simple keyword extraction:
@@ -182,7 +231,42 @@ impl RetrievalStage for AnalyzeStage {
info!("Target sections: {:?}", ctx.target_sections);
}
- // 4. Update metrics
+ // 4. Decompose query if enabled and complex enough
+ if self.enable_decomposition {
+ if let Some(ref decomposer) = self.query_decomposer {
+ let complexity_score = ctx.complexity
+ .as_ref()
+ .map(|c| match c {
+ crate::retrieval::types::QueryComplexity::Simple => 0.3,
+ crate::retrieval::types::QueryComplexity::Medium => 0.6,
+ crate::retrieval::types::QueryComplexity::Complex => 0.9,
+ })
+ .unwrap_or(0.5);
+
+ if complexity_score >= self.decomposition_threshold {
+ info!("Decomposing query (complexity: {:.2})", complexity_score);
+ match decomposer.decompose(&ctx.query).await {
+ Ok(result) => {
+ if result.was_decomposed {
+ info!(
+ "Query decomposed into {} sub-queries",
+ result.sub_queries.len()
+ );
+ for (i, sq) in result.sub_queries.iter().enumerate() {
+ info!(" Sub-query {}: {} (priority: {})", i, sq.text, sq.priority);
+ }
+ }
+ ctx.decomposition = Some(result);
+ }
+ Err(e) => {
+ info!("Query decomposition failed: {}, continuing with original query", e);
+ }
+ }
+ }
+ }
+ }
+
+ // 5. Update metrics
ctx.metrics.llm_calls += 0; // No LLM calls in this stage
Ok(StageOutcome::cont())
diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs
index 756b5616..6624643e 100644
--- a/rust/src/retrieval/stages/search.rs
+++ b/rust/src/retrieval/stages/search.rs
@@ -273,51 +273,142 @@ impl RetrievalStage for SearchStage {
leaf_only: false,
};
- // Create legacy context for search algorithms
- let legacy_ctx = RetrievalContext::new(
- &ctx.query,
- ctx.options.max_tokens,
- ctx.options.sufficiency_check,
- );
-
// Get Pilot reference (or None if not available)
let pilot_ref: Option<&dyn Pilot> = self.pilot.as_deref();
println!("[DEBUG] SearchStage: pilot_ref is {}", if pilot_ref.is_some() { "Some" } else { "None" });
- println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}",
- algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score);
- // Execute search based on algorithm with Pilot
- let result = match algorithm {
- SearchAlgorithm::Greedy => {
- let search = GreedySearch::new();
- search
- .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
- .await
- }
- SearchAlgorithm::Beam => {
- let search = BeamSearch::new();
- search
- .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
- .await
- }
- SearchAlgorithm::Mcts => {
- // Use beam search as fallback for now
- let search = BeamSearch::new();
- search
- .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref)
- .await
+ // === Check for decomposition ===
+ if let Some(ref decomposition) = ctx.decomposition {
+ if decomposition.was_decomposed && decomposition.is_multi_turn() {
+ info!("Processing {} decomposed sub-queries", decomposition.sub_queries.len());
+
+ let mut all_paths = Vec::new();
+ let mut all_candidates = Vec::new();
+ let mut total_pilot_interventions = 0u64;
+
+ // Process each sub-query in execution order
+ let order = decomposition.execution_order();
+ for sub_idx in order {
+ let sub_query = &decomposition.sub_queries[sub_idx];
+ info!("Processing sub-query : {}", sub_query.text);
+
+ // Create legacy context for this sub-query
+ let legacy_ctx = RetrievalContext::new(
+ &sub_query.text,
+ ctx.options.max_tokens,
+ ctx.options.sufficiency_check,
+ );
+
+ println!("[DEBUG] SearchStage: Starting search for sub-query: algorithm={:?}, top_k={}, beam_width={}",
+ algorithm, search_config.top_k, search_config.beam_width);
+
+ // Execute search for this sub-query
+ let result = match algorithm {
+ SearchAlgorithm::Greedy => {
+ let search = GreedySearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Beam => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Mcts => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ };
+
+ all_candidates.extend(self.extract_candidates(&result.paths, &ctx.tree));
+ all_paths.extend(result.paths);
+ total_pilot_interventions += result.pilot_interventions as u64;
+
+ info!("Sub-query '{}' found {} paths", sub_query.text, all_paths.len());
+ }
+
+ // Merge results
+ ctx.search_paths = all_paths;
+ ctx.candidates = all_candidates;
+
+ info!(
+ "Search complete: {} total candidates from {} sub-queries (pilot interventions: {})",
+ ctx.candidates.len(),
+ decomposition.sub_queries.len(),
+ total_pilot_interventions
+ );
+ } else {
+ // Single query (not decomposed or single sub-query) - process as normal
+ let legacy_ctx = RetrievalContext::new(
+ &ctx.query,
+ ctx.options.max_tokens,
+ ctx.options.sufficiency_check,
+ );
+
+ println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}",
+ algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score);
+
+ let result = match algorithm {
+ SearchAlgorithm::Greedy => {
+ let search = GreedySearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Beam => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Mcts => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ };
+
+ ctx.search_paths = result.paths;
+ ctx.candidates = self.extract_candidates(&ctx.search_paths, &ctx.tree);
+
+ info!(
+ "Search found {} paths (pilot interventions: {})",
+ ctx.search_paths.len(),
+ result.pilot_interventions
+ );
}
- };
+ } else {
+ // No decomposition available, process original query
+ let legacy_ctx = RetrievalContext::new(
+ &ctx.query,
+ ctx.options.max_tokens,
+ ctx.options.sufficiency_check,
+ );
+
+ println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}",
+ algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score);
+
+ let result = match algorithm {
+ SearchAlgorithm::Greedy => {
+ let search = GreedySearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Beam => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ SearchAlgorithm::Mcts => {
+ let search = BeamSearch::new();
+ search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await
+ }
+ };
- info!(
- "Search found {} paths (pilot interventions: {})",
- result.paths.len(),
- result.pilot_interventions
- );
+ ctx.search_paths = result.paths;
+ ctx.candidates = self.extract_candidates(&ctx.search_paths, &ctx.tree);
+
+ info!(
+ "Search found {} paths (pilot interventions: {})",
+ ctx.search_paths.len(),
+ result.pilot_interventions
+ );
+ }
// Debug output
- println!("[DEBUG] Search found {} paths", result.paths.len());
- for (i, path) in result.paths.iter().enumerate().take(5) {
+ println!("[DEBUG] Search found {} total paths, {} candidates", ctx.search_paths.len(), ctx.candidates.len());
+ for (i, path) in ctx.search_paths.iter().enumerate().take(5) {
if let Some(leaf_id) = path.leaf {
if let Some(node) = ctx.tree.get(leaf_id) {
println!("[DEBUG] Path {}: score={:.3}, title='{}', content_len={}",
@@ -326,10 +417,6 @@ impl RetrievalStage for SearchStage {
}
}
- // Update context with results
- ctx.search_paths = result.paths.clone();
- ctx.candidates = self.extract_candidates(&result.paths, &ctx.tree);
-
// Debug output
println!("[DEBUG] Extracted {} candidates", ctx.candidates.len());
for (i, c) in ctx.candidates.iter().enumerate().take(5) {
From e583086a3da08f3fd9c50068e7eae44742e7f969 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Wed, 8 Apr 2026 20:07:54 +0800
Subject: [PATCH 5/5] feat(search): add LLM-first search with direct TOC-based
location
Add new LLM-first search capability that attempts to directly locate
relevant nodes using the table of contents before falling back to
tree traversal algorithms. The search stage now accepts an LLM client
and implements direct TOC-based node location with structured JSON
responses containing top-3 most relevant entries.
The feature includes:
- New `with_llm_client()` method to configure LLM-based search
- TOC flattening utility for LLM consumption with numbered entries
- Structured JSON prompting for precise node selection
- Proper fallback to beam/greedy search when LLM fails
- Metrics tracking for LLM calls and search performance
refactor(indexer): change default summary strategy to full
Change the default summary strategy from selective to full generation
for improved content indexing quality.
debug: add debug logging throughout indexing and retrieval
Add comprehensive debug logging to track indexing flow, pipeline
options building, summary evaluation, and search operations including
pilot interventions for better debugging visibility.
---
rust/src/client/engine.rs | 3 +
rust/src/client/indexer.rs | 5 +-
rust/src/index/config.rs | 2 +-
rust/src/index/stages/enhance.rs | 3 +
rust/src/retrieval/pipeline_retriever.rs | 2 +-
rust/src/retrieval/search/beam.rs | 1 +
rust/src/retrieval/search/greedy.rs | 1 +
rust/src/retrieval/stages/evaluate.rs | 1 +
rust/src/retrieval/stages/search.rs | 214 ++++++++++++++++++++++-
9 files changed, 227 insertions(+), 5 deletions(-)
diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs
index 886e3197..4297d8e0 100644
--- a/rust/src/client/engine.rs
+++ b/rust/src/client/engine.rs
@@ -214,6 +214,9 @@ impl Engine {
/// # }
/// ```
pub async fn index(&self, ctx: IndexContext) -> Result {
+ println!("Indexing...");
+ println!("ctx: {:?}", ctx);
+
let doc = self.indexer.index(ctx).await?;
let persisted = self.indexer.to_persisted(doc);
diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs
index e2e96ac3..0ba0a847 100644
--- a/rust/src/client/indexer.rs
+++ b/rust/src/client/indexer.rs
@@ -259,6 +259,8 @@ impl IndexerClient {
options: &IndexOptions,
format: DocumentFormat,
) -> PipelineOptions {
+ println!("[DEBUG] Building pipeline options for format: {:?} with options: {:?}", format, options);
+
PipelineOptions {
mode: match format {
DocumentFormat::Markdown => IndexMode::Markdown,
@@ -268,7 +270,8 @@ impl IndexerClient {
},
generate_ids: options.generate_ids,
summary_strategy: if options.generate_summaries {
- SummaryStrategy::selective(self.config.min_summary_tokens, false)
+ // SummaryStrategy::selective(self.config.min_summary_tokens, false)
+ SummaryStrategy::full()
} else {
SummaryStrategy::none()
},
diff --git a/rust/src/index/config.rs b/rust/src/index/config.rs
index f08b5968..f5cabebc 100644
--- a/rust/src/index/config.rs
+++ b/rust/src/index/config.rs
@@ -160,7 +160,7 @@ impl Default for PipelineOptions {
Self {
mode: IndexMode::Auto,
generate_ids: true,
- summary_strategy: SummaryStrategy::default(),
+ summary_strategy: SummaryStrategy::full(),
thinning: ThinningConfig::default(),
optimization: OptimizationConfig::default(),
generate_description: true,
diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs
index 08cafe8e..e848e832 100644
--- a/rust/src/index/stages/enhance.rs
+++ b/rust/src/index/stages/enhance.rs
@@ -161,6 +161,7 @@ impl IndexStage for EnhanceStage {
Some(n) => n.clone(),
None => continue,
};
+ println!("[DEBUG] Evaluating node for summary: {} {}", node.title, node.content);
// Skip if no content
if node.content.is_empty() {
@@ -204,6 +205,8 @@ impl IndexStage for EnhanceStage {
// Generate summary (generator also has memoization built-in)
println!("[DEBUG] Calling LLM to generate summary for node: {} ({} tokens)", node.title, token_count);
+ println!("[DEBUG] Node content: {}", node.content);
+
match generator.generate(&node.title, &node.content).await {
Ok(summary) => {
if summary.is_empty() {
diff --git a/rust/src/retrieval/pipeline_retriever.rs b/rust/src/retrieval/pipeline_retriever.rs
index 222b3a9b..377c4747 100644
--- a/rust/src/retrieval/pipeline_retriever.rs
+++ b/rust/src/retrieval/pipeline_retriever.rs
@@ -119,7 +119,7 @@ impl PipelineRetriever {
orchestrator = orchestrator.stage(plan_stage);
// Add search stage with Pilot for semantic navigation
- let mut search_stage = SearchStage::new();
+ let mut search_stage = SearchStage::new().with_llm_client(self.llm_client.clone());
if let Some(ref client) = self.llm_client {
// Create LLM-based Pilot for semantic navigation guidance
let mut pilot = LlmPilot::new(client.clone(), PilotConfig::default());
diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs
index 2adf0808..51d700f4 100644
--- a/rust/src/retrieval/search/beam.rs
+++ b/rust/src/retrieval/search/beam.rs
@@ -236,6 +236,7 @@ impl SearchTree for BeamSearch {
children.len()
);
+ println!("[DEBUG] BEAM SEARCH: Pilot intervening at decision point");
match p.decide(&state).await {
decision => {
pilot_interventions += 1;
diff --git a/rust/src/retrieval/search/greedy.rs b/rust/src/retrieval/search/greedy.rs
index b539cd23..a8ee80ed 100644
--- a/rust/src/retrieval/search/greedy.rs
+++ b/rust/src/retrieval/search/greedy.rs
@@ -144,6 +144,7 @@ impl SearchTree for GreedySearch {
children.len()
);
+ println!("[DEBUG] GREEDY SEARCH: Pilot intervening at decision point");
match p.decide(&state).await {
decision => {
pilot_interventions += 1;
diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs
index f9cbcb91..ad8858f2 100644
--- a/rust/src/retrieval/stages/evaluate.rs
+++ b/rust/src/retrieval/stages/evaluate.rs
@@ -331,6 +331,7 @@ impl RetrievalStage for EvaluateStage {
println!("[DEBUG] EvaluateStage: {} candidates, iteration {}",
ctx.candidates.len(), ctx.search_iterations);
+
info!(
"Judging sufficiency: {} candidates, iteration {}",
ctx.candidates.len(),
diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs
index 6624643e..17006bdf 100644
--- a/rust/src/retrieval/stages/search.rs
+++ b/rust/src/retrieval/stages/search.rs
@@ -6,13 +6,21 @@
//! This stage executes the selected search algorithm using
//! the selected retrieval strategy. When a Pilot is provided,
//! it can provide semantic guidance at key decision points.
+//!
+//! # LLM-First Search
+//!
+//! When an LLM client is provided, the stage will first attempt to
+//! directly locate the top-3 most relevant nodes using the TOC,
+//! falling back to tree traversal algorithms (Beam/Greedy) only if
+//! LLM fails or returns insufficient results.
use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tracing::{info, warn};
-use crate::document::DocumentTree;
-// LlmClient is used via strategy
+use crate::document::{DocumentTree, TocView};
+use crate::llm::LlmClient;
use crate::retrieval::RetrievalContext; // Legacy context
use crate::retrieval::pilot::Pilot;
use crate::retrieval::pipeline::{
@@ -57,6 +65,8 @@ pub struct SearchStage {
hybrid_strategy: Option>,
/// Pilot for navigation guidance (optional).
pilot: Option>,
+ /// LLM client for direct TOC-based search (optional).
+ llm_client: Option,
}
impl Default for SearchStage {
@@ -74,9 +84,20 @@ impl SearchStage {
semantic_strategy: None,
hybrid_strategy: None,
pilot: None,
+ llm_client: None,
}
}
+ /// Add LLM client for direct TOC-based search.
+ ///
+ /// When provided, the stage will first attempt to locate relevant
+ /// nodes directly using the TOC, falling back to tree traversal
+ /// algorithms only if LLM fails or returns insufficient results.
+ pub fn with_llm_client(mut self, client: Option) -> Self {
+ self.llm_client = client;
+ self
+ }
+
/// Add Pilot for semantic navigation guidance.
///
/// When provided, the search algorithm will consult the Pilot
@@ -210,6 +231,172 @@ impl SearchStage {
candidates
}
+
+ /// Build a flat TOC list for LLM consumption.
+ ///
+ /// Returns a formatted string with numbered entries:
+ /// ```
+ /// [1] Title: "Overview"
+ /// Summary: "This section covers..."
+ /// [2] Title: "Architecture"
+ /// Summary: "The system architecture..."
+ /// ```
+ fn build_toc_for_llm(&self, tree: &DocumentTree) -> (String, Vec) {
+ let toc_view = TocView::new();
+ let mut entries = Vec::new();
+ let mut node_ids = Vec::new();
+
+ fn collect_entries(
+ tree: &DocumentTree,
+ node_id: crate::document::NodeId,
+ entries: &mut Vec<(usize, String, String)>,
+ node_ids: &mut Vec,
+ index: &mut usize,
+ ) {
+ if let Some(node) = tree.get(node_id) {
+ let title = node.title.clone();
+ let summary = if node.summary.is_empty() {
+ "(no summary)".to_string()
+ } else {
+ node.summary.clone()
+ };
+ entries.push((*index, title, summary));
+ node_ids.push(node_id);
+ *index += 1;
+
+ for child_id in tree.children(node_id) {
+ collect_entries(tree, child_id, entries, node_ids, index);
+ }
+ }
+ }
+
+ collect_entries(tree, tree.root(), &mut entries, &mut node_ids, &mut 0);
+
+ let toc_str = entries
+ .iter()
+ .map(|(idx, title, summary)| {
+ format!("[{}] Title: \"{}\"\n Summary: \"{}\"", idx + 1, title, summary)
+ })
+ .collect::>()
+ .join("\n\n");
+
+ (toc_str, node_ids)
+ }
+
+ /// Locate top candidates directly via LLM using TOC.
+ ///
+ /// This method bypasses tree traversal by asking the LLM to
+ /// directly identify the most relevant nodes from the TOC.
+ async fn locate_via_llm(
+ &self,
+ query: &str,
+ tree: &DocumentTree,
+ ) -> Option> {
+ let llm_client = self.llm_client.as_ref()?;
+ let (toc_str, node_ids) = self.build_toc_for_llm(tree);
+
+ if node_ids.is_empty() {
+ warn!("No nodes in tree for LLM search");
+ return None;
+ }
+
+ let system_prompt = r#"You are a document navigation assistant. Your task is to locate the most relevant sections in a document hierarchy for a user's query.
+
+CRITICAL INSTRUCTIONS:
+1. Analyze the user query carefully to understand the intent
+2. Examine the provided Table of Contents (TOC) with numbered entries
+3. Select the TOP 3 most relevant entries that would contain the answer
+4. You MUST respond with ONLY valid JSON. No markdown code blocks. No explanations outside JSON.
+
+Your response must have this EXACT structure:
+{
+ "reasoning": "Brief analysis of the query and why you selected these entries",
+ "candidates": [
+ {"node_id": 1, "relevance_score": 0.95, "reason": "Why this entry matches the query"},
+ {"node_id": 2, "relevance_score": 0.80, "reason": "Why this entry is also relevant"},
+ {"node_id": 3, "relevance_score": 0.65, "reason": "Why this entry might be relevant"}
+ ]
+}
+
+Rules:
+- node_id: MUST be a number from the provided TOC (the number in [N] brackets)
+- relevance_score: Number between 0.0 and 1.0 (higher = more relevant)
+- reason: Brief explanation for each selection
+- candidates: Must have exactly 3 items, ordered by relevance (highest first)"#;
+
+ let user_prompt = format!(
+ "USER QUERY: {}\n\nDOCUMENT TOC ({} entries):\n{}\n\nBased on the query and TOC above, select the TOP 3 most relevant entries.\n\nRespond with ONLY the JSON object:",
+ query,
+ node_ids.len(),
+ toc_str
+ );
+
+ info!("Attempting LLM-based search for query: '{}'", query);
+
+ match llm_client.complete(system_prompt, &user_prompt).await {
+ Ok(response) => {
+ // Parse JSON response
+ match serde_json::from_str::(&response) {
+ Ok(llm_response) => {
+ let mut candidates = Vec::new();
+
+ for candidate in llm_response.candidates {
+ // node_id is 1-indexed from LLM, convert to 0-indexed
+ let idx = candidate.node_id.saturating_sub(1);
+ if idx < node_ids.len() {
+ let node_id = node_ids[idx];
+ if let Some(node) = tree.get(node_id) {
+ candidates.push(CandidateNode::new(
+ node_id,
+ candidate.relevance_score,
+ node.depth,
+ tree.is_leaf(node_id),
+ ));
+ info!(
+ "LLM selected: [{}] '{}' (score: {:.2})",
+ candidate.node_id, node.title, candidate.relevance_score
+ );
+ }
+ }
+ }
+
+ if candidates.is_empty() {
+ warn!("LLM returned no valid candidates");
+ return None;
+ }
+
+ println!("LLM search found {} candidates", candidates.len());
+ println!("LLM candidates content: {:?}", candidates);
+ Some(candidates)
+ }
+ Err(e) => {
+ warn!("Failed to parse LLM response as JSON: {}", e);
+ warn!("Raw response: {}", response);
+ None
+ }
+ }
+ }
+ Err(e) => {
+ warn!("LLM call failed: {}", e);
+ None
+ }
+ }
+ }
+}
+
+/// LLM response for locate query.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct LlmLocateResponse {
+ reasoning: String,
+ candidates: Vec,
+}
+
+/// A candidate from LLM locate response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct LlmLocateCandidate {
+ node_id: usize,
+ relevance_score: f32,
+ reason: String,
}
#[async_trait]
@@ -264,6 +451,29 @@ impl RetrievalStage for SearchStage {
// Increment search iteration
ctx.increment_search_iteration();
+ // === Try LLM-first search (direct TOC-based location) ===
+ if self.llm_client.is_some() {
+ info!("Attempting LLM-first search for query: '{}'", ctx.query);
+
+ if let Some(candidates) = self.locate_via_llm(&ctx.query, &ctx.tree).await {
+ if !candidates.is_empty() {
+ ctx.candidates = candidates;
+ ctx.metrics.search_time_ms += start.elapsed().as_millis() as u64;
+ ctx.metrics.nodes_visited += ctx.candidates.len();
+ ctx.metrics.llm_calls += 1;
+
+ info!(
+ "LLM-first search found {} candidates (skipped tree traversal)",
+ ctx.candidates.len()
+ );
+
+ return Ok(StageOutcome::cont());
+ }
+ }
+
+ info!("LLM-first search returned no results, falling back to tree traversal");
+ }
+
// Build search config for search algorithms
let search_config = SearchAlgConfig {
top_k: config.beam_width * 2,