diff --git a/README.md b/README.md index 603adb21..e96095ad 100644 --- a/README.md +++ b/README.md @@ -163,6 +163,105 @@ async fn main() -> vectorless::Result<()> { | **Feedback Learning** | Improves from user feedback over time | | **Multi-turn Queries** | Handles complex questions with decomposition | +## Configuration + +### Zero Configuration (Recommended) + +Just set `OPENAI_API_KEY` and you're ready to go: + +```bash +export OPENAI_API_KEY="sk-..." +``` + +
+Python + +```python +from vectorless import Engine + +# Uses OPENAI_API_KEY from environment +engine = Engine(workspace="./data") +``` + +
+ +
+Rust + +```rust +use vectorless::Engine; + +let client = Engine::builder() + .with_workspace("./workspace") + .build().await?; +``` + +
+ +### Environment Variables + +| Variable | Description | +|----------|-------------| +| `OPENAI_API_KEY` | LLM API key | +| `VECTORLESS_MODEL` | Default model (e.g., `gpt-4o-mini`) | +| `VECTORLESS_ENDPOINT` | API endpoint URL | +| `VECTORLESS_WORKSPACE` | Workspace directory | + +### Advanced Configuration + +For fine-grained control, use a config file: + +```bash +cp config.toml ./vectorless.toml +``` + +
+Python + +```python +from vectorless import Engine + +# Use full configuration file +engine = Engine(config_path="./vectorless.toml") + +# Or override specific settings +engine = Engine( + config_path="./vectorless.toml", + model="gpt-4o", # Override model from config +) +``` + +
+ +
+Rust + +```rust +use vectorless::Engine; + +// Use full configuration file +let client = Engine::builder() + .with_config_path("./vectorless.toml") + .build().await?; + +// Or override specific settings +let client = Engine::builder() + .with_config_path("./vectorless.toml") + .with_model("gpt-4o", None) // Override model + .build().await?; +``` + +
+ +### Configuration Priority + +Later overrides earlier: + +1. Default configuration +2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`) +3. Explicit config file (`config_path` / `with_config_path`) +4. Environment variables +5. Constructor/builder parameters (highest priority) ## Architecture @@ -177,7 +276,7 @@ async fn main() -> vectorless::Result<()> { ## Examples -See the [examples/](examples/) directory. +See the [examples/](examples/) directory for more usage patterns. ## Contributing diff --git a/docs/samples/sample.md b/docs/samples/sample.md index 8868cd20..1ab9ce35 100644 --- a/docs/samples/sample.md +++ b/docs/samples/sample.md @@ -29,53 +29,4 @@ The core module provides fundamental types: The parser module handles document parsing: - `MarkdownParser` — Parse Markdown files - `PdfParser` — Parse PDF files (planned) -- `HtmlParser` — Parse HTML files (planned) - -## Usage Examples - -### Basic Usage - -```rust -use vectorless::client::{Vectorless, VectorlessBuilder}; - -let client = VectorlessBuilder::new() - .with_workspace("./workspace") - .build()?; - -let doc_id = client.index("./document.md").await?; -``` - -### Advanced Usage - -You can customize the retrieval process: - -```rust -use vectorless::{LlmNavigator, RetrieveOptions}; - -let retriever = LlmNavigator::with_defaults(); -let options = RetrieveOptions::new() - .with_top_k(5) - .with_min_score(0.5); - -let results = retriever.retrieve(&tree, "What is vectorless?", &options).await?; -``` - -## Configuration - -The library can be configured via TOML files or programmatically. - -### Configuration File - -```toml -[summary] -model = "gpt-4" -max_tokens = 200 - -[retrieval] -model = "gpt-4" -top_k = 3 -``` - -## API Reference - -See the API documentation for detailed information about each function and type. +- `HtmlParser` — Parse HTML files (planned) \ No newline at end of file diff --git a/examples/python/advanced/README.md b/examples/python/advanced/README.md new file mode 100644 index 00000000..2d11afbc --- /dev/null +++ b/examples/python/advanced/README.md @@ -0,0 +1,45 @@ +# Advanced Example - Full Configuration + +Use a configuration file for fine-grained control. + +## Setup + +```bash +pip install vectorless + +# Copy the example config +cp ../../../config.toml ./vectorless.toml + +# Edit to customize your settings +vim vectorless.toml +``` + +## Run + +```bash +python main.py +``` + +## Configuration File Structure + +```toml +[llm] +api_key = "sk-..." + +[llm.summary] +model = "gpt-4o-mini" +max_tokens = 200 + +[llm.retrieval] +model = "gpt-4o" +max_tokens = 100 + +[retrieval] +top_k = 5 +beam_width = 3 +max_iterations = 10 + +[storage] +workspace_dir = "./workspace" +cache_size = 100 +``` diff --git a/examples/python/advanced/main.py b/examples/python/advanced/main.py new file mode 100644 index 00000000..d223ad02 --- /dev/null +++ b/examples/python/advanced/main.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" +Advanced example - Full Configuration File. + +This example demonstrates how to use a full configuration file +for fine-grained control over all settings. + +Usage: + cp ../../../config.toml ./vectorless.toml + # Edit vectorless.toml to customize settings + python main.py +""" + +import os +from vectorless import Engine, IndexContext + +# Path to config file (relative to this script) +CONFIG_PATH = "./vectorless.toml" +WORKSPACE = "./workspace" + + +def main(): + print("=== Vectorless Advanced Example (Full Configuration) ===\n") + + # Check if config file exists + if not os.path.exists(CONFIG_PATH): + print(f"Error: Config file not found: {CONFIG_PATH}") + print("\nCreate it by copying the example:") + print(f" cp ../../../config.toml {CONFIG_PATH}") + print("\nThen edit it to customize your settings.") + return + + # Create engine with config file + engine = Engine(config_path=CONFIG_PATH) + + print(f"✓ Engine created with config file: {CONFIG_PATH}\n") + + # Index a document + content = """ +# System Documentation + +## Architecture + +The system consists of three main components: + +1. **Index Pipeline** - Parses documents and builds a navigable tree +2. **Retrieval Pipeline** - Queries and retrieves relevant content +3. **Pilot** - LLM-powered navigation guide + +## Configuration Options + +### LLM Settings +- `model`: The LLM model to use (e.g., "gpt-4o", "gpt-4o-mini") +- `endpoint`: API endpoint URL +- `api_key`: Your API key +- `temperature`: Generation temperature (0.0 for deterministic) + +### Retrieval Settings +- `top_k`: Number of results to return +- `max_iterations`: Maximum search iterations +- `beam_width`: Beam width for multi-path search + +### Storage Settings +- `workspace_dir`: Directory for persisted documents +- `cache_size`: LRU cache size +- `compression`: Enable/disable compression + +## Performance Tuning + +For faster retrieval: +- Use a smaller model like gpt-4o-mini +- Reduce max_iterations +- Enable caching + +For higher accuracy: +- Use a more capable model like gpt-4o +- Increase beam_width +- Enable multi-turn decomposition +""" + ctx = IndexContext.from_content(content, name="system_docs", format="markdown") + doc_id = engine.index(ctx) + print(f"✓ Indexed: {doc_id}\n") + + # Query examples + questions = [ + "What are the main components?", + "How can I improve retrieval speed?", + "What settings are available?", + ] + + for q in questions: + result = engine.query(doc_id, q) + print(f"Q: {q}") + print(f"A: {result.content[:150]}...") + print(f" Score: {result.score:.2f}\n") + + # Cleanup + engine.remove(doc_id) + print("✓ Cleaned up") + + # Print configuration info + print("\n" + "=" * 60) + print("Configuration Priority") + print("=" * 60) + print(""" +1. Default configuration +2. Auto-detected config file (vectorless.toml, config.toml) +3. Explicit config file (config_path parameter) +4. Environment variables (OPENAI_API_KEY, etc.) +5. Constructor parameters (api_key, model, etc.) +""") + + +if __name__ == "__main__": + main() diff --git a/examples/python/advanced/pyproject.toml b/examples/python/advanced/pyproject.toml new file mode 100644 index 00000000..a85a964d --- /dev/null +++ b/examples/python/advanced/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "vectorless-advanced-example" +version = "0.1.0" +requires-python = ">=3.9" +dependencies = [ + "vectorless", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/examples/python/basic/README.md b/examples/python/basic/README.md new file mode 100644 index 00000000..e74975de --- /dev/null +++ b/examples/python/basic/README.md @@ -0,0 +1,16 @@ +# Basic Example - Zero Configuration + +The simplest way to use Vectorless. + +## Setup + +```bash +pip install vectorless +export OPENAI_API_KEY="sk-..." +``` + +## Run + +```bash +python main.py +``` diff --git a/examples/python/basic/main.py b/examples/python/basic/main.py new file mode 100644 index 00000000..4ae34b42 --- /dev/null +++ b/examples/python/basic/main.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +Basic example - Zero Configuration. + +This example demonstrates the simplest way to use Vectorless. +Just set OPENAI_API_KEY environment variable and you're ready to go. + +Usage: + export OPENAI_API_KEY="sk-..." + python main.py +""" + +import os +import tempfile +from vectorless import Engine, IndexContext + + +def main(): + print("=== Vectorless Basic Example (Zero Configuration) ===\n") + + # Zero configuration: Just set OPENAI_API_KEY environment variable + with tempfile.TemporaryDirectory() as workspace: + engine = Engine(workspace=workspace) + + print("✓ Engine created (using OPENAI_API_KEY from environment)\n") + + # Index from text content + content = """ +# Technical Manual + +## Chapter 1: Introduction + +Vectorless is a library for querying structured documents using natural language. + +## Chapter 2: Installation + +Install with pip: +``` +pip install vectorless +``` + +## Chapter 3: Usage + +```python +from vectorless import Engine, IndexContext + +engine = Engine(workspace="./data") +ctx = IndexContext.from_file("./report.pdf") +doc_id = engine.index(ctx) + +result = engine.query(doc_id, "What is the total revenue?") +print(result.content) +``` +""" + ctx = IndexContext.from_content(content, name="manual", format="markdown") + doc_id = engine.index(ctx) + print(f"✓ Indexed: {doc_id}\n") + + # Query + result = engine.query(doc_id, "How do I install vectorless?") + print("Query: How do I install vectorless?") + print(f"Score: {result.score:.2f}") + print(f"Result: {result.content[:200]}...\n") + + # Cleanup + engine.remove(doc_id) + print("✓ Cleaned up") + + print("\n=== Done ===") + + +if __name__ == "__main__": + if not os.environ.get("OPENAI_API_KEY"): + print("Error: OPENAI_API_KEY environment variable not set.") + print("Set it with: export OPENAI_API_KEY='sk-...'") + exit(1) + + main() diff --git a/examples/python/basic/pyproject.toml b/examples/python/basic/pyproject.toml new file mode 100644 index 00000000..d99ee750 --- /dev/null +++ b/examples/python/basic/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "vectorless-basic-example" +version = "0.1.0" +requires-python = ">=3.9" +dependencies = [ + "vectorless", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/examples/python/custom_config/README.md b/examples/python/custom_config/README.md new file mode 100644 index 00000000..b743098a --- /dev/null +++ b/examples/python/custom_config/README.md @@ -0,0 +1,48 @@ +# Custom Configuration Example + +Use your own API key, model, and endpoint. + +## Setup + +```bash +pip install vectorless +``` + +## Configure + +Edit `main.py` and update the settings: + +```python +API_KEY = "your-api-key" +MODEL = "gpt-4o-mini" # or "deepseek-chat", "claude-3-5-sonnet", etc. +ENDPOINT = "https://api.openai.com/v1" # or your custom endpoint +``` + +## Run + +```bash +python main.py +``` + +## Other Providers + +### DeepSeek +```python +API_KEY = "sk-..." +MODEL = "deepseek-chat" +ENDPOINT = "https://api.deepseek.com/v1" +``` + +### Azure OpenAI +```python +API_KEY = "your-azure-key" +MODEL = "gpt-4o" +ENDPOINT = "https://your-resource.openai.azure.com/openai/deployments/your-deployment" +``` + +### Local LLM (Ollama) +```python +API_KEY = None # Not needed +MODEL = "llama3" +ENDPOINT = "http://localhost:11434/v1" +``` diff --git a/examples/python/custom_config/main.py b/examples/python/custom_config/main.py new file mode 100644 index 00000000..d6e0bda4 --- /dev/null +++ b/examples/python/custom_config/main.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +Custom configuration example - Using your own API key, model, and endpoint. + +This example demonstrates how to use custom LLM settings without a config file. +Useful when you want to use different providers like DeepSeek, Azure OpenAI, etc. + +Usage: + python main.py +""" + +import tempfile +from vectorless import Engine, IndexContext + +# ============================================================ +# Configure your settings here +# ============================================================ +API_KEY = "sk-or-v1-xxxx" # Your API key +MODEL = "google/gemini-3-flash-preview" # Model name +ENDPOINT = "https://api/v1" # API endpoint +WORKSPACE = "./workspace" # Workspace directory + + +def main(): + print("=== Vectorless Custom Configuration Example ===\n") + + # Create engine with custom settings + engine = Engine( + workspace=WORKSPACE, + api_key=API_KEY, + model=MODEL, + endpoint=ENDPOINT, + ) + + print(f"✓ Engine created with custom settings") + print(f" Model: {MODEL}") + print(f" Endpoint: {ENDPOINT}\n") + + # Index a document + content = """ +# Product Documentation + +## Overview +This product helps you manage documents intelligently using LLM-powered navigation. + +## Features +- Fast indexing with tree-based structure +- Accurate retrieval using hybrid search +- Easy to use Python and Rust APIs +- Support for PDF, Markdown, HTML, and DOCX + +## Installation + +Install with pip: +```bash +pip install vectorless +``` + +## Quick Start + +```python +from vectorless import Engine, IndexContext + +# Create engine +engine = Engine(workspace="./data") + +# Index a document +ctx = IndexContext.from_file("./report.pdf") +doc_id = engine.index(ctx) + +# Query +result = engine.query(doc_id, "What is the total revenue?") +print(result.content) +``` + +## Configuration + +Vectorless supports multiple configuration methods: +1. Zero configuration - just set OPENAI_API_KEY +2. Custom settings - pass api_key, model, endpoint +3. Full config file - use vectorless.toml +""" + ctx = IndexContext.from_content(content, name="docs", format="markdown") + doc_id = engine.index(ctx) + print(f"✓ Indexed: {doc_id}\n") + + # Check document info + docs = engine.list_docs() + print(f"Documents in workspace: {len(docs)}") + for d in docs: + print(f" - {d.name} (id: {d.id}, format: {d.format})") + print() + + # Query + result = engine.query(doc_id, "How do I install the product?") + print("Query: How do I install the product?") + print(f"Score: {result.score:.2f}") + print(f"Result: {result.content}\n") + + # Another query + result = engine.query(doc_id, "What features are available?") + print("Query: What features are available?") + print(f"Score: {result.score:.2f}") + print(f"Result: {result.content}\n") + + # Cleanup + engine.remove(doc_id) + print("✓ Cleaned up") + + print("\n=== Done ===") + + +if __name__ == "__main__": + main() diff --git a/examples/python/custom_config/pyproject.toml b/examples/python/custom_config/pyproject.toml new file mode 100644 index 00000000..d316077d --- /dev/null +++ b/examples/python/custom_config/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "vectorless-custom-config-example" +version = "0.1.0" +requires-python = ">=3.9" +dependencies = [ + "vectorless", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/examples/python_basic.py b/examples/python_basic.py deleted file mode 100644 index e9801e79..00000000 --- a/examples/python_basic.py +++ /dev/null @@ -1,273 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) 2026 vectorless developers -# SPDX-License-Identifier: Apache-2.0 - -""" -Basic example demonstrating the vectorless Python library. - -This example shows: -1. Creating an Engine with workspace -2. Indexing documents from different sources -3. Querying indexed documents -4. Managing documents (list, exists, remove) - -Prerequisites: - pip install vectorless - export OPENAI_API_KEY="sk-..." - -Usage: - python python_basic.py -""" - -import os -import tempfile -from pathlib import Path - -from vectorless import Engine, IndexContext, VectorlessError - - -def main(): - # Create a temporary workspace for this example - with tempfile.TemporaryDirectory() as workspace: - print(f"Workspace: {workspace}") - print() - - # ============================================================ - # 1. Create Engine - # ============================================================ - print("=" * 60) - print("1. Creating Engine") - print("=" * 60) - - # Option A: Use OPENAI_API_KEY environment variable - engine = Engine(workspace=workspace) - - # Option B: Explicit API key - # engine = Engine( - # workspace=workspace, - # api_key="sk-...", - # model="gpt-4o-mini", # optional - # ) - - print(f"Engine created successfully!") - print(f"Initial document count: {engine.len()}") - print() - - # ============================================================ - # 2. Index Documents - # ============================================================ - print("=" * 60) - print("2. Indexing Documents") - print("=" * 60) - - # 2a. Index from text content (Markdown) - markdown_content = """ -# Technical Manual - -## Chapter 1: Introduction - -This document describes the architecture of our system. - -## Chapter 2: Installation - -### System Requirements - -- Python 3.9+ -- Rust 1.75+ - -### Steps - -1. Install dependencies -2. Configure environment -3. Run the application - -## Chapter 3: API Reference - -### Engine - -The main entry point for vectorless. - -```python -engine = Engine(workspace="./data") -``` - -### IndexContext - -Context for indexing documents from various sources. -""" - ctx_md = IndexContext.from_text( - markdown_content, - name="technical_manual", - format="markdown" - ) - doc_id_md = engine.index(ctx_md) - print(f"Indexed markdown document: {doc_id_md}") - - # 2b. Index from text content (HTML) - html_content = """ - -Product Guide - -

Product Guide

-

Getting Started

-

Welcome to our product. This guide will help you get started.

-

Features

- - - -""" - ctx_html = IndexContext.from_text( - html_content, - name="product_guide", - format="html" - ) - doc_id_html = engine.index(ctx_html) - print(f"Indexed HTML document: {doc_id_html}") - - # 2c. Index from text content (plain text) - text_content = """ -Meeting Notes - Q4 Planning - -Date: 2024-01-15 - -Attendees: Alice, Bob, Charlie - -Agenda: -1. Review Q3 performance -2. Set Q4 goals -3. Resource allocation - -Key Decisions: -- Increase marketing budget by 20% -- Launch new product in March -- Hire 5 additional engineers -""" - ctx_text = IndexContext.from_text( - text_content, - name="meeting_notes", - format="text" - ) - doc_id_text = engine.index(ctx_text) - print(f"Indexed text document: {doc_id_text}") - - # 2d. Index from file (if you have actual files) - # ctx_file = IndexContext.from_file("./report.pdf") - # doc_id_file = engine.index(ctx_file) - # print(f"Indexed file: {doc_id_file}") - - print(f"\nTotal documents indexed: {engine.len()}") - print() - - # ============================================================ - # 3. List Documents - # ============================================================ - print("=" * 60) - print("3. Listing Documents") - print("=" * 60) - - docs = engine.list_docs() - for doc in docs: - print(f" - {doc.name} (id: {doc.id}, format: {doc.format})") - if doc.line_count: - print(f" Lines: {doc.line_count}") - print() - - # ============================================================ - # 4. Query Documents - # ============================================================ - print("=" * 60) - print("4. Querying Documents") - print("=" * 60) - - # Query the technical manual - questions = [ - "What are the system requirements?", - "How do I create an Engine?", - "What are the installation steps?", - ] - - for question in questions: - result = engine.query(doc_id_md, question) - print(f"Q: {question}") - print(f"A: {result.content[:200]}...") - print(f" Score: {result.score:.2f}") - print() - - # Query the meeting notes - result = engine.query(doc_id_text, "What was decided about the marketing budget?") - print(f"Q: What was decided about the marketing budget?") - print(f"A: {result.content}") - print(f" Score: {result.score:.2f}") - print() - - # ============================================================ - # 5. Check Document Existence - # ============================================================ - print("=" * 60) - print("5. Checking Document Existence") - print("=" * 60) - - print(f"Document {doc_id_md[:8]}... exists: {engine.exists(doc_id_md)}") - print(f"Document 'nonexistent' exists: {engine.exists('nonexistent')}") - print() - - # ============================================================ - # 6. Error Handling - # ============================================================ - print("=" * 60) - print("6. Error Handling") - print("=" * 60) - - try: - engine.query("nonexistent_doc_id", "question") - except VectorlessError as e: - print(f"Caught error: {e.message}") - print(f"Error kind: {e.kind}") - print() - - # ============================================================ - # 7. Remove Documents - # ============================================================ - print("=" * 60) - print("7. Removing Documents") - print("=" * 60) - - # Remove the HTML document - removed = engine.remove(doc_id_html) - print(f"Removed {doc_id_html}: {removed}") - print(f"Documents remaining: {engine.len()}") - - # Try to remove again (should return False) - removed_again = engine.remove(doc_id_html) - print(f"Remove again: {removed_again}") - print() - - # ============================================================ - # 8. Clear All Documents - # ============================================================ - print("=" * 60) - print("8. Clearing All Documents") - print("=" * 60) - - cleared_count = engine.clear() - print(f"Cleared {cleared_count} documents") - print(f"Final document count: {engine.len()}") - print() - - print("=" * 60) - print("Example completed successfully!") - print("=" * 60) - - -if __name__ == "__main__": - # Check for API key - if not os.environ.get("OPENAI_API_KEY"): - print("Warning: OPENAI_API_KEY environment variable not set.") - print("Some operations may fail without an API key.") - print() - - main() diff --git a/examples/rust/advanced.rs b/examples/rust/advanced.rs new file mode 100644 index 00000000..946f619b --- /dev/null +++ b/examples/rust/advanced.rs @@ -0,0 +1,68 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Advanced usage example - Full Configuration. +//! +//! This example demonstrates how to use a full configuration file +//! for advanced use cases where you need fine-grained control. +//! +//! # Usage +//! +//! ```bash +//! # First, copy the example config and edit it +//! cp config.toml ./my_vectorless.toml +//! # Edit my_vectorless.toml to customize settings +//! +//! cargo run --example advanced +//! ``` + +use vectorless::{Engine, IndexContext}; + +#[tokio::main] +async fn main() -> vectorless::Result<()> { + println!("=== Vectorless Advanced Example (Full Configuration) ===\n"); + + // Method 1: Use explicit config file path + // This loads all settings from the specified config file + let client = Engine::builder() + .with_config_path("./config.toml") // or "./my_vectorless.toml" + .build() + .await + .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?; + + println!("✓ Client created with config file\n"); + + // Index a document + let doc_id = client.index(IndexContext::from_path("./README.md")).await?; + println!("✓ Indexed: {}\n", doc_id); + + // Query + let result = client.query(&doc_id, "What features does Vectorless provide?").await?; + println!("Query: What features does Vectorless provide?"); + println!("Score: {:.2}", result.score); + if !result.content.is_empty() { + let preview: String = result.content.chars().take(200).collect(); + println!("Result: {}...\n", preview); + } + + // Cleanup + client.remove(&doc_id).await?; + println!("✓ Cleaned up"); + + println!("\n=== Configuration Options ===\n"); + println!("Configuration Priority (later overrides earlier):"); + println!(" 1. Default configuration"); + println!(" 2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml)"); + println!(" 3. Explicit config file (with_config_path)"); + println!(" 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.)"); + println!(" 5. Builder methods (with_openai, with_model, etc.)"); + println!(); + println!("Environment Variables:"); + println!(" OPENAI_API_KEY - LLM API key"); + println!(" VECTORLESS_MODEL - Default model name"); + println!(" VECTORLESS_ENDPOINT - API endpoint URL"); + println!(" VECTORLESS_WORKSPACE - Workspace directory"); + + println!("\n=== Done ==="); + Ok(()) +} diff --git a/examples/basic.rs b/examples/rust/basic.rs similarity index 100% rename from examples/basic.rs rename to examples/rust/basic.rs diff --git a/examples/batch_processing.rs b/examples/rust/batch_processing.rs similarity index 100% rename from examples/batch_processing.rs rename to examples/rust/batch_processing.rs diff --git a/examples/cli_tool.rs b/examples/rust/cli_tool.rs similarity index 100% rename from examples/cli_tool.rs rename to examples/rust/cli_tool.rs diff --git a/examples/content_aggregation.rs b/examples/rust/content_aggregation.rs similarity index 100% rename from examples/content_aggregation.rs rename to examples/rust/content_aggregation.rs diff --git a/examples/rust/custom_config.rs b/examples/rust/custom_config.rs new file mode 100644 index 00000000..12eaedc4 --- /dev/null +++ b/examples/rust/custom_config.rs @@ -0,0 +1,92 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Custom configuration example - Using your own API key, model, and endpoint. +//! +//! This example demonstrates how to use custom LLM settings without a config file. +//! Useful when you want to use different providers like Azure OpenAI, DeepSeek, etc. +//! +//! # Usage +//! +//! ```bash +//! cargo run --example custom_config +//! ``` + +use vectorless::{Engine, IndexContext}; + +#[tokio::main] +async fn main() -> vectorless::Result<()> { + println!("=== Vectorless Custom Configuration Example ===\n"); + + // ============================================================ + // Option 1: Use environment variables + // ============================================================ + // Set these environment variables: + // - OPENAI_API_KEY or VECTORLESS_API_KEY + // - VECTORLESS_MODEL (optional, default: gpt-4o) + // - VECTORLESS_ENDPOINT (optional, default: OpenAI endpoint) + // - VECTORLESS_WORKSPACE (optional, default: ./workspace) + + // ============================================================ + // Option 2: Use builder methods (recommended for custom config) + // ============================================================ + + // Example: Use DeepSeek API + let client = Engine::builder() + .with_workspace("./workspace") + .with_model("deepseek-chat", Some("sk-your-deepseek-key".to_string())) + .with_endpoint("https://api.deepseek.com/v1") + .build() + .await + .map_err(|e: vectorless::BuildError| vectorless::Error::Config(e.to_string()))?; + + println!("✓ Client created with custom settings\n"); + + // Index a document + let doc_id = client.index(IndexContext::from_path("./README.md")).await?; + println!("✓ Indexed: {}\n", doc_id); + + // Query + let result = client.query(&doc_id, "What is Vectorless?").await?; + println!("Query: What is Vectorless?"); + println!("Score: {:.2}", result.score); + if !result.content.is_empty() { + let preview: String = result.content.chars().take(200).collect(); + println!("Result: {}...\n", preview); + } + + // Cleanup + client.remove(&doc_id).await?; + println!("✓ Cleaned up"); + + // ============================================================ + // Other provider examples (commented out) + // ============================================================ + + // Azure OpenAI: + // let client = Engine::builder() + // .with_workspace("./workspace") + // .with_model("gpt-4o", Some("your-azure-key".to_string())) + // .with_endpoint("https://your-resource.openai.azure.com/openai/deployments/your-deployment") + // .build() + // .await?; + + // Local LLM (e.g., Ollama with OpenAI-compatible API): + // let client = Engine::builder() + // .with_workspace("./workspace") + // .with_model("llama3", None) // No API key needed + // .with_endpoint("http://localhost:11434/v1") + // .build() + // .await?; + + // Anthropic Claude (via OpenAI-compatible proxy): + // let client = Engine::builder() + // .with_workspace("./workspace") + // .with_model("claude-3-5-sonnet-20241022", Some("sk-ant-...".to_string())) + // .with_endpoint("https://api.anthropic.com/v1") + // .build() + // .await?; + + println!("\n=== Done ==="); + Ok(()) +} diff --git a/examples/custom_pilot.rs b/examples/rust/custom_pilot.rs similarity index 100% rename from examples/custom_pilot.rs rename to examples/rust/custom_pilot.rs diff --git a/examples/events.rs b/examples/rust/events.rs similarity index 100% rename from examples/events.rs rename to examples/rust/events.rs diff --git a/examples/feedback_learning.rs b/examples/rust/feedback_learning.rs similarity index 100% rename from examples/feedback_learning.rs rename to examples/rust/feedback_learning.rs diff --git a/examples/html_parser.rs b/examples/rust/html_parser.rs similarity index 100% rename from examples/html_parser.rs rename to examples/rust/html_parser.rs diff --git a/examples/index.rs b/examples/rust/index.rs similarity index 100% rename from examples/index.rs rename to examples/rust/index.rs diff --git a/examples/markdownflow.rs b/examples/rust/markdownflow.rs similarity index 100% rename from examples/markdownflow.rs rename to examples/rust/markdownflow.rs diff --git a/examples/memo_cache.rs b/examples/rust/memo_cache.rs similarity index 100% rename from examples/memo_cache.rs rename to examples/rust/memo_cache.rs diff --git a/examples/multi_format.rs b/examples/rust/multi_format.rs similarity index 100% rename from examples/multi_format.rs rename to examples/rust/multi_format.rs diff --git a/examples/reference_following.rs b/examples/rust/reference_following.rs similarity index 100% rename from examples/reference_following.rs rename to examples/rust/reference_following.rs diff --git a/examples/retrieve.rs b/examples/rust/retrieve.rs similarity index 100% rename from examples/retrieve.rs rename to examples/rust/retrieve.rs diff --git a/examples/session.rs b/examples/rust/session.rs similarity index 100% rename from examples/session.rs rename to examples/rust/session.rs diff --git a/examples/storage_backend.rs b/examples/rust/storage_backend.rs similarity index 100% rename from examples/storage_backend.rs rename to examples/rust/storage_backend.rs diff --git a/examples/storage_compression.rs b/examples/rust/storage_compression.rs similarity index 100% rename from examples/storage_compression.rs rename to examples/rust/storage_compression.rs diff --git a/examples/storage_migration.rs b/examples/rust/storage_migration.rs similarity index 100% rename from examples/storage_migration.rs rename to examples/rust/storage_migration.rs diff --git a/examples/storage_workspace.rs b/examples/rust/storage_workspace.rs similarity index 100% rename from examples/storage_workspace.rs rename to examples/rust/storage_workspace.rs diff --git a/examples/strategy_cross_document.rs b/examples/rust/strategy_cross_document.rs similarity index 100% rename from examples/strategy_cross_document.rs rename to examples/rust/strategy_cross_document.rs diff --git a/examples/strategy_hybrid.rs b/examples/rust/strategy_hybrid.rs similarity index 100% rename from examples/strategy_hybrid.rs rename to examples/rust/strategy_hybrid.rs diff --git a/examples/strategy_page_range.rs b/examples/rust/strategy_page_range.rs similarity index 100% rename from examples/strategy_page_range.rs rename to examples/rust/strategy_page_range.rs diff --git a/examples/streaming.rs b/examples/rust/streaming.rs similarity index 100% rename from examples/streaming.rs rename to examples/rust/streaming.rs diff --git a/python/src/lib.rs b/python/src/lib.rs index 72c2f061..fc2cf730 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -28,6 +28,11 @@ pub struct VectorlessError { #[pymethods] impl VectorlessError { + #[new] + fn new_py(message: String, kind: String) -> Self { + Self { message, kind } + } + #[getter] fn message(&self) -> &str { &self.message @@ -96,7 +101,7 @@ fn to_py_err(e: RustError) -> PyErr { /// # From bytes /// ctx = IndexContext.from_bytes(data, name="doc", format="pdf") /// ``` -#[pyclass] +#[pyclass(name = "IndexContext")] pub struct PyIndexContext { inner: IndexContext, } @@ -134,7 +139,7 @@ impl PyIndexContext { /// IndexContext for the content. #[staticmethod] #[pyo3(signature = (content, name=None, format="markdown"))] - fn from_text(content: String, name: Option, format: &str) -> PyResult { + fn from_content(content: String, name: Option, format: &str) -> PyResult { let doc_format = parse_format(format)?; let mut ctx = IndexContext::from_content(&content, doc_format); if let Some(n) = name { @@ -168,9 +173,8 @@ fn parse_format(format: &str) -> PyResult { "pdf" => Ok(DocumentFormat::Pdf), "docx" | "doc" => Ok(DocumentFormat::Docx), "html" | "htm" => Ok(DocumentFormat::Html), - "text" | "txt" => Ok(DocumentFormat::Text), _ => Err(PyErr::from(VectorlessError::new( - format!("Unknown format: {}", format), + format!("Unknown format: {}. Supported: markdown, pdf, docx, html", format), "config", ))), } @@ -181,7 +185,7 @@ fn parse_format(format: &str) -> PyResult { // ============================================================ /// Result of a document query. -#[pyclass] +#[pyclass(name = "QueryResult")] pub struct PyQueryResult { inner: QueryResult, } @@ -227,7 +231,7 @@ impl PyQueryResult { // ============================================================ /// Information about an indexed document. -#[pyclass] +#[pyclass(name = "DocumentInfo")] pub struct PyDocumentInfo { inner: DocumentInfo, } @@ -284,7 +288,16 @@ impl PyDocumentInfo { /// The main vectorless engine. /// -/// Create an engine with a workspace directory: +/// Configuration priority (later overrides earlier): +/// 1. Default configuration +/// 2. Auto-detected config file (vectorless.toml, config.toml, .vectorless.toml) +/// 3. Explicit config file (config_path parameter) +/// 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.) +/// 5. Constructor parameters (api_key, model, endpoint) - highest priority +/// +/// # Zero Configuration (Recommended) +/// +/// Just set OPENAI_API_KEY environment variable: /// /// ```python /// from vectorless import Engine @@ -292,12 +305,18 @@ impl PyDocumentInfo { /// engine = Engine(workspace="./data") /// ``` /// -/// Or with an explicit API key: +/// # With Custom Model +/// +/// ```python +/// engine = Engine(workspace="./data", model="gpt-4o-mini") +/// ``` +/// +/// # With Full Config File (Advanced) /// /// ```python -/// engine = Engine(workspace="./data", api_key="sk-...") +/// engine = Engine(config_path="./vectorless.toml") /// ``` -#[pyclass] +#[pyclass(name = "Engine")] pub struct PyEngine { inner: Arc, rt: Runtime, @@ -308,17 +327,26 @@ impl PyEngine { /// Create a new Engine. /// /// Args: - /// workspace: Path to the workspace directory. + /// workspace: Path to the workspace directory (optional if config_path provides it). + /// config_path: Path to configuration file (optional, advanced usage). /// api_key: Optional API key. If not provided, uses OPENAI_API_KEY env var. - /// model: Optional model name. Default: "gpt-4o-mini". + /// model: Optional model name. Default: "gpt-4o". /// endpoint: Optional API endpoint. /// + /// Configuration priority (later overrides earlier): + /// 1. Default configuration + /// 2. Auto-detected config file + /// 3. config_path parameter + /// 4. Environment variables (OPENAI_API_KEY, VECTORLESS_MODEL, etc.) + /// 5. Constructor parameters (api_key, model, endpoint) + /// /// Raises: /// VectorlessError: If engine creation fails. #[new] - #[pyo3(signature = (workspace, api_key=None, model=None, endpoint=None))] + #[pyo3(signature = (workspace=None, config_path=None, api_key=None, model=None, endpoint=None))] fn new( - workspace: String, + workspace: Option, + config_path: Option, api_key: Option, model: Option, endpoint: Option, @@ -334,18 +362,31 @@ impl PyEngine { let resolved_api_key = api_key.or_else(|| std::env::var("OPENAI_API_KEY").ok()); let engine = rt.block_on(async { - let mut builder = EngineBuilder::new().with_workspace(&workspace); + let mut builder = EngineBuilder::new(); - if let Some(key) = resolved_api_key { - builder = builder.with_openai(key); + // Set config path first (if provided) + if let Some(path) = &config_path { + builder = builder.with_config_path(path); + } + + // Set workspace (if provided) + if let Some(ws) = &workspace { + builder = builder.with_workspace(ws); } - if let Some(m) = model { - builder = builder.with_model(&m, None); + // Set model first (without overriding api_key) + if let Some(m) = &model { + builder = builder.with_model(m, None); } - if let Some(e) = endpoint { - builder = builder.with_endpoint(&e); + // Set endpoint + if let Some(e) = &endpoint { + builder = builder.with_endpoint(e); + } + + // Set API key last (this ensures it's not overwritten) + if let Some(key) = resolved_api_key { + builder = builder.with_openai(key); } builder.build().await @@ -506,7 +547,7 @@ impl PyEngine { /// print(result.content) /// ``` #[pymodule] -fn _vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { +fn vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 78f65624..fe9729b9 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -11,7 +11,104 @@ documentation = "https://docs.rs/vectorless" keywords = ["rag", "document", "retrieval", "indexing", "llm"] categories = ["text-processing", "data-structures", "algorithms"] readme = "../README.md" -exclude = ["samples/", "docs/", ".*"] +exclude = ["docs/", ".*"] + +# Example directory configuration +[[example]] +name = "basic" +path = "../examples/rust/basic.rs" + +[[example]] +name = "advanced" +path = "../examples/rust/advanced.rs" + +[[example]] +name = "batch_processing" +path = "../examples/rust/batch_processing.rs" + +[[example]] +name = "cli_tool" +path = "../examples/rust/cli_tool.rs" + +[[example]] +name = "content_aggregation" +path = "../examples/rust/content_aggregation.rs" + +[[example]] +name = "custom_config" +path = "../examples/rust/custom_config.rs" + +[[example]] +name = "custom_pilot" +path = "../examples/rust/custom_pilot.rs" + +[[example]] +name = "events" +path = "../examples/rust/events.rs" + +[[example]] +name = "feedback_learning" +path = "../examples/rust/feedback_learning.rs" + +[[example]] +name = "html_parser" +path = "../examples/rust/html_parser.rs" + +[[example]] +name = "index" +path = "../examples/rust/index.rs" + +[[example]] +name = "markdownflow" +path = "../examples/rust/markdownflow.rs" + +[[example]] +name = "multi_format" +path = "../examples/rust/multi_format.rs" + +[[example]] +name = "reference_following" +path = "../examples/rust/reference_following.rs" + +[[example]] +name = "retrieve" +path = "../examples/rust/retrieve.rs" + +[[example]] +name = "session" +path = "../examples/rust/session.rs" + +[[example]] +name = "storage_backend" +path = "../examples/rust/storage_backend.rs" + +[[example]] +name = "storage_compression" +path = "../examples/rust/storage_compression.rs" + +[[example]] +name = "storage_migration" +path = "../examples/rust/storage_migration.rs" + +[[example]] +name = "storage_workspace" +path = "../examples/rust/storage_workspace.rs" + +[[example]] +name = "strategy_cross_document" +path = "../examples/rust/strategy_cross_document.rs" + +[[example]] +name = "strategy_hybrid" +path = "../examples/rust/strategy_hybrid.rs" + +[[example]] +name = "strategy_page_range" +path = "../examples/rust/strategy_page_range.rs" + +[[example]] +name = "streaming" +path = "../examples/rust/streaming.rs" [dependencies] # Async runtime diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs index 61621f23..70c3682b 100644 --- a/rust/src/client/builder.rs +++ b/rust/src/client/builder.rs @@ -6,27 +6,67 @@ //! This module provides [`EngineBuilder`] for configuring and building //! [`Engine`] instances with sensible defaults. //! -//! # Example +//! # Configuration Priority +//! +//! Configuration is applied in this order (later overrides earlier): +//! 1. Default configuration +//! 2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`) +//! 3. Explicit config file (`with_config_path`) +//! 4. Environment variables (`OPENAI_API_KEY`, `VECTORLESS_MODEL`, etc.) +//! 5. Builder methods (`with_openai`, `with_model`, etc.) - highest priority +//! +//! # Environment Variables +//! +//! | Variable | Description | +//! |----------|-------------| +//! | `OPENAI_API_KEY` | LLM API key | +//! | `VECTORLESS_MODEL` | Default model name | +//! | `VECTORLESS_ENDPOINT` | API endpoint URL | +//! | `VECTORLESS_WORKSPACE` | Workspace directory | +//! +//! # Examples +//! +//! ## Zero Configuration (Recommended) //! //! ```rust,no_run //! use vectorless::client::EngineBuilder; //! //! # #[tokio::main] //! # async fn main() -> Result<(), vectorless::BuildError> { -//! // Simple setup with workspace +//! // Just set OPENAI_API_KEY environment variable //! let engine = EngineBuilder::new() -//! .with_workspace("./my_workspace") -//! .with_openai(std::env::var("OPENAI_API_KEY").unwrap()) +//! .with_workspace("./data") //! .build() //! .await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! ## With Custom Model +//! +//! ```rust,no_run +//! use vectorless::client::EngineBuilder; //! -//! // Advanced configuration +//! # #[tokio::main] +//! # async fn main() -> Result<(), vectorless::BuildError> { //! let engine = EngineBuilder::new() //! .with_workspace("./data") -//! .with_model("gpt-4o", None) -//! .with_endpoint("https://api.openai.com/v1") -//! .with_top_k(10) -//! .precise() +//! .with_model("gpt-4o-mini", None) // Uses OPENAI_API_KEY from env +//! .build() +//! .await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! ## With Full Config File (Advanced) +//! +//! ```rust,no_run +//! use vectorless::client::EngineBuilder; +//! +//! # #[tokio::main] +//! # async fn main() -> Result<(), vectorless::BuildError> { +//! let engine = EngineBuilder::new() +//! .with_config_path("./vectorless.toml") //! .build() //! .await?; //! # Ok(()) @@ -49,16 +89,25 @@ const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorl /// Builder for creating a [`Engine`] client. /// /// The builder uses sensible defaults and automatically loads -/// LLM configuration from environment variables or config files. +/// configuration from config files and environment variables. /// /// # Configuration Priority /// -/// Configuration is loaded in this order (later overrides earlier): +/// Configuration is applied in this order (later overrides earlier): /// 1. Default configuration -/// 2. Auto-detected config file +/// 2. Auto-detected config file (`vectorless.toml`, `config.toml`, `.vectorless.toml`) /// 3. Explicit config file (`with_config_path`) -/// 4. Custom config object (`with_config`) -/// 5. Individual builder methods +/// 4. Environment variables (`OPENAI_API_KEY`, `VECTORLESS_MODEL`, etc.) +/// 5. Builder methods (`with_openai`, `with_model`, etc.) - highest priority +/// +/// # Environment Variables +/// +/// | Variable | Description | +/// |----------|-------------| +/// | `OPENAI_API_KEY` | LLM API key | +/// | `VECTORLESS_MODEL` | Default model name | +/// | `VECTORLESS_ENDPOINT` | API endpoint URL | +/// | `VECTORLESS_WORKSPACE` | Workspace directory | /// /// # Example /// @@ -67,9 +116,9 @@ const CONFIG_FILE_NAMES: &[&str] = &["vectorless.toml", "config.toml", ".vectorl /// /// # #[tokio::main] /// # async fn main() -> Result<(), vectorless::BuildError> { +/// // Zero configuration - just set OPENAI_API_KEY environment variable /// let client = EngineBuilder::new() /// .with_workspace("./my_workspace") -/// .with_openai(std::env::var("OPENAI_API_KEY").unwrap()) /// .build() /// .await?; /// # Ok(()) @@ -236,8 +285,8 @@ impl EngineBuilder { /// Configure for OpenAI API. /// - /// Uses `gpt-4o` model by default. Use [`with_model`](EngineBuilder::with_model) - /// to specify a different model. + /// Sets the API key and optionally the model to "gpt-4o" if not already set. + /// Use [`with_model`](EngineBuilder::with_model) before this to specify a different model. /// /// # Example /// @@ -256,7 +305,13 @@ impl EngineBuilder { /// ``` #[must_use] pub fn with_openai(self, api_key: impl Into) -> Self { - self.with_model("gpt-4o", Some(api_key.into())) + let mut builder = self; + builder.api_key = Some(api_key.into()); + // Only set default model if not already set + if builder.model.is_none() { + builder.model = Some("gpt-4o".to_string()); + } + builder } /// Set the LLM model and optional API key. @@ -284,7 +339,9 @@ impl EngineBuilder { #[must_use] pub fn with_model(mut self, model: impl Into, api_key: Option) -> Self { self.model = Some(model.into()); - self.api_key = api_key; + if api_key.is_some() { + self.api_key = api_key; + } self } @@ -357,6 +414,57 @@ impl EngineBuilder { self } + /// Apply environment variable overrides to a Config. + /// + /// This is used when a custom Config is provided via `with_config` + /// or when using default config without a config file. + fn apply_env_overrides(config: &mut Config) { + // OPENAI_API_KEY: Set API key for all LLM clients + if let Ok(api_key) = std::env::var("OPENAI_API_KEY") { + // Set default API key + config.llm.api_key = Some(api_key.clone()); + // Override individual client API keys if not explicitly set + if config.llm.summary.api_key.is_none() { + config.llm.summary.api_key = Some(api_key.clone()); + } + if config.llm.retrieval.api_key.is_none() { + config.llm.retrieval.api_key = Some(api_key.clone()); + } + if config.llm.pilot.api_key.is_none() { + config.llm.pilot.api_key = Some(api_key); + } + // Also set legacy config for backwards compatibility + if config.summary.api_key.is_none() { + config.summary.api_key = Some(std::env::var("OPENAI_API_KEY").unwrap()); + } + } + + // VECTORLESS_MODEL: Set default model + if let Ok(model) = std::env::var("VECTORLESS_MODEL") { + config.llm.summary.model = model.clone(); + config.llm.retrieval.model = model.clone(); + config.llm.pilot.model = model.clone(); + // Also set legacy config + config.summary.model = model.clone(); + config.retrieval.model = model; + } + + // VECTORLESS_ENDPOINT: Set API endpoint + if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") { + config.llm.summary.endpoint = endpoint.clone(); + config.llm.retrieval.endpoint = endpoint.clone(); + config.llm.pilot.endpoint = endpoint.clone(); + // Also set legacy config + config.summary.endpoint = endpoint.clone(); + config.retrieval.endpoint = endpoint; + } + + // VECTORLESS_WORKSPACE: Set workspace directory + if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") { + config.storage.workspace_dir = PathBuf::from(workspace); + } + } + /// Search for config file in current directory and parent directories. fn find_config_file() -> Option { let current_dir = std::env::current_dir().ok()?; @@ -414,8 +522,12 @@ impl EngineBuilder { /// ``` pub async fn build(self) -> Result { // Load or create configuration + // ConfigLoader automatically applies environment variable overrides let mut config = if let Some(config) = self.config { - config + // Custom config - still apply env vars + let mut cfg = config; + Self::apply_env_overrides(&mut cfg); + cfg } else if let Some(path) = self.config_path { ConfigLoader::new() .file(&path) @@ -426,7 +538,10 @@ impl EngineBuilder { BuildError::Config(format!("Failed to load {}: {}", config_path.display(), e)) })? } else { - Config::default() + // No config file - use defaults with env var overrides + let mut cfg = Config::default(); + Self::apply_env_overrides(&mut cfg); + cfg }; // Apply builder overrides to retrieval config @@ -436,13 +551,24 @@ impl EngineBuilder { // Apply individual overrides if let Some(api_key) = self.api_key { - config.retrieval.api_key = Some(api_key); + // Set API key for both retrieval and summary + config.retrieval.api_key = Some(api_key.clone()); + config.summary.api_key = Some(api_key); + // Also set LLM pool config + if config.llm.summary.api_key.is_none() { + config.llm.summary.api_key = config.summary.api_key.clone(); + } + if config.llm.retrieval.api_key.is_none() { + config.llm.retrieval.api_key = config.summary.api_key.clone(); + } } if let Some(model) = self.model { - config.retrieval.model = model; + config.retrieval.model = model.clone(); + config.summary.model = model; } if let Some(endpoint) = self.endpoint { - config.retrieval.endpoint = endpoint; + config.retrieval.endpoint = endpoint.clone(); + config.summary.endpoint = endpoint; } if let Some(top_k) = self.top_k { config.retrieval.top_k = top_k; diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index 886e3197..4297d8e0 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -214,6 +214,9 @@ impl Engine { /// # } /// ``` pub async fn index(&self, ctx: IndexContext) -> Result { + println!("Indexing..."); + println!("ctx: {:?}", ctx); + let doc = self.indexer.index(ctx).await?; let persisted = self.indexer.to_persisted(doc); diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs index 679dbea0..0ba0a847 100644 --- a/rust/src/client/indexer.rs +++ b/rust/src/client/indexer.rs @@ -259,17 +259,19 @@ impl IndexerClient { options: &IndexOptions, format: DocumentFormat, ) -> PipelineOptions { + println!("[DEBUG] Building pipeline options for format: {:?} with options: {:?}", format, options); + PipelineOptions { mode: match format { DocumentFormat::Markdown => IndexMode::Markdown, DocumentFormat::Pdf => IndexMode::Pdf, DocumentFormat::Html => IndexMode::Html, DocumentFormat::Docx => IndexMode::Docx, - DocumentFormat::Text => IndexMode::Auto, }, generate_ids: options.generate_ids, summary_strategy: if options.generate_summaries { - SummaryStrategy::selective(self.config.min_summary_tokens, false) + // SummaryStrategy::selective(self.config.min_summary_tokens, false) + SummaryStrategy::full() } else { SummaryStrategy::none() }, diff --git a/rust/src/client/types.rs b/rust/src/client/types.rs index 31438a62..0cfcb065 100644 --- a/rust/src/client/types.rs +++ b/rust/src/client/types.rs @@ -174,7 +174,7 @@ impl Default for IndexOptions { fn default() -> Self { Self { mode: IndexMode::Default, - generate_summaries: false, + generate_summaries: true, include_text: true, generate_ids: true, generate_description: false, diff --git a/rust/src/config/loader.rs b/rust/src/config/loader.rs index 33f8bb9f..e436a315 100644 --- a/rust/src/config/loader.rs +++ b/rust/src/config/loader.rs @@ -3,17 +3,33 @@ //! Configuration loader. //! -//! Loads configuration from TOML files with validation. -//! All configuration must be explicit in the config file - no environment variables. +//! Loads configuration from TOML files with environment variable overrides. +//! +//! # Configuration Priority +//! +//! Configuration is loaded in this order (later overrides earlier): +//! 1. Default configuration +//! 2. Config file (if found or specified) +//! 3. Environment variables +//! +//! # Environment Variables +//! +//! | Variable | Description | Maps To | +//! |----------|-------------|---------| +//! | `OPENAI_API_KEY` | LLM API key | `llm.api_key` / `retrieval.api_key` | +//! | `VECTORLESS_MODEL` | Default LLM model | `retrieval.model` | +//! | `VECTORLESS_ENDPOINT` | LLM API endpoint | `retrieval.endpoint` | +//! | `VECTORLESS_WORKSPACE` | Workspace directory | `storage.workspace_dir` | //! //! # Example //! //! ```rust,no_run //! use vectorless::config::{ConfigLoader, Config}; //! -//! // Load from file +//! // Load from file with environment variable overrides //! let config = ConfigLoader::new() //! .file("config.toml") +//! .with_env(true) // Enable environment variables (default: true) //! .load()?; //! //! // Load with validation @@ -73,6 +89,9 @@ pub struct ConfigLoader { /// Custom validator (optional). validator: Option, + + /// Whether to apply environment variable overrides. + env_enabled: bool, } impl Default for ConfigLoader { @@ -88,6 +107,7 @@ impl ConfigLoader { files: Vec::new(), validate: false, validator: None, + env_enabled: true, } } @@ -122,13 +142,68 @@ impl ConfigLoader { self } + /// Enable or disable environment variable overrides. + /// + /// When enabled (default), environment variables override config file values: + /// - `OPENAI_API_KEY` → sets API key for all LLM clients + /// - `VECTORLESS_MODEL` → sets default model + /// - `VECTORLESS_ENDPOINT` → sets API endpoint + /// - `VECTORLESS_WORKSPACE` → sets workspace directory + pub fn with_env(mut self, enabled: bool) -> Self { + self.env_enabled = enabled; + self + } + + /// Apply environment variable overrides to configuration. + fn apply_env_overrides(&self, config: &mut Config) { + if !self.env_enabled { + return; + } + + // OPENAI_API_KEY: Set API key for all LLM clients + if let Ok(api_key) = std::env::var("OPENAI_API_KEY") { + // Set default API key + config.llm.api_key = Some(api_key.clone()); + // Override individual client API keys if not explicitly set + if config.llm.summary.api_key.is_none() { + config.llm.summary.api_key = Some(api_key.clone()); + } + if config.llm.retrieval.api_key.is_none() { + config.llm.retrieval.api_key = Some(api_key.clone()); + } + if config.llm.pilot.api_key.is_none() { + config.llm.pilot.api_key = Some(api_key); + } + } + + // VECTORLESS_MODEL: Set default model + if let Ok(model) = std::env::var("VECTORLESS_MODEL") { + config.llm.summary.model = model.clone(); + config.llm.retrieval.model = model.clone(); + config.llm.pilot.model = model; + } + + // VECTORLESS_ENDPOINT: Set API endpoint + if let Ok(endpoint) = std::env::var("VECTORLESS_ENDPOINT") { + config.llm.summary.endpoint = endpoint.clone(); + config.llm.retrieval.endpoint = endpoint.clone(); + config.llm.pilot.endpoint = endpoint; + } + + // VECTORLESS_WORKSPACE: Set workspace directory + if let Ok(workspace) = std::env::var("VECTORLESS_WORKSPACE") { + config.storage.workspace_dir = PathBuf::from(workspace); + } + } + /// Load the configuration. /// /// # Behavior /// /// 1. Start with default configuration /// 2. Load and merge each specified file (in order) - /// 3. Validate configuration (if enabled) + /// 3. Apply environment variable overrides (if enabled) + /// 4. Validate configuration (if enabled) /// /// # Errors /// @@ -150,6 +225,9 @@ impl ConfigLoader { } } + // Apply environment variable overrides + self.apply_env_overrides(&mut config); + // Validate if requested if self.validate { let validator = self.validator.unwrap_or_default(); diff --git a/rust/src/index/config.rs b/rust/src/index/config.rs index f08b5968..f5cabebc 100644 --- a/rust/src/index/config.rs +++ b/rust/src/index/config.rs @@ -160,7 +160,7 @@ impl Default for PipelineOptions { Self { mode: IndexMode::Auto, generate_ids: true, - summary_strategy: SummaryStrategy::default(), + summary_strategy: SummaryStrategy::full(), thinning: ThinningConfig::default(), optimization: OptimizationConfig::default(), generate_description: true, diff --git a/rust/src/index/pipeline/context.rs b/rust/src/index/pipeline/context.rs index ab9a462d..979839a8 100644 --- a/rust/src/index/pipeline/context.rs +++ b/rust/src/index/pipeline/context.rs @@ -51,7 +51,7 @@ impl IndexInput { Self::Content { content: content.into(), name: String::new(), - format: DocumentFormat::Text, + format: DocumentFormat::Markdown, } } diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs index 4fb29310..e848e832 100644 --- a/rust/src/index/stages/enhance.rs +++ b/rust/src/index/stages/enhance.rs @@ -101,6 +101,7 @@ impl IndexStage for EnhanceStage { // Check if we need summaries if !self.needs_summaries(ctx) { + println!("[DEBUG] Summary generation skipped (strategy: {:?})", ctx.options.summary_strategy); info!( "Summary generation skipped (strategy: {:?})", ctx.options.summary_strategy @@ -112,6 +113,7 @@ impl IndexStage for EnhanceStage { let llm_client = match &self.llm_client { Some(client) => client, None => { + println!("[DEBUG] No LLM client configured, skipping summary generation"); warn!("No LLM client configured, skipping summary generation"); return Ok(StageResult::success("enhance")); } @@ -121,11 +123,13 @@ impl IndexStage for EnhanceStage { let tree = match ctx.tree.as_mut() { Some(t) => t, None => { + println!("[DEBUG] No tree built, skipping enhance stage"); warn!("No tree built, skipping enhance stage"); return Ok(StageResult::success("enhance")); } }; + println!("[DEBUG] Using summary strategy: {:?}", ctx.options.summary_strategy); info!("Using summary strategy: {:?}", ctx.options.summary_strategy); // Create summary generator with optional memo store @@ -141,11 +145,14 @@ impl IndexStage for EnhanceStage { let node_ids: Vec = tree.traverse(); let total_nodes = node_ids.len(); + println!("[DEBUG] Processing {} nodes for summary generation", total_nodes); info!("Processing {} nodes for summary generation", total_nodes); // Process nodes let mut generated = 0; let mut failed = 0; + let mut skipped_no_content = 0; + let mut skipped_tokens = 0; let strategy = ctx.options.summary_strategy.clone(); for node_id in node_ids { @@ -154,15 +161,18 @@ impl IndexStage for EnhanceStage { Some(n) => n.clone(), None => continue, }; + println!("[DEBUG] Evaluating node for summary: {} {}", node.title, node.content); // Skip if no content if node.content.is_empty() { + skipped_no_content += 1; continue; } // Get token count and check if we should generate let token_count = node.token_count.unwrap_or(0); if !strategy.should_generate(tree, node_id, token_count) { + skipped_tokens += 1; continue; } @@ -194,6 +204,9 @@ impl IndexStage for EnhanceStage { } // Generate summary (generator also has memoization built-in) + println!("[DEBUG] Calling LLM to generate summary for node: {} ({} tokens)", node.title, token_count); + println!("[DEBUG] Node content: {}", node.content); + match generator.generate(&node.title, &node.content).await { Ok(summary) => { if summary.is_empty() { @@ -223,6 +236,8 @@ impl IndexStage for EnhanceStage { let duration = start.elapsed().as_millis() as u64; ctx.metrics.record_enhance(duration); + println!("[DEBUG] Generated {} summaries ({} failed, {} skipped no content, {} skipped tokens) in {}ms", + generated, failed, skipped_no_content, skipped_tokens, duration); info!( "Generated {} summaries ({} failed) in {}ms", generated, failed, duration diff --git a/rust/src/llm/executor.rs b/rust/src/llm/executor.rs index 8ac193f1..2c157432 100644 --- a/rust/src/llm/executor.rs +++ b/rust/src/llm/executor.rs @@ -360,7 +360,7 @@ impl LlmExecutor { ChatCompletionRequestUserMessage::from(truncated).into(), ]) .temperature(self.config.temperature) - .max_tokens(tokens) + // .max_tokens(tokens) .build() } else { CreateChatCompletionRequestArgs::default() diff --git a/rust/src/parser/registry.rs b/rust/src/parser/registry.rs index ef1cf416..4f3d0e8c 100644 --- a/rust/src/parser/registry.rs +++ b/rust/src/parser/registry.rs @@ -141,7 +141,7 @@ impl ParserRegistry { /// For binary formats (PDF, DOCX), the parser handles the bytes directly. pub async fn parse_bytes(&self, bytes: &[u8], format: DocumentFormat) -> Result { match format { - DocumentFormat::Markdown | DocumentFormat::Html | DocumentFormat::Text => { + DocumentFormat::Markdown | DocumentFormat::Html => { // Text formats - convert to string first let content = std::str::from_utf8(bytes) .map_err(|e| Error::Parse(format!("Invalid UTF-8 content: {}", e)))?; @@ -188,7 +188,6 @@ pub fn get_parser(format: DocumentFormat) -> Option> { DocumentFormat::Pdf => Some(Box::new(PdfParser::new())), DocumentFormat::Html => Some(Box::new(HtmlParser::new())), DocumentFormat::Docx => Some(Box::new(super::docx::DocxParser::new())), - DocumentFormat::Text => None, // TODO: Implement plain text parser } } diff --git a/rust/src/parser/types.rs b/rust/src/parser/types.rs index c8c5f000..2ccac2f7 100644 --- a/rust/src/parser/types.rs +++ b/rust/src/parser/types.rs @@ -21,8 +21,6 @@ pub enum DocumentFormat { Html, /// Word documents (.docx) Docx, - /// Plain text files (.txt) - Text, } impl DocumentFormat { @@ -33,7 +31,6 @@ impl DocumentFormat { "pdf" => Some(Self::Pdf), "html" | "htm" => Some(Self::Html), "docx" => Some(Self::Docx), - "txt" => Some(Self::Text), _ => None, } } @@ -45,7 +42,6 @@ impl DocumentFormat { Self::Pdf => "pdf", Self::Html => "html", Self::Docx => "docx", - Self::Text => "txt", } } } @@ -173,7 +169,7 @@ impl Default for DocumentMeta { fn default() -> Self { Self { name: String::new(), - format: DocumentFormat::Text, + format: DocumentFormat::Markdown, page_count: None, line_count: 0, source_path: None, diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs index af0fd602..de41c052 100644 --- a/rust/src/retrieval/pilot/llm_pilot.rs +++ b/rust/src/retrieval/pilot/llm_pilot.rs @@ -286,7 +286,7 @@ impl LlmPilot { &self, point: InterventionPoint, context: &super::builder::PilotContext, - candidates: &[crate::document::NodeId], + candidates: &[super::parser::CandidateInfo], ) -> PilotDecision { // Check memo cache first if let Some(ref store) = self.memo_store { @@ -331,22 +331,29 @@ impl LlmPilot { } } + println!("[DEBUG] LlmPilot::call_llm() - point={:?}, estimated_tokens={}", point, prompt.estimated_tokens); + println!("[DEBUG] LlmPilot::call_llm() - SYSTEM PROMPT:\n{}", prompt.system); + println!("[DEBUG] LlmPilot::call_llm() - USER PROMPT:\n{}", prompt.user); + println!("[DEBUG] LlmPilot::call_llm() - candidates count: {}", candidates.len()); debug!( "Calling LLM for {:?} point (estimated: {} tokens)", point, prompt.estimated_tokens ); - // Make LLM call - use executor if available, otherwise use client directly + // Make LLM call -use executor if available, otherwise use client directly let result = if let Some(ref executor) = self.executor { + println!("[DEBUG] LlmPilot::call_llm() - using LlmExecutor"); // Use LlmExecutor for unified throttle/retry/fallback executor.complete(&prompt.system, &prompt.user).await } else { + println!("[DEBUG] LlmPilot::call_llm() - using direct client"); // Fallback to direct client call self.client.complete(&prompt.system, &prompt.user).await }; match result { Ok(response) => { + println!("[DEBUG] LlmPilot::call_llm() - RAW LLM RESPONSE:\n{}", response); // Record usage (estimate output tokens) let output_tokens = self.estimate_tokens(&response); self.budget @@ -354,6 +361,10 @@ impl LlmPilot { // Parse response let mut decision = self.response_parser.parse(&response, candidates, point); + println!("[DEBUG] LlmPilot::call_llm() - PARSED DECISION: confidence={:.2}, ranked={}, direction={:?}, reasoning={}", + decision.confidence, decision.ranked_candidates.len(), + std::mem::discriminant(&decision.direction), + decision.reasoning.chars().take(100).collect::()); // Apply learner adjustment if available if let Some(ref adj) = adjustment { @@ -406,14 +417,14 @@ impl LlmPilot { fn cached_value_to_decision( &self, value: crate::memo::PilotDecisionValue, - candidates: &[crate::document::NodeId], + candidates: &[super::parser::CandidateInfo], point: InterventionPoint, ) -> PilotDecision { let ranked = candidates .iter() .enumerate() - .map(|(i, &node_id)| super::decision::RankedCandidate { - node_id, + .map(|(i, c)| super::decision::RankedCandidate { + node_id: c.node_id, score: if i == value.selected_idx { 1.0 } else { 0.5 / (i + 1) as f32 }, reason: None, }) @@ -433,14 +444,14 @@ impl LlmPilot { /// Create a default decision when LLM fails. fn default_decision( &self, - candidates: &[crate::document::NodeId], + candidates: &[super::parser::CandidateInfo], point: InterventionPoint, ) -> PilotDecision { let ranked = candidates .iter() .enumerate() - .map(|(i, &node_id)| super::decision::RankedCandidate { - node_id, + .map(|(i, c)| super::decision::RankedCandidate { + node_id: c.node_id, score: 1.0 / (i + 1) as f32, reason: None, }) @@ -479,11 +490,13 @@ impl Pilot for LlmPilot { fn should_intervene(&self, state: &SearchState<'_>) -> bool { // Check mode if !self.config.mode.uses_llm() { + println!("[DEBUG] LlmPilot::should_intervene() - mode doesn't use LLM"); return false; } // Check budget if !self.has_budget() { + println!("[DEBUG] LlmPilot::should_intervene() - budget exhausted"); debug!("Budget exhausted, skipping intervention"); return false; } @@ -492,6 +505,8 @@ impl Pilot for LlmPilot { // Condition 1: Fork point with enough candidates if state.candidates.len() > intervention.fork_threshold { + println!("[DEBUG] LlmPilot::should_intervene() - YES: fork point with {} candidates (threshold={})", + state.candidates.len(), intervention.fork_threshold); debug!( "Intervening: fork point with {} candidates", state.candidates.len() @@ -501,12 +516,15 @@ impl Pilot for LlmPilot { // Condition 2: Scores are too close (algorithm uncertain) if self.scores_are_close(state) { + println!("[DEBUG] LlmPilot::should_intervene() - YES: scores are close (best={:.2})", state.best_score); debug!("Intervening: scores are close"); return true; } // Condition 3: Low confidence (best score too low) if intervention.is_low_confidence(state.best_score) { + println!("[DEBUG] LlmPilot::should_intervene() - YES: low confidence (best_score={:.2}, threshold={:.2})", + state.best_score, intervention.low_score_threshold); debug!( "Intervening: low confidence (best_score={:.2})", state.best_score @@ -516,31 +534,58 @@ impl Pilot for LlmPilot { // Condition 4: Backtracking and guide_at_backtrack is enabled if state.is_backtracking && self.config.guide_at_backtrack { + println!("[DEBUG] LlmPilot::should_intervene() - YES: backtracking"); debug!("Intervening: backtracking"); return true; } + println!("[DEBUG] LlmPilot::should_intervene() - NO: candidates={}, best_score={:.2}", + state.candidates.len(), state.best_score); false } async fn decide(&self, state: &SearchState<'_>) -> PilotDecision { let point = self.get_intervention_point(state); + println!("[DEBUG] LlmPilot::decide() - intervention_point={:?}, candidates={}", + point, state.candidates.len()); // Build context let context = self.context_builder.build(state); + // Build candidate info with titles + let candidate_info: Vec = state.candidates + .iter() + .enumerate() + .filter_map(|(i, &node_id)| { + state.tree.get(node_id).map(|node| super::parser::CandidateInfo { + node_id, + title: node.title.clone(), + index: i, + }) + }) + .collect(); + // Make LLM call - self.call_llm(point, &context, state.candidates).await + let decision = self.call_llm(point, &context, &candidate_info).await; + + println!("[DEBUG] LlmPilot::decide() - result: confidence={:.2}, direction={:?}, ranked={}", + decision.confidence, std::mem::discriminant(&decision.direction), decision.ranked_candidates.len()); + + decision } async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option { + println!("[DEBUG] LlmPilot::guide_start() called, query='{}'", query); + // Check if guide_at_start is enabled if !self.config.guide_at_start { + println!("[DEBUG] LlmPilot::guide_start() - guide_at_start=false, skipping"); return None; } // Check budget if !self.has_budget() { + println!("[DEBUG] LlmPilot::guide_start() - budget exhausted, skipping"); debug!("Budget exhausted, cannot guide start"); return None; } @@ -549,12 +594,41 @@ impl Pilot for LlmPilot { let context = self.context_builder.build_start_context(tree, query); // Get root's children as candidates - let candidates = tree.children(tree.root()); + let node_ids = tree.children(tree.root()); + println!("[DEBUG] LlmPilot::guide_start() - {} root children candidates", node_ids.len()); + + // Build CandidateInfo with titles + let candidates: Vec = node_ids + .iter() + .enumerate() + .filter_map(|(i, &node_id)| { + tree.get(node_id).map(|node| super::parser::CandidateInfo { + node_id, + title: node.title.clone(), + index: i, + }) + }) + .collect(); // Make LLM call + println!("[DEBUG] LlmPilot::guide_start() - calling LLM..."); let decision = self .call_llm(InterventionPoint::Start, &context, &candidates) .await; + + println!("[DEBUG] LlmPilot::guide_start() - LLM returned: confidence={:.2}, ranked_candidates={}, reasoning='{}'", + decision.confidence, + decision.ranked_candidates.len(), + decision.reasoning.chars().take(100).collect::()); + + // Debug: show top ranked candidates + for (i, rc) in decision.ranked_candidates.iter().enumerate().take(3) { + if let Some(node) = tree.get(rc.node_id) { + println!("[DEBUG] Ranked {}: node_id={:?}, score={:.3}, title='{}'", + i, rc.node_id, rc.score, node.title); + } + } + info!( "Pilot start guidance: confidence={}, candidates={}", decision.confidence, @@ -580,9 +654,22 @@ impl Pilot for LlmPilot { .context_builder .build_backtrack_context(state, state.path); + // Build CandidateInfo + let candidates: Vec = state.candidates + .iter() + .enumerate() + .filter_map(|(i, &node_id)| { + state.tree.get(node_id).map(|node| super::parser::CandidateInfo { + node_id, + title: node.title.clone(), + index: i, + }) + }) + .collect(); + // Make LLM call Some( - self.call_llm(InterventionPoint::Backtrack, &context, state.candidates) + self.call_llm(InterventionPoint::Backtrack, &context, &candidates) .await, ) } @@ -662,20 +749,6 @@ mod tests { assert!(!pilot.has_budget()); } - #[test] - fn test_default_decision() { - let client = LlmClient::for_model("gpt-4o-mini"); - let config = PilotConfig::default(); - let pilot = LlmPilot::new(client, config); - - let candidates = create_test_node_ids(2); - let decision = pilot.default_decision(&candidates, InterventionPoint::Fork); - - assert_eq!(decision.ranked_candidates.len(), 2); - assert_eq!(decision.confidence, 0.0); - assert!(decision.reasoning.contains("LLM")); - } - #[test] fn test_reset() { let client = LlmClient::for_model("gpt-4o-mini"); diff --git a/rust/src/retrieval/pilot/parser.rs b/rust/src/retrieval/pilot/parser.rs index 85954c82..651b1c3c 100644 --- a/rust/src/retrieval/pilot/parser.rs +++ b/rust/src/retrieval/pilot/parser.rs @@ -19,20 +19,86 @@ use crate::document::NodeId; /// Parsed response from LLM. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LlmResponse { - /// Ranked candidates with scores. + /// Ranked candidates with scores (FORK format). #[serde(default)] pub ranked_candidates: Vec, + /// Entry points for START intervention (list of node titles). + #[serde(default)] + pub entry_points: Vec, + /// Best entry points (alternative START format from LLM). + #[serde(default)] + pub best_entry_points: Vec, + /// Selected nodes (another alternative START format - list of titles). + #[serde(default)] + pub selected_nodes: Vec, + /// Selected node (singular - some LLMs return this format). + #[serde(default)] + pub selected_node: Option, + /// Recommended node (another singular format). + #[serde(default)] + pub recommended_node: Option, + /// Analysis wrapper (some LLMs nest response in "analysis" object). + #[serde(default)] + pub analysis: Option, /// Recommended search direction. #[serde(default)] pub direction: DirectionResponse, - /// Confidence level (0.0 - 1.0). - #[serde(default = "default_confidence")] + /// Confidence level (0.0 - 1.0 or "high"/"medium"/"low"). + #[serde(default = "default_confidence", deserialize_with = "deserialize_confidence")] pub confidence: f32, /// Reasoning for the decision. #[serde(default)] pub reasoning: String, } +/// Custom deserializer for confidence that accepts both float and string. +fn deserialize_confidence<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + use serde::de::Error; + + let value = serde_json::Value::deserialize(deserializer)?; + match value { + serde_json::Value::Number(n) => { + // Handle numeric value + Ok(n.as_f64().unwrap_or(0.5) as f32) + } + serde_json::Value::String(s) => { + // Handle string values like "high", "medium", "low" + let lower = s.to_lowercase(); + let confidence = match lower.as_str() { + "high" | "very high" | "strong" => 0.9, + "medium" | "moderate" => 0.6, + "low" | "weak" => 0.3, + _ => 0.5, // default for unknown strings + }; + Ok(confidence) + } + _ => Ok(0.5), // default for other types + } +} + +/// Analysis wrapper for nested LLM responses. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AnalysisWrapper { + /// Query from analysis. + #[serde(default)] + pub query: Option, + /// Intent detected. + #[serde(default)] + pub intent: Option, + /// Selected node (singular). + #[serde(default)] + pub selected_node: Option, + /// Selected nodes (plural). + #[serde(default)] + pub selected_nodes: Vec, + /// Reasoning from analysis. + #[serde(default)] + pub reasoning: Option, +} + /// Candidate score from LLM response. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CandidateScore { @@ -45,6 +111,48 @@ pub struct CandidateScore { pub reason: Option, } +/// Candidate info for title matching. +#[derive(Debug, Clone)] +pub struct CandidateInfo { + /// Node ID. + pub node_id: NodeId, + /// Title of the node. + pub title: String, + /// Index in the candidates list. + pub index: usize, +} + +/// Entry point from START response. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EntryPoint { + /// Node ID or index. + #[serde(default)] + pub node_id: Option, + /// Index of the candidate. + #[serde(default)] + pub index: Option, + /// Title of the entry point. + #[serde(default)] + pub title: Option, + /// Relevance score (may be 1-5 or 0.0-1.0). + #[serde(default)] + pub relevance_score: Option, + /// Score (alternative field name). + #[serde(default)] + pub score: Option, +} + +/// Top-3 candidate from LLM LOCatetop-3 response. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Top3Candidate { + /// Node ID from TO TO copy. + pub node_id: usize, + /// Relevance score (0.0-1.0). + pub relevance_score: f32, + /// Reason for the selection. + pub reason: String, +} + /// Direction response from LLM. #[derive(Debug, Clone, Serialize, Deserialize, Default)] #[serde(rename_all = "snake_case")] @@ -111,23 +219,29 @@ impl ResponseParser { /// # Arguments /// /// * `response` - Raw LLM response text - /// * `candidates` - Original candidate NodeIds (for mapping indices) + /// * `candidates` - Candidate info with NodeId, title, and index /// * `point` - The intervention point pub fn parse( &self, response: &str, - candidates: &[NodeId], + candidates: &[CandidateInfo], point: InterventionPoint, ) -> PilotDecision { + println!("[DEBUG] ResponseParser::parse() - candidates.len()={}", candidates.len()); + // Try JSON parse first if let Some(decision) = self.try_json_parse(response, candidates, point) { + println!("[DEBUG] ResponseParser::parse() - JSON parse succeeded, ranked={}", decision.ranked_candidates.len()); return decision; } + println!("[DEBUG] ResponseParser::parse() - JSON parse failed, trying regex..."); // Try regex extraction if let Some(decision) = self.try_regex_parse(response, candidates, point) { + println!("[DEBUG] ResponseParser::parse() - Regex parse succeeded, ranked={}", decision.ranked_candidates.len()); return decision; } + println!("[DEBUG] ResponseParser::parse() - Regex parse failed, using default decision"); // Return default decision self.default_decision(candidates, point) @@ -137,23 +251,34 @@ impl ResponseParser { fn try_json_parse( &self, response: &str, - candidates: &[NodeId], + candidates: &[CandidateInfo], point: InterventionPoint, ) -> Option { // First, try to extract JSON from code blocks let json_str = if let Some(caps) = self.json_block_regex.captures(response) { - caps.get(1)?.as_str().trim().to_string() + let extracted = caps.get(1)?.as_str().trim().to_string(); + println!("[DEBUG] ResponseParser::try_json_parse() - Found JSON in code block"); + extracted } else { // Try to find raw JSON object let start = response.find('{')?; let end = response.rfind('}')? + 1; - response[start..end].to_string() + let extracted = response[start..end].to_string(); + println!("[DEBUG] ResponseParser::try_json_parse() - Found raw JSON (no code block)"); + extracted }; + println!("[DEBUG] ResponseParser::try_json_parse() - Extracted JSON:\n{}", json_str); + // Parse JSON - let llm_response: LlmResponse = match serde_json::from_str(&json_str) { - Ok(r) => r, + let llm_response: LlmResponse = match serde_json::from_str::(&json_str) { + Ok(r) => { + println!("[DEBUG] ResponseParser::try_json_parse() - JSON parsed successfully"); + println!("[DEBUG] ResponseParser::try_json_parse() - ranked_candidates count: {}", r.ranked_candidates.len()); + r + }, Err(e) => { + println!("[DEBUG] ResponseParser::try_json_parse() - JSON parse FAILED: {}", e); warn!("Failed to parse LLM response as JSON: {}", e); return None; } @@ -167,7 +292,7 @@ impl ResponseParser { fn try_regex_parse( &self, response: &str, - candidates: &[NodeId], + candidates: &[CandidateInfo], point: InterventionPoint, ) -> Option { // Extract confidence @@ -226,7 +351,7 @@ impl ResponseParser { fn extract_ranked_candidates( &self, response: &str, - candidates: &[NodeId], + candidates: &[CandidateInfo], ) -> Vec { let mut ranked = Vec::new(); @@ -245,7 +370,7 @@ impl ResponseParser { if index < candidates.len() { ranked.push(RankedCandidate { - node_id: candidates[index], + node_id: candidates[index].node_id, score: score.clamp(0.0, 1.0), reason: None, }); @@ -268,7 +393,7 @@ impl ResponseParser { if let Ok(idx) = match_1.as_str().parse::() { if idx < candidates.len() && seen.insert(idx) { ranked.push(RankedCandidate { - node_id: candidates[idx], + node_id: candidates[idx].node_id, score: 1.0 - (ranked.len() as f32 * 0.1), // Decreasing scores reason: None, }); @@ -287,20 +412,28 @@ impl ResponseParser { /// Convert LlmResponse to PilotDecision. fn llm_response_to_decision( &self, - llm_response: LlmResponse, - candidates: &[NodeId], + mut llm_response: LlmResponse, + candidates: &[CandidateInfo], point: InterventionPoint, ) -> PilotDecision { + println!("[DEBUG] ResponseParser::llm_response_to_decision() - point={:?}", point); + println!("[DEBUG] ResponseParser::llm_response_to_decision() - ranked_candidates.len()={}", llm_response.ranked_candidates.len()); + println!("[DEBUG] ResponseParser::llm_response_to_decision() - best_entry_points.len()={}", llm_response.best_entry_points.len()); + println!("[DEBUG] ResponseParser::llm_response_to_decision() - entry_points.len()={}", llm_response.entry_points.len()); + println!("[DEBUG] ResponseParser::llm_response_to_decision() - selected_nodes.len()={}", llm_response.selected_nodes.len()); + println!("[DEBUG] ResponseParser::llm_response_to_decision() - selected_node={:?}", llm_response.selected_node); + println!("[DEBUG] ResponseParser::llm_response_to_decision() - analysis={:?}", llm_response.analysis.as_ref().map(|a| (&a.selected_node, &a.selected_nodes))); + // Convert candidate scores to RankedCandidate - let ranked_candidates: Vec = llm_response + let mut ranked_candidates: Vec = llm_response .ranked_candidates - .into_iter() + .iter() .filter_map(|cs| { if cs.index < candidates.len() { Some(RankedCandidate { - node_id: candidates[cs.index], + node_id: candidates[cs.index].node_id, score: cs.score.clamp(0.0, 1.0), - reason: cs.reason, + reason: cs.reason.clone(), }) } else { None @@ -308,6 +441,154 @@ impl ResponseParser { }) .collect(); + // Handle START response format: best_entry_points, entry_points, or selected_nodes + if ranked_candidates.is_empty() { + // Try to convert best_entry_points (format: [{"node_id": 1, "title": "...", "relevance_score": 5}]) + for entry in &llm_response.best_entry_points { + // Get index from either node_id or index field + // node_id is 1-indexed from LLM, convert to 0-indexed + let idx = if let Some(nid) = entry.node_id { + if nid > 0 { nid - 1 } else { nid } + } else if let Some(idx) = entry.index { + idx + } else { + continue; // Skip if no valid index + }; + + if idx < candidates.len() { + let score = entry.relevance_score + .or(entry.score) + .unwrap_or(0.5) + / 5.0; // Normalize 1-5 scale to 0.0-1.0 + ranked_candidates.push(RankedCandidate { + node_id: candidates[idx].node_id, + score: score.clamp(0.0, 1.0), + reason: entry.title.clone(), + }); + println!("[DEBUG] ResponseParser - converted best_entry_point[{}] to ranked_candidate (idx={}, score={:.2})", + idx, idx, score); + } + } + + // Try to convert selected_nodes (format: ["Project Documentation", "Overview"]) + // Match by title + for selected_title in &llm_response.selected_nodes { + for candidate in candidates { + if Self::titles_match(selected_title, &candidate.title) { + ranked_candidates.push(RankedCandidate { + node_id: candidate.node_id, + score: 0.9, // High score for title match + reason: Some(format!("Title match: {}", selected_title)), + }); + println!("[DEBUG] ResponseParser - matched selected_node '{}' to candidate '{}' (index={})", + selected_title, candidate.title, candidate.index); + break; // Only match once per selected_node + } + } + } + + // Try to convert selected_node (singular - format: "Project Documentation") + if let Some(ref single_node) = llm_response.selected_node { + for candidate in candidates { + if Self::titles_match(single_node, &candidate.title) { + if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) { + ranked_candidates.push(RankedCandidate { + node_id: candidate.node_id, + score: 0.9, + reason: Some(format!("Title match (singular): {}", single_node)), + }); + println!("[DEBUG] ResponseParser - matched selected_node (singular) '{}' to candidate '{}' (index={})", + single_node, candidate.title, candidate.index); + } + break; + } + } + } + + // Try to convert recommended_node (another singular format) + if let Some(ref recommended) = llm_response.recommended_node { + for candidate in candidates { + if Self::titles_match(recommended, &candidate.title) { + if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) { + ranked_candidates.push(RankedCandidate { + node_id: candidate.node_id, + score: 0.85, + reason: Some(format!("Recommended node: {}", recommended)), + }); + println!("[DEBUG] ResponseParser - matched recommended_node '{}' to candidate '{}' (index={})", + recommended, candidate.title, candidate.index); + } + break; + } + } + } + + // Try to extract from analysis wrapper if present + if let Some(ref analysis) = llm_response.analysis { + // Check analysis.selected_nodes (plural array) + for selected_title in &analysis.selected_nodes { + for candidate in candidates { + if Self::titles_match(selected_title, &candidate.title) { + if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) { + ranked_candidates.push(RankedCandidate { + node_id: candidate.node_id, + score: 0.85, + reason: Some(format!("Analysis selected_nodes: {}", selected_title)), + }); + println!("[DEBUG] ResponseParser - matched analysis.selected_nodes '{}' to candidate '{}' (index={})", + selected_title, candidate.title, candidate.index); + } + break; + } + } + } + + // Check analysis.selected_node (singular) + if let Some(ref single_node) = analysis.selected_node { + for candidate in candidates { + if Self::titles_match(single_node, &candidate.title) { + if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) { + ranked_candidates.push(RankedCandidate { + node_id: candidate.node_id, + score: 0.85, + reason: Some(format!("Analysis selected_node: {}", single_node)), + }); + println!("[DEBUG] ResponseParser - matched analysis.selected_node (singular) '{}' to candidate '{}' (index={})", + single_node, candidate.title, candidate.index); + } + break; + } + } + } + + // Use analysis.reasoning if top-level reasoning is empty + if llm_response.reasoning.is_empty() { + if let Some(ref r) = analysis.reasoning { + llm_response.reasoning = r.clone(); + } + } + } + + // Try to convert entry_points (format: ["Node Title 1", "Node Title 2"]) + for entry_title in &llm_response.entry_points { + for candidate in candidates { + if Self::titles_match(entry_title, &candidate.title) { + // Check if already added + if !ranked_candidates.iter().any(|rc| rc.node_id == candidate.node_id) { + ranked_candidates.push(RankedCandidate { + node_id: candidate.node_id, + score: 0.8, // Slightly lower score for entry_points + reason: Some(format!("Entry point: {}", entry_title)), + }); + println!("[DEBUG] ResponseParser - matched entry_point '{}' to candidate '{}' (index={})", + entry_title, candidate.title, candidate.index); + } + break; + } + } + } + } + // Convert direction let direction = match llm_response.direction { DirectionResponse::GoDeeper => SearchDirection::GoDeeper { @@ -333,6 +614,8 @@ impl ResponseParser { }, }; + println!("[DEBUG] ResponseParser::llm_response_to_decision() - final ranked_candidates.len()={}", ranked_candidates.len()); + PilotDecision { ranked_candidates, direction, @@ -342,14 +625,41 @@ impl ResponseParser { } } + /// Check if two titles match (fuzzy matching). + fn titles_match(llm_title: &str, candidate_title: &str) -> bool { + let llm_lower = llm_title.to_lowercase().trim().to_string(); + let candidate_lower = candidate_title.to_lowercase().trim().to_string(); + + // Exact match + if llm_lower == candidate_lower { + return true; + } + + // Contains match + if llm_lower.contains(&candidate_lower) || candidate_lower.contains(&llm_lower) { + return true; + } + + // Word overlap match (at least 50% of words match) + let llm_words: std::collections::HashSet<&str> = llm_lower.split_whitespace().collect(); + let candidate_words: std::collections::HashSet<&str> = candidate_lower.split_whitespace().collect(); + let overlap = llm_words.intersection(&candidate_words).count(); + let min_words = llm_words.len().min(candidate_words.len()); + if min_words > 0 && overlap as f32 / min_words as f32 >= 0.5 { + return true; + } + + false + } + /// Create a default decision when parsing fails. - fn default_decision(&self, candidates: &[NodeId], point: InterventionPoint) -> PilotDecision { + fn default_decision(&self, candidates: &[CandidateInfo], point: InterventionPoint) -> PilotDecision { // Score candidates uniformly let ranked: Vec = candidates .iter() .enumerate() - .map(|(i, &node_id)| RankedCandidate { - node_id, + .map(|(i, c)| RankedCandidate { + node_id: c.node_id, score: 1.0 / (i + 1) as f32, // Decreasing scores reason: None, }) @@ -395,126 +705,4 @@ mod tests { } ids } - - #[test] - fn test_parse_json_response() { - let parser = ResponseParser::new(); - let candidates = create_test_node_ids(3); - - let response = r#"{ - "ranked_candidates": [ - {"index": 1, "score": 0.9, "reason": "Best match"}, - {"index": 0, "score": 0.5} - ], - "direction": "go_deeper", - "confidence": 0.85, - "reasoning": "Candidate 1 is most relevant" - }"#; - - let decision = parser.parse(response, &candidates, InterventionPoint::Fork); - - assert_eq!(decision.ranked_candidates.len(), 2); - assert_eq!(decision.ranked_candidates[0].node_id, candidates[1]); - assert!((decision.confidence - 0.85).abs() < 0.01); - assert!(matches!( - decision.direction, - SearchDirection::GoDeeper { .. } - )); - } - - #[test] - fn test_parse_json_in_code_block() { - let parser = ResponseParser::new(); - let candidates = create_test_node_ids(2); - - let response = r#" -Here's my analysis: - -```json -{ - "ranked_candidates": [{"index": 0, "score": 0.8}], - "direction": "go_deeper", - "confidence": 0.8, - "reasoning": "Test" -} -``` -"#; - - let decision = parser.parse(response, &candidates, InterventionPoint::Fork); - assert_eq!(decision.ranked_candidates.len(), 1); - } - - #[test] - fn test_parse_with_regex_fallback() { - let parser = ResponseParser::new(); - let candidates = create_test_node_ids(2); - - // Non-JSON response with some structure - let response = r#" -I think candidate 0 is the best match. -Confidence: 0.75 -Direction: go_deeper -"#; - - let decision = parser.parse(response, &candidates, InterventionPoint::Fork); - - // Should use regex extraction - assert!((decision.confidence - 0.75).abs() < 0.01); - } - - #[test] - fn test_default_decision() { - let parser = ResponseParser::new(); - let candidates = create_test_node_ids(2); - - let decision = parser.parse( - "This is unparseable gibberish", - &candidates, - InterventionPoint::Fork, - ); - - // Should return default - assert_eq!(decision.ranked_candidates.len(), 2); - assert_eq!(decision.confidence, 0.0); - assert!(decision.reasoning.contains("parsing failed")); - } - - #[test] - fn test_confidence_clamping() { - let parser = ResponseParser::new(); - let candidates = create_test_node_ids(1); - - let response = r#"{ - "ranked_candidates": [{"index": 0, "score": 1.5}], - "confidence": 1.5, - "direction": "go_deeper" - }"#; - - let decision = parser.parse(response, &candidates, InterventionPoint::Fork); - - // Confidence should be clamped to 1.0 - assert!((decision.confidence - 1.0).abs() < 0.01); - } - - #[test] - fn test_direction_conversion() { - let parser = ResponseParser::new(); - let candidates = create_test_node_ids(1); - - let test_cases = vec![ - ("\"direction\": \"go_deeper\"", true), - ("\"direction\": \"explore_siblings\"", true), - ("\"direction\": \"backtrack\"", true), - ("\"direction\": \"found_answer\"", true), - ]; - - for (dir_json, should_parse) in test_cases { - let response = format!( - r#"{{"ranked_candidates": [], "confidence": 0.5, {}}}"#, - dir_json - ); - let decision = parser.parse(&response, &candidates, InterventionPoint::Fork); - assert!(should_parse, "Direction should parse correctly"); - } - } } diff --git a/rust/src/retrieval/pilot/prompts/system_start.txt b/rust/src/retrieval/pilot/prompts/system_start.txt index d3a65f49..086f9e96 100644 --- a/rust/src/retrieval/pilot/prompts/system_start.txt +++ b/rust/src/retrieval/pilot/prompts/system_start.txt @@ -1,11 +1,15 @@ -You are a document navigation assistant specialized in hierarchical document search. +You are a document navigation assistant. Your task is to identify the best entry points for searching a hierarchical document based on a user query. -Your task is to analyze a user's query and the document structure to identify the best starting points for search. +CRITICAL: You MUST respond with ONLY valid JSON. No markdown, no explanation, just the JSON object. -Guidelines: -- Identify sections that are most likely to contain the answer -- Consider the query's domain, keywords, and intent -- Prefer more specific sections over general ones when appropriate -- Multiple entry points can be suggested if the query is ambiguous +Your response must to have this EXACT structure: +{ + "entry_points": ["list of node titles as strings"], + "reasoning": "explanation string", + "confidence": 0.85 +} -You must respond in valid JSON format. +Where: +- entry_points: MUST be an array of node titles (strings) from the candidate list +- reasoning: MUST be a string +- confidence: MUST be a number (0.0 to 1.0), not a string like "high" diff --git a/rust/src/retrieval/pilot/prompts/templates.rs b/rust/src/retrieval/pilot/prompts/templates.rs index 5f9f75ff..50f4c3cc 100644 --- a/rust/src/retrieval/pilot/prompts/templates.rs +++ b/rust/src/retrieval/pilot/prompts/templates.rs @@ -248,52 +248,170 @@ pub mod fallback { use super::*; pub fn system_start() -> String { - "You are a document navigation assistant. Help identify the best starting point for searching a hierarchical document.".to_string() + r#"You are a document navigation assistant. Help identify the best entry points for searching a hierarchical document. + +CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks, No explanation. Just the JSON object. + +Your response must have this EXACT structure: +{ + "entry_points": ["Title 1", "Title 2"], + "reasoning": "Brief explanation", + "confidence": 0.85 +} + +Rules: +- entry_points: Array of node title strings (from the candidates provided) +- reasoning: String explaining your choice +- confidence: Number between 0.0 and 1.0 (use a number, NOT "high"/"medium"/"low")"#.to_string() } pub fn user_start() -> String { - r#"Given the following document structure and user query, identify the best entry points for search. - -{context} + r#"{context} -Respond in JSON format with your analysis."#.to_string() +Respond with ONLY the JSON object (no markdown, no explanation): +{ + "entry_points": ["list of node titles as strings"], + "reasoning": "your reasoning here", + "confidence": 0.85 +}"#.to_string() } pub fn system_fork() -> String { - "You are a document navigation assistant. At each decision point, rank the candidate branches by their likelihood of containing the answer to the user's query.".to_string() + r#"You are a document navigation assistant. At each decision point, rank the candidate branches by their likelihood of containing the answer to the user's query. + +CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks. + +Your response must have this EXACT structure: +{ + "ranked_candidates": [ + {"index": 0, "score": 0.9, "reason": "explanation"} + ], + "direction": "go_deeper", + "confidence": 0.85, + "reasoning": "overall explanation" +} + +Rules: +- ranked_candidates: Array of objects with index (number), score (0.0-1.0), reason (string) +- direction: One of "go_deeper", "explore_siblings", "backtrack", "found_answer" +- confidence: Number between 0.0 and 1.0 (NOT a string)"#.to_string() } pub fn user_fork() -> String { - r#"Given the current search context and candidate branches, rank them by relevance. - -{context} + r#"{context} -Respond in JSON format with ranked candidates."# +Respond with ONLY the JSON object: +{ + "ranked_candidates": [ + {"index": 0, "score": 0.9, "reason": "why this candidate"} + ], + "direction": "go_deeper", + "confidence": 0.85, + "reasoning": "overall explanation" +}"# .to_string() } pub fn system_backtrack() -> String { - "You are a document navigation assistant. When a search path fails to find the answer, analyze why and suggest alternative branches to explore.".to_string() + r#"You are a document navigation assistant. When a search path fails to find the answer, analyze why and suggest alternative branches to explore. + +CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks. + +Your response must have this EXACT structure: +{ + "alternative_branches": [ + {"index": 0, "score": 0.8, "reason": "explanation"} + ], + "direction": "backtrack", + "confidence": 0.85, + "reasoning": "why the original path failed" +}"#.to_string() } pub fn user_backtrack() -> String { - r#"The current search path did not find the answer. Analyze the failure and suggest alternatives. - -{context} + r#"{context} -Respond in JSON format with alternative branches."#.to_string() +Respond with ONLY the JSON object: +{ + "alternative_branches": [ + {"index": 0, "score": 0.8, "reason": "why this alternative"} + ], + "direction": "backtrack", + "confidence": 0.85, + "reasoning": "why original path failed" +}"#.to_string() } pub fn system_evaluate() -> String { - "You are a document analysis assistant. Evaluate whether the current node contains the answer to the user's query.".to_string() + r#"You are a document analysis assistant. Evaluate whether the current node contains the answer to the user's query. + +CRITICAL: You MUST respond with ONLY valid JSON. No markdown code blocks. + +Your response must have this EXACT structure: +{ + "relevance_score": 0.85, + "is_answer": false, + "direction": "go_deeper", + "confidence": 0.85, + "reasoning": "explanation" +}"#.to_string() } pub fn user_evaluate() -> String { - r#"Evaluate if this node contains the answer to the user's query. + r#"{context} + +Respond with ONLY the JSON object: +{ + "relevance_score": 0.85, + "is_answer": false, + "direction": "go_deeper", + "confidence": 0.85, + "reasoning": "explanation" +}"# + .to_string() + } -{context} + pub fn system_locate_top3() -> String { + r#"You are a document navigation assistant. Your task is to locate the most relevant sections in a document hierarchy for a user's query. -Respond in JSON format with your evaluation."# +CRITICAL INSTRUCTIONS: +1. Analyze the user query carefully to understand the intent +2. Examine the provided Table of Contents (TOC) with node IDs +3. Select the TOP 3 most relevant nodes that would contain the answer +4. You MUST respond with ONLY valid JSON. No markdown code blocks. No explanations outside JSON. + +Your response must have this EXACT structure: +{ + "reasoning": "Brief analysis of the query and why you selected these nodes", + "candidates": [ + {"node_id": , "relevance_score": 0.95, "reason": "Why this node matches the query"}, + {"node_id": , "relevance_score": 0.80, "reason": "Why this node is also relevant"}, + {"node_id": , "relevance_score": 0.65, "reason": "Why this node might be relevant"} + ] +} + +Rules: +- node_id: MUST be a number from the provided TOC (copy exactly) +- relevance_score: Number between 0.0 and 1.0 (higher = more relevant) +- reason: Brief explanation for each selection +- candidates: Must have exactly 3 items, ordered by relevance (highest first) +- If fewer than 3 relevant nodes exist, use lower scores for less relevant ones"#.to_string() + } + + pub fn user_locate_top3() -> String { + r#"{context} + +Based on the query and TOC above, select the TOP 3 most relevant nodes. + +Respond with ONLY the JSON object: +{ + "reasoning": "Your analysis here", + "candidates": [ + {"node_id": 1, "relevance_score": 0.95, "reason": "explanation"}, + {"node_id": 2, "relevance_score": 0.80, "reason": "explanation"}, + {"node_id": 3, "relevance_score": 0.65, "reason": "explanation"} + ] +}"# .to_string() } } @@ -337,3 +455,68 @@ impl EvaluatePrompt { } } } + +impl LocateTop3Prompt { + /// Get template with fallback. + pub fn with_fallback() -> Self { + Self { + system: fallback::system_locate_top3(), + template: fallback::user_locate_top3(), + } + } +} + +/// Prompt template for LOCATE_TOP3 intervention point. +/// +/// Used at the start to directly locate top-3 relevant nodes from TOC: +/// - Understand query intent +/// - Identify top 3 most relevant nodes with confidence scores +/// - Provide reasoning for each selection +#[derive(Debug, Clone)] +pub struct LocateTop3Prompt { + system: String, + template: String, +} + +impl Default for LocateTop3Prompt { + fn default() -> Self { + Self::with_fallback() + } +} + +impl LocateTop3Prompt { + /// Create a new locate top-3 prompt template. + pub fn new() -> Self { + Self::default() + } + + /// Create with custom templates. + pub fn with_templates(system: String, template: String) -> Self { + Self { system, template } + } +} + +impl PromptTemplate for LocateTop3Prompt { + fn system_prompt(&self) -> &str { + &self.system + } + + fn user_prompt_template(&self) -> &str { + &self.template + } + + fn intervention_point(&self) -> InterventionPoint { + InterventionPoint::Start + } + + fn output_format_hint(&self) -> &str { + r#"{ + "reasoning": "Overall analysis of the query and document structure", + "candidates": [ + {"node_id": 1, "relevance_score": 0.95, "reason": "Why this node is relevant"}, + {"node_id": 2, "relevance_score": 0.80, "reason": "Why this node is relevant"}, + {"node_id": 3, "relevance_score": 0.65, "reason": "Why this node is relevant"} + ] +}"# + } +} diff --git a/rust/src/retrieval/pilot/prompts/user_start.txt b/rust/src/retrieval/pilot/prompts/user_start.txt index b091735e..df048df8 100644 --- a/rust/src/retrieval/pilot/prompts/user_start.txt +++ b/rust/src/retrieval/pilot/prompts/user_start.txt @@ -2,7 +2,16 @@ Analyze the following document structure and user query to identify the best ent {context} -Provide your response as a JSON object with: -- entry_points: list of section titles to start searching from -- reasoning: brief explanation of why these entry points -- confidence: your confidence in this recommendation (0.0-1.0) +IMPORTANT: You MUST respond with ONLY a JSON object in this EXACT format: +{ + "entry_points": ["Title 1", "Title 2"], + "reasoning": "Brief explanation of why these entry points", + "confidence": 0.85 +} + +Rules: +- entry_points: Array of strings (node titles from the candidates above) +- reasoning: String explaining your choice +- confidence: Number between 0.0 and 1.0 (NOT a string like "high") + +Do NOT use any other field names. Use "entry_points" not "selected_node" or "recommended_node". diff --git a/rust/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs index 9bf02ae3..823abdba 100644 --- a/rust/src/retrieval/pipeline/context.rs +++ b/rust/src/retrieval/pipeline/context.rs @@ -209,6 +209,8 @@ pub struct PipelineContext { pub keywords: Vec, /// Target sections from ToC matching. pub target_sections: Vec, + /// Decomposed sub-queries (if query was decomposed). + pub decomposition: Option, // ============ Plan Stage Output ============ /// Selected retrieval strategy. @@ -268,6 +270,7 @@ impl PipelineContext { complexity: None, keywords: Vec::new(), target_sections: Vec::new(), + decomposition: None, selected_strategy: None, selected_algorithm: None, search_config: None, diff --git a/rust/src/retrieval/pipeline_retriever.rs b/rust/src/retrieval/pipeline_retriever.rs index 222b3a9b..377c4747 100644 --- a/rust/src/retrieval/pipeline_retriever.rs +++ b/rust/src/retrieval/pipeline_retriever.rs @@ -119,7 +119,7 @@ impl PipelineRetriever { orchestrator = orchestrator.stage(plan_stage); // Add search stage with Pilot for semantic navigation - let mut search_stage = SearchStage::new(); + let mut search_stage = SearchStage::new().with_llm_client(self.llm_client.clone()); if let Some(ref client) = self.llm_client { // Create LLM-based Pilot for semantic navigation guidance let mut pilot = LlmPilot::new(client.clone(), PilotConfig::default()); diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs index 9fba59e9..51d700f4 100644 --- a/rust/src/retrieval/search/beam.rs +++ b/rust/src/retrieval/search/beam.rs @@ -126,21 +126,30 @@ impl SearchTree for BeamSearch { let beam_width = config.beam_width.min(self.beam_width); let mut visited: HashSet = HashSet::new(); + println!("[DEBUG] BeamSearch: query='{}', beam_width={}, min_score={:.2}", + context.query, beam_width, config.min_score); + // Track Pilot interventions let mut pilot_interventions = 0; // Initialize with root's children let root_children = tree.children(tree.root()); + println!("[DEBUG] Root has {} children", root_children.len()); // Check if Pilot wants to guide the start let initial_candidates = if let Some(p) = pilot { + println!("[DEBUG] BeamSearch: Pilot is available, name={}, guide_at_start={}", + p.name(), p.config().guide_at_start); if p.config().guide_at_start { + println!("[DEBUG] BeamSearch: Calling pilot.guide_start()..."); if let Some(guidance) = p.guide_start(tree, &context.query).await { debug!( "Pilot provided start guidance with confidence {}", guidance.confidence ); pilot_interventions += 1; + println!("[DEBUG] BeamSearch: Pilot returned guidance! confidence={:.2}, candidates={}", + guidance.confidence, guidance.ranked_candidates.len()); // Use Pilot's ranked order if available if guidance.has_candidates() { @@ -151,15 +160,19 @@ impl SearchTree for BeamSearch { &context.query, ) } else { + println!("[DEBUG] BeamSearch: Guidance has no candidates, using algorithm scoring"); self.score_candidates_with_query(tree, &root_children, &context.query) } } else { + println!("[DEBUG] BeamSearch: pilot.guide_start() returned None"); self.score_candidates_with_query(tree, &root_children, &context.query) } } else { + println!("[DEBUG] BeamSearch: guide_at_start=false, skipping Pilot"); self.score_candidates_with_query(tree, &root_children, &context.query) } } else { + println!("[DEBUG] BeamSearch: No Pilot available"); self.score_candidates_with_query(tree, &root_children, &context.query) }; @@ -168,6 +181,14 @@ impl SearchTree for BeamSearch { .map(|(node_id, score)| SearchPath::from_node(node_id, score)) .collect(); + // Debug: show initial scores + println!("[DEBUG] Initial {} candidates after scoring", current_beam.len()); + for (i, path) in current_beam.iter().enumerate().take(5) { + if let Some(node) = tree.get(path.leaf.unwrap_or(tree.root())) { + println!("[DEBUG] Initial {}: score={:.3}, title='{}'", i, path.score, node.title); + } + } + // Keep top beam_width current_beam.truncate(beam_width); @@ -215,6 +236,7 @@ impl SearchTree for BeamSearch { children.len() ); + println!("[DEBUG] BEAM SEARCH: Pilot intervening at decision point"); match p.decide(&state).await { decision => { pilot_interventions += 1; diff --git a/rust/src/retrieval/search/greedy.rs b/rust/src/retrieval/search/greedy.rs index b539cd23..a8ee80ed 100644 --- a/rust/src/retrieval/search/greedy.rs +++ b/rust/src/retrieval/search/greedy.rs @@ -144,6 +144,7 @@ impl SearchTree for GreedySearch { children.len() ); + println!("[DEBUG] GREEDY SEARCH: Pilot intervening at decision point"); match p.decide(&state).await { decision => { pilot_interventions += 1; diff --git a/rust/src/retrieval/search/scorer.rs b/rust/src/retrieval/search/scorer.rs index f17bf118..5dbb9209 100644 --- a/rust/src/retrieval/search/scorer.rs +++ b/rust/src/retrieval/search/scorer.rs @@ -193,9 +193,9 @@ impl ScoringContext { let total_score = title_score + summary_score + content_score; - // Normalize to [0, 1] range - let max_possible = self.query_terms.len() as f32 * 10.0; // Rough upper bound - let normalized = (total_score / max_possible).clamp(0.0, 1.0); + // Normalize to [0, 1] range using sigmoid-like scaling + // This prevents over-penalization with few query terms + let normalized = (total_score / 3.0).tanh(); // 3.0 is a reasonable midpoint // Apply depth penalty let depth_factor = 1.0 - (node.depth as f32 * self.depth_penalty).min(0.5); diff --git a/rust/src/retrieval/stages/analyze.rs b/rust/src/retrieval/stages/analyze.rs index 3eabca1f..8dd875e6 100644 --- a/rust/src/retrieval/stages/analyze.rs +++ b/rust/src/retrieval/stages/analyze.rs @@ -7,14 +7,16 @@ //! - Query complexity (Simple/Medium/Complex) //! - Keywords for matching //! - Target sections based on ToC matching +//! - Query decomposition for complex queries use async_trait::async_trait; use tracing::info; use crate::document::{DocumentTree, TocView}; use crate::retrieval::complexity::ComplexityDetector; +use crate::retrieval::decompose::{DecompositionConfig, QueryDecomposer}; use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage, StageOutcome}; -// QueryComplexity is used in context +use crate::llm::LlmClient; /// Analyze Stage - analyzes queries for retrieval planning. /// @@ -22,17 +24,25 @@ use crate::retrieval::pipeline::{FailurePolicy, PipelineContext, RetrievalStage, /// 1. Detects query complexity (Simple/Medium/Complex) /// 2. Extracts keywords for matching /// 3. Matches target sections from ToC +/// 4. Decomposes complex queries into sub-queries (if enabled) /// /// # Example /// /// ```rust,ignore /// let stage = AnalyzeStage::new() -/// .with_toc_matching(true); +/// .with_toc_matching(true) +/// .with_decomposition(true); /// ``` pub struct AnalyzeStage { complexity_detector: ComplexityDetector, toc_view: TocView, enable_toc_matching: bool, + /// Query decomposer for complex queries. + query_decomposer: Option, + /// Enable query decomposition. + enable_decomposition: bool, + /// Complexity threshold for triggering decomposition. + decomposition_threshold: f32, } impl Default for AnalyzeStage { @@ -48,6 +58,9 @@ impl AnalyzeStage { complexity_detector: ComplexityDetector::new(), toc_view: TocView::new(), enable_toc_matching: true, + query_decomposer: None, + enable_decomposition: false, + decomposition_threshold: 0.6, } } @@ -57,6 +70,42 @@ impl AnalyzeStage { self } + /// Enable query decomposition with default configuration. + pub fn with_decomposition(mut self, enable: bool) -> Self { + self.enable_decomposition = enable; + if enable && self.query_decomposer.is_none() { + self.query_decomposer = Some(QueryDecomposer::new(DecompositionConfig::default())); + } + self + } + + /// Enable query decomposition with custom configuration. + pub fn with_decomposition_config(mut self, config: DecompositionConfig) -> Self { + self.enable_decomposition = true; + self.query_decomposer = Some(QueryDecomposer::new(config)); + self + } + + /// Enable query decomposition with LLM client. + pub fn with_llm_client(mut self, client: crate::llm::LlmClient) -> Self { + if self.query_decomposer.is_none() { + self.query_decomposer = Some( + QueryDecomposer::new(DecompositionConfig::default()) + .with_llm_client(client), + ); + } else if let Some(ref mut decomposer) = self.query_decomposer { + *decomposer = QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client); + } + self.enable_decomposition = true; + self + } + + /// Set complexity threshold for triggering decomposition. + pub fn with_decomposition_threshold(mut self, threshold: f32) -> Self { + self.decomposition_threshold = threshold.clamp(0.0, 1.0); + self + } + /// Extract keywords from a query. fn extract_keywords(&self, query: &str) -> Vec { // Simple keyword extraction: @@ -182,7 +231,42 @@ impl RetrievalStage for AnalyzeStage { info!("Target sections: {:?}", ctx.target_sections); } - // 4. Update metrics + // 4. Decompose query if enabled and complex enough + if self.enable_decomposition { + if let Some(ref decomposer) = self.query_decomposer { + let complexity_score = ctx.complexity + .as_ref() + .map(|c| match c { + crate::retrieval::types::QueryComplexity::Simple => 0.3, + crate::retrieval::types::QueryComplexity::Medium => 0.6, + crate::retrieval::types::QueryComplexity::Complex => 0.9, + }) + .unwrap_or(0.5); + + if complexity_score >= self.decomposition_threshold { + info!("Decomposing query (complexity: {:.2})", complexity_score); + match decomposer.decompose(&ctx.query).await { + Ok(result) => { + if result.was_decomposed { + info!( + "Query decomposed into {} sub-queries", + result.sub_queries.len() + ); + for (i, sq) in result.sub_queries.iter().enumerate() { + info!(" Sub-query {}: {} (priority: {})", i, sq.text, sq.priority); + } + } + ctx.decomposition = Some(result); + } + Err(e) => { + info!("Query decomposition failed: {}, continuing with original query", e); + } + } + } + } + } + + // 5. Update metrics ctx.metrics.llm_calls += 0; // No LLM calls in this stage Ok(StageOutcome::cont()) diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs index 31e7f173..ad8858f2 100644 --- a/rust/src/retrieval/stages/evaluate.rs +++ b/rust/src/retrieval/stages/evaluate.rs @@ -283,6 +283,7 @@ impl EvaluateStage { /// Calculate overall confidence score. fn calculate_confidence(&self, ctx: &PipelineContext) -> f32 { if ctx.candidates.is_empty() { + println!("[DEBUG] calculate_confidence: no candidates, returning 0.0"); return 0.0; } @@ -296,7 +297,10 @@ impl EvaluateStage { SufficiencyLevel::Insufficient => 0.4, }; - avg_score * sufficiency_factor + let confidence = avg_score * sufficiency_factor; + println!("[DEBUG] calculate_confidence: avg_score={:.3}, sufficiency={:?}, factor={:.1}, confidence={:.3}", + avg_score, ctx.sufficiency, sufficiency_factor, confidence); + confidence } } @@ -325,6 +329,9 @@ impl RetrievalStage for EvaluateStage { async fn execute(&self, ctx: &mut PipelineContext) -> crate::error::Result { let start = std::time::Instant::now(); + println!("[DEBUG] EvaluateStage: {} candidates, iteration {}", + ctx.candidates.len(), ctx.search_iterations); + info!( "Judging sufficiency: {} candidates, iteration {}", ctx.candidates.len(), diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs index 78f07c20..17006bdf 100644 --- a/rust/src/retrieval/stages/search.rs +++ b/rust/src/retrieval/stages/search.rs @@ -6,13 +6,21 @@ //! This stage executes the selected search algorithm using //! the selected retrieval strategy. When a Pilot is provided, //! it can provide semantic guidance at key decision points. +//! +//! # LLM-First Search +//! +//! When an LLM client is provided, the stage will first attempt to +//! directly locate the top-3 most relevant nodes using the TOC, +//! falling back to tree traversal algorithms (Beam/Greedy) only if +//! LLM fails or returns insufficient results. use async_trait::async_trait; +use serde::{Deserialize, Serialize}; use std::sync::Arc; use tracing::{info, warn}; -use crate::document::DocumentTree; -// LlmClient is used via strategy +use crate::document::{DocumentTree, TocView}; +use crate::llm::LlmClient; use crate::retrieval::RetrievalContext; // Legacy context use crate::retrieval::pilot::Pilot; use crate::retrieval::pipeline::{ @@ -57,6 +65,8 @@ pub struct SearchStage { hybrid_strategy: Option>, /// Pilot for navigation guidance (optional). pilot: Option>, + /// LLM client for direct TOC-based search (optional). + llm_client: Option, } impl Default for SearchStage { @@ -74,9 +84,20 @@ impl SearchStage { semantic_strategy: None, hybrid_strategy: None, pilot: None, + llm_client: None, } } + /// Add LLM client for direct TOC-based search. + /// + /// When provided, the stage will first attempt to locate relevant + /// nodes directly using the TOC, falling back to tree traversal + /// algorithms only if LLM fails or returns insufficient results. + pub fn with_llm_client(mut self, client: Option) -> Self { + self.llm_client = client; + self + } + /// Add Pilot for semantic navigation guidance. /// /// When provided, the search algorithm will consult the Pilot @@ -210,6 +231,172 @@ impl SearchStage { candidates } + + /// Build a flat TOC list for LLM consumption. + /// + /// Returns a formatted string with numbered entries: + /// ``` + /// [1] Title: "Overview" + /// Summary: "This section covers..." + /// [2] Title: "Architecture" + /// Summary: "The system architecture..." + /// ``` + fn build_toc_for_llm(&self, tree: &DocumentTree) -> (String, Vec) { + let toc_view = TocView::new(); + let mut entries = Vec::new(); + let mut node_ids = Vec::new(); + + fn collect_entries( + tree: &DocumentTree, + node_id: crate::document::NodeId, + entries: &mut Vec<(usize, String, String)>, + node_ids: &mut Vec, + index: &mut usize, + ) { + if let Some(node) = tree.get(node_id) { + let title = node.title.clone(); + let summary = if node.summary.is_empty() { + "(no summary)".to_string() + } else { + node.summary.clone() + }; + entries.push((*index, title, summary)); + node_ids.push(node_id); + *index += 1; + + for child_id in tree.children(node_id) { + collect_entries(tree, child_id, entries, node_ids, index); + } + } + } + + collect_entries(tree, tree.root(), &mut entries, &mut node_ids, &mut 0); + + let toc_str = entries + .iter() + .map(|(idx, title, summary)| { + format!("[{}] Title: \"{}\"\n Summary: \"{}\"", idx + 1, title, summary) + }) + .collect::>() + .join("\n\n"); + + (toc_str, node_ids) + } + + /// Locate top candidates directly via LLM using TOC. + /// + /// This method bypasses tree traversal by asking the LLM to + /// directly identify the most relevant nodes from the TOC. + async fn locate_via_llm( + &self, + query: &str, + tree: &DocumentTree, + ) -> Option> { + let llm_client = self.llm_client.as_ref()?; + let (toc_str, node_ids) = self.build_toc_for_llm(tree); + + if node_ids.is_empty() { + warn!("No nodes in tree for LLM search"); + return None; + } + + let system_prompt = r#"You are a document navigation assistant. Your task is to locate the most relevant sections in a document hierarchy for a user's query. + +CRITICAL INSTRUCTIONS: +1. Analyze the user query carefully to understand the intent +2. Examine the provided Table of Contents (TOC) with numbered entries +3. Select the TOP 3 most relevant entries that would contain the answer +4. You MUST respond with ONLY valid JSON. No markdown code blocks. No explanations outside JSON. + +Your response must have this EXACT structure: +{ + "reasoning": "Brief analysis of the query and why you selected these entries", + "candidates": [ + {"node_id": 1, "relevance_score": 0.95, "reason": "Why this entry matches the query"}, + {"node_id": 2, "relevance_score": 0.80, "reason": "Why this entry is also relevant"}, + {"node_id": 3, "relevance_score": 0.65, "reason": "Why this entry might be relevant"} + ] +} + +Rules: +- node_id: MUST be a number from the provided TOC (the number in [N] brackets) +- relevance_score: Number between 0.0 and 1.0 (higher = more relevant) +- reason: Brief explanation for each selection +- candidates: Must have exactly 3 items, ordered by relevance (highest first)"#; + + let user_prompt = format!( + "USER QUERY: {}\n\nDOCUMENT TOC ({} entries):\n{}\n\nBased on the query and TOC above, select the TOP 3 most relevant entries.\n\nRespond with ONLY the JSON object:", + query, + node_ids.len(), + toc_str + ); + + info!("Attempting LLM-based search for query: '{}'", query); + + match llm_client.complete(system_prompt, &user_prompt).await { + Ok(response) => { + // Parse JSON response + match serde_json::from_str::(&response) { + Ok(llm_response) => { + let mut candidates = Vec::new(); + + for candidate in llm_response.candidates { + // node_id is 1-indexed from LLM, convert to 0-indexed + let idx = candidate.node_id.saturating_sub(1); + if idx < node_ids.len() { + let node_id = node_ids[idx]; + if let Some(node) = tree.get(node_id) { + candidates.push(CandidateNode::new( + node_id, + candidate.relevance_score, + node.depth, + tree.is_leaf(node_id), + )); + info!( + "LLM selected: [{}] '{}' (score: {:.2})", + candidate.node_id, node.title, candidate.relevance_score + ); + } + } + } + + if candidates.is_empty() { + warn!("LLM returned no valid candidates"); + return None; + } + + println!("LLM search found {} candidates", candidates.len()); + println!("LLM candidates content: {:?}", candidates); + Some(candidates) + } + Err(e) => { + warn!("Failed to parse LLM response as JSON: {}", e); + warn!("Raw response: {}", response); + None + } + } + } + Err(e) => { + warn!("LLM call failed: {}", e); + None + } + } + } +} + +/// LLM response for locate query. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct LlmLocateResponse { + reasoning: String, + candidates: Vec, +} + +/// A candidate from LLM locate response. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct LlmLocateCandidate { + node_id: usize, + relevance_score: f32, + reason: String, } #[async_trait] @@ -245,6 +432,9 @@ impl RetrievalStage for SearchStage { // Reset Pilot state for new query if let Some(ref pilot) = self.pilot { pilot.reset(); + println!("[DEBUG] SearchStage: Pilot is available, is_active={}", pilot.is_active()); + } else { + println!("[DEBUG] SearchStage: No Pilot available"); } info!( @@ -261,6 +451,29 @@ impl RetrievalStage for SearchStage { // Increment search iteration ctx.increment_search_iteration(); + // === Try LLM-first search (direct TOC-based location) === + if self.llm_client.is_some() { + info!("Attempting LLM-first search for query: '{}'", ctx.query); + + if let Some(candidates) = self.locate_via_llm(&ctx.query, &ctx.tree).await { + if !candidates.is_empty() { + ctx.candidates = candidates; + ctx.metrics.search_time_ms += start.elapsed().as_millis() as u64; + ctx.metrics.nodes_visited += ctx.candidates.len(); + ctx.metrics.llm_calls += 1; + + info!( + "LLM-first search found {} candidates (skipped tree traversal)", + ctx.candidates.len() + ); + + return Ok(StageOutcome::cont()); + } + } + + info!("LLM-first search returned no results, falling back to tree traversal"); + } + // Build search config for search algorithms let search_config = SearchAlgConfig { top_k: config.beam_width * 2, @@ -270,48 +483,158 @@ impl RetrievalStage for SearchStage { leaf_only: false, }; - // Create legacy context for search algorithms - let legacy_ctx = RetrievalContext::new( - &ctx.query, - ctx.options.max_tokens, - ctx.options.sufficiency_check, - ); - // Get Pilot reference (or None if not available) let pilot_ref: Option<&dyn Pilot> = self.pilot.as_deref(); + println!("[DEBUG] SearchStage: pilot_ref is {}", if pilot_ref.is_some() { "Some" } else { "None" }); + + // === Check for decomposition === + if let Some(ref decomposition) = ctx.decomposition { + if decomposition.was_decomposed && decomposition.is_multi_turn() { + info!("Processing {} decomposed sub-queries", decomposition.sub_queries.len()); + + let mut all_paths = Vec::new(); + let mut all_candidates = Vec::new(); + let mut total_pilot_interventions = 0u64; + + // Process each sub-query in execution order + let order = decomposition.execution_order(); + for sub_idx in order { + let sub_query = &decomposition.sub_queries[sub_idx]; + info!("Processing sub-query : {}", sub_query.text); + + // Create legacy context for this sub-query + let legacy_ctx = RetrievalContext::new( + &sub_query.text, + ctx.options.max_tokens, + ctx.options.sufficiency_check, + ); + + println!("[DEBUG] SearchStage: Starting search for sub-query: algorithm={:?}, top_k={}, beam_width={}", + algorithm, search_config.top_k, search_config.beam_width); + + // Execute search for this sub-query + let result = match algorithm { + SearchAlgorithm::Greedy => { + let search = GreedySearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + SearchAlgorithm::Beam => { + let search = BeamSearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + SearchAlgorithm::Mcts => { + let search = BeamSearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + }; + + all_candidates.extend(self.extract_candidates(&result.paths, &ctx.tree)); + all_paths.extend(result.paths); + total_pilot_interventions += result.pilot_interventions as u64; + + info!("Sub-query '{}' found {} paths", sub_query.text, all_paths.len()); + } - // Execute search based on algorithm with Pilot - let result = match algorithm { - SearchAlgorithm::Greedy => { - let search = GreedySearch::new(); - search - .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref) - .await - } - SearchAlgorithm::Beam => { - let search = BeamSearch::new(); - search - .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref) - .await - } - SearchAlgorithm::Mcts => { - // Use beam search as fallback for now - let search = BeamSearch::new(); - search - .search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref) - .await + // Merge results + ctx.search_paths = all_paths; + ctx.candidates = all_candidates; + + info!( + "Search complete: {} total candidates from {} sub-queries (pilot interventions: {})", + ctx.candidates.len(), + decomposition.sub_queries.len(), + total_pilot_interventions + ); + } else { + // Single query (not decomposed or single sub-query) - process as normal + let legacy_ctx = RetrievalContext::new( + &ctx.query, + ctx.options.max_tokens, + ctx.options.sufficiency_check, + ); + + println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}", + algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score); + + let result = match algorithm { + SearchAlgorithm::Greedy => { + let search = GreedySearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + SearchAlgorithm::Beam => { + let search = BeamSearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + SearchAlgorithm::Mcts => { + let search = BeamSearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + }; + + ctx.search_paths = result.paths; + ctx.candidates = self.extract_candidates(&ctx.search_paths, &ctx.tree); + + info!( + "Search found {} paths (pilot interventions: {})", + ctx.search_paths.len(), + result.pilot_interventions + ); } - }; + } else { + // No decomposition available, process original query + let legacy_ctx = RetrievalContext::new( + &ctx.query, + ctx.options.max_tokens, + ctx.options.sufficiency_check, + ); + + println!("[DEBUG] SearchStage: Starting search with algorithm={:?}, top_k={}, beam_width={}, max_iterations={}, min_score={:.2}", + algorithm, search_config.top_k, search_config.beam_width, search_config.max_iterations, search_config.min_score); + + let result = match algorithm { + SearchAlgorithm::Greedy => { + let search = GreedySearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + SearchAlgorithm::Beam => { + let search = BeamSearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + SearchAlgorithm::Mcts => { + let search = BeamSearch::new(); + search.search(&ctx.tree, &legacy_ctx, &search_config, pilot_ref).await + } + }; - info!( - "Search found {} paths (pilot interventions: {})", - result.paths.len(), - result.pilot_interventions - ); + ctx.search_paths = result.paths; + ctx.candidates = self.extract_candidates(&ctx.search_paths, &ctx.tree); + + info!( + "Search found {} paths (pilot interventions: {})", + ctx.search_paths.len(), + result.pilot_interventions + ); + } + + // Debug output + println!("[DEBUG] Search found {} total paths, {} candidates", ctx.search_paths.len(), ctx.candidates.len()); + for (i, path) in ctx.search_paths.iter().enumerate().take(5) { + if let Some(leaf_id) = path.leaf { + if let Some(node) = ctx.tree.get(leaf_id) { + println!("[DEBUG] Path {}: score={:.3}, title='{}', content_len={}", + i, path.score, node.title, node.content.len()); + } + } + } - // Update context with results - ctx.search_paths = result.paths.clone(); - ctx.candidates = self.extract_candidates(&result.paths, &ctx.tree); + // Debug output + println!("[DEBUG] Extracted {} candidates", ctx.candidates.len()); + for (i, c) in ctx.candidates.iter().enumerate().take(5) { + if let Some(node) = ctx.tree.get(c.node_id) { + println!("[DEBUG] Candidate {}: score={:.3}, title='{}'", + i, c.score, node.title); + } + } // Update metrics ctx.metrics.search_time_ms += start.elapsed().as_millis() as u64;