From d5795ef64387b953675f4ccba932b8dcbbcc73b8 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 7 Apr 2026 17:33:55 +0800 Subject: [PATCH 1/6] feat: add Python bindings and convert to multi-language workspace - Add Python bindings using PyO3 for the vectorless Rust library - Convert monorepo structure from single Rust crate to multi-language workspace - Add pyproject.toml, setup configuration for Python package distribution - Include comprehensive Python documentation and examples in README - Add basic integration tests for Python bindings covering core functionality - Update .gitignore to include Python-specific files and directories - Maintain existing Rust functionality while enabling cross-language usage --- .gitignore | 30 +- Cargo.toml | 128 +---- README.md | 26 +- examples/python_basic.py | 273 +++++++++ pyproject.toml | 65 +++ python/Cargo.toml | 16 + python/README.md | 221 ++++++++ python/src/lib.rs | 520 ++++++++++++++++++ python/tests/test_basic.py | 221 ++++++++ python/vectorless/__init__.py | 38 ++ rust/Cargo.toml | 125 +++++ {src => rust/src}/client/builder.rs | 0 {src => rust/src}/client/context.rs | 0 {src => rust/src}/client/engine.rs | 0 {src => rust/src}/client/events.rs | 0 {src => rust/src}/client/index_context.rs | 0 {src => rust/src}/client/indexer.rs | 0 {src => rust/src}/client/mod.rs | 0 {src => rust/src}/client/retriever.rs | 0 {src => rust/src}/client/session.rs | 0 {src => rust/src}/client/types.rs | 0 {src => rust/src}/client/workspace.rs | 0 {src => rust/src}/config/docs.rs | 0 {src => rust/src}/config/loader.rs | 0 {src => rust/src}/config/merge.rs | 0 {src => rust/src}/config/mod.rs | 0 {src => rust/src}/config/types/concurrency.rs | 0 {src => rust/src}/config/types/content.rs | 0 {src => rust/src}/config/types/fallback.rs | 0 {src => rust/src}/config/types/indexer.rs | 0 {src => rust/src}/config/types/llm.rs | 0 {src => rust/src}/config/types/llm_pool.rs | 0 {src => rust/src}/config/types/metrics.rs | 0 {src => rust/src}/config/types/mod.rs | 0 {src => rust/src}/config/types/retrieval.rs | 0 {src => rust/src}/config/types/storage.rs | 0 {src => rust/src}/config/validator.rs | 0 {src => rust/src}/document/mod.rs | 0 {src => rust/src}/document/node.rs | 0 {src => rust/src}/document/reference.rs | 0 {src => rust/src}/document/structure.rs | 0 {src => rust/src}/document/toc.rs | 0 {src => rust/src}/document/tree.rs | 0 {src => rust/src}/error.rs | 0 {src => rust/src}/index/config.rs | 0 .../src}/index/incremental/detector.rs | 0 {src => rust/src}/index/incremental/mod.rs | 0 .../src}/index/incremental/updater.rs | 0 {src => rust/src}/index/mod.rs | 0 {src => rust/src}/index/pipeline/context.rs | 0 {src => rust/src}/index/pipeline/executor.rs | 0 {src => rust/src}/index/pipeline/metrics.rs | 0 {src => rust/src}/index/pipeline/mod.rs | 0 .../src}/index/pipeline/orchestrator.rs | 0 {src => rust/src}/index/pipeline/policy.rs | 0 {src => rust/src}/index/stages/build.rs | 0 {src => rust/src}/index/stages/enhance.rs | 0 {src => rust/src}/index/stages/enrich.rs | 0 {src => rust/src}/index/stages/mod.rs | 0 {src => rust/src}/index/stages/optimize.rs | 0 {src => rust/src}/index/stages/parse.rs | 0 {src => rust/src}/index/stages/persist.rs | 0 {src => rust/src}/index/summary/full.rs | 0 {src => rust/src}/index/summary/lazy.rs | 0 {src => rust/src}/index/summary/mod.rs | 0 {src => rust/src}/index/summary/selective.rs | 0 {src => rust/src}/index/summary/strategy.rs | 0 {src => rust/src}/lib.rs | 0 {src => rust/src}/llm/client.rs | 0 {src => rust/src}/llm/config.rs | 0 {src => rust/src}/llm/error.rs | 0 {src => rust/src}/llm/executor.rs | 0 {src => rust/src}/llm/fallback.rs | 0 {src => rust/src}/llm/mod.rs | 0 {src => rust/src}/llm/pool.rs | 0 {src => rust/src}/llm/retry.rs | 0 {src => rust/src}/memo/mod.rs | 0 {src => rust/src}/memo/store.rs | 0 {src => rust/src}/memo/types.rs | 0 {src => rust/src}/metrics/hub.rs | 0 {src => rust/src}/metrics/llm.rs | 0 {src => rust/src}/metrics/mod.rs | 0 {src => rust/src}/metrics/pilot.rs | 0 {src => rust/src}/metrics/retrieval.rs | 0 {src => rust/src}/parser/docx/mod.rs | 0 {src => rust/src}/parser/docx/parser.rs | 0 {src => rust/src}/parser/docx/styles.rs | 0 {src => rust/src}/parser/docx/types.rs | 0 {src => rust/src}/parser/html/config.rs | 0 {src => rust/src}/parser/html/mod.rs | 0 {src => rust/src}/parser/html/parser.rs | 0 {src => rust/src}/parser/markdown/config.rs | 0 .../src}/parser/markdown/frontmatter.rs | 0 {src => rust/src}/parser/markdown/mod.rs | 0 {src => rust/src}/parser/markdown/parser.rs | 0 {src => rust/src}/parser/mod.rs | 0 {src => rust/src}/parser/pdf/mod.rs | 0 {src => rust/src}/parser/pdf/parser.rs | 0 {src => rust/src}/parser/pdf/types.rs | 0 {src => rust/src}/parser/registry.rs | 0 {src => rust/src}/parser/toc/assigner.rs | 0 {src => rust/src}/parser/toc/detector.rs | 0 {src => rust/src}/parser/toc/mod.rs | 0 {src => rust/src}/parser/toc/parser.rs | 0 {src => rust/src}/parser/toc/processor.rs | 0 {src => rust/src}/parser/toc/repairer.rs | 0 {src => rust/src}/parser/toc/types.rs | 0 {src => rust/src}/parser/toc/verifier.rs | 0 {src => rust/src}/parser/traits.rs | 0 {src => rust/src}/parser/types.rs | 0 {src => rust/src}/retrieval/cache/mod.rs | 0 .../src}/retrieval/cache/path_cache.rs | 0 .../src}/retrieval/complexity/detector.rs | 0 {src => rust/src}/retrieval/complexity/mod.rs | 0 .../src}/retrieval/content/aggregator.rs | 0 {src => rust/src}/retrieval/content/budget.rs | 0 .../src}/retrieval/content/builder.rs | 0 {src => rust/src}/retrieval/content/config.rs | 0 {src => rust/src}/retrieval/content/mod.rs | 0 {src => rust/src}/retrieval/content/scorer.rs | 0 {src => rust/src}/retrieval/context.rs | 0 {src => rust/src}/retrieval/decompose.rs | 0 {src => rust/src}/retrieval/mod.rs | 0 {src => rust/src}/retrieval/pilot/budget.rs | 0 {src => rust/src}/retrieval/pilot/builder.rs | 0 {src => rust/src}/retrieval/pilot/config.rs | 0 {src => rust/src}/retrieval/pilot/decision.rs | 0 {src => rust/src}/retrieval/pilot/fallback.rs | 0 {src => rust/src}/retrieval/pilot/feedback.rs | 0 .../src}/retrieval/pilot/llm_pilot.rs | 0 {src => rust/src}/retrieval/pilot/metrics.rs | 0 {src => rust/src}/retrieval/pilot/mod.rs | 0 {src => rust/src}/retrieval/pilot/noop.rs | 0 {src => rust/src}/retrieval/pilot/parser.rs | 0 .../src}/retrieval/pilot/prompts/builder.rs | 0 .../src}/retrieval/pilot/prompts/mod.rs | 0 .../pilot/prompts/system_backtrack.txt | 0 .../pilot/prompts/system_evaluate.txt | 0 .../retrieval/pilot/prompts/system_fork.txt | 0 .../retrieval/pilot/prompts/system_start.txt | 0 .../src}/retrieval/pilot/prompts/templates.rs | 0 .../pilot/prompts/user_backtrack.txt | 0 .../retrieval/pilot/prompts/user_evaluate.txt | 0 .../retrieval/pilot/prompts/user_fork.txt | 0 .../retrieval/pilot/prompts/user_start.txt | 0 {src => rust/src}/retrieval/pilot/trait.rs | 0 .../src}/retrieval/pipeline/context.rs | 0 {src => rust/src}/retrieval/pipeline/mod.rs | 0 .../src}/retrieval/pipeline/orchestrator.rs | 0 .../src}/retrieval/pipeline/outcome.rs | 0 {src => rust/src}/retrieval/pipeline/stage.rs | 0 .../src}/retrieval/pipeline_retriever.rs | 0 {src => rust/src}/retrieval/reference.rs | 0 {src => rust/src}/retrieval/retriever.rs | 0 {src => rust/src}/retrieval/search/beam.rs | 0 {src => rust/src}/retrieval/search/bm25.rs | 0 {src => rust/src}/retrieval/search/greedy.rs | 0 {src => rust/src}/retrieval/search/mcts.rs | 0 {src => rust/src}/retrieval/search/mod.rs | 0 {src => rust/src}/retrieval/search/scorer.rs | 0 {src => rust/src}/retrieval/search/trait.rs | 0 {src => rust/src}/retrieval/stages/analyze.rs | 0 .../src}/retrieval/stages/evaluate.rs | 0 {src => rust/src}/retrieval/stages/mod.rs | 0 {src => rust/src}/retrieval/stages/plan.rs | 0 {src => rust/src}/retrieval/stages/search.rs | 0 .../src}/retrieval/strategy/cross_document.rs | 0 .../src}/retrieval/strategy/hybrid.rs | 0 .../src}/retrieval/strategy/keyword.rs | 0 {src => rust/src}/retrieval/strategy/llm.rs | 0 {src => rust/src}/retrieval/strategy/mod.rs | 0 .../src}/retrieval/strategy/page_range.rs | 0 .../src}/retrieval/strategy/semantic.rs | 0 {src => rust/src}/retrieval/strategy/trait.rs | 0 .../src}/retrieval/sufficiency/llm_judge.rs | 0 .../src}/retrieval/sufficiency/mod.rs | 0 .../src}/retrieval/sufficiency/threshold.rs | 0 {src => rust/src}/retrieval/types.rs | 0 {src => rust/src}/storage/backend/file.rs | 0 {src => rust/src}/storage/backend/memory.rs | 0 {src => rust/src}/storage/backend/mod.rs | 0 .../src}/storage/backend/trait_def.rs | 0 {src => rust/src}/storage/cache.rs | 0 {src => rust/src}/storage/codec.rs | 0 {src => rust/src}/storage/lock.rs | 0 {src => rust/src}/storage/migration.rs | 0 {src => rust/src}/storage/mod.rs | 0 {src => rust/src}/storage/persistence.rs | 0 {src => rust/src}/storage/workspace.rs | 0 {src => rust/src}/throttle/config.rs | 0 {src => rust/src}/throttle/controller.rs | 0 {src => rust/src}/throttle/mod.rs | 0 {src => rust/src}/throttle/rate_limiter.rs | 0 {src => rust/src}/utils/fingerprint.rs | 0 {src => rust/src}/utils/format.rs | 0 {src => rust/src}/utils/mod.rs | 0 {src => rust/src}/utils/timing.rs | 0 {src => rust/src}/utils/token.rs | 0 198 files changed, 1536 insertions(+), 127 deletions(-) create mode 100644 examples/python_basic.py create mode 100644 pyproject.toml create mode 100644 python/Cargo.toml create mode 100644 python/README.md create mode 100644 python/src/lib.rs create mode 100644 python/tests/test_basic.py create mode 100644 python/vectorless/__init__.py create mode 100644 rust/Cargo.toml rename {src => rust/src}/client/builder.rs (100%) rename {src => rust/src}/client/context.rs (100%) rename {src => rust/src}/client/engine.rs (100%) rename {src => rust/src}/client/events.rs (100%) rename {src => rust/src}/client/index_context.rs (100%) rename {src => rust/src}/client/indexer.rs (100%) rename {src => rust/src}/client/mod.rs (100%) rename {src => rust/src}/client/retriever.rs (100%) rename {src => rust/src}/client/session.rs (100%) rename {src => rust/src}/client/types.rs (100%) rename {src => rust/src}/client/workspace.rs (100%) rename {src => rust/src}/config/docs.rs (100%) rename {src => rust/src}/config/loader.rs (100%) rename {src => rust/src}/config/merge.rs (100%) rename {src => rust/src}/config/mod.rs (100%) rename {src => rust/src}/config/types/concurrency.rs (100%) rename {src => rust/src}/config/types/content.rs (100%) rename {src => rust/src}/config/types/fallback.rs (100%) rename {src => rust/src}/config/types/indexer.rs (100%) rename {src => rust/src}/config/types/llm.rs (100%) rename {src => rust/src}/config/types/llm_pool.rs (100%) rename {src => rust/src}/config/types/metrics.rs (100%) rename {src => rust/src}/config/types/mod.rs (100%) rename {src => rust/src}/config/types/retrieval.rs (100%) rename {src => rust/src}/config/types/storage.rs (100%) rename {src => rust/src}/config/validator.rs (100%) rename {src => rust/src}/document/mod.rs (100%) rename {src => rust/src}/document/node.rs (100%) rename {src => rust/src}/document/reference.rs (100%) rename {src => rust/src}/document/structure.rs (100%) rename {src => rust/src}/document/toc.rs (100%) rename {src => rust/src}/document/tree.rs (100%) rename {src => rust/src}/error.rs (100%) rename {src => rust/src}/index/config.rs (100%) rename {src => rust/src}/index/incremental/detector.rs (100%) rename {src => rust/src}/index/incremental/mod.rs (100%) rename {src => rust/src}/index/incremental/updater.rs (100%) rename {src => rust/src}/index/mod.rs (100%) rename {src => rust/src}/index/pipeline/context.rs (100%) rename {src => rust/src}/index/pipeline/executor.rs (100%) rename {src => rust/src}/index/pipeline/metrics.rs (100%) rename {src => rust/src}/index/pipeline/mod.rs (100%) rename {src => rust/src}/index/pipeline/orchestrator.rs (100%) rename {src => rust/src}/index/pipeline/policy.rs (100%) rename {src => rust/src}/index/stages/build.rs (100%) rename {src => rust/src}/index/stages/enhance.rs (100%) rename {src => rust/src}/index/stages/enrich.rs (100%) rename {src => rust/src}/index/stages/mod.rs (100%) rename {src => rust/src}/index/stages/optimize.rs (100%) rename {src => rust/src}/index/stages/parse.rs (100%) rename {src => rust/src}/index/stages/persist.rs (100%) rename {src => rust/src}/index/summary/full.rs (100%) rename {src => rust/src}/index/summary/lazy.rs (100%) rename {src => rust/src}/index/summary/mod.rs (100%) rename {src => rust/src}/index/summary/selective.rs (100%) rename {src => rust/src}/index/summary/strategy.rs (100%) rename {src => rust/src}/lib.rs (100%) rename {src => rust/src}/llm/client.rs (100%) rename {src => rust/src}/llm/config.rs (100%) rename {src => rust/src}/llm/error.rs (100%) rename {src => rust/src}/llm/executor.rs (100%) rename {src => rust/src}/llm/fallback.rs (100%) rename {src => rust/src}/llm/mod.rs (100%) rename {src => rust/src}/llm/pool.rs (100%) rename {src => rust/src}/llm/retry.rs (100%) rename {src => rust/src}/memo/mod.rs (100%) rename {src => rust/src}/memo/store.rs (100%) rename {src => rust/src}/memo/types.rs (100%) rename {src => rust/src}/metrics/hub.rs (100%) rename {src => rust/src}/metrics/llm.rs (100%) rename {src => rust/src}/metrics/mod.rs (100%) rename {src => rust/src}/metrics/pilot.rs (100%) rename {src => rust/src}/metrics/retrieval.rs (100%) rename {src => rust/src}/parser/docx/mod.rs (100%) rename {src => rust/src}/parser/docx/parser.rs (100%) rename {src => rust/src}/parser/docx/styles.rs (100%) rename {src => rust/src}/parser/docx/types.rs (100%) rename {src => rust/src}/parser/html/config.rs (100%) rename {src => rust/src}/parser/html/mod.rs (100%) rename {src => rust/src}/parser/html/parser.rs (100%) rename {src => rust/src}/parser/markdown/config.rs (100%) rename {src => rust/src}/parser/markdown/frontmatter.rs (100%) rename {src => rust/src}/parser/markdown/mod.rs (100%) rename {src => rust/src}/parser/markdown/parser.rs (100%) rename {src => rust/src}/parser/mod.rs (100%) rename {src => rust/src}/parser/pdf/mod.rs (100%) rename {src => rust/src}/parser/pdf/parser.rs (100%) rename {src => rust/src}/parser/pdf/types.rs (100%) rename {src => rust/src}/parser/registry.rs (100%) rename {src => rust/src}/parser/toc/assigner.rs (100%) rename {src => rust/src}/parser/toc/detector.rs (100%) rename {src => rust/src}/parser/toc/mod.rs (100%) rename {src => rust/src}/parser/toc/parser.rs (100%) rename {src => rust/src}/parser/toc/processor.rs (100%) rename {src => rust/src}/parser/toc/repairer.rs (100%) rename {src => rust/src}/parser/toc/types.rs (100%) rename {src => rust/src}/parser/toc/verifier.rs (100%) rename {src => rust/src}/parser/traits.rs (100%) rename {src => rust/src}/parser/types.rs (100%) rename {src => rust/src}/retrieval/cache/mod.rs (100%) rename {src => rust/src}/retrieval/cache/path_cache.rs (100%) rename {src => rust/src}/retrieval/complexity/detector.rs (100%) rename {src => rust/src}/retrieval/complexity/mod.rs (100%) rename {src => rust/src}/retrieval/content/aggregator.rs (100%) rename {src => rust/src}/retrieval/content/budget.rs (100%) rename {src => rust/src}/retrieval/content/builder.rs (100%) rename {src => rust/src}/retrieval/content/config.rs (100%) rename {src => rust/src}/retrieval/content/mod.rs (100%) rename {src => rust/src}/retrieval/content/scorer.rs (100%) rename {src => rust/src}/retrieval/context.rs (100%) rename {src => rust/src}/retrieval/decompose.rs (100%) rename {src => rust/src}/retrieval/mod.rs (100%) rename {src => rust/src}/retrieval/pilot/budget.rs (100%) rename {src => rust/src}/retrieval/pilot/builder.rs (100%) rename {src => rust/src}/retrieval/pilot/config.rs (100%) rename {src => rust/src}/retrieval/pilot/decision.rs (100%) rename {src => rust/src}/retrieval/pilot/fallback.rs (100%) rename {src => rust/src}/retrieval/pilot/feedback.rs (100%) rename {src => rust/src}/retrieval/pilot/llm_pilot.rs (100%) rename {src => rust/src}/retrieval/pilot/metrics.rs (100%) rename {src => rust/src}/retrieval/pilot/mod.rs (100%) rename {src => rust/src}/retrieval/pilot/noop.rs (100%) rename {src => rust/src}/retrieval/pilot/parser.rs (100%) rename {src => rust/src}/retrieval/pilot/prompts/builder.rs (100%) rename {src => rust/src}/retrieval/pilot/prompts/mod.rs (100%) rename {src => rust/src}/retrieval/pilot/prompts/system_backtrack.txt (100%) rename {src => rust/src}/retrieval/pilot/prompts/system_evaluate.txt (100%) rename {src => rust/src}/retrieval/pilot/prompts/system_fork.txt (100%) rename {src => rust/src}/retrieval/pilot/prompts/system_start.txt (100%) rename {src => rust/src}/retrieval/pilot/prompts/templates.rs (100%) rename {src => rust/src}/retrieval/pilot/prompts/user_backtrack.txt (100%) rename {src => rust/src}/retrieval/pilot/prompts/user_evaluate.txt (100%) rename {src => rust/src}/retrieval/pilot/prompts/user_fork.txt (100%) rename {src => rust/src}/retrieval/pilot/prompts/user_start.txt (100%) rename {src => rust/src}/retrieval/pilot/trait.rs (100%) rename {src => rust/src}/retrieval/pipeline/context.rs (100%) rename {src => rust/src}/retrieval/pipeline/mod.rs (100%) rename {src => rust/src}/retrieval/pipeline/orchestrator.rs (100%) rename {src => rust/src}/retrieval/pipeline/outcome.rs (100%) rename {src => rust/src}/retrieval/pipeline/stage.rs (100%) rename {src => rust/src}/retrieval/pipeline_retriever.rs (100%) rename {src => rust/src}/retrieval/reference.rs (100%) rename {src => rust/src}/retrieval/retriever.rs (100%) rename {src => rust/src}/retrieval/search/beam.rs (100%) rename {src => rust/src}/retrieval/search/bm25.rs (100%) rename {src => rust/src}/retrieval/search/greedy.rs (100%) rename {src => rust/src}/retrieval/search/mcts.rs (100%) rename {src => rust/src}/retrieval/search/mod.rs (100%) rename {src => rust/src}/retrieval/search/scorer.rs (100%) rename {src => rust/src}/retrieval/search/trait.rs (100%) rename {src => rust/src}/retrieval/stages/analyze.rs (100%) rename {src => rust/src}/retrieval/stages/evaluate.rs (100%) rename {src => rust/src}/retrieval/stages/mod.rs (100%) rename {src => rust/src}/retrieval/stages/plan.rs (100%) rename {src => rust/src}/retrieval/stages/search.rs (100%) rename {src => rust/src}/retrieval/strategy/cross_document.rs (100%) rename {src => rust/src}/retrieval/strategy/hybrid.rs (100%) rename {src => rust/src}/retrieval/strategy/keyword.rs (100%) rename {src => rust/src}/retrieval/strategy/llm.rs (100%) rename {src => rust/src}/retrieval/strategy/mod.rs (100%) rename {src => rust/src}/retrieval/strategy/page_range.rs (100%) rename {src => rust/src}/retrieval/strategy/semantic.rs (100%) rename {src => rust/src}/retrieval/strategy/trait.rs (100%) rename {src => rust/src}/retrieval/sufficiency/llm_judge.rs (100%) rename {src => rust/src}/retrieval/sufficiency/mod.rs (100%) rename {src => rust/src}/retrieval/sufficiency/threshold.rs (100%) rename {src => rust/src}/retrieval/types.rs (100%) rename {src => rust/src}/storage/backend/file.rs (100%) rename {src => rust/src}/storage/backend/memory.rs (100%) rename {src => rust/src}/storage/backend/mod.rs (100%) rename {src => rust/src}/storage/backend/trait_def.rs (100%) rename {src => rust/src}/storage/cache.rs (100%) rename {src => rust/src}/storage/codec.rs (100%) rename {src => rust/src}/storage/lock.rs (100%) rename {src => rust/src}/storage/migration.rs (100%) rename {src => rust/src}/storage/mod.rs (100%) rename {src => rust/src}/storage/persistence.rs (100%) rename {src => rust/src}/storage/workspace.rs (100%) rename {src => rust/src}/throttle/config.rs (100%) rename {src => rust/src}/throttle/controller.rs (100%) rename {src => rust/src}/throttle/mod.rs (100%) rename {src => rust/src}/throttle/rate_limiter.rs (100%) rename {src => rust/src}/utils/fingerprint.rs (100%) rename {src => rust/src}/utils/format.rs (100%) rename {src => rust/src}/utils/mod.rs (100%) rename {src => rust/src}/utils/timing.rs (100%) rename {src => rust/src}/utils/token.rs (100%) diff --git a/.gitignore b/.gitignore index 329a9790..d0ac8b38 100644 --- a/.gitignore +++ b/.gitignore @@ -53,4 +53,32 @@ config.toml .vectorless.toml # Test fixtures -test_workspace/ \ No newline at end of file +test_workspace/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.venv/ +venv/ +ENV/ \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 0759c0ec..2e20d2a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,125 +1,3 @@ -[package] -name = "vectorless" -version = "0.1.19" -edition = "2024" -authors = ["zTgx "] -description = "Hierarchical, reasoning-native document intelligence engine" -license = "Apache-2.0" -repository = "https://github.com/vectorlessflow/vectorless" -homepage = "https://vectorless.dev" -documentation = "https://docs.rs/vectorless" -keywords = ["rag", "document", "retrieval", "indexing", "llm"] -categories = ["text-processing", "data-structures", "algorithms"] -readme = "README.md" -exclude = ["samples/", "docs/", ".*"] - -[dependencies] -# Async runtime -tokio = { version = "1", features = ["full"] } -async-trait = "0.1" -futures = "0.3" - -# Serialization -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -toml = "0.8" - -# Error handling -thiserror = "2" -anyhow = { version = "1", optional = true } - -# OpenAI-compatible API client -async-openai = { version = "0.34", features = ["chat-completion"] } - -# UUID -uuid = { version = "1.10", features = ["v4", "serde"] } - -# Time -chrono = { version = "0.4", default-features = false, features = ["serde", "clock"] } - -# Logging -tracing = "0.1" - -# Rate limiting -governor = "0.6" -nonzero_ext = "0.3" - -# Token counting -tiktoken-rs = "0.9" - -# Text processing -regex = "1.10" - -# Markdown parsing -pulldown-cmark = { version = "0.12", default-features = false, features = ["simd"] } - -# Tree data structure -indextree = { version = "4.8.0", features = ["deser"] } - -# LRU cache -lru = "0.12" - -# Checksum -sha2 = "0.10" - -# BLAKE2b hashing for fingerprints -blake2 = "0.10" -base64 = "0.22" - -# Synchronization primitives (for memo store) -parking_lot = "0.12" - -# Compression -flate2 = "1.0" - -# File locking (Unix) -[target.'cfg(unix)'.dependencies] -libc = "0.2" - -# PDF processing -pdf-extract = "0.10.0" -lopdf = "0.34" - -# DOCX processing -zip = "2.2" -roxmltree = "0.20" - -# Random number generation (for sampling) -rand = "0.8" - -# BM25 scoring -bm25 = { version = "2.3.2", features = ["parallelism"] } - -# HTML parsing -scraper = "0.22" - -[dev-dependencies] -tempfile = "3.10" -tokio-test = "0.4" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } - -[profile.release] -opt-level = 3 -lto = "thin" -codegen-units = 1 -strip = true -panic = "abort" - -[profile.dev] -opt-level = 0 -debug = true - -[profile.bench] -inherits = "release" -debug = true - -[profile.release.package."*"] -opt-level = 3 - -[lints.rust] -missing_docs = "warn" -unsafe_code = "warn" - -[lints.clippy] -all = "warn" -pedantic = "warn" +[workspace] +members = ["rust", "python"] +resolver = "2" diff --git a/README.md b/README.md index 4db8a9ea..f9539eeb 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ ![Vectorless](docs/design/logo-horizontal.svg) [![Crates.io](https://img.shields.io/crates/v/vectorless.svg)](https://crates.io/crates/vectorless) +[![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/) [![Downloads](https://img.shields.io/crates/d/vectorless.svg)](https://crates.io/crates/vectorless) [![Documentation](https://docs.rs/vectorless/badge.svg)](https://docs.rs/vectorless) [![License](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](LICENSE) @@ -90,7 +91,30 @@ Source: Chapter 4 > Section 4.2 > Reset Procedure ## Quick Start -### Installation +### Python + +```bash +pip install vectorless +``` + +```python +from vectorless import Engine, IndexContext + +# Create engine (uses OPENAI_API_KEY env var) +engine = Engine(workspace="./data") + +# Index a document +ctx = IndexContext.from_file("./report.pdf") +doc_id = engine.index(ctx) + +# Query +result = engine.query(doc_id, "What is the total revenue?") +print(f"Answer: {result.content}") +``` + +See [python/README.md](python/README.md) for full Python documentation. + +### Rust ```toml [dependencies] diff --git a/examples/python_basic.py b/examples/python_basic.py new file mode 100644 index 00000000..e9801e79 --- /dev/null +++ b/examples/python_basic.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +# Copyright (c) 2026 vectorless developers +# SPDX-License-Identifier: Apache-2.0 + +""" +Basic example demonstrating the vectorless Python library. + +This example shows: +1. Creating an Engine with workspace +2. Indexing documents from different sources +3. Querying indexed documents +4. Managing documents (list, exists, remove) + +Prerequisites: + pip install vectorless + export OPENAI_API_KEY="sk-..." + +Usage: + python python_basic.py +""" + +import os +import tempfile +from pathlib import Path + +from vectorless import Engine, IndexContext, VectorlessError + + +def main(): + # Create a temporary workspace for this example + with tempfile.TemporaryDirectory() as workspace: + print(f"Workspace: {workspace}") + print() + + # ============================================================ + # 1. Create Engine + # ============================================================ + print("=" * 60) + print("1. Creating Engine") + print("=" * 60) + + # Option A: Use OPENAI_API_KEY environment variable + engine = Engine(workspace=workspace) + + # Option B: Explicit API key + # engine = Engine( + # workspace=workspace, + # api_key="sk-...", + # model="gpt-4o-mini", # optional + # ) + + print(f"Engine created successfully!") + print(f"Initial document count: {engine.len()}") + print() + + # ============================================================ + # 2. Index Documents + # ============================================================ + print("=" * 60) + print("2. Indexing Documents") + print("=" * 60) + + # 2a. Index from text content (Markdown) + markdown_content = """ +# Technical Manual + +## Chapter 1: Introduction + +This document describes the architecture of our system. + +## Chapter 2: Installation + +### System Requirements + +- Python 3.9+ +- Rust 1.75+ + +### Steps + +1. Install dependencies +2. Configure environment +3. Run the application + +## Chapter 3: API Reference + +### Engine + +The main entry point for vectorless. + +```python +engine = Engine(workspace="./data") +``` + +### IndexContext + +Context for indexing documents from various sources. +""" + ctx_md = IndexContext.from_text( + markdown_content, + name="technical_manual", + format="markdown" + ) + doc_id_md = engine.index(ctx_md) + print(f"Indexed markdown document: {doc_id_md}") + + # 2b. Index from text content (HTML) + html_content = """ + +Product Guide + +

Product Guide

+

Getting Started

+

Welcome to our product. This guide will help you get started.

+

Features

+ + + +""" + ctx_html = IndexContext.from_text( + html_content, + name="product_guide", + format="html" + ) + doc_id_html = engine.index(ctx_html) + print(f"Indexed HTML document: {doc_id_html}") + + # 2c. Index from text content (plain text) + text_content = """ +Meeting Notes - Q4 Planning + +Date: 2024-01-15 + +Attendees: Alice, Bob, Charlie + +Agenda: +1. Review Q3 performance +2. Set Q4 goals +3. Resource allocation + +Key Decisions: +- Increase marketing budget by 20% +- Launch new product in March +- Hire 5 additional engineers +""" + ctx_text = IndexContext.from_text( + text_content, + name="meeting_notes", + format="text" + ) + doc_id_text = engine.index(ctx_text) + print(f"Indexed text document: {doc_id_text}") + + # 2d. Index from file (if you have actual files) + # ctx_file = IndexContext.from_file("./report.pdf") + # doc_id_file = engine.index(ctx_file) + # print(f"Indexed file: {doc_id_file}") + + print(f"\nTotal documents indexed: {engine.len()}") + print() + + # ============================================================ + # 3. List Documents + # ============================================================ + print("=" * 60) + print("3. Listing Documents") + print("=" * 60) + + docs = engine.list_docs() + for doc in docs: + print(f" - {doc.name} (id: {doc.id}, format: {doc.format})") + if doc.line_count: + print(f" Lines: {doc.line_count}") + print() + + # ============================================================ + # 4. Query Documents + # ============================================================ + print("=" * 60) + print("4. Querying Documents") + print("=" * 60) + + # Query the technical manual + questions = [ + "What are the system requirements?", + "How do I create an Engine?", + "What are the installation steps?", + ] + + for question in questions: + result = engine.query(doc_id_md, question) + print(f"Q: {question}") + print(f"A: {result.content[:200]}...") + print(f" Score: {result.score:.2f}") + print() + + # Query the meeting notes + result = engine.query(doc_id_text, "What was decided about the marketing budget?") + print(f"Q: What was decided about the marketing budget?") + print(f"A: {result.content}") + print(f" Score: {result.score:.2f}") + print() + + # ============================================================ + # 5. Check Document Existence + # ============================================================ + print("=" * 60) + print("5. Checking Document Existence") + print("=" * 60) + + print(f"Document {doc_id_md[:8]}... exists: {engine.exists(doc_id_md)}") + print(f"Document 'nonexistent' exists: {engine.exists('nonexistent')}") + print() + + # ============================================================ + # 6. Error Handling + # ============================================================ + print("=" * 60) + print("6. Error Handling") + print("=" * 60) + + try: + engine.query("nonexistent_doc_id", "question") + except VectorlessError as e: + print(f"Caught error: {e.message}") + print(f"Error kind: {e.kind}") + print() + + # ============================================================ + # 7. Remove Documents + # ============================================================ + print("=" * 60) + print("7. Removing Documents") + print("=" * 60) + + # Remove the HTML document + removed = engine.remove(doc_id_html) + print(f"Removed {doc_id_html}: {removed}") + print(f"Documents remaining: {engine.len()}") + + # Try to remove again (should return False) + removed_again = engine.remove(doc_id_html) + print(f"Remove again: {removed_again}") + print() + + # ============================================================ + # 8. Clear All Documents + # ============================================================ + print("=" * 60) + print("8. Clearing All Documents") + print("=" * 60) + + cleared_count = engine.clear() + print(f"Cleared {cleared_count} documents") + print(f"Final document count: {engine.len()}") + print() + + print("=" * 60) + print("Example completed successfully!") + print("=" * 60) + + +if __name__ == "__main__": + # Check for API key + if not os.environ.get("OPENAI_API_KEY"): + print("Warning: OPENAI_API_KEY environment variable not set.") + print("Some operations may fail without an API key.") + print() + + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..2c75d9ca --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,65 @@ +[build-system] +requires = ["maturin>=1.5,<2.0"] +build-backend = "maturin" + +[project] +name = "vectorless" +version = "0.1.0" +description = "Hierarchical document intelligence without vectors" +readme = "README.md" +requires-python = ">=3.9" +license = { text = "Apache-2.0" } +authors = [ + { name = "vectorless developers", email = "beautifularea@gmail.com" } +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Rust", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Text Processing :: Linguistic", +] +keywords = ["rag", "document", "retrieval", "llm", "document-intelligence"] + +dependencies = [] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0", + "pytest-asyncio>=0.21", + "mypy>=1.0", +] + +[project.urls] +Homepage = "https://vectorless.dev" +Repository = "https://github.com/vectorlessflow/vectorless" +Documentation = "https://docs.rs/vectorless" + +[tool.maturin] +python-source = "python" +module-name = "vectorless._vectorless" +manifest-path = "python/Cargo.toml" +features = ["pyo3/extension-module"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["python/tests"] + +[tool.mypy] +python_version = "3.9" +warn_return_any = true +warn_unused_configs = true + +[tool.ruff] +line-length = 100 +target-version = "py39" + +[tool.ruff.lint] +select = ["E", "F", "W", "I", "N", "UP", "B"] diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 00000000..e59806e2 --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "vectorless-py" +version = "0.1.0" +edition = "2024" +authors = ["zTgx "] +description = "Python bindings for vectorless" +license = "Apache-2.0" + +[lib] +name = "vectorless" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.22", features = ["extension-module"] } +tokio = { version = "1", features = ["rt-multi-thread"] } +vectorless = { path = "../rust" } diff --git a/python/README.md b/python/README.md new file mode 100644 index 00000000..9f966f93 --- /dev/null +++ b/python/README.md @@ -0,0 +1,221 @@ +# Vectorless Python Bindings + +Python bindings for [vectorless](https://github.com/vectorlessflow/vectorless) - a hierarchical document intelligence engine. + +## Installation + +```bash +pip install vectorless +``` + +## Quick Start + +```python +from vectorless import Engine, IndexContext + +# Create engine (uses OPENAI_API_KEY env var by default) +engine = Engine(workspace="./data") + +# Or with explicit API key +engine = Engine(workspace="./data", api_key="sk-...") + +# Index a document +ctx = IndexContext.from_file("./report.pdf") +doc_id = engine.index(ctx) +print(f"Indexed: {doc_id}") + +# Query the document +result = engine.query(doc_id, "What is the total revenue?") +print(f"Answer: {result.content}") +print(f"Score: {result.score:.2f}") + +# List all documents +for doc in engine.list_docs(): + print(f" - {doc.name} ({doc.id})") + +# Cleanup +engine.remove(doc_id) +``` + +## API Reference + +### Engine + +The main entry point for vectorless. + +```python +class Engine: + def __init__( + self, + workspace: str, + api_key: str | None = None, + model: str | None = None, + endpoint: str | None = None, + ): ... + + def index(self, ctx: IndexContext) -> str: ... + def query(self, doc_id: str, question: str) -> QueryResult: ... + def list_docs(self) -> list[DocumentInfo]: ... + def remove(self, doc_id: str) -> bool: ... + def clear(self) -> int: ... + def exists(self, doc_id: str) -> bool: ... + def len(self) -> int: ... +``` + +### IndexContext + +Context for indexing documents. + +```python +class IndexContext: + @staticmethod + def from_file(path: str, name: str | None = None) -> IndexContext: ... + + @staticmethod + def from_text( + content: str, + name: str | None = None, + format: str = "markdown", + ) -> IndexContext: ... + + @staticmethod + def from_bytes( + data: bytes, + name: str, + format: str, + ) -> IndexContext: ... +``` + +**Supported formats:** +- `"markdown"` / `"md"` - Markdown content +- `"pdf"` - PDF documents +- `"docx"` / `"doc"` - Word documents +- `"html"` / `"htm"` - HTML content +- `"text"` / `"txt"` - Plain text + +### QueryResult + +Result of a document query. + +```python +class QueryResult: + @property + def doc_id(self) -> str: ... + + @property + def content(self) -> str: ... + + @property + def score(self) -> float: ... + + @property + def node_ids(self) -> list[str]: ... +``` + +### DocumentInfo + +Information about an indexed document. + +```python +class DocumentInfo: + @property + def id(self) -> str: ... + @property + def name(self) -> str: ... + @property + def format(self) -> str: ... + @property + def description(self) -> str | None: ... + @property + def page_count(self) -> int | None: ... + @property + def line_count(self) -> int | None: ... +``` + +### VectorlessError + +Exception raised for vectorless errors. + +```python +class VectorlessError(Exception): + @property + def message(self) -> str: ... + + @property + def kind(self) -> str: ... # "config", "parse", "not_found", "llm" +``` + +## Examples + +### Index from different sources + +```python +from vectorless import Engine, IndexContext + +engine = Engine(workspace="./data") + +# From file (format auto-detected) +doc_id = engine.index(IndexContext.from_file("./report.pdf")) + +# From markdown text +doc_id = engine.index(IndexContext.from_text( + "# Report\n\nThis is the content...", + name="report", + format="markdown" +)) + +# From HTML +doc_id = engine.index(IndexContext.from_text( + "

Title

", + name="page", + format="html" +)) + +# From bytes (e.g., downloaded file) +with open("document.pdf", "rb") as f: + doc_id = engine.index(IndexContext.from_bytes( + f.read(), + name="downloaded", + format="pdf" + )) +``` + +### Error handling + +```python +from vectorless import Engine, VectorlessError + +engine = Engine(workspace="./data") + +try: + result = engine.query("nonexistent", "question") +except VectorlessError as e: + print(f"Error: {e.message} (kind={e.kind})") +``` + +## Development + +### Building from source + +```bash +# Install maturin +pip install maturin + +# Build and install +cd python +maturin develop + +# Run tests +pytest +``` + +### Publishing to PyPI + +```bash +maturin build --release +maturin publish +``` + +## License + +Apache-2.0 diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 00000000..72c2f061 --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,520 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Python bindings for vectorless. +//! +//! This module provides Python bindings using PyO3. + +use pyo3::prelude::*; +use pyo3::exceptions::PyException; +use std::sync::Arc; +use tokio::runtime::Runtime; + +// Use ::vectorless to avoid conflict with the #[pymodule] named vectorless +use ::vectorless::client::{Engine, EngineBuilder, IndexContext, QueryResult, DocumentInfo}; +use ::vectorless::parser::DocumentFormat; +use ::vectorless::error::Error as RustError; + +// ============================================================ +// Error Types +// ============================================================ + +/// Python exception for vectorless errors. +#[pyclass(extends = PyException, subclass)] +pub struct VectorlessError { + message: String, + kind: String, +} + +#[pymethods] +impl VectorlessError { + #[getter] + fn message(&self) -> &str { + &self.message + } + + #[getter] + fn kind(&self) -> &str { + &self.kind + } + + fn __str__(&self) -> &str { + &self.message + } + + fn __repr__(&self) -> String { + format!("VectorlessError('{}', kind='{}')", self.message, self.kind) + } +} + +impl VectorlessError { + fn new(message: String, kind: &str) -> Self { + Self { + message, + kind: kind.to_string(), + } + } +} + +impl std::convert::From for PyErr { + fn from(err: VectorlessError) -> PyErr { + PyErr::new::((err.message, err.kind)) + } +} + +/// Convert vectorless errors to Python exceptions. +fn to_py_err(e: RustError) -> PyErr { + let message = e.to_string(); + let kind = match &e { + RustError::DocumentNotFound(_) => "not_found", + RustError::Parse(_) => "parse", + RustError::Config(_) => "config", + RustError::Workspace(_) => "workspace", + RustError::Llm(_) => "llm", + _ => "unknown", + }; + VectorlessError::new(message, kind).into() +} + +// ============================================================ +// IndexContext +// ============================================================ + +/// Context for indexing a document. +/// +/// Create using the static methods: +/// +/// ```python +/// from vectorless import IndexContext +/// +/// # From file +/// ctx = IndexContext.from_file("./document.pdf") +/// +/// # From text +/// ctx = IndexContext.from_text("# Title\\nContent...", name="doc") +/// +/// # From bytes +/// ctx = IndexContext.from_bytes(data, name="doc", format="pdf") +/// ``` +#[pyclass] +pub struct PyIndexContext { + inner: IndexContext, +} + +#[pymethods] +impl PyIndexContext { + /// Create an IndexContext from a file path. + /// + /// The format is detected from the file extension. + /// + /// Args: + /// path: Path to the file. + /// name: Optional document name. + /// + /// Returns: + /// IndexContext for the file. + #[staticmethod] + #[pyo3(signature = (path, name=None))] + fn from_file(path: String, name: Option) -> Self { + let mut ctx = IndexContext::from_path(&path); + if let Some(n) = name { + ctx = ctx.with_name(&n); + } + Self { inner: ctx } + } + + /// Create an IndexContext from text content. + /// + /// Args: + /// content: The text content. + /// name: Optional document name. + /// format: Content format ("markdown", "html", "text"). Default: "markdown". + /// + /// Returns: + /// IndexContext for the content. + #[staticmethod] + #[pyo3(signature = (content, name=None, format="markdown"))] + fn from_text(content: String, name: Option, format: &str) -> PyResult { + let doc_format = parse_format(format)?; + let mut ctx = IndexContext::from_content(&content, doc_format); + if let Some(n) = name { + ctx = ctx.with_name(&n); + } + Ok(Self { inner: ctx }) + } + + /// Create an IndexContext from binary data. + /// + /// Args: + /// data: The binary data. + /// name: Document name (required). + /// format: Content format ("pdf", "docx"). + /// + /// Returns: + /// IndexContext for the bytes. + #[staticmethod] + #[pyo3(signature = (data, name, format))] + fn from_bytes(data: Vec, name: String, format: &str) -> PyResult { + let doc_format = parse_format(format)?; + let ctx = IndexContext::from_bytes(data, doc_format).with_name(&name); + Ok(Self { inner: ctx }) + } +} + +/// Parse format string to DocumentFormat. +fn parse_format(format: &str) -> PyResult { + match format.to_lowercase().as_str() { + "markdown" | "md" => Ok(DocumentFormat::Markdown), + "pdf" => Ok(DocumentFormat::Pdf), + "docx" | "doc" => Ok(DocumentFormat::Docx), + "html" | "htm" => Ok(DocumentFormat::Html), + "text" | "txt" => Ok(DocumentFormat::Text), + _ => Err(PyErr::from(VectorlessError::new( + format!("Unknown format: {}", format), + "config", + ))), + } +} + +// ============================================================ +// QueryResult +// ============================================================ + +/// Result of a document query. +#[pyclass] +pub struct PyQueryResult { + inner: QueryResult, +} + +#[pymethods] +impl PyQueryResult { + /// The document ID. + #[getter] + fn doc_id(&self) -> &str { + &self.inner.doc_id + } + + /// The retrieved content. + #[getter] + fn content(&self) -> &str { + &self.inner.content + } + + /// Relevance score (0.0 to 1.0). + #[getter] + fn score(&self) -> f32 { + self.inner.score + } + + /// Node IDs that matched. + #[getter] + fn node_ids(&self) -> Vec { + self.inner.node_ids.clone() + } + + fn __repr__(&self) -> String { + format!( + "QueryResult(doc_id='{}', score={:.2}, content_len={})", + self.inner.doc_id, + self.inner.score, + self.inner.content.len() + ) + } +} + +// ============================================================ +// DocumentInfo +// ============================================================ + +/// Information about an indexed document. +#[pyclass] +pub struct PyDocumentInfo { + inner: DocumentInfo, +} + +#[pymethods] +impl PyDocumentInfo { + /// Document ID. + #[getter] + fn id(&self) -> &str { + &self.inner.id + } + + /// Document name. + #[getter] + fn name(&self) -> &str { + &self.inner.name + } + + /// Document format. + #[getter] + fn format(&self) -> &str { + &self.inner.format + } + + /// Document description (if available). + #[getter] + fn description(&self) -> Option<&str> { + self.inner.description.as_deref() + } + + /// Page count (for PDFs). + #[getter] + fn page_count(&self) -> Option { + self.inner.page_count + } + + /// Line count (for text files). + #[getter] + fn line_count(&self) -> Option { + self.inner.line_count + } + + fn __repr__(&self) -> String { + format!( + "DocumentInfo(id='{}', name='{}', format='{}')", + self.inner.id, self.inner.name, self.inner.format + ) + } +} + +// ============================================================ +// Engine +// ============================================================ + +/// The main vectorless engine. +/// +/// Create an engine with a workspace directory: +/// +/// ```python +/// from vectorless import Engine +/// +/// engine = Engine(workspace="./data") +/// ``` +/// +/// Or with an explicit API key: +/// +/// ```python +/// engine = Engine(workspace="./data", api_key="sk-...") +/// ``` +#[pyclass] +pub struct PyEngine { + inner: Arc, + rt: Runtime, +} + +#[pymethods] +impl PyEngine { + /// Create a new Engine. + /// + /// Args: + /// workspace: Path to the workspace directory. + /// api_key: Optional API key. If not provided, uses OPENAI_API_KEY env var. + /// model: Optional model name. Default: "gpt-4o-mini". + /// endpoint: Optional API endpoint. + /// + /// Raises: + /// VectorlessError: If engine creation fails. + #[new] + #[pyo3(signature = (workspace, api_key=None, model=None, endpoint=None))] + fn new( + workspace: String, + api_key: Option, + model: Option, + endpoint: Option, + ) -> PyResult { + let rt = Runtime::new().map_err(|e| { + PyErr::from(VectorlessError::new( + format!("Failed to create tokio runtime: {}", e), + "config", + )) + })?; + + // Resolve API key: explicit > env var + let resolved_api_key = api_key.or_else(|| std::env::var("OPENAI_API_KEY").ok()); + + let engine = rt.block_on(async { + let mut builder = EngineBuilder::new().with_workspace(&workspace); + + if let Some(key) = resolved_api_key { + builder = builder.with_openai(key); + } + + if let Some(m) = model { + builder = builder.with_model(&m, None); + } + + if let Some(e) = endpoint { + builder = builder.with_endpoint(&e); + } + + builder.build().await + }); + + let engine = engine.map_err(|e| { + PyErr::from(VectorlessError::new( + format!("Failed to create engine: {}", e), + "config", + )) + })?; + + Ok(Self { + inner: Arc::new(engine), + rt, + }) + } + + /// Index a document. + /// + /// Args: + /// ctx: IndexContext created from from_file, from_text, or from_bytes. + /// + /// Returns: + /// Document ID string. + /// + /// Raises: + /// VectorlessError: If indexing fails. + fn index(&self, ctx: &PyIndexContext) -> PyResult { + let engine = Arc::clone(&self.inner); + let index_ctx = ctx.inner.clone(); + + self.rt.block_on(async move { + engine.index(index_ctx).await.map_err(to_py_err) + }) + } + + /// Query a document. + /// + /// Args: + /// doc_id: Document ID returned from index(). + /// question: The question to ask. + /// + /// Returns: + /// QueryResult with the answer. + /// + /// Raises: + /// VectorlessError: If query fails. + fn query(&self, doc_id: String, question: String) -> PyResult { + let engine = Arc::clone(&self.inner); + + let result = self.rt.block_on(async move { + engine.query(&doc_id, &question).await.map_err(to_py_err) + })?; + + Ok(PyQueryResult { inner: result }) + } + + /// List all indexed documents. + /// + /// Returns: + /// List of DocumentInfo objects. + /// + /// Raises: + /// VectorlessError: If listing fails. + fn list_docs(&self) -> PyResult> { + let engine = Arc::clone(&self.inner); + + let docs = self.rt.block_on(async move { + engine.list_documents().await.map_err(to_py_err) + })?; + + Ok(docs + .into_iter() + .map(|d| PyDocumentInfo { inner: d }) + .collect()) + } + + /// Remove a document. + /// + /// Args: + /// doc_id: Document ID to remove. + /// + /// Returns: + /// True if document was removed, False if not found. + /// + /// Raises: + /// VectorlessError: If removal fails. + fn remove(&self, doc_id: String) -> PyResult { + let engine = Arc::clone(&self.inner); + + self.rt.block_on(async move { + engine.remove(&doc_id).await.map_err(to_py_err) + }) + } + + /// Clear all documents. + /// + /// Returns: + /// Number of documents removed. + /// + /// Raises: + /// VectorlessError: If clearing fails. + fn clear(&self) -> PyResult { + let engine = Arc::clone(&self.inner); + + self.rt.block_on(async move { engine.clear().await.map_err(to_py_err) }) + } + + /// Check if a document exists. + /// + /// Args: + /// doc_id: Document ID to check. + /// + /// Returns: + /// True if document exists. + fn exists(&self, doc_id: String) -> PyResult { + let engine = Arc::clone(&self.inner); + + self.rt.block_on(async move { engine.exists(&doc_id).await.map_err(to_py_err) }) + } + + /// Get the number of indexed documents. + fn len(&self) -> PyResult { + let engine = Arc::clone(&self.inner); + + self.rt.block_on(async move { engine.len().await.map_err(to_py_err) }) + } + + fn __repr__(&self) -> String { + "Engine(workspace=...)".to_string() + } +} + +// ============================================================ +// Module Definition +// ============================================================ + +/// Vectorless - Hierarchical document intelligence without vectors. +/// +/// A document intelligence engine that uses tree-based understanding +/// instead of vector databases. +/// +/// Quick Start: +/// +/// ```python +/// from vectorless import Engine, IndexContext +/// +/// # Create engine +/// engine = Engine(workspace="./data") +/// +/// # Index a document +/// ctx = IndexContext.from_file("./report.pdf") +/// doc_id = engine.index(ctx) +/// +/// # Query +/// result = engine.query(doc_id, "What is the revenue?") +/// print(result.content) +/// ``` +#[pymodule] +fn _vectorless(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + // Add version + m.add("__version__", env!("CARGO_PKG_VERSION"))?; + + Ok(()) +} diff --git a/python/tests/test_basic.py b/python/tests/test_basic.py new file mode 100644 index 00000000..c541abc8 --- /dev/null +++ b/python/tests/test_basic.py @@ -0,0 +1,221 @@ +# Copyright (c) 2026 vectorless developers +# SPDX-License-Identifier: Apache-2.0 + +"""Basic tests for vectorless Python bindings.""" + +import pytest + + +def test_import(): + """Test that we can import the module.""" + import vectorless + + assert hasattr(vectorless, "Engine") + assert hasattr(vectorless, "IndexContext") + assert hasattr(vectorless, "QueryResult") + assert hasattr(vectorless, "DocumentInfo") + assert hasattr(vectorless, "VectorlessError") + + +def test_version(): + """Test that version is available.""" + import vectorless + + assert vectorless.__version__ is not None + assert isinstance(vectorless.__version__, str) + + +def test_index_context_from_file(): + """Test creating IndexContext from file.""" + from vectorless import IndexContext + + ctx = IndexContext.from_file("./test.md") + assert ctx is not None + + +def test_index_context_from_file_with_name(): + """Test creating IndexContext from file with custom name.""" + from vectorless import IndexContext + + ctx = IndexContext.from_file("./test.md", name="custom_name") + assert ctx is not None + + +def test_index_context_from_text(): + """Test creating IndexContext from text.""" + from vectorless import IndexContext + + ctx = IndexContext.from_text("# Test\n\nContent here.") + assert ctx is not None + + +def test_index_context_from_text_with_name(): + """Test creating IndexContext from text with custom name.""" + from vectorless import IndexContext + + ctx = IndexContext.from_text( + "# Test\n\nContent here.", + name="test_doc", + format="markdown", + ) + assert ctx is not None + + +def test_index_context_from_text_html(): + """Test creating IndexContext from HTML text.""" + from vectorless import IndexContext + + ctx = IndexContext.from_text( + "

Title

Content

", + name="page", + format="html", + ) + assert ctx is not None + + +def test_index_context_from_bytes(): + """Test creating IndexContext from bytes.""" + from vectorless import IndexContext + + data = b"%PDF-1.4\n%fake pdf" + ctx = IndexContext.from_bytes(data, name="test.pdf", format="pdf") + assert ctx is not None + + +def test_index_context_invalid_format(): + """Test that invalid format raises error.""" + from vectorless import IndexContext, VectorlessError + + with pytest.raises(VectorlessError) as exc_info: + IndexContext.from_text("content", format="invalid_format") + + assert "Unknown format" in str(exc_info.value.message) + assert exc_info.value.kind == "config" + + +@pytest.mark.asyncio +async def test_engine_create(): + """Test creating an engine.""" + import tempfile + from vectorless import Engine + + with tempfile.TemporaryDirectory() as tmpdir: + engine = Engine(workspace=tmpdir) + assert engine is not None + + +@pytest.mark.asyncio +async def test_engine_len(): + """Test engine document count.""" + import tempfile + from vectorless import Engine + + with tempfile.TemporaryDirectory() as tmpdir: + engine = Engine(workspace=tmpdir) + count = engine.len() + assert isinstance(count, int) + assert count >= 0 + + +@pytest.mark.asyncio +async def test_engine_list_docs(): + """Test listing documents.""" + import tempfile + from vectorless import Engine + + with tempfile.TemporaryDirectory() as tmpdir: + engine = Engine(workspace=tmpdir) + docs = engine.list_docs() + assert isinstance(docs, list) + + +@pytest.mark.asyncio +async def test_engine_clear(): + """Test clearing all documents.""" + import tempfile + from vectorless import Engine + + with tempfile.TemporaryDirectory() as tmpdir: + engine = Engine(workspace=tmpdir) + removed = engine.clear() + assert isinstance(removed, int) + + +@pytest.mark.asyncio +async def test_engine_exists(): + """Test checking if document exists.""" + import tempfile + from vectorless import Engine + + with tempfile.TemporaryDirectory() as tmpdir: + engine = Engine(workspace=tmpdir) + exists = engine.exists("nonexistent") + assert exists is False + + +@pytest.mark.asyncio +async def test_index_and_query_text(): + """Test indexing and querying a text document.""" + import tempfile + from vectorless import Engine, IndexContext + + with tempfile.TemporaryDirectory() as tmpdir: + engine = Engine(workspace=tmpdir) + + # Index a simple document + ctx = IndexContext.from_text( + "# Test Document\n\nThis is a test document about apples.", + name="test", + ) + doc_id = engine.index(ctx) + + assert doc_id is not None + assert isinstance(doc_id, str) + + # Query the document + result = engine.query(doc_id, "What is this document about?") + + assert result.doc_id == doc_id + assert result.content is not None + assert result.score >= 0.0 + + +@pytest.mark.asyncio +async def test_remove_document(): + """Test removing a document.""" + import tempfile + from vectorless import Engine, IndexContext + + with tempfile.TemporaryDirectory() as tmpdir: + engine = Engine(workspace=tmpdir) + + # Index a document + ctx = IndexContext.from_text("# Test\n\nContent", name="test") + doc_id = engine.index(ctx) + + # Remove it + removed = engine.remove(doc_id) + assert removed is True + + # Check it's gone + exists = engine.exists(doc_id) + assert exists is False + + +@pytest.mark.asyncio +async def test_query_nonexistent(): + """Test querying a nonexistent document.""" + import tempfile + from vectorless import Engine, VectorlessError + + with tempfile.TemporaryDirectory() as tmpdir: + engine = Engine(workspace=tmpdir) + + with pytest.raises(VectorlessError) as exc_info: + engine.query("nonexistent", "question") + + assert exc_info.value.kind == "not_found" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/python/vectorless/__init__.py b/python/vectorless/__init__.py new file mode 100644 index 00000000..8b995746 --- /dev/null +++ b/python/vectorless/__init__.py @@ -0,0 +1,38 @@ +""" +Vectorless - Hierarchical document intelligence without vectors. + +A document intelligence engine that uses tree-based understanding +instead of vector databases for accurate, explainable retrieval. + +Quick Start: + from vectorless import Engine, IndexContext + + # Create engine + engine = Engine(workspace="./data") + + # Index a document + ctx = IndexContext.from_file("./report.pdf") + doc_id = engine.index(ctx) + + # Query + result = engine.query(doc_id, "What is the revenue?") + print(result.content) +""" + +from vectorless.vectorless import ( + Engine, + IndexContext, + QueryResult, + DocumentInfo, + VectorlessError, + __version__, +) + +__all__ = [ + "Engine", + "IndexContext", + "QueryResult", + "DocumentInfo", + "VectorlessError", + "__version__", +] diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 00000000..9d62e781 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,125 @@ +[package] +name = "vectorless" +version = "0.1.19" +edition = "2024" +authors = ["zTgx "] +description = "Hierarchical, reasoning-native document intelligence engine" +license = "Apache-2.0" +repository = "https://github.com/vectorlessflow/vectorless" +homepage = "https://vectorless.dev" +documentation = "https://docs.rs/vectorless" +keywords = ["rag", "document", "retrieval", "indexing", "llm"] +categories = ["text-processing", "data-structures", "algorithms"] +readme = "../README.md" +exclude = ["samples/", "docs/", ".*"] + +[dependencies] +# Async runtime +tokio = { version = "1", features = ["full"] } +async-trait = "0.1" +futures = "0.3" + +# Serialization +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +toml = "0.8" + +# Error handling +thiserror = "2" +anyhow = { version = "1", optional = true } + +# OpenAI-compatible API client +async-openai = { version = "0.34", features = ["chat-completion"] } + +# UUID +uuid = { version = "1.10", features = ["v4", "serde"] } + +# Time +chrono = { version = "0.4", default-features = false, features = ["serde", "clock"] } + +# Logging +tracing = "0.1" + +# Rate limiting +governor = "0.6" +nonzero_ext = "0.3" + +# Token counting +tiktoken-rs = "0.9" + +# Text processing +regex = "1.10" + +# Markdown parsing +pulldown-cmark = { version = "0.12", default-features = false, features = ["simd"] } + +# Tree data structure +indextree = { version = "4.8.0", features = ["deser"] } + +# LRU cache +lru = "0.12" + +# Checksum +sha2 = "0.10" + +# BLAKE2b hashing for fingerprints +blake2 = "0.10" +base64 = "0.22" + +# Synchronization primitives (for memo store) +parking_lot = "0.12" + +# Compression +flate2 = "1.0" + +# File locking (Unix) +[target.'cfg(unix)'.dependencies] +libc = "0.2" + +# PDF processing +pdf-extract = "0.10.0" +lopdf = "0.34" + +# DOCX processing +zip = "2.2" +roxmltree = "0.20" + +# Random number generation (for sampling) +rand = "0.8" + +# BM25 scoring +bm25 = { version = "2.3.2", features = ["parallelism"] } + +# HTML parsing +scraper = "0.22" + +[dev-dependencies] +tempfile = "3.10" +tokio-test = "0.4" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +[profile.release] +opt-level = 3 +lto = "thin" +codegen-units = 1 +strip = true +panic = "abort" + +[profile.dev] +opt-level = 0 +debug = true + +[profile.bench] +inherits = "release" +debug = true + +[profile.release.package."*"] +opt-level = 3 + +[lints.rust] +missing_docs = "warn" +unsafe_code = "warn" + +[lints.clippy] +all = "warn" +pedantic = "warn" diff --git a/src/client/builder.rs b/rust/src/client/builder.rs similarity index 100% rename from src/client/builder.rs rename to rust/src/client/builder.rs diff --git a/src/client/context.rs b/rust/src/client/context.rs similarity index 100% rename from src/client/context.rs rename to rust/src/client/context.rs diff --git a/src/client/engine.rs b/rust/src/client/engine.rs similarity index 100% rename from src/client/engine.rs rename to rust/src/client/engine.rs diff --git a/src/client/events.rs b/rust/src/client/events.rs similarity index 100% rename from src/client/events.rs rename to rust/src/client/events.rs diff --git a/src/client/index_context.rs b/rust/src/client/index_context.rs similarity index 100% rename from src/client/index_context.rs rename to rust/src/client/index_context.rs diff --git a/src/client/indexer.rs b/rust/src/client/indexer.rs similarity index 100% rename from src/client/indexer.rs rename to rust/src/client/indexer.rs diff --git a/src/client/mod.rs b/rust/src/client/mod.rs similarity index 100% rename from src/client/mod.rs rename to rust/src/client/mod.rs diff --git a/src/client/retriever.rs b/rust/src/client/retriever.rs similarity index 100% rename from src/client/retriever.rs rename to rust/src/client/retriever.rs diff --git a/src/client/session.rs b/rust/src/client/session.rs similarity index 100% rename from src/client/session.rs rename to rust/src/client/session.rs diff --git a/src/client/types.rs b/rust/src/client/types.rs similarity index 100% rename from src/client/types.rs rename to rust/src/client/types.rs diff --git a/src/client/workspace.rs b/rust/src/client/workspace.rs similarity index 100% rename from src/client/workspace.rs rename to rust/src/client/workspace.rs diff --git a/src/config/docs.rs b/rust/src/config/docs.rs similarity index 100% rename from src/config/docs.rs rename to rust/src/config/docs.rs diff --git a/src/config/loader.rs b/rust/src/config/loader.rs similarity index 100% rename from src/config/loader.rs rename to rust/src/config/loader.rs diff --git a/src/config/merge.rs b/rust/src/config/merge.rs similarity index 100% rename from src/config/merge.rs rename to rust/src/config/merge.rs diff --git a/src/config/mod.rs b/rust/src/config/mod.rs similarity index 100% rename from src/config/mod.rs rename to rust/src/config/mod.rs diff --git a/src/config/types/concurrency.rs b/rust/src/config/types/concurrency.rs similarity index 100% rename from src/config/types/concurrency.rs rename to rust/src/config/types/concurrency.rs diff --git a/src/config/types/content.rs b/rust/src/config/types/content.rs similarity index 100% rename from src/config/types/content.rs rename to rust/src/config/types/content.rs diff --git a/src/config/types/fallback.rs b/rust/src/config/types/fallback.rs similarity index 100% rename from src/config/types/fallback.rs rename to rust/src/config/types/fallback.rs diff --git a/src/config/types/indexer.rs b/rust/src/config/types/indexer.rs similarity index 100% rename from src/config/types/indexer.rs rename to rust/src/config/types/indexer.rs diff --git a/src/config/types/llm.rs b/rust/src/config/types/llm.rs similarity index 100% rename from src/config/types/llm.rs rename to rust/src/config/types/llm.rs diff --git a/src/config/types/llm_pool.rs b/rust/src/config/types/llm_pool.rs similarity index 100% rename from src/config/types/llm_pool.rs rename to rust/src/config/types/llm_pool.rs diff --git a/src/config/types/metrics.rs b/rust/src/config/types/metrics.rs similarity index 100% rename from src/config/types/metrics.rs rename to rust/src/config/types/metrics.rs diff --git a/src/config/types/mod.rs b/rust/src/config/types/mod.rs similarity index 100% rename from src/config/types/mod.rs rename to rust/src/config/types/mod.rs diff --git a/src/config/types/retrieval.rs b/rust/src/config/types/retrieval.rs similarity index 100% rename from src/config/types/retrieval.rs rename to rust/src/config/types/retrieval.rs diff --git a/src/config/types/storage.rs b/rust/src/config/types/storage.rs similarity index 100% rename from src/config/types/storage.rs rename to rust/src/config/types/storage.rs diff --git a/src/config/validator.rs b/rust/src/config/validator.rs similarity index 100% rename from src/config/validator.rs rename to rust/src/config/validator.rs diff --git a/src/document/mod.rs b/rust/src/document/mod.rs similarity index 100% rename from src/document/mod.rs rename to rust/src/document/mod.rs diff --git a/src/document/node.rs b/rust/src/document/node.rs similarity index 100% rename from src/document/node.rs rename to rust/src/document/node.rs diff --git a/src/document/reference.rs b/rust/src/document/reference.rs similarity index 100% rename from src/document/reference.rs rename to rust/src/document/reference.rs diff --git a/src/document/structure.rs b/rust/src/document/structure.rs similarity index 100% rename from src/document/structure.rs rename to rust/src/document/structure.rs diff --git a/src/document/toc.rs b/rust/src/document/toc.rs similarity index 100% rename from src/document/toc.rs rename to rust/src/document/toc.rs diff --git a/src/document/tree.rs b/rust/src/document/tree.rs similarity index 100% rename from src/document/tree.rs rename to rust/src/document/tree.rs diff --git a/src/error.rs b/rust/src/error.rs similarity index 100% rename from src/error.rs rename to rust/src/error.rs diff --git a/src/index/config.rs b/rust/src/index/config.rs similarity index 100% rename from src/index/config.rs rename to rust/src/index/config.rs diff --git a/src/index/incremental/detector.rs b/rust/src/index/incremental/detector.rs similarity index 100% rename from src/index/incremental/detector.rs rename to rust/src/index/incremental/detector.rs diff --git a/src/index/incremental/mod.rs b/rust/src/index/incremental/mod.rs similarity index 100% rename from src/index/incremental/mod.rs rename to rust/src/index/incremental/mod.rs diff --git a/src/index/incremental/updater.rs b/rust/src/index/incremental/updater.rs similarity index 100% rename from src/index/incremental/updater.rs rename to rust/src/index/incremental/updater.rs diff --git a/src/index/mod.rs b/rust/src/index/mod.rs similarity index 100% rename from src/index/mod.rs rename to rust/src/index/mod.rs diff --git a/src/index/pipeline/context.rs b/rust/src/index/pipeline/context.rs similarity index 100% rename from src/index/pipeline/context.rs rename to rust/src/index/pipeline/context.rs diff --git a/src/index/pipeline/executor.rs b/rust/src/index/pipeline/executor.rs similarity index 100% rename from src/index/pipeline/executor.rs rename to rust/src/index/pipeline/executor.rs diff --git a/src/index/pipeline/metrics.rs b/rust/src/index/pipeline/metrics.rs similarity index 100% rename from src/index/pipeline/metrics.rs rename to rust/src/index/pipeline/metrics.rs diff --git a/src/index/pipeline/mod.rs b/rust/src/index/pipeline/mod.rs similarity index 100% rename from src/index/pipeline/mod.rs rename to rust/src/index/pipeline/mod.rs diff --git a/src/index/pipeline/orchestrator.rs b/rust/src/index/pipeline/orchestrator.rs similarity index 100% rename from src/index/pipeline/orchestrator.rs rename to rust/src/index/pipeline/orchestrator.rs diff --git a/src/index/pipeline/policy.rs b/rust/src/index/pipeline/policy.rs similarity index 100% rename from src/index/pipeline/policy.rs rename to rust/src/index/pipeline/policy.rs diff --git a/src/index/stages/build.rs b/rust/src/index/stages/build.rs similarity index 100% rename from src/index/stages/build.rs rename to rust/src/index/stages/build.rs diff --git a/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs similarity index 100% rename from src/index/stages/enhance.rs rename to rust/src/index/stages/enhance.rs diff --git a/src/index/stages/enrich.rs b/rust/src/index/stages/enrich.rs similarity index 100% rename from src/index/stages/enrich.rs rename to rust/src/index/stages/enrich.rs diff --git a/src/index/stages/mod.rs b/rust/src/index/stages/mod.rs similarity index 100% rename from src/index/stages/mod.rs rename to rust/src/index/stages/mod.rs diff --git a/src/index/stages/optimize.rs b/rust/src/index/stages/optimize.rs similarity index 100% rename from src/index/stages/optimize.rs rename to rust/src/index/stages/optimize.rs diff --git a/src/index/stages/parse.rs b/rust/src/index/stages/parse.rs similarity index 100% rename from src/index/stages/parse.rs rename to rust/src/index/stages/parse.rs diff --git a/src/index/stages/persist.rs b/rust/src/index/stages/persist.rs similarity index 100% rename from src/index/stages/persist.rs rename to rust/src/index/stages/persist.rs diff --git a/src/index/summary/full.rs b/rust/src/index/summary/full.rs similarity index 100% rename from src/index/summary/full.rs rename to rust/src/index/summary/full.rs diff --git a/src/index/summary/lazy.rs b/rust/src/index/summary/lazy.rs similarity index 100% rename from src/index/summary/lazy.rs rename to rust/src/index/summary/lazy.rs diff --git a/src/index/summary/mod.rs b/rust/src/index/summary/mod.rs similarity index 100% rename from src/index/summary/mod.rs rename to rust/src/index/summary/mod.rs diff --git a/src/index/summary/selective.rs b/rust/src/index/summary/selective.rs similarity index 100% rename from src/index/summary/selective.rs rename to rust/src/index/summary/selective.rs diff --git a/src/index/summary/strategy.rs b/rust/src/index/summary/strategy.rs similarity index 100% rename from src/index/summary/strategy.rs rename to rust/src/index/summary/strategy.rs diff --git a/src/lib.rs b/rust/src/lib.rs similarity index 100% rename from src/lib.rs rename to rust/src/lib.rs diff --git a/src/llm/client.rs b/rust/src/llm/client.rs similarity index 100% rename from src/llm/client.rs rename to rust/src/llm/client.rs diff --git a/src/llm/config.rs b/rust/src/llm/config.rs similarity index 100% rename from src/llm/config.rs rename to rust/src/llm/config.rs diff --git a/src/llm/error.rs b/rust/src/llm/error.rs similarity index 100% rename from src/llm/error.rs rename to rust/src/llm/error.rs diff --git a/src/llm/executor.rs b/rust/src/llm/executor.rs similarity index 100% rename from src/llm/executor.rs rename to rust/src/llm/executor.rs diff --git a/src/llm/fallback.rs b/rust/src/llm/fallback.rs similarity index 100% rename from src/llm/fallback.rs rename to rust/src/llm/fallback.rs diff --git a/src/llm/mod.rs b/rust/src/llm/mod.rs similarity index 100% rename from src/llm/mod.rs rename to rust/src/llm/mod.rs diff --git a/src/llm/pool.rs b/rust/src/llm/pool.rs similarity index 100% rename from src/llm/pool.rs rename to rust/src/llm/pool.rs diff --git a/src/llm/retry.rs b/rust/src/llm/retry.rs similarity index 100% rename from src/llm/retry.rs rename to rust/src/llm/retry.rs diff --git a/src/memo/mod.rs b/rust/src/memo/mod.rs similarity index 100% rename from src/memo/mod.rs rename to rust/src/memo/mod.rs diff --git a/src/memo/store.rs b/rust/src/memo/store.rs similarity index 100% rename from src/memo/store.rs rename to rust/src/memo/store.rs diff --git a/src/memo/types.rs b/rust/src/memo/types.rs similarity index 100% rename from src/memo/types.rs rename to rust/src/memo/types.rs diff --git a/src/metrics/hub.rs b/rust/src/metrics/hub.rs similarity index 100% rename from src/metrics/hub.rs rename to rust/src/metrics/hub.rs diff --git a/src/metrics/llm.rs b/rust/src/metrics/llm.rs similarity index 100% rename from src/metrics/llm.rs rename to rust/src/metrics/llm.rs diff --git a/src/metrics/mod.rs b/rust/src/metrics/mod.rs similarity index 100% rename from src/metrics/mod.rs rename to rust/src/metrics/mod.rs diff --git a/src/metrics/pilot.rs b/rust/src/metrics/pilot.rs similarity index 100% rename from src/metrics/pilot.rs rename to rust/src/metrics/pilot.rs diff --git a/src/metrics/retrieval.rs b/rust/src/metrics/retrieval.rs similarity index 100% rename from src/metrics/retrieval.rs rename to rust/src/metrics/retrieval.rs diff --git a/src/parser/docx/mod.rs b/rust/src/parser/docx/mod.rs similarity index 100% rename from src/parser/docx/mod.rs rename to rust/src/parser/docx/mod.rs diff --git a/src/parser/docx/parser.rs b/rust/src/parser/docx/parser.rs similarity index 100% rename from src/parser/docx/parser.rs rename to rust/src/parser/docx/parser.rs diff --git a/src/parser/docx/styles.rs b/rust/src/parser/docx/styles.rs similarity index 100% rename from src/parser/docx/styles.rs rename to rust/src/parser/docx/styles.rs diff --git a/src/parser/docx/types.rs b/rust/src/parser/docx/types.rs similarity index 100% rename from src/parser/docx/types.rs rename to rust/src/parser/docx/types.rs diff --git a/src/parser/html/config.rs b/rust/src/parser/html/config.rs similarity index 100% rename from src/parser/html/config.rs rename to rust/src/parser/html/config.rs diff --git a/src/parser/html/mod.rs b/rust/src/parser/html/mod.rs similarity index 100% rename from src/parser/html/mod.rs rename to rust/src/parser/html/mod.rs diff --git a/src/parser/html/parser.rs b/rust/src/parser/html/parser.rs similarity index 100% rename from src/parser/html/parser.rs rename to rust/src/parser/html/parser.rs diff --git a/src/parser/markdown/config.rs b/rust/src/parser/markdown/config.rs similarity index 100% rename from src/parser/markdown/config.rs rename to rust/src/parser/markdown/config.rs diff --git a/src/parser/markdown/frontmatter.rs b/rust/src/parser/markdown/frontmatter.rs similarity index 100% rename from src/parser/markdown/frontmatter.rs rename to rust/src/parser/markdown/frontmatter.rs diff --git a/src/parser/markdown/mod.rs b/rust/src/parser/markdown/mod.rs similarity index 100% rename from src/parser/markdown/mod.rs rename to rust/src/parser/markdown/mod.rs diff --git a/src/parser/markdown/parser.rs b/rust/src/parser/markdown/parser.rs similarity index 100% rename from src/parser/markdown/parser.rs rename to rust/src/parser/markdown/parser.rs diff --git a/src/parser/mod.rs b/rust/src/parser/mod.rs similarity index 100% rename from src/parser/mod.rs rename to rust/src/parser/mod.rs diff --git a/src/parser/pdf/mod.rs b/rust/src/parser/pdf/mod.rs similarity index 100% rename from src/parser/pdf/mod.rs rename to rust/src/parser/pdf/mod.rs diff --git a/src/parser/pdf/parser.rs b/rust/src/parser/pdf/parser.rs similarity index 100% rename from src/parser/pdf/parser.rs rename to rust/src/parser/pdf/parser.rs diff --git a/src/parser/pdf/types.rs b/rust/src/parser/pdf/types.rs similarity index 100% rename from src/parser/pdf/types.rs rename to rust/src/parser/pdf/types.rs diff --git a/src/parser/registry.rs b/rust/src/parser/registry.rs similarity index 100% rename from src/parser/registry.rs rename to rust/src/parser/registry.rs diff --git a/src/parser/toc/assigner.rs b/rust/src/parser/toc/assigner.rs similarity index 100% rename from src/parser/toc/assigner.rs rename to rust/src/parser/toc/assigner.rs diff --git a/src/parser/toc/detector.rs b/rust/src/parser/toc/detector.rs similarity index 100% rename from src/parser/toc/detector.rs rename to rust/src/parser/toc/detector.rs diff --git a/src/parser/toc/mod.rs b/rust/src/parser/toc/mod.rs similarity index 100% rename from src/parser/toc/mod.rs rename to rust/src/parser/toc/mod.rs diff --git a/src/parser/toc/parser.rs b/rust/src/parser/toc/parser.rs similarity index 100% rename from src/parser/toc/parser.rs rename to rust/src/parser/toc/parser.rs diff --git a/src/parser/toc/processor.rs b/rust/src/parser/toc/processor.rs similarity index 100% rename from src/parser/toc/processor.rs rename to rust/src/parser/toc/processor.rs diff --git a/src/parser/toc/repairer.rs b/rust/src/parser/toc/repairer.rs similarity index 100% rename from src/parser/toc/repairer.rs rename to rust/src/parser/toc/repairer.rs diff --git a/src/parser/toc/types.rs b/rust/src/parser/toc/types.rs similarity index 100% rename from src/parser/toc/types.rs rename to rust/src/parser/toc/types.rs diff --git a/src/parser/toc/verifier.rs b/rust/src/parser/toc/verifier.rs similarity index 100% rename from src/parser/toc/verifier.rs rename to rust/src/parser/toc/verifier.rs diff --git a/src/parser/traits.rs b/rust/src/parser/traits.rs similarity index 100% rename from src/parser/traits.rs rename to rust/src/parser/traits.rs diff --git a/src/parser/types.rs b/rust/src/parser/types.rs similarity index 100% rename from src/parser/types.rs rename to rust/src/parser/types.rs diff --git a/src/retrieval/cache/mod.rs b/rust/src/retrieval/cache/mod.rs similarity index 100% rename from src/retrieval/cache/mod.rs rename to rust/src/retrieval/cache/mod.rs diff --git a/src/retrieval/cache/path_cache.rs b/rust/src/retrieval/cache/path_cache.rs similarity index 100% rename from src/retrieval/cache/path_cache.rs rename to rust/src/retrieval/cache/path_cache.rs diff --git a/src/retrieval/complexity/detector.rs b/rust/src/retrieval/complexity/detector.rs similarity index 100% rename from src/retrieval/complexity/detector.rs rename to rust/src/retrieval/complexity/detector.rs diff --git a/src/retrieval/complexity/mod.rs b/rust/src/retrieval/complexity/mod.rs similarity index 100% rename from src/retrieval/complexity/mod.rs rename to rust/src/retrieval/complexity/mod.rs diff --git a/src/retrieval/content/aggregator.rs b/rust/src/retrieval/content/aggregator.rs similarity index 100% rename from src/retrieval/content/aggregator.rs rename to rust/src/retrieval/content/aggregator.rs diff --git a/src/retrieval/content/budget.rs b/rust/src/retrieval/content/budget.rs similarity index 100% rename from src/retrieval/content/budget.rs rename to rust/src/retrieval/content/budget.rs diff --git a/src/retrieval/content/builder.rs b/rust/src/retrieval/content/builder.rs similarity index 100% rename from src/retrieval/content/builder.rs rename to rust/src/retrieval/content/builder.rs diff --git a/src/retrieval/content/config.rs b/rust/src/retrieval/content/config.rs similarity index 100% rename from src/retrieval/content/config.rs rename to rust/src/retrieval/content/config.rs diff --git a/src/retrieval/content/mod.rs b/rust/src/retrieval/content/mod.rs similarity index 100% rename from src/retrieval/content/mod.rs rename to rust/src/retrieval/content/mod.rs diff --git a/src/retrieval/content/scorer.rs b/rust/src/retrieval/content/scorer.rs similarity index 100% rename from src/retrieval/content/scorer.rs rename to rust/src/retrieval/content/scorer.rs diff --git a/src/retrieval/context.rs b/rust/src/retrieval/context.rs similarity index 100% rename from src/retrieval/context.rs rename to rust/src/retrieval/context.rs diff --git a/src/retrieval/decompose.rs b/rust/src/retrieval/decompose.rs similarity index 100% rename from src/retrieval/decompose.rs rename to rust/src/retrieval/decompose.rs diff --git a/src/retrieval/mod.rs b/rust/src/retrieval/mod.rs similarity index 100% rename from src/retrieval/mod.rs rename to rust/src/retrieval/mod.rs diff --git a/src/retrieval/pilot/budget.rs b/rust/src/retrieval/pilot/budget.rs similarity index 100% rename from src/retrieval/pilot/budget.rs rename to rust/src/retrieval/pilot/budget.rs diff --git a/src/retrieval/pilot/builder.rs b/rust/src/retrieval/pilot/builder.rs similarity index 100% rename from src/retrieval/pilot/builder.rs rename to rust/src/retrieval/pilot/builder.rs diff --git a/src/retrieval/pilot/config.rs b/rust/src/retrieval/pilot/config.rs similarity index 100% rename from src/retrieval/pilot/config.rs rename to rust/src/retrieval/pilot/config.rs diff --git a/src/retrieval/pilot/decision.rs b/rust/src/retrieval/pilot/decision.rs similarity index 100% rename from src/retrieval/pilot/decision.rs rename to rust/src/retrieval/pilot/decision.rs diff --git a/src/retrieval/pilot/fallback.rs b/rust/src/retrieval/pilot/fallback.rs similarity index 100% rename from src/retrieval/pilot/fallback.rs rename to rust/src/retrieval/pilot/fallback.rs diff --git a/src/retrieval/pilot/feedback.rs b/rust/src/retrieval/pilot/feedback.rs similarity index 100% rename from src/retrieval/pilot/feedback.rs rename to rust/src/retrieval/pilot/feedback.rs diff --git a/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs similarity index 100% rename from src/retrieval/pilot/llm_pilot.rs rename to rust/src/retrieval/pilot/llm_pilot.rs diff --git a/src/retrieval/pilot/metrics.rs b/rust/src/retrieval/pilot/metrics.rs similarity index 100% rename from src/retrieval/pilot/metrics.rs rename to rust/src/retrieval/pilot/metrics.rs diff --git a/src/retrieval/pilot/mod.rs b/rust/src/retrieval/pilot/mod.rs similarity index 100% rename from src/retrieval/pilot/mod.rs rename to rust/src/retrieval/pilot/mod.rs diff --git a/src/retrieval/pilot/noop.rs b/rust/src/retrieval/pilot/noop.rs similarity index 100% rename from src/retrieval/pilot/noop.rs rename to rust/src/retrieval/pilot/noop.rs diff --git a/src/retrieval/pilot/parser.rs b/rust/src/retrieval/pilot/parser.rs similarity index 100% rename from src/retrieval/pilot/parser.rs rename to rust/src/retrieval/pilot/parser.rs diff --git a/src/retrieval/pilot/prompts/builder.rs b/rust/src/retrieval/pilot/prompts/builder.rs similarity index 100% rename from src/retrieval/pilot/prompts/builder.rs rename to rust/src/retrieval/pilot/prompts/builder.rs diff --git a/src/retrieval/pilot/prompts/mod.rs b/rust/src/retrieval/pilot/prompts/mod.rs similarity index 100% rename from src/retrieval/pilot/prompts/mod.rs rename to rust/src/retrieval/pilot/prompts/mod.rs diff --git a/src/retrieval/pilot/prompts/system_backtrack.txt b/rust/src/retrieval/pilot/prompts/system_backtrack.txt similarity index 100% rename from src/retrieval/pilot/prompts/system_backtrack.txt rename to rust/src/retrieval/pilot/prompts/system_backtrack.txt diff --git a/src/retrieval/pilot/prompts/system_evaluate.txt b/rust/src/retrieval/pilot/prompts/system_evaluate.txt similarity index 100% rename from src/retrieval/pilot/prompts/system_evaluate.txt rename to rust/src/retrieval/pilot/prompts/system_evaluate.txt diff --git a/src/retrieval/pilot/prompts/system_fork.txt b/rust/src/retrieval/pilot/prompts/system_fork.txt similarity index 100% rename from src/retrieval/pilot/prompts/system_fork.txt rename to rust/src/retrieval/pilot/prompts/system_fork.txt diff --git a/src/retrieval/pilot/prompts/system_start.txt b/rust/src/retrieval/pilot/prompts/system_start.txt similarity index 100% rename from src/retrieval/pilot/prompts/system_start.txt rename to rust/src/retrieval/pilot/prompts/system_start.txt diff --git a/src/retrieval/pilot/prompts/templates.rs b/rust/src/retrieval/pilot/prompts/templates.rs similarity index 100% rename from src/retrieval/pilot/prompts/templates.rs rename to rust/src/retrieval/pilot/prompts/templates.rs diff --git a/src/retrieval/pilot/prompts/user_backtrack.txt b/rust/src/retrieval/pilot/prompts/user_backtrack.txt similarity index 100% rename from src/retrieval/pilot/prompts/user_backtrack.txt rename to rust/src/retrieval/pilot/prompts/user_backtrack.txt diff --git a/src/retrieval/pilot/prompts/user_evaluate.txt b/rust/src/retrieval/pilot/prompts/user_evaluate.txt similarity index 100% rename from src/retrieval/pilot/prompts/user_evaluate.txt rename to rust/src/retrieval/pilot/prompts/user_evaluate.txt diff --git a/src/retrieval/pilot/prompts/user_fork.txt b/rust/src/retrieval/pilot/prompts/user_fork.txt similarity index 100% rename from src/retrieval/pilot/prompts/user_fork.txt rename to rust/src/retrieval/pilot/prompts/user_fork.txt diff --git a/src/retrieval/pilot/prompts/user_start.txt b/rust/src/retrieval/pilot/prompts/user_start.txt similarity index 100% rename from src/retrieval/pilot/prompts/user_start.txt rename to rust/src/retrieval/pilot/prompts/user_start.txt diff --git a/src/retrieval/pilot/trait.rs b/rust/src/retrieval/pilot/trait.rs similarity index 100% rename from src/retrieval/pilot/trait.rs rename to rust/src/retrieval/pilot/trait.rs diff --git a/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs similarity index 100% rename from src/retrieval/pipeline/context.rs rename to rust/src/retrieval/pipeline/context.rs diff --git a/src/retrieval/pipeline/mod.rs b/rust/src/retrieval/pipeline/mod.rs similarity index 100% rename from src/retrieval/pipeline/mod.rs rename to rust/src/retrieval/pipeline/mod.rs diff --git a/src/retrieval/pipeline/orchestrator.rs b/rust/src/retrieval/pipeline/orchestrator.rs similarity index 100% rename from src/retrieval/pipeline/orchestrator.rs rename to rust/src/retrieval/pipeline/orchestrator.rs diff --git a/src/retrieval/pipeline/outcome.rs b/rust/src/retrieval/pipeline/outcome.rs similarity index 100% rename from src/retrieval/pipeline/outcome.rs rename to rust/src/retrieval/pipeline/outcome.rs diff --git a/src/retrieval/pipeline/stage.rs b/rust/src/retrieval/pipeline/stage.rs similarity index 100% rename from src/retrieval/pipeline/stage.rs rename to rust/src/retrieval/pipeline/stage.rs diff --git a/src/retrieval/pipeline_retriever.rs b/rust/src/retrieval/pipeline_retriever.rs similarity index 100% rename from src/retrieval/pipeline_retriever.rs rename to rust/src/retrieval/pipeline_retriever.rs diff --git a/src/retrieval/reference.rs b/rust/src/retrieval/reference.rs similarity index 100% rename from src/retrieval/reference.rs rename to rust/src/retrieval/reference.rs diff --git a/src/retrieval/retriever.rs b/rust/src/retrieval/retriever.rs similarity index 100% rename from src/retrieval/retriever.rs rename to rust/src/retrieval/retriever.rs diff --git a/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs similarity index 100% rename from src/retrieval/search/beam.rs rename to rust/src/retrieval/search/beam.rs diff --git a/src/retrieval/search/bm25.rs b/rust/src/retrieval/search/bm25.rs similarity index 100% rename from src/retrieval/search/bm25.rs rename to rust/src/retrieval/search/bm25.rs diff --git a/src/retrieval/search/greedy.rs b/rust/src/retrieval/search/greedy.rs similarity index 100% rename from src/retrieval/search/greedy.rs rename to rust/src/retrieval/search/greedy.rs diff --git a/src/retrieval/search/mcts.rs b/rust/src/retrieval/search/mcts.rs similarity index 100% rename from src/retrieval/search/mcts.rs rename to rust/src/retrieval/search/mcts.rs diff --git a/src/retrieval/search/mod.rs b/rust/src/retrieval/search/mod.rs similarity index 100% rename from src/retrieval/search/mod.rs rename to rust/src/retrieval/search/mod.rs diff --git a/src/retrieval/search/scorer.rs b/rust/src/retrieval/search/scorer.rs similarity index 100% rename from src/retrieval/search/scorer.rs rename to rust/src/retrieval/search/scorer.rs diff --git a/src/retrieval/search/trait.rs b/rust/src/retrieval/search/trait.rs similarity index 100% rename from src/retrieval/search/trait.rs rename to rust/src/retrieval/search/trait.rs diff --git a/src/retrieval/stages/analyze.rs b/rust/src/retrieval/stages/analyze.rs similarity index 100% rename from src/retrieval/stages/analyze.rs rename to rust/src/retrieval/stages/analyze.rs diff --git a/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs similarity index 100% rename from src/retrieval/stages/evaluate.rs rename to rust/src/retrieval/stages/evaluate.rs diff --git a/src/retrieval/stages/mod.rs b/rust/src/retrieval/stages/mod.rs similarity index 100% rename from src/retrieval/stages/mod.rs rename to rust/src/retrieval/stages/mod.rs diff --git a/src/retrieval/stages/plan.rs b/rust/src/retrieval/stages/plan.rs similarity index 100% rename from src/retrieval/stages/plan.rs rename to rust/src/retrieval/stages/plan.rs diff --git a/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs similarity index 100% rename from src/retrieval/stages/search.rs rename to rust/src/retrieval/stages/search.rs diff --git a/src/retrieval/strategy/cross_document.rs b/rust/src/retrieval/strategy/cross_document.rs similarity index 100% rename from src/retrieval/strategy/cross_document.rs rename to rust/src/retrieval/strategy/cross_document.rs diff --git a/src/retrieval/strategy/hybrid.rs b/rust/src/retrieval/strategy/hybrid.rs similarity index 100% rename from src/retrieval/strategy/hybrid.rs rename to rust/src/retrieval/strategy/hybrid.rs diff --git a/src/retrieval/strategy/keyword.rs b/rust/src/retrieval/strategy/keyword.rs similarity index 100% rename from src/retrieval/strategy/keyword.rs rename to rust/src/retrieval/strategy/keyword.rs diff --git a/src/retrieval/strategy/llm.rs b/rust/src/retrieval/strategy/llm.rs similarity index 100% rename from src/retrieval/strategy/llm.rs rename to rust/src/retrieval/strategy/llm.rs diff --git a/src/retrieval/strategy/mod.rs b/rust/src/retrieval/strategy/mod.rs similarity index 100% rename from src/retrieval/strategy/mod.rs rename to rust/src/retrieval/strategy/mod.rs diff --git a/src/retrieval/strategy/page_range.rs b/rust/src/retrieval/strategy/page_range.rs similarity index 100% rename from src/retrieval/strategy/page_range.rs rename to rust/src/retrieval/strategy/page_range.rs diff --git a/src/retrieval/strategy/semantic.rs b/rust/src/retrieval/strategy/semantic.rs similarity index 100% rename from src/retrieval/strategy/semantic.rs rename to rust/src/retrieval/strategy/semantic.rs diff --git a/src/retrieval/strategy/trait.rs b/rust/src/retrieval/strategy/trait.rs similarity index 100% rename from src/retrieval/strategy/trait.rs rename to rust/src/retrieval/strategy/trait.rs diff --git a/src/retrieval/sufficiency/llm_judge.rs b/rust/src/retrieval/sufficiency/llm_judge.rs similarity index 100% rename from src/retrieval/sufficiency/llm_judge.rs rename to rust/src/retrieval/sufficiency/llm_judge.rs diff --git a/src/retrieval/sufficiency/mod.rs b/rust/src/retrieval/sufficiency/mod.rs similarity index 100% rename from src/retrieval/sufficiency/mod.rs rename to rust/src/retrieval/sufficiency/mod.rs diff --git a/src/retrieval/sufficiency/threshold.rs b/rust/src/retrieval/sufficiency/threshold.rs similarity index 100% rename from src/retrieval/sufficiency/threshold.rs rename to rust/src/retrieval/sufficiency/threshold.rs diff --git a/src/retrieval/types.rs b/rust/src/retrieval/types.rs similarity index 100% rename from src/retrieval/types.rs rename to rust/src/retrieval/types.rs diff --git a/src/storage/backend/file.rs b/rust/src/storage/backend/file.rs similarity index 100% rename from src/storage/backend/file.rs rename to rust/src/storage/backend/file.rs diff --git a/src/storage/backend/memory.rs b/rust/src/storage/backend/memory.rs similarity index 100% rename from src/storage/backend/memory.rs rename to rust/src/storage/backend/memory.rs diff --git a/src/storage/backend/mod.rs b/rust/src/storage/backend/mod.rs similarity index 100% rename from src/storage/backend/mod.rs rename to rust/src/storage/backend/mod.rs diff --git a/src/storage/backend/trait_def.rs b/rust/src/storage/backend/trait_def.rs similarity index 100% rename from src/storage/backend/trait_def.rs rename to rust/src/storage/backend/trait_def.rs diff --git a/src/storage/cache.rs b/rust/src/storage/cache.rs similarity index 100% rename from src/storage/cache.rs rename to rust/src/storage/cache.rs diff --git a/src/storage/codec.rs b/rust/src/storage/codec.rs similarity index 100% rename from src/storage/codec.rs rename to rust/src/storage/codec.rs diff --git a/src/storage/lock.rs b/rust/src/storage/lock.rs similarity index 100% rename from src/storage/lock.rs rename to rust/src/storage/lock.rs diff --git a/src/storage/migration.rs b/rust/src/storage/migration.rs similarity index 100% rename from src/storage/migration.rs rename to rust/src/storage/migration.rs diff --git a/src/storage/mod.rs b/rust/src/storage/mod.rs similarity index 100% rename from src/storage/mod.rs rename to rust/src/storage/mod.rs diff --git a/src/storage/persistence.rs b/rust/src/storage/persistence.rs similarity index 100% rename from src/storage/persistence.rs rename to rust/src/storage/persistence.rs diff --git a/src/storage/workspace.rs b/rust/src/storage/workspace.rs similarity index 100% rename from src/storage/workspace.rs rename to rust/src/storage/workspace.rs diff --git a/src/throttle/config.rs b/rust/src/throttle/config.rs similarity index 100% rename from src/throttle/config.rs rename to rust/src/throttle/config.rs diff --git a/src/throttle/controller.rs b/rust/src/throttle/controller.rs similarity index 100% rename from src/throttle/controller.rs rename to rust/src/throttle/controller.rs diff --git a/src/throttle/mod.rs b/rust/src/throttle/mod.rs similarity index 100% rename from src/throttle/mod.rs rename to rust/src/throttle/mod.rs diff --git a/src/throttle/rate_limiter.rs b/rust/src/throttle/rate_limiter.rs similarity index 100% rename from src/throttle/rate_limiter.rs rename to rust/src/throttle/rate_limiter.rs diff --git a/src/utils/fingerprint.rs b/rust/src/utils/fingerprint.rs similarity index 100% rename from src/utils/fingerprint.rs rename to rust/src/utils/fingerprint.rs diff --git a/src/utils/format.rs b/rust/src/utils/format.rs similarity index 100% rename from src/utils/format.rs rename to rust/src/utils/format.rs diff --git a/src/utils/mod.rs b/rust/src/utils/mod.rs similarity index 100% rename from src/utils/mod.rs rename to rust/src/utils/mod.rs diff --git a/src/utils/timing.rs b/rust/src/utils/timing.rs similarity index 100% rename from src/utils/timing.rs rename to rust/src/utils/timing.rs diff --git a/src/utils/token.rs b/rust/src/utils/token.rs similarity index 100% rename from src/utils/token.rs rename to rust/src/utils/token.rs From 641db4262d961662546a5f3235b88d277d4322a4 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 7 Apr 2026 18:06:55 +0800 Subject: [PATCH 2/6] docs(README): update library description and improve quick start section - Remove Rust language specification from main description since it's now available through Python bindings - Add detailed information about core engine being written in Rust with Python bindings - Convert Python and Rust quick start sections to collapsible details/summary format for better readability - Update code examples formatting and structure --- README.md | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index f9539eeb..75c04eca 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ ## What is Vectorless? -**Vectorless** is a Rust library for querying structured documents using natural language — without vector databases or embedding models. +**Vectorless** is a library for querying structured documents using natural language — without vector databases or embedding models. Core engine written in Rust, with Python bindings. Instead of chunking documents into vectors, Vectorless preserves the document's tree structure and uses a **hybrid algorithm + LLM approach** to navigate it — like how a human reads a table of contents: @@ -91,7 +91,8 @@ Source: Chapter 4 > Section 4.2 > Reset Procedure ## Quick Start -### Python +
+Python ```bash pip install vectorless @@ -112,46 +113,43 @@ result = engine.query(doc_id, "What is the total revenue?") print(f"Answer: {result.content}") ``` -See [python/README.md](python/README.md) for full Python documentation. +
-### Rust +
+Rust ```toml [dependencies] vectorless = "0.1" ``` -### Configuration - ```bash cp vectorless.example.toml ./vectorless.toml ``` -### Usage - ```rust use vectorless::Engine; #[tokio::main] async fn main() -> vectorless::Result<()> { - // Create client let client = Engine::builder() .with_workspace("./workspace") .build()?; - // Index a document (PDF, Markdown, DOCX, HTML) let doc_id = client.index("./document.pdf").await?; - // Query with natural language - let result = client.query(&doc_id, "What are the system requirements?").await?; + let result = client.query(&doc_id, + "What are the system requirements?").await?; println!("Answer: {}", result.content); - println!("Source: {}", result.path); // e.g., "Chapter 2 > Section 2.1" + println!("Source: {}", result.path); Ok(()) } ``` +
+ ## Features | Feature | Description | From 8d94f34d5f007c03dee73ee83c07a3e5aff00114 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 7 Apr 2026 18:09:09 +0800 Subject: [PATCH 3/6] feat: add ruff format configuration to pyproject.toml - configure quote style to double quotes - set indent style to spaces - enable magic trailing comma formatting - set line ending to LF --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2c75d9ca..3a77c43c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,3 +63,9 @@ target-version = "py39" [tool.ruff.lint] select = ["E", "F", "W", "I", "N", "UP", "B"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "lf" From 9ae99d54eb391a6124e080dcd0262f222d6823b3 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 7 Apr 2026 18:29:55 +0800 Subject: [PATCH 4/6] chore(workspace): migrate to workspace-managed dependencies and configuration - Move common package metadata to workspace level in Cargo.toml - Replace individual dependency versions with workspace references - Add comprehensive workspace dependencies section for shared crates - Configure workspace lints for rust and clippy - Set up release and development profiles at workspace level - Update python and rust Cargo.toml files to use workspace inheritance - Consolidate all dependency management to central workspace configuration --- Cargo.toml | 123 ++++++++++++++++++++++++++++++++++++++++++++++ README.md | 14 +++--- python/Cargo.toml | 9 ++-- rust/Cargo.toml | 117 ++++++++++++++++++------------------------- 4 files changed, 182 insertions(+), 81 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2e20d2a8..9da12c6c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,126 @@ [workspace] members = ["rust", "python"] resolver = "2" + +[workspace.package] +version = "0.1.19" +edition = "2024" +authors = ["zTgx "] +license = "Apache-2.0" +repository = "https://github.com/vectorlessflow/vectorless" +homepage = "https://vectorless.dev" +documentation = "https://docs.rs/vectorless" + +[workspace.dependencies] +# Async runtime +tokio = { version = "1", features = ["full"] } +async-trait = "0.1" +futures = "0.3" + +# Serialization +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +toml = "0.8" + +# Error handling +thiserror = "2" +anyhow = "1" + +# OpenAI-compatible API client +async-openai = { version = "0.34", features = ["chat-completion"] } + +# UUID +uuid = { version = "1.10", features = ["v4", "serde"] } + +# Time +chrono = { version = "0.4", default-features = false, features = ["serde", "clock"] } + +# Logging +tracing = "0.1" + +# Rate limiting +governor = "0.6" +nonzero_ext = "0.3" + +# Token counting +tiktoken-rs = "0.9" + +# Text processing +regex = "1.10" + +# Markdown parsing +pulldown-cmark = { version = "0.12", default-features = false, features = ["simd"] } + +# Tree data structure +indextree = { version = "4.8.0", features = ["deser"] } + +# LRU cache +lru = "0.12" + +# Checksum +sha2 = "0.10" + +# BLAKE2b hashing +blake2 = "0.10" +base64 = "0.22" + +# Synchronization primitives +parking_lot = "0.12" + +# Compression +flate2 = "1.0" + +# File locking (Unix) +libc = "0.2" + +# PDF processing +pdf-extract = "0.10.0" +lopdf = "0.34" + +# DOCX processing +zip = "2.2" +roxmltree = "0.20" + +# Random number generation +rand = "0.8" + +# BM25 scoring +bm25 = { version = "2.3.2", features = ["parallelism"] } + +# HTML parsing +scraper = "0.22" + +# Python bindings +pyo3 = { version = "0.22", features = ["extension-module"] } + +# Dev dependencies +tempfile = "3.10" +tokio-test = "0.4" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +[workspace.lints.rust] +missing_docs = "warn" +unsafe_code = "warn" + +[workspace.lints.clippy] +all = "warn" +pedantic = "warn" + +# Profile settings (must be at root level, not under workspace) +[profile.release] +opt-level = 3 +lto = "thin" +codegen-units = 1 +strip = true +panic = "abort" + +[profile.dev] +opt-level = 0 +debug = true + +[profile.bench] +inherits = "release" +debug = true + +[profile.release.package."*"] +opt-level = 3 diff --git a/README.md b/README.md index 75c04eca..f05d336e 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@
-![Vectorless](docs/design/logo-horizontal.svg) +Vectorless -[![Crates.io](https://img.shields.io/crates/v/vectorless.svg)](https://crates.io/crates/vectorless) [![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/) -[![Downloads](https://img.shields.io/crates/d/vectorless.svg)](https://crates.io/crates/vectorless) -[![Documentation](https://docs.rs/vectorless/badge.svg)](https://docs.rs/vectorless) +[![Python](https://img.shields.io/pypi/pyversions/vectorless.svg)](https://pypi.org/project/vectorless/) +[![Crates.io](https://img.shields.io/crates/v/vectorless.svg)](https://crates.io/crates/vectorless) +[![Docs](https://docs.rs/vectorless/badge.svg)](https://docs.rs/vectorless) [![License](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](LICENSE) [![Rust](https://img.shields.io/badge/rust-1.85%2B-orange.svg)](https://www.rust-lang.org/) @@ -23,7 +23,7 @@ Instead of chunking documents into vectors, Vectorless preserves the document's ## How It Works -![How it works](docs/design/how-it-works.svg) +How it works ### 1. Index: Build a Navigable Tree @@ -49,7 +49,7 @@ When you ask "How do I reset the device?": ## Traditional RAG vs Vectorless -![Traditional RAG vs Vectorless](docs/design/comparison.svg) +Traditional RAG vs Vectorless | Aspect | Traditional RAG | Vectorless | |--------|----------------|------------| @@ -164,7 +164,7 @@ async fn main() -> vectorless::Result<()> { ## Architecture -![Architecture](docs/design/architecture.svg) +Architecture ### Core Components diff --git a/python/Cargo.toml b/python/Cargo.toml index e59806e2..e9483e7a 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,16 +1,17 @@ [package] name = "vectorless-py" version = "0.1.0" -edition = "2024" -authors = ["zTgx "] +edition.workspace = true +authors.workspace = true description = "Python bindings for vectorless" -license = "Apache-2.0" +license.workspace = true +repository.workspace = true [lib] name = "vectorless" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.22", features = ["extension-module"] } +pyo3 = { workspace = true } tokio = { version = "1", features = ["rt-multi-thread"] } vectorless = { path = "../rust" } diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 9d62e781..78f65624 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,12 +1,12 @@ [package] name = "vectorless" -version = "0.1.19" -edition = "2024" -authors = ["zTgx "] +version.workspace = true +edition.workspace = true +authors.workspace = true description = "Hierarchical, reasoning-native document intelligence engine" -license = "Apache-2.0" -repository = "https://github.com/vectorlessflow/vectorless" -homepage = "https://vectorless.dev" +license.workspace = true +repository.workspace = true +homepage.workspace = true documentation = "https://docs.rs/vectorless" keywords = ["rag", "document", "retrieval", "indexing", "llm"] categories = ["text-processing", "data-structures", "algorithms"] @@ -15,111 +15,88 @@ exclude = ["samples/", "docs/", ".*"] [dependencies] # Async runtime -tokio = { version = "1", features = ["full"] } -async-trait = "0.1" -futures = "0.3" +tokio = { workspace = true } +async-trait = { workspace = true } +futures = { workspace = true } # Serialization -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -toml = "0.8" +serde = { workspace = true } +serde_json = { workspace = true } +toml = { workspace = true } # Error handling -thiserror = "2" -anyhow = { version = "1", optional = true } +thiserror = { workspace = true } +anyhow = { workspace = true, optional = true } # OpenAI-compatible API client -async-openai = { version = "0.34", features = ["chat-completion"] } +async-openai = { workspace = true } # UUID -uuid = { version = "1.10", features = ["v4", "serde"] } +uuid = { workspace = true } # Time -chrono = { version = "0.4", default-features = false, features = ["serde", "clock"] } +chrono = { workspace = true } # Logging -tracing = "0.1" +tracing = { workspace = true } # Rate limiting -governor = "0.6" -nonzero_ext = "0.3" +governor = { workspace = true } +nonzero_ext = { workspace = true } # Token counting -tiktoken-rs = "0.9" +tiktoken-rs = { workspace = true } # Text processing -regex = "1.10" +regex = { workspace = true } # Markdown parsing -pulldown-cmark = { version = "0.12", default-features = false, features = ["simd"] } +pulldown-cmark = { workspace = true } # Tree data structure -indextree = { version = "4.8.0", features = ["deser"] } +indextree = { workspace = true } # LRU cache -lru = "0.12" +lru = { workspace = true } # Checksum -sha2 = "0.10" +sha2 = { workspace = true } -# BLAKE2b hashing for fingerprints -blake2 = "0.10" -base64 = "0.22" +# BLAKE2b hashing +blake2 = { workspace = true } +base64 = { workspace = true } -# Synchronization primitives (for memo store) -parking_lot = "0.12" +# Synchronization primitives +parking_lot = { workspace = true } # Compression -flate2 = "1.0" +flate2 = { workspace = true } # File locking (Unix) [target.'cfg(unix)'.dependencies] -libc = "0.2" +libc = { workspace = true } # PDF processing -pdf-extract = "0.10.0" -lopdf = "0.34" +pdf-extract = { workspace = true } +lopdf = { workspace = true } # DOCX processing -zip = "2.2" -roxmltree = "0.20" +zip = { workspace = true } +roxmltree = { workspace = true } -# Random number generation (for sampling) -rand = "0.8" +# Random number generation +rand = { workspace = true } # BM25 scoring -bm25 = { version = "2.3.2", features = ["parallelism"] } +bm25 = { workspace = true } # HTML parsing -scraper = "0.22" +scraper = { workspace = true } [dev-dependencies] -tempfile = "3.10" -tokio-test = "0.4" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } - -[profile.release] -opt-level = 3 -lto = "thin" -codegen-units = 1 -strip = true -panic = "abort" - -[profile.dev] -opt-level = 0 -debug = true - -[profile.bench] -inherits = "release" -debug = true - -[profile.release.package."*"] -opt-level = 3 - -[lints.rust] -missing_docs = "warn" -unsafe_code = "warn" - -[lints.clippy] -all = "warn" -pedantic = "warn" +tempfile = { workspace = true } +tokio-test = { workspace = true } +tracing-subscriber = { workspace = true } + +[lints] +workspace = true From 281fd5beed2fca6cd7578ce7f7a8d7e532f10c7e Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 7 Apr 2026 18:35:11 +0800 Subject: [PATCH 5/6] feat: add download badges to README - Add PyPI downloads badge to show monthly download statistics - Add Crates.io downloads badge to display Rust crate downloads --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f05d336e..5669ccbb 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,9 @@ [![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/) [![Python](https://img.shields.io/pypi/pyversions/vectorless.svg)](https://pypi.org/project/vectorless/) +[![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless) [![Crates.io](https://img.shields.io/crates/v/vectorless.svg)](https://crates.io/crates/vectorless) +[![Crates.io Downloads](https://img.shields.io/crates/d/vectorless.svg)](https://crates.io/crates/vectorless) [![Docs](https://docs.rs/vectorless/badge.svg)](https://docs.rs/vectorless) [![License](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](LICENSE) [![Rust](https://img.shields.io/badge/rust-1.85%2B-orange.svg)](https://www.rust-lang.org/) From 62c7928903539797e76108b86a251918744e2ff4 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 7 Apr 2026 18:40:04 +0800 Subject: [PATCH 6/6] docs(README): remove width attributes from image tags Remove explicit width attributes from all image tags in README.md to allow responsive sizing and better adapt to different screen sizes and containers. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5669ccbb..603adb21 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
-Vectorless +Vectorless [![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/) [![Python](https://img.shields.io/pypi/pyversions/vectorless.svg)](https://pypi.org/project/vectorless/) @@ -25,7 +25,7 @@ Instead of chunking documents into vectors, Vectorless preserves the document's ## How It Works -How it works +How it works ### 1. Index: Build a Navigable Tree @@ -51,7 +51,7 @@ When you ask "How do I reset the device?": ## Traditional RAG vs Vectorless -Traditional RAG vs Vectorless +Traditional RAG vs Vectorless | Aspect | Traditional RAG | Vectorless | |--------|----------------|------------| @@ -166,7 +166,7 @@ async fn main() -> vectorless::Result<()> { ## Architecture -Architecture +Architecture ### Core Components