diff --git a/CHANGELOG.md b/CHANGELOG.md index a257a08..951e709 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## [Unreleased] + +### Features + +* **embeddings:** Add native Ollama provider for local embeddings ([#70](https://github.com/PatrickSys/codebase-context/issues/70)) + - New `EMBEDDING_PROVIDER=ollama` option + - Supports `OLLAMA_HOST` environment variable (default: http://localhost:11434) + - Configurable via `EMBEDDING_MODEL` (default: nomic-embed-text, 768 dimensions) + - Also adds `OPENAI_BASE_URL` for custom OpenAI-compatible endpoints + - Tested with nomic-embed-text (768 dim), mxbai-embed-large (1024 dim), all-minilm (384 dim) + ## [1.8.2](https://github.com/PatrickSys/codebase-context/compare/v1.8.1...v1.8.2) (2026-03-05) diff --git a/README.md b/README.md index 3c35fb1..4c536ba 100644 --- a/README.md +++ b/README.md @@ -344,8 +344,10 @@ Structured filters available: `framework`, `language`, `componentType`, `layer` | Variable | Default | Description | | ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------- | -| `EMBEDDING_PROVIDER` | `transformers` | `openai` (fast, cloud) or `transformers` (local, private) | +| `EMBEDDING_PROVIDER` | `transformers` | `transformers` (local, private), `openai` (fast, cloud), or `ollama` (local via Ollama) | | `OPENAI_API_KEY` | - | Required only if using `openai` provider | +| `OPENAI_BASE_URL` | `https://api.openai.com/v1` | Custom OpenAI-compatible API endpoint (LiteLLM, Groq, OpenRouter, etc.) | +| `OLLAMA_HOST` | `http://localhost:11434` | Ollama server URL (only used when `EMBEDDING_PROVIDER=ollama`) | | `CODEBASE_ROOT` | - | Project root (CLI arg takes precedence) | | `CODEBASE_CONTEXT_DEBUG` | - | Set to `1` for verbose logging | | `EMBEDDING_MODEL` | `Xenova/bge-small-en-v1.5` | Local embedding model override (e.g. `onnx-community/granite-embedding-small-english-r2-ONNX` for Granite) | diff --git a/src/embeddings/index.ts b/src/embeddings/index.ts index aabb3fa..7699857 100644 --- a/src/embeddings/index.ts +++ b/src/embeddings/index.ts @@ -1,5 +1,4 @@ export * from './types.js'; -export * from './transformers.js'; import { EmbeddingProvider, @@ -8,22 +7,52 @@ import { DEFAULT_MODEL, parseEmbeddingProviderName } from './types.js'; -import { TransformersEmbeddingProvider, MODEL_CONFIGS } from './transformers.js'; + +// Model configs for dimension lookups (sync, no heavy dependencies) +// This avoids loading the full transformers module at import time +const TRANSFORMERS_MODEL_CONFIGS: Record = { + 'Xenova/bge-small-en-v1.5': { dimensions: 384, maxContext: 512 }, + 'Xenova/all-MiniLM-L6-v2': { dimensions: 384, maxContext: 512 }, + 'Xenova/bge-base-en-v1.5': { dimensions: 768, maxContext: 512 }, + 'onnx-community/granite-embedding-small-english-r2-ONNX': { dimensions: 384, maxContext: 8192 } +}; /** * Returns expected embedding dimensions for a given config without initializing any provider. * Used for LanceDB dimension validation before committing to an incremental update. * - * Looks up dimensions from MODEL_CONFIGS (the authoritative source shared with the provider - * implementation) so new models are automatically handled without updating this function. + * Looks up dimensions from TRANSFORMERS_MODEL_CONFIGS for local models and handles + * remote providers (OpenAI, Ollama) with their specific dimension logic. */ export function getConfiguredDimensions(config: Partial = {}): number { + // Allow explicit dimension override via env var for custom models + if (process.env.EMBEDDING_DIMENSIONS) { + const parsed = parseInt(process.env.EMBEDDING_DIMENSIONS, 10); + if (!isNaN(parsed) && parsed > 0) { + return parsed; + } + } + const provider = config.provider ?? parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers'; const model = config.model ?? process.env.EMBEDDING_MODEL ?? DEFAULT_MODEL; if (provider === 'openai') return model.includes('large') ? 3072 : 1536; // text-embedding-3-large: 3072, all others: 1536 - // Look up from the same MODEL_CONFIGS the provider uses — avoids stale hardcoded guesses - return MODEL_CONFIGS[model]?.dimensions ?? 384; + if (provider === 'ollama') { + // Common Ollama embedding model dimensions + const ollamaDimensions: Record = { + 'nomic-embed-text': 768, + 'nomic-embed-text:latest': 768, + embeddinggemma: 768, + 'embeddinggemma:latest': 768, + 'mxbai-embed-large': 1024, + 'mxbai-embed-large:latest': 1024, + 'all-minilm': 384, + 'all-minilm:latest': 384 + }; + return ollamaDimensions[model] || 768; + } + // Look up from the local config for transformers provider + return TRANSFORMERS_MODEL_CONFIGS[model]?.dimensions ?? 384; } let cachedProvider: EmbeddingProvider | null = null; @@ -52,14 +81,22 @@ export async function getEmbeddingProvider( return provider; } - if (mergedConfig.provider === 'custom') { - throw new Error("Custom provider not implemented. Use 'openai' or 'transformers'."); - } - if (mergedConfig.provider === 'ollama') { - console.warn('Ollama provider not yet implemented, falling back to Transformers.js'); + const { OllamaEmbeddingProvider } = await import('./ollama.js'); + const endpoint = + mergedConfig.apiEndpoint || process.env.OLLAMA_HOST || 'http://localhost:11434'; + const provider = new OllamaEmbeddingProvider( + mergedConfig.model || 'nomic-embed-text', + endpoint + ); + await provider.initialize(); + cachedProvider = provider; + cachedProviderType = providerKey; + return provider; } + // Default: transformers (lazy loaded) + const { TransformersEmbeddingProvider } = await import('./transformers.js'); const provider = new TransformersEmbeddingProvider(mergedConfig.model); await provider.initialize(); cachedProvider = provider; @@ -67,3 +104,7 @@ export async function getEmbeddingProvider( return provider; } + +// Note: transformers provider is lazy-loaded in getEmbeddingProvider to avoid +// eager heavy dependency loading. Consumers should import from './transformers' +// directly if they need access to provider implementation or MODEL_CONFIGS. diff --git a/src/embeddings/ollama.ts b/src/embeddings/ollama.ts new file mode 100644 index 0000000..41af038 --- /dev/null +++ b/src/embeddings/ollama.ts @@ -0,0 +1,126 @@ +import { EmbeddingProvider } from './types.js'; + +interface OllamaEmbeddingResponse { + embedding: number[]; +} + +// Context window sizes for common Ollama embedding models (in tokens) +const MODEL_CONTEXT_WINDOWS: Record = { + 'nomic-embed-text': 2048, + 'nomic-embed-text:latest': 2048, + embeddinggemma: 2048, + 'embeddinggemma:latest': 2048, + 'mxbai-embed-large': 512, + 'mxbai-embed-large:latest': 512, + 'all-minilm': 512, + 'all-minilm:latest': 512 +}; + +// Conservative character limit (approx 2 chars per token for code) +// Code has more tokens per character due to punctuation and symbols +function getMaxChars(modelName: string): number { + const tokens = MODEL_CONTEXT_WINDOWS[modelName] || 2048; + return tokens * 2; // Very conservative: 2 chars per token +} + +/** + * Ollama Embedding Provider + * Supports local embedding models via Ollama API. + * API endpoint: POST /api/embeddings + */ +export class OllamaEmbeddingProvider implements EmbeddingProvider { + readonly name = 'ollama'; + private maxChars: number; + + // Default dimensions for nomic-embed-text (768) + // Override via EMBEDDING_DIMENSIONS env var for custom models + get dimensions(): number { + // Allow explicit dimension override via env var + if (process.env.EMBEDDING_DIMENSIONS) { + const parsed = parseInt(process.env.EMBEDDING_DIMENSIONS, 10); + if (!isNaN(parsed) && parsed > 0) { + return parsed; + } + } + + // Common Ollama embedding model dimensions + const modelDimensions: Record = { + 'nomic-embed-text': 768, + 'nomic-embed-text:latest': 768, + embeddinggemma: 768, + 'embeddinggemma:latest': 768, + 'mxbai-embed-large': 1024, + 'mxbai-embed-large:latest': 1024, + 'all-minilm': 384, + 'all-minilm:latest': 384 + }; + return modelDimensions[this.modelName] || 768; + } + + constructor( + readonly modelName: string = 'nomic-embed-text', + private apiEndpoint: string = 'http://localhost:11434' + ) { + this.maxChars = getMaxChars(modelName); + } + + async initialize(): Promise { + // Ollama doesn't require an API key + // We could test connectivity here if needed + } + + isReady(): boolean { + // Ollama is always "ready" - no auth required + return true; + } + + private truncateText(text: string): string { + if (text.length <= this.maxChars) { + return text; + } + return text.slice(0, this.maxChars); + } + + async embed(text: string): Promise { + const batch = await this.embedBatch([text]); + return batch[0]; + } + + async embedBatch(texts: string[]): Promise { + if (!texts.length) return []; + + const embeddings: number[][] = []; + + // Ollama embeddings API processes one text at a time + for (const text of texts) { + try { + // Truncate text to fit within model's context window + const truncatedText = this.truncateText(text); + + const response = await fetch(`${this.apiEndpoint}/api/embeddings`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: this.modelName, + prompt: truncatedText + }) + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Ollama API Error ${response.status}: ${error}`); + } + + const data = (await response.json()) as OllamaEmbeddingResponse; + embeddings.push(data.embedding); + } catch (error) { + console.error('Ollama Embedding Failed:', error); + throw error; + } + } + + return embeddings; + } +} diff --git a/src/embeddings/types.ts b/src/embeddings/types.ts index 66e5a7a..4476699 100644 --- a/src/embeddings/types.ts +++ b/src/embeddings/types.ts @@ -32,10 +32,23 @@ export function parseEmbeddingProviderName( // better conceptual search at the cost of 5-10x slower indexing and higher RAM usage export const DEFAULT_MODEL = process.env.EMBEDDING_MODEL || 'Xenova/bge-small-en-v1.5'; +function getDefaultApiEndpoint(provider: EmbeddingConfig['provider']): string | undefined { + if (provider === 'ollama') { + return process.env.OLLAMA_HOST || 'http://localhost:11434'; + } + if (provider === 'openai') { + return process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1'; + } + return undefined; +} + export const DEFAULT_EMBEDDING_CONFIG: EmbeddingConfig = { provider: parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers', model: DEFAULT_MODEL, batchSize: 32, maxRetries: 3, - apiKey: process.env.OPENAI_API_KEY + apiKey: process.env.OPENAI_API_KEY, + get apiEndpoint() { + return getDefaultApiEndpoint(this.provider); + } };