-
Notifications
You must be signed in to change notification settings - Fork 8
feat(embeddings): add native Ollama provider for local embeddings #73
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
9eef072
75d66d3
7659c02
8ecc514
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| import { EmbeddingProvider } from './types.js'; | ||
|
|
||
| interface OllamaEmbeddingResponse { | ||
| embedding: number[]; | ||
| } | ||
|
|
||
| // Context window sizes for common Ollama embedding models (in tokens) | ||
| const MODEL_CONTEXT_WINDOWS: Record<string, number> = { | ||
| 'nomic-embed-text': 2048, | ||
| 'nomic-embed-text:latest': 2048, | ||
| embeddinggemma: 2048, | ||
| 'embeddinggemma:latest': 2048, | ||
| 'mxbai-embed-large': 512, | ||
| 'mxbai-embed-large:latest': 512, | ||
| 'all-minilm': 512, | ||
| 'all-minilm:latest': 512 | ||
| }; | ||
|
|
||
| // Conservative character limit (approx 2 chars per token for code) | ||
| // Code has more tokens per character due to punctuation and symbols | ||
| function getMaxChars(modelName: string): number { | ||
| const tokens = MODEL_CONTEXT_WINDOWS[modelName] || 2048; | ||
| return tokens * 2; // Very conservative: 2 chars per token | ||
| } | ||
|
|
||
| /** | ||
| * Ollama Embedding Provider | ||
| * Supports local embedding models via Ollama API. | ||
| * API endpoint: POST /api/embeddings | ||
| */ | ||
| export class OllamaEmbeddingProvider implements EmbeddingProvider { | ||
| readonly name = 'ollama'; | ||
| private maxChars: number; | ||
|
|
||
| // Default dimensions for nomic-embed-text (768) | ||
| // Override via EMBEDDING_DIMENSIONS env var for custom models | ||
| get dimensions(): number { | ||
| // Allow explicit dimension override via env var | ||
| if (process.env.EMBEDDING_DIMENSIONS) { | ||
| const parsed = parseInt(process.env.EMBEDDING_DIMENSIONS, 10); | ||
| if (!isNaN(parsed) && parsed > 0) { | ||
| return parsed; | ||
| } | ||
| } | ||
|
|
||
| // Common Ollama embedding model dimensions | ||
| const modelDimensions: Record<string, number> = { | ||
| 'nomic-embed-text': 768, | ||
| 'nomic-embed-text:latest': 768, | ||
| embeddinggemma: 768, | ||
| 'embeddinggemma:latest': 768, | ||
| 'mxbai-embed-large': 1024, | ||
| 'mxbai-embed-large:latest': 1024, | ||
| 'all-minilm': 384, | ||
| 'all-minilm:latest': 384 | ||
| }; | ||
| return modelDimensions[this.modelName] || 768; | ||
|
Comment on lines
+8
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The PR description and The same gap exists in const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
'nomic-embed-text': 2048,
'nomic-embed-text:latest': 2048,
'embeddinggemma': 2048, // add
'embeddinggemma:latest': 2048, // add
'mxbai-embed-large': 512,
...
}; |
||
| } | ||
|
|
||
| constructor( | ||
| readonly modelName: string = 'nomic-embed-text', | ||
| private apiEndpoint: string = 'http://localhost:11434' | ||
| ) { | ||
| this.maxChars = getMaxChars(modelName); | ||
| } | ||
|
|
||
| async initialize(): Promise<void> { | ||
| // Ollama doesn't require an API key | ||
| // We could test connectivity here if needed | ||
| } | ||
|
|
||
| isReady(): boolean { | ||
| // Ollama is always "ready" - no auth required | ||
| return true; | ||
| } | ||
|
|
||
| private truncateText(text: string): string { | ||
| if (text.length <= this.maxChars) { | ||
| return text; | ||
| } | ||
| return text.slice(0, this.maxChars); | ||
| } | ||
|
|
||
| async embed(text: string): Promise<number[]> { | ||
| const batch = await this.embedBatch([text]); | ||
| return batch[0]; | ||
| } | ||
|
|
||
| async embedBatch(texts: string[]): Promise<number[][]> { | ||
| if (!texts.length) return []; | ||
|
|
||
| const embeddings: number[][] = []; | ||
|
|
||
| // Ollama embeddings API processes one text at a time | ||
| for (const text of texts) { | ||
| try { | ||
| // Truncate text to fit within model's context window | ||
| const truncatedText = this.truncateText(text); | ||
|
|
||
| const response = await fetch(`${this.apiEndpoint}/api/embeddings`, { | ||
| method: 'POST', | ||
| headers: { | ||
| 'Content-Type': 'application/json' | ||
| }, | ||
| body: JSON.stringify({ | ||
| model: this.modelName, | ||
| prompt: truncatedText | ||
| }) | ||
| }); | ||
|
|
||
| if (!response.ok) { | ||
| const error = await response.text(); | ||
| throw new Error(`Ollama API Error ${response.status}: ${error}`); | ||
| } | ||
|
|
||
| const data = (await response.json()) as OllamaEmbeddingResponse; | ||
| embeddings.push(data.embedding); | ||
| } catch (error) { | ||
| console.error('Ollama Embedding Failed:', error); | ||
| throw error; | ||
| } | ||
| } | ||
|
|
||
| return embeddings; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OLLAMA_HOSTignored when provider is passed programmaticallyDEFAULT_EMBEDDING_CONFIGdefinesapiEndpointas a getter that callsgetDefaultApiEndpoint(this.provider). WhengetEmbeddingProviderspreads this config:The spread operator evaluates the getter at spread time, with
thisbound toDEFAULT_EMBEDDING_CONFIG. Sothis.providerequalsDEFAULT_EMBEDDING_CONFIG.provider, which is derived fromprocess.env.EMBEDDING_PROVIDER ?? 'transformers'.If
EMBEDDING_PROVIDERis not set (defaults to'transformers'), the getter returnsundefinedforapiEndpoint. This means callinggetEmbeddingProvider({ provider: 'ollama' })programmatically will always fall back to the hardcoded'http://localhost:11434'on line 78, silently ignoring theOLLAMA_HOSTenvironment variable.A straightforward fix is to read
OLLAMA_HOSTdirectly within the provider branch: