Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Changelog

## [Unreleased]

### Features

* **embeddings:** Add native Ollama provider for local embeddings ([#70](https://github.com/PatrickSys/codebase-context/issues/70))
- New `EMBEDDING_PROVIDER=ollama` option
- Supports `OLLAMA_HOST` environment variable (default: http://localhost:11434)
- Configurable via `EMBEDDING_MODEL` (default: nomic-embed-text, 768 dimensions)
- Also adds `OPENAI_BASE_URL` for custom OpenAI-compatible endpoints
- Tested with nomic-embed-text (768 dim), mxbai-embed-large (1024 dim), all-minilm (384 dim)

## [1.8.2](https://github.com/PatrickSys/codebase-context/compare/v1.8.1...v1.8.2) (2026-03-05)


Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,10 @@ Structured filters available: `framework`, `language`, `componentType`, `layer`

| Variable | Default | Description |
| ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------- |
| `EMBEDDING_PROVIDER` | `transformers` | `openai` (fast, cloud) or `transformers` (local, private) |
| `EMBEDDING_PROVIDER` | `transformers` | `transformers` (local, private), `openai` (fast, cloud), or `ollama` (local via Ollama) |
| `OPENAI_API_KEY` | - | Required only if using `openai` provider |
| `OPENAI_BASE_URL` | `https://api.openai.com/v1` | Custom OpenAI-compatible API endpoint (LiteLLM, Groq, OpenRouter, etc.) |
| `OLLAMA_HOST` | `http://localhost:11434` | Ollama server URL (only used when `EMBEDDING_PROVIDER=ollama`) |
| `CODEBASE_ROOT` | - | Project root (CLI arg takes precedence) |
| `CODEBASE_CONTEXT_DEBUG` | - | Set to `1` for verbose logging |
| `EMBEDDING_MODEL` | `Xenova/bge-small-en-v1.5` | Local embedding model override (e.g. `onnx-community/granite-embedding-small-english-r2-ONNX` for Granite) |
Expand Down
63 changes: 52 additions & 11 deletions src/embeddings/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
export * from './types.js';
export * from './transformers.js';

import {
EmbeddingProvider,
Expand All @@ -8,22 +7,52 @@ import {
DEFAULT_MODEL,
parseEmbeddingProviderName
} from './types.js';
import { TransformersEmbeddingProvider, MODEL_CONFIGS } from './transformers.js';

// Model configs for dimension lookups (sync, no heavy dependencies)
// This avoids loading the full transformers module at import time
const TRANSFORMERS_MODEL_CONFIGS: Record<string, { dimensions: number; maxContext: number }> = {
'Xenova/bge-small-en-v1.5': { dimensions: 384, maxContext: 512 },
'Xenova/all-MiniLM-L6-v2': { dimensions: 384, maxContext: 512 },
'Xenova/bge-base-en-v1.5': { dimensions: 768, maxContext: 512 },
'onnx-community/granite-embedding-small-english-r2-ONNX': { dimensions: 384, maxContext: 8192 }
};

/**
* Returns expected embedding dimensions for a given config without initializing any provider.
* Used for LanceDB dimension validation before committing to an incremental update.
*
* Looks up dimensions from MODEL_CONFIGS (the authoritative source shared with the provider
* implementation) so new models are automatically handled without updating this function.
* Looks up dimensions from TRANSFORMERS_MODEL_CONFIGS for local models and handles
* remote providers (OpenAI, Ollama) with their specific dimension logic.
*/
export function getConfiguredDimensions(config: Partial<EmbeddingConfig> = {}): number {
// Allow explicit dimension override via env var for custom models
if (process.env.EMBEDDING_DIMENSIONS) {
const parsed = parseInt(process.env.EMBEDDING_DIMENSIONS, 10);
if (!isNaN(parsed) && parsed > 0) {
return parsed;
}
}

const provider =
config.provider ?? parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
const model = config.model ?? process.env.EMBEDDING_MODEL ?? DEFAULT_MODEL;
if (provider === 'openai') return model.includes('large') ? 3072 : 1536; // text-embedding-3-large: 3072, all others: 1536
// Look up from the same MODEL_CONFIGS the provider uses — avoids stale hardcoded guesses
return MODEL_CONFIGS[model]?.dimensions ?? 384;
if (provider === 'ollama') {
// Common Ollama embedding model dimensions
const ollamaDimensions: Record<string, number> = {
'nomic-embed-text': 768,
'nomic-embed-text:latest': 768,
embeddinggemma: 768,
'embeddinggemma:latest': 768,
'mxbai-embed-large': 1024,
'mxbai-embed-large:latest': 1024,
'all-minilm': 384,
'all-minilm:latest': 384
};
return ollamaDimensions[model] || 768;
}
// Look up from the local config for transformers provider
return TRANSFORMERS_MODEL_CONFIGS[model]?.dimensions ?? 384;
}

let cachedProvider: EmbeddingProvider | null = null;
Expand Down Expand Up @@ -52,18 +81,30 @@ export async function getEmbeddingProvider(
return provider;
}

if (mergedConfig.provider === 'custom') {
throw new Error("Custom provider not implemented. Use 'openai' or 'transformers'.");
}

if (mergedConfig.provider === 'ollama') {
console.warn('Ollama provider not yet implemented, falling back to Transformers.js');
const { OllamaEmbeddingProvider } = await import('./ollama.js');
const endpoint =
mergedConfig.apiEndpoint || process.env.OLLAMA_HOST || 'http://localhost:11434';
const provider = new OllamaEmbeddingProvider(
mergedConfig.model || 'nomic-embed-text',
endpoint
);
await provider.initialize();
cachedProvider = provider;
cachedProviderType = providerKey;
return provider;
}
Comment on lines 84 to 96
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OLLAMA_HOST ignored when provider is passed programmatically

DEFAULT_EMBEDDING_CONFIG defines apiEndpoint as a getter that calls getDefaultApiEndpoint(this.provider). When getEmbeddingProvider spreads this config:

const mergedConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };

The spread operator evaluates the getter at spread time, with this bound to DEFAULT_EMBEDDING_CONFIG. So this.provider equals DEFAULT_EMBEDDING_CONFIG.provider, which is derived from process.env.EMBEDDING_PROVIDER ?? 'transformers'.

If EMBEDDING_PROVIDER is not set (defaults to 'transformers'), the getter returns undefined for apiEndpoint. This means calling getEmbeddingProvider({ provider: 'ollama' }) programmatically will always fall back to the hardcoded 'http://localhost:11434' on line 78, silently ignoring the OLLAMA_HOST environment variable.

A straightforward fix is to read OLLAMA_HOST directly within the provider branch:

const endpoint =
  mergedConfig.apiEndpoint ||
  process.env.OLLAMA_HOST ||
  'http://localhost:11434';
const provider = new OllamaEmbeddingProvider(
  mergedConfig.model || 'nomic-embed-text',
  endpoint
);


// Default: transformers (lazy loaded)
const { TransformersEmbeddingProvider } = await import('./transformers.js');
const provider = new TransformersEmbeddingProvider(mergedConfig.model);
await provider.initialize();
cachedProvider = provider;
cachedProviderType = providerKey;

return provider;
}

// Note: transformers provider is lazy-loaded in getEmbeddingProvider to avoid
// eager heavy dependency loading. Consumers should import from './transformers'
// directly if they need access to provider implementation or MODEL_CONFIGS.
126 changes: 126 additions & 0 deletions src/embeddings/ollama.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import { EmbeddingProvider } from './types.js';

interface OllamaEmbeddingResponse {
embedding: number[];
}

// Context window sizes for common Ollama embedding models (in tokens)
const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
'nomic-embed-text': 2048,
'nomic-embed-text:latest': 2048,
embeddinggemma: 2048,
'embeddinggemma:latest': 2048,
'mxbai-embed-large': 512,
'mxbai-embed-large:latest': 512,
'all-minilm': 512,
'all-minilm:latest': 512
};

// Conservative character limit (approx 2 chars per token for code)
// Code has more tokens per character due to punctuation and symbols
function getMaxChars(modelName: string): number {
const tokens = MODEL_CONTEXT_WINDOWS[modelName] || 2048;
return tokens * 2; // Very conservative: 2 chars per token
}

/**
* Ollama Embedding Provider
* Supports local embedding models via Ollama API.
* API endpoint: POST /api/embeddings
*/
export class OllamaEmbeddingProvider implements EmbeddingProvider {
readonly name = 'ollama';
private maxChars: number;

// Default dimensions for nomic-embed-text (768)
// Override via EMBEDDING_DIMENSIONS env var for custom models
get dimensions(): number {
// Allow explicit dimension override via env var
if (process.env.EMBEDDING_DIMENSIONS) {
const parsed = parseInt(process.env.EMBEDDING_DIMENSIONS, 10);
if (!isNaN(parsed) && parsed > 0) {
return parsed;
}
}

// Common Ollama embedding model dimensions
const modelDimensions: Record<string, number> = {
'nomic-embed-text': 768,
'nomic-embed-text:latest': 768,
embeddinggemma: 768,
'embeddinggemma:latest': 768,
'mxbai-embed-large': 1024,
'mxbai-embed-large:latest': 1024,
'all-minilm': 384,
'all-minilm:latest': 384
};
return modelDimensions[this.modelName] || 768;
Comment on lines +8 to +57
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

embeddinggemma missing from model lookup tables

The PR description and OLLAMA_TEST_RESULTS.md both highlight embeddinggemma as a first-class supported and tested model. However, it is absent from both MODEL_CONTEXT_WINDOWS and the modelDimensions map in the dimensions getter. Unknown models silently fall back to 768 dimensions and 2048 token context. If embeddinggemma's actual values differ from these defaults in a future Ollama version, users will get silent LanceDB schema mismatches during re-indexing.

The same gap exists in getConfiguredDimensions in index.ts (line 34–42). Consider adding an explicit entry:

const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
  'nomic-embed-text': 2048,
  'nomic-embed-text:latest': 2048,
  'embeddinggemma': 2048,       // add
  'embeddinggemma:latest': 2048, // add
  'mxbai-embed-large': 512,
  ...
};

}

constructor(
readonly modelName: string = 'nomic-embed-text',
private apiEndpoint: string = 'http://localhost:11434'
) {
this.maxChars = getMaxChars(modelName);
}

async initialize(): Promise<void> {
// Ollama doesn't require an API key
// We could test connectivity here if needed
}

isReady(): boolean {
// Ollama is always "ready" - no auth required
return true;
}

private truncateText(text: string): string {
if (text.length <= this.maxChars) {
return text;
}
return text.slice(0, this.maxChars);
}

async embed(text: string): Promise<number[]> {
const batch = await this.embedBatch([text]);
return batch[0];
}

async embedBatch(texts: string[]): Promise<number[][]> {
if (!texts.length) return [];

const embeddings: number[][] = [];

// Ollama embeddings API processes one text at a time
for (const text of texts) {
try {
// Truncate text to fit within model's context window
const truncatedText = this.truncateText(text);

const response = await fetch(`${this.apiEndpoint}/api/embeddings`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: this.modelName,
prompt: truncatedText
})
});

if (!response.ok) {
const error = await response.text();
throw new Error(`Ollama API Error ${response.status}: ${error}`);
}

const data = (await response.json()) as OllamaEmbeddingResponse;
embeddings.push(data.embedding);
} catch (error) {
console.error('Ollama Embedding Failed:', error);
throw error;
}
}

return embeddings;
}
}
15 changes: 14 additions & 1 deletion src/embeddings/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,23 @@ export function parseEmbeddingProviderName(
// better conceptual search at the cost of 5-10x slower indexing and higher RAM usage
export const DEFAULT_MODEL = process.env.EMBEDDING_MODEL || 'Xenova/bge-small-en-v1.5';

function getDefaultApiEndpoint(provider: EmbeddingConfig['provider']): string | undefined {
if (provider === 'ollama') {
return process.env.OLLAMA_HOST || 'http://localhost:11434';
}
if (provider === 'openai') {
return process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1';
}
return undefined;
}

export const DEFAULT_EMBEDDING_CONFIG: EmbeddingConfig = {
provider: parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers',
model: DEFAULT_MODEL,
batchSize: 32,
maxRetries: 3,
apiKey: process.env.OPENAI_API_KEY
apiKey: process.env.OPENAI_API_KEY,
get apiEndpoint() {
return getDefaultApiEndpoint(this.provider);
}
};