From 6e1369a5569a574438a558154c05dd872fb3ac20 Mon Sep 17 00:00:00 2001
From: Ravi Tharuma <RaviTharuma@users.noreply.github.com>
Date: Sun, 29 Mar 2026 01:11:12 +0100
Subject: [PATCH 1/3] feat: audio ingestion via Whisper API (memorix-ek1)

---
 src/multimodal/audio-loader.ts        | 145 +++++++++++++++++++++
 src/multimodal/index.ts               |  12 ++
 src/server.ts                         |  45 +++++++
 tests/multimodal/audio-loader.test.ts | 173 ++++++++++++++++++++++++++
 4 files changed, 375 insertions(+)
 create mode 100644 src/multimodal/audio-loader.ts
 create mode 100644 src/multimodal/index.ts
 create mode 100644 tests/multimodal/audio-loader.test.ts

diff --git a/src/multimodal/audio-loader.ts b/src/multimodal/audio-loader.ts
new file mode 100644
index 0000000..0c5f1ba
--- /dev/null
+++ b/src/multimodal/audio-loader.ts
@@ -0,0 +1,145 @@
+/**
+ * Audio Loader — Whisper API Integration
+ *
+ * Transcribes audio files via OpenAI Whisper or Groq Whisper API,
+ * then stores the transcript as a Memorix observation.
+ *
+ * Supports: mp3, wav, m4a, webm, mp4, ogg, flac
+ * Providers: OpenAI (whisper-1), Groq (whisper-large-v3)
+ */
+
+import { getLLMApiKey } from '../config.js';
+
+// ── Types ────────────────────────────────────────────────────────────
+
+export interface AudioInput {
+  /** Base64-encoded audio data */
+  base64: string;
+  /** Audio MIME type (default: audio/mp3) */
+  mimeType?: string;
+  /** Original filename */
+  filename?: string;
+  /** ISO language code for transcription hint */
+  language?: string;
+  /** Whisper provider: openai or groq */
+  provider?: 'openai' | 'groq';
+}
+
+export interface TranscriptionResult {
+  /** Transcribed text */
+  text: string;
+  /** Audio duration in seconds */
+  duration?: number;
+  /** Detected language */
+  language?: string;
+  /** Provider used */
+  provider: string;
+}
+
+// ── Provider Config ──────────────────────────────────────────────────
+
+const PROVIDERS = {
+  openai: {
+    baseUrl: 'https://api.openai.com/v1',
+    model: 'whisper-1',
+  },
+  groq: {
+    baseUrl: 'https://api.groq.com/openai/v1',
+    model: 'whisper-large-v3',
+  },
+} as const;
+
+// ── Core Functions ───────────────────────────────────────────────────
+
+/**
+ * Transcribe audio via Whisper API.
+ *
+ * @throws Error if no API key configured or API returns error.
+ */
+export async function transcribeAudio(input: AudioInput): Promise<TranscriptionResult> {
+  const apiKey = getLLMApiKey();
+  if (!apiKey) {
+    throw new Error(
+      'No API key configured for audio transcription. ' +
+      'Set MEMORIX_LLM_API_KEY, MEMORIX_API_KEY, or OPENAI_API_KEY.',
+    );
+  }
+
+  const providerName = input.provider
+    ?? (process.env.MEMORIX_AUDIO_PROVIDER as 'openai' | 'groq' | undefined)
+    ?? 'openai';
+  const config = PROVIDERS[providerName] ?? PROVIDERS.openai;
+
+  // Build multipart form
+  const audioBuffer = Buffer.from(input.base64, 'base64');
+  const blob = new Blob([audioBuffer], { type: input.mimeType ?? 'audio/mp3' });
+  const form = new FormData();
+  form.append('file', blob, input.filename ?? 'audio.mp3');
+  form.append('model', config.model);
+  form.append('response_format', 'json');
+  if (input.language) {
+    form.append('language', input.language);
+  }
+
+  const response = await fetch(`${config.baseUrl}/audio/transcriptions`, {
+    method: 'POST',
+    headers: { 'Authorization': `Bearer ${apiKey}` },
+    body: form,
+    signal: AbortSignal.timeout(120_000), // 2 min timeout for large files
+  });
+
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => 'unknown error');
+    throw new Error(`Whisper API error (${response.status}): ${errorText}`);
+  }
+
+  const data = await response.json() as {
+    text: string;
+    duration?: number;
+    language?: string;
+  };
+
+  return {
+    text: data.text,
+    duration: data.duration,
+    language: data.language,
+    provider: providerName,
+  };
+}
+
+/**
+ * Transcribe audio and store as a Memorix observation.
+ */
+export async function ingestAudio(
+  input: AudioInput,
+  storeFn: (obs: {
+    entityName: string;
+    type: string;
+    title: string;
+    narrative: string;
+    concepts: string[];
+    projectId: string;
+  }) => Promise<{ observation: { id: number }; upserted: boolean }>,
+  projectId: string,
+): Promise<{ observationId: number; text: string; duration?: number }> {
+  const result = await transcribeAudio(input);
+
+  const entityName = input.filename
+    ? input.filename.replace(/\.[^.]+$/, '')
+    : `audio-${Date.now()}`;
+
+  const { observation } = await storeFn({
+    entityName,
+    type: 'discovery',
+    title: `Audio transcript: ${entityName}`,
+    narrative: result.text,
+    concepts: ['audio', 'transcript', ...(result.language ? [result.language] : [])],
+    projectId,
+  });
+
+  return {
+    observationId: observation.id,
+    text: result.text,
+    duration: result.duration,
+  };
+}
diff --git a/src/multimodal/index.ts b/src/multimodal/index.ts
new file mode 100644
index 0000000..823b7fc
--- /dev/null
+++ b/src/multimodal/index.ts
@@ -0,0 +1,12 @@
+/**
+ * Multimodal Ingestion — Unified Entry Point
+ *
+ * Re-exports all multimodal loaders for convenient access.
+ */
+
+export {
+  transcribeAudio,
+  ingestAudio,
+  type AudioInput,
+  type TranscriptionResult,
+} from './audio-loader.js';
diff --git a/src/server.ts b/src/server.ts
index 69162fd..8b78565 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -3055,6 +3055,51 @@ export async function createMemorixServer(
     },
   );
 
+  // ── Multimodal Ingestion Tools ─────────────────────────────────────
+
+  server.registerTool(
+    'memorix_ingest_audio',
+    {
+      title: 'Ingest Audio',
+      description:
+        'Transcribe audio via Whisper API (OpenAI or Groq) and store the transcript as a memory observation. ' +
+        'Supports mp3, wav, m4a, webm, ogg, flac formats.',
+      inputSchema: {
+        base64: z.string().describe('Base64-encoded audio data'),
+        mimeType: z.string().optional().describe('Audio MIME type (e.g. audio/mp3)'),
+        filename: z.string().optional().describe('Original filename'),
+        language: z.string().optional().describe('ISO language code for transcription hint'),
+        provider: z.enum(['openai', 'groq']).optional().describe('Whisper provider (default: openai)'),
+      },
+    },
+    async (args) => {
+      try {
+        const { ingestAudio } = await import('./multimodal/index.js');
+        markInternalWrite();
+        const result = await ingestAudio(
+          args,
+          (obs) => storeObservation(obs),
+          project.id,
+        );
+        return {
+          content: [{
+            type: 'text' as const,
+            text: `🎤 Audio transcribed (${result.duration ? result.duration.toFixed(1) + 's' : 'unknown duration'})\n` +
+              `Observation #${result.observationId}\n` +
+              `Preview: ${result.text.slice(0, 300)}${result.text.length > 300 ? '…' : ''}`,
+          }],
+        };
+      } catch (err: unknown) {
+        return {
+          content: [{
+            type: 'text' as const,
+            text: `❌ Audio ingestion failed: ${err instanceof Error ? err.message : String(err)}`,
+          }],
+          isError: true,
+        };
+      }
+    },
+  );
   // Deferred initialization — runs AFTER transport connect so MCP handshake isn't blocked.
   // Sync advisory scan and file watcher are non-essential for tool functionality.
   const deferredInit = async () => {
diff --git a/tests/multimodal/audio-loader.test.ts b/tests/multimodal/audio-loader.test.ts
new file mode 100644
index 0000000..ce98f68
--- /dev/null
+++ b/tests/multimodal/audio-loader.test.ts
@@ -0,0 +1,173 @@
+import { describe, it, expect, afterEach, beforeEach } from 'bun:test';
+import { transcribeAudio, ingestAudio } from '../../src/multimodal/audio-loader.js';
+import { resetConfigCache } from '../../src/config.js';
+
+describe('audio-loader', () => {
+  const originalFetch = globalThis.fetch;
+
+  beforeEach(() => {
+    resetConfigCache();
+  });
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+    // Clean up env vars
+    delete process.env.OPENAI_API_KEY;
+    delete process.env.MEMORIX_LLM_API_KEY;
+    delete process.env.MEMORIX_API_KEY;
+    delete process.env.MEMORIX_AUDIO_PROVIDER;
+    resetConfigCache();
+  });
+
+  it('calls OpenAI Whisper endpoint by default', async () => {
+    let calledUrl = '';
+    let calledHeaders: Record<string, string> = {};
+    globalThis.fetch = (async (url: any, opts: any) => {
+      calledUrl = String(url);
+      calledHeaders = opts?.headers ?? {};
+      return new Response(JSON.stringify({ text: 'hello world', duration: 5.2 }), { status: 200 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'test-key-123';
+    const result = await transcribeAudio({
+      base64: Buffer.from('fake audio data').toString('base64'),
+    });
+
+    expect(calledUrl).toContain('api.openai.com');
+    expect(calledUrl).toContain('/audio/transcriptions');
+    expect(calledHeaders['Authorization']).toBe('Bearer test-key-123');
+    expect(result.text).toBe('hello world');
+    expect(result.duration).toBe(5.2);
+    expect(result.provider).toBe('openai');
+  });
+
+  it('calls Groq endpoint when provider=groq', async () => {
+    let calledUrl = '';
+    globalThis.fetch = (async (url: any) => {
+      calledUrl = String(url);
+      return new Response(JSON.stringify({ text: 'groq result' }), { status: 200 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'test-key';
+    const result = await transcribeAudio({
+      base64: Buffer.from('fake').toString('base64'),
+      provider: 'groq',
+    });
+
+    expect(calledUrl).toContain('api.groq.com');
+    expect(result.provider).toBe('groq');
+  });
+
+  it('uses MEMORIX_AUDIO_PROVIDER env var', async () => {
+    let calledUrl = '';
+    globalThis.fetch = (async (url: any) => {
+      calledUrl = String(url);
+      return new Response(JSON.stringify({ text: 'env result' }), { status: 200 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'test-key';
+    process.env.MEMORIX_AUDIO_PROVIDER = 'groq';
+    const result = await transcribeAudio({
+      base64: Buffer.from('fake').toString('base64'),
+    });
+
+    expect(calledUrl).toContain('api.groq.com');
+    expect(result.provider).toBe('groq');
+  });
+
+  it('throws when no API key configured', async () => {
+    // Ensure no API keys are set
+    delete process.env.OPENAI_API_KEY;
+    delete process.env.MEMORIX_LLM_API_KEY;
+    delete process.env.MEMORIX_API_KEY;
+    delete process.env.ANTHROPIC_API_KEY;
+    delete process.env.OPENROUTER_API_KEY;
+
+    await expect(
+      transcribeAudio({ base64: 'dGVzdA==' }),
+    ).rejects.toThrow('No API key configured');
+  });
+
+  it('throws on API error response', async () => {
+    globalThis.fetch = (async () => {
+      return new Response('Rate limit exceeded', { status: 429 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'test-key';
+    await expect(
+      transcribeAudio({ base64: Buffer.from('audio').toString('base64') }),
+    ).rejects.toThrow('Whisper API error (429)');
+  });
+
+  it('passes language parameter', async () => {
+    let formData: FormData | null = null;
+    globalThis.fetch = (async (_url: any, opts: any) => {
+      formData = opts?.body;
+      return new Response(JSON.stringify({ text: 'bonjour', language: 'fr' }), { status: 200 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'test-key';
+    const result = await transcribeAudio({
+      base64: Buffer.from('french audio').toString('base64'),
+      language: 'fr',
+    });
+
+    expect(result.text).toBe('bonjour');
+    expect(result.language).toBe('fr');
+    // FormData should have language field
+    expect(formData).toBeTruthy();
+  });
+
+  it('ingestAudio stores observation with correct fields', async () => {
+    globalThis.fetch = (async () => {
+      return new Response(JSON.stringify({ text: 'transcribed content', duration: 30 }), { status: 200 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'test-key';
+
+    let storedObs: Record<string, unknown> | null = null;
+    const storeFn = async (obs: Record<string, unknown>) => {
+      storedObs = obs;
+      return { observation: { id: 42 }, upserted: false };
+    };
+
+    const result = await ingestAudio(
+      { base64: Buffer.from('audio data').toString('base64'), filename: 'meeting-notes.mp3' },
+      storeFn as any,
+      'project-123',
+    );
+
+    expect(result.observationId).toBe(42);
+    expect(result.text).toBe('transcribed content');
+    expect(result.duration).toBe(30);
+    expect(storedObs).toBeTruthy();
+    expect(storedObs!.entityName).toBe('meeting-notes');
+    expect(storedObs!.type).toBe('discovery');
+    expect(storedObs!.projectId).toBe('project-123');
+    expect((storedObs!.concepts as string[])).toContain('audio');
+    expect((storedObs!.concepts as string[])).toContain('transcript');
+  });
+
+  it('ingestAudio uses timestamp for unnamed files', async () => {
+    globalThis.fetch = (async () => {
+      return new Response(JSON.stringify({ text: 'text' }), { status: 200 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'test-key';
+
+    let storedObs: Record<string, unknown> | null = null;
+    const storeFn = async (obs: Record<string, unknown>) => {
+      storedObs = obs;
+      return { observation: { id: 1 }, upserted: false };
+    };
+
+    await ingestAudio(
+      { base64: Buffer.from('data').toString('base64') },
+      storeFn as any,
+      'proj',
+    );
+
+    expect(storedObs).toBeTruthy();
+    expect((storedObs!.entityName as string)).toMatch(/^audio-\d+$/);
+  });
+});

From c57784c5bc5922dae310964a4e24ebfc532b8a63 Mon Sep 17 00:00:00 2001
From: Ravi Tharuma <RaviTharuma@users.noreply.github.com>
Date: Mon, 30 Mar 2026 09:22:16 +0200
Subject: [PATCH 2/3] =?UTF-8?q?fix:=20address=20PR=20review=20=E2=80=94=20?=
 =?UTF-8?q?move=20markInternalWrite=20to=20write=20callback,=20use=20vites?=
 =?UTF-8?q?t=20imports?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/server.ts                         | 3 +--
 tests/multimodal/audio-loader.test.ts | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/server.ts b/src/server.ts
index 8b78565..1beec63 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -3075,10 +3075,9 @@ export async function createMemorixServer(
     async (args) => {
       try {
         const { ingestAudio } = await import('./multimodal/index.js');
-        markInternalWrite();
         const result = await ingestAudio(
           args,
-          (obs) => storeObservation(obs),
+          (obs) => { markInternalWrite(); return storeObservation(obs); },
           project.id,
         );
         return {
diff --git a/tests/multimodal/audio-loader.test.ts b/tests/multimodal/audio-loader.test.ts
index ce98f68..bcd4151 100644
--- a/tests/multimodal/audio-loader.test.ts
+++ b/tests/multimodal/audio-loader.test.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect, afterEach, beforeEach } from 'bun:test';
+import { describe, it, expect, afterEach, beforeEach } from 'vitest';
 import { transcribeAudio, ingestAudio } from '../../src/multimodal/audio-loader.js';
 import { resetConfigCache } from '../../src/config.js';
 

From 55a196d132d6130077d33b66f9294b1b22b60bc7 Mon Sep 17 00:00:00 2001
From: Ravi Tharuma <RaviTharuma@users.noreply.github.com>
Date: Wed, 8 Apr 2026 19:21:28 +0200
Subject: [PATCH 3/3] =?UTF-8?q?fix(audio):=20address=20PR=20review=20block?=
 =?UTF-8?q?ers=20=E2=80=94=20ObservationType,=20dedicated=20Whisper=20conf?=
 =?UTF-8?q?ig?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three changes:

1. storeFn parameter type: type: string -> type: ObservationType so the
   callback signature is compatible with storeObservation() without a cast.

2. Dedicated Whisper key resolution: transcribeAudio() now checks
   MEMORIX_WHISPER_API_KEY first, then falls back to OPENAI_API_KEY.
   Does NOT fall through to MEMORIX_LLM_API_KEY — that key may point
   to Anthropic or another non-Whisper provider, which would send the
   wrong credential to the /audio/transcriptions endpoint.

3. MEMORIX_WHISPER_BASE_URL: optional override for self-hosted or
   alternative Whisper-compatible endpoints (vLLM, LocalAI, etc.).

Tests: added coverage for MEMORIX_WHISPER_API_KEY priority and
MEMORIX_WHISPER_BASE_URL custom endpoint; cleanup ensures both vars
are cleared in afterEach.
---
 src/multimodal/audio-loader.ts        | 59 +++++++++++++++++++++++----
 tests/multimodal/audio-loader.test.ts | 34 +++++++++++++++
 2 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/src/multimodal/audio-loader.ts b/src/multimodal/audio-loader.ts
index 0c5f1ba..e8d1490 100644
--- a/src/multimodal/audio-loader.ts
+++ b/src/multimodal/audio-loader.ts
@@ -6,9 +6,19 @@
  *
  * Supports: mp3, wav, m4a, webm, mp4, ogg, flac
  * Providers: OpenAI (whisper-1), Groq (whisper-large-v3)
+ *
+ * Configuration:
+ *   MEMORIX_WHISPER_API_KEY  — API key for Whisper transcription (preferred)
+ *   MEMORIX_WHISPER_BASE_URL — Custom Whisper-compatible endpoint (optional)
+ *   MEMORIX_AUDIO_PROVIDER   — Provider preset: 'openai' | 'groq' (default: openai)
+ *
+ * Falls back to OPENAI_API_KEY when MEMORIX_WHISPER_API_KEY is not set,
+ * since OpenAI Whisper is the default provider. Does NOT fall back to
+ * generic MEMORIX_LLM_API_KEY to avoid sending the wrong credential to
+ * a non-Whisper provider (e.g. Anthropic).
  */
 
-import { getLLMApiKey } from '../config.js';
+import type { ObservationType } from '../types.js';
 
 // ── Types ────────────────────────────────────────────────────────────
 
@@ -49,6 +59,38 @@ const PROVIDERS = {
   },
 } as const;
 
+// ── Key Resolution ───────────────────────────────────────────────────
+
+/**
+ * Resolve the API key for Whisper transcription.
+ *
+ * Resolution order:
+ *   1. MEMORIX_WHISPER_API_KEY  (dedicated Whisper key — always correct)
+ *   2. OPENAI_API_KEY           (OpenAI Whisper default)
+ *
+ * Intentionally does NOT fall back to MEMORIX_LLM_API_KEY or MEMORIX_API_KEY:
+ * those may point to Anthropic or another non-Whisper provider, which would
+ * send the wrong credential to the /audio/transcriptions endpoint.
+ */
+function getWhisperApiKey(): string | undefined {
+  return (
+    process.env.MEMORIX_WHISPER_API_KEY ||
+    process.env.OPENAI_API_KEY
+  );
+}
+
+/**
+ * Resolve the base URL for the Whisper endpoint.
+ *
+ * Resolution order:
+ *   1. MEMORIX_WHISPER_BASE_URL (custom / self-hosted endpoint)
+ *   2. Provider preset default (api.openai.com or api.groq.com)
+ */
+function getWhisperBaseUrl(providerDefault: string): string {
+  const custom = process.env.MEMORIX_WHISPER_BASE_URL;
+  return custom ? custom.replace(/\/+$/, '') : providerDefault;
+}
+
 // ── Core Functions ───────────────────────────────────────────────────
 
 /**
@@ -57,31 +99,32 @@ const PROVIDERS = {
  * @throws Error if no API key configured or API returns error.
  */
 export async function transcribeAudio(input: AudioInput): Promise<TranscriptionResult> {
-  const apiKey = getLLMApiKey();
+  const apiKey = getWhisperApiKey();
   if (!apiKey) {
     throw new Error(
       'No API key configured for audio transcription. ' +
-      'Set MEMORIX_LLM_API_KEY, MEMORIX_API_KEY, or OPENAI_API_KEY.',
+      'Set MEMORIX_WHISPER_API_KEY or OPENAI_API_KEY.',
     );
   }
 
   const providerName = input.provider
     ?? (process.env.MEMORIX_AUDIO_PROVIDER as 'openai' | 'groq' | undefined)
     ?? 'openai';
-  const config = PROVIDERS[providerName] ?? PROVIDERS.openai;
+  const preset = PROVIDERS[providerName] ?? PROVIDERS.openai;
+  const baseUrl = getWhisperBaseUrl(preset.baseUrl);
 
   // Build multipart form
   const audioBuffer = Buffer.from(input.base64, 'base64');
   const blob = new Blob([audioBuffer], { type: input.mimeType ?? 'audio/mp3' });
   const form = new FormData();
   form.append('file', blob, input.filename ?? 'audio.mp3');
-  form.append('model', config.model);
+  form.append('model', preset.model);
   form.append('response_format', 'json');
   if (input.language) {
     form.append('language', input.language);
   }
 
-  const response = await fetch(`${config.baseUrl}/audio/transcriptions`, {
+  const response = await fetch(`${baseUrl}/audio/transcriptions`, {
     method: 'POST',
     headers: { 'Authorization': `Bearer ${apiKey}` },
     body: form,
@@ -114,7 +157,7 @@ export async function ingestAudio(
   input: AudioInput,
   storeFn: (obs: {
     entityName: string;
-    type: string;
+    type: ObservationType;
     title: string;
     narrative: string;
     concepts: string[];
@@ -130,7 +173,7 @@ export async function ingestAudio(
 
   const { observation } = await storeFn({
     entityName,
-    type: 'discovery',
+    type: 'discovery' as ObservationType,
     title: `Audio transcript: ${entityName}`,
     narrative: result.text,
     concepts: ['audio', 'transcript', ...(result.language ? [result.language] : [])],
diff --git a/tests/multimodal/audio-loader.test.ts b/tests/multimodal/audio-loader.test.ts
index bcd4151..023e2d6 100644
--- a/tests/multimodal/audio-loader.test.ts
+++ b/tests/multimodal/audio-loader.test.ts
@@ -16,6 +16,8 @@ describe('audio-loader', () => {
     delete process.env.MEMORIX_LLM_API_KEY;
     delete process.env.MEMORIX_API_KEY;
     delete process.env.MEMORIX_AUDIO_PROVIDER;
+    delete process.env.MEMORIX_WHISPER_API_KEY;
+    delete process.env.MEMORIX_WHISPER_BASE_URL;
     resetConfigCache();
   });
 
@@ -75,11 +77,43 @@ describe('audio-loader', () => {
     expect(result.provider).toBe('groq');
   });
 
+  it('uses MEMORIX_WHISPER_API_KEY over OPENAI_API_KEY', async () => {
+    let calledHeaders: Record<string, string> = {};
+    globalThis.fetch = (async (_url: any, opts: any) => {
+      calledHeaders = opts?.headers ?? {};
+      return new Response(JSON.stringify({ text: 'result' }), { status: 200 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'wrong-key';
+    process.env.MEMORIX_WHISPER_API_KEY = 'whisper-specific-key';
+    const result = await transcribeAudio({ base64: Buffer.from('data').toString('base64') });
+
+    expect(calledHeaders['Authorization']).toBe('Bearer whisper-specific-key');
+    expect(result.text).toBe('result');
+  });
+
+  it('uses MEMORIX_WHISPER_BASE_URL for custom endpoint', async () => {
+    let calledUrl = '';
+    globalThis.fetch = (async (url: any) => {
+      calledUrl = String(url);
+      return new Response(JSON.stringify({ text: 'custom result' }), { status: 200 });
+    }) as typeof fetch;
+
+    process.env.OPENAI_API_KEY = 'test-key';
+    process.env.MEMORIX_WHISPER_BASE_URL = 'https://my-whisper.example.com/v1';
+    const result = await transcribeAudio({ base64: Buffer.from('data').toString('base64') });
+
+    expect(calledUrl).toContain('my-whisper.example.com');
+    expect(calledUrl).toContain('/audio/transcriptions');
+    expect(result.text).toBe('custom result');
+  });
+
   it('throws when no API key configured', async () => {
     // Ensure no API keys are set
     delete process.env.OPENAI_API_KEY;
     delete process.env.MEMORIX_LLM_API_KEY;
     delete process.env.MEMORIX_API_KEY;
+    delete process.env.MEMORIX_WHISPER_API_KEY;
     delete process.env.ANTHROPIC_API_KEY;
     delete process.env.OPENROUTER_API_KEY;