MystenLabs · Ashwin-3cS · May 14, 2026 · May 14, 2026
diff --git a/docs/reference/environment-variables.md b/docs/reference/environment-variables.md
@@ -30,6 +30,11 @@ These are not all enforced at boot, but most real deployments need them.
 | `PORT` | `8000` | Relayer port |
 | `SIDECAR_URL` | `http://localhost:9000` | Sidecar HTTP endpoint |
 | `OPENAI_API_BASE` | `https://api.openai.com/v1` | OpenAI-compatible base URL |
+| `EMBEDDING_API_KEY` | falls back to `OPENAI_API_KEY` | Separate API key for the embedding provider. Use this when embeddings and LLM completions live behind different providers (e.g. Jina embeddings + OpenAI chat) |
+| `EMBEDDING_API_BASE` | falls back to `OPENAI_API_BASE` | Separate base URL for the embedding provider |
+| `EMBEDDING_MODEL` | `openai/text-embedding-3-small` | Embedding model identifier sent on `/embeddings` requests |
+| `EMBEDDING_DIMENSIONS` | model default | Optional `dimensions` override forwarded to the embedding provider. Required for variable-dimension models such as `jina-embeddings-v3`. Must match the schema dimension of `vector_entries.embedding` |
+| `LLM_MODEL` | `openai/gpt-4o-mini` | LLM model identifier used by `/api/analyze` fact extraction, `/api/ask`, and the summarize-before-embed step in `/api/remember` |
 | `SUI_NETWORK` | `mainnet` | Picks the fallback RPC URL and network-driven service defaults |
 | `SUI_RPC_URL` | network default | Override the Sui fullnode URL |
 | `WALRUS_PUBLISHER_URL` | Walrus mainnet publisher | Override upload endpoint |
@@ -54,7 +59,9 @@ These are not all enforced at boot, but most real deployments need them.
 - If both `SERVER_SUI_PRIVATE_KEYS` and `SERVER_SUI_PRIVATE_KEY` are set, the key pool takes priority for uploads. Upload jobs use the pool in round-robin order.
 - Keep `ENOKI_FALLBACK_TO_DIRECT_SIGN=false` in production if the server wallet should not pay gas when sponsorship is missing, expired, or rejected.
 - `OPENAI_API_KEY` and `OPENAI_API_BASE` control the embedding and fact-extraction provider used by `remember`, `recall`, `analyze`, `ask`, and restore re-indexing.
-- Without `OPENAI_API_KEY`, the server can fall back to mock embeddings. That is useful for local testing, not for normal production behavior.
+- `EMBEDDING_API_KEY` / `EMBEDDING_API_BASE` / `EMBEDDING_MODEL` / `EMBEDDING_DIMENSIONS` let you run the embedding side against a different provider (Jina, Cohere, etc.) without touching code. Anything unset falls back to the corresponding `OPENAI_*` value (or to the model default for `EMBEDDING_DIMENSIONS`). `LLM_MODEL` selects the chat/completion model independently.
+- When you switch embedding provider or dimension, the cached recall query embeddings invalidate automatically — the cache key includes the effective base URL and model name.
+- Without `OPENAI_API_KEY` (and `EMBEDDING_API_KEY`), the server can fall back to mock embeddings. That is useful for local testing, not for normal production behavior.
 - `SUI_NETWORK` drives the default RPC URL, Walrus endpoints, Walrus package ID, and upload relay selection.
 - `SEAL_SERVER_CONFIGS` is a JSON array of `{ objectId, weight, aggregatorUrl?, apiKeyName?, apiKey? }`. Committee key server configs require `aggregatorUrl`.
 - `SEAL_KEY_SERVERS` is the legacy comma-separated independent key server list. It is only used when `SEAL_SERVER_CONFIGS` is unset.

diff --git a/docs/relayer/self-hosting.md b/docs/relayer/self-hosting.md
@@ -85,6 +85,16 @@ curl http://localhost:8000/health
 - `OPENAI_API_KEY` — enables real embeddings (falls back to mock embeddings without it)
 - `OPENAI_API_BASE` — point to an OpenAI-compatible provider like OpenRouter
 
+### Embedding & LLM Provider (Optional)
+
+The embedding and LLM endpoints are pluggable. Anything left unset falls back to the corresponding `OPENAI_*` value.
+
+- `EMBEDDING_API_KEY` — separate key for the embedding provider (e.g. Jina, Cohere)
+- `EMBEDDING_API_BASE` — separate base URL for the embedding provider
+- `EMBEDDING_MODEL` — embedding model identifier (default `openai/text-embedding-3-small`)
+- `EMBEDDING_DIMENSIONS` — optional dimensions override, required for variable-dimension models like `jina-embeddings-v3`. Must match the schema dimension of `vector_entries.embedding`; the server logs a WARN at boot on mismatch.
+- `LLM_MODEL` — chat/completion model used by `/api/analyze`, `/api/ask`, and the summarize-before-embed step (default `openai/gpt-4o-mini`)
+
 ### Rate Limits & Storage (Optional)
 
 By default, the relayer enforces rate limits and storage quotas via Redis to prevent abuse. You can customize these limits:

diff --git a/services/server/.env.example b/services/server/.env.example
@@ -4,10 +4,21 @@ PORT=8000
 # Database (PostgreSQL + pgvector)
 DATABASE_URL=postgresql://memwal:memwal_secret@localhost:5432/memwal
 
-# OpenAI-compatible API (for embedding)
+# OpenAI-compatible API (LLM + embedding fallback)
 OPENAI_API_KEY=sk-...
 OPENAI_API_BASE=https://api.openai.com/v1
 
+# Embedding provider — override to use Jina, Cohere, or any OpenAI-compatible provider.
+# Falls back to OPENAI_API_KEY / OPENAI_API_BASE when unset.
+# EMBEDDING_API_KEY=jina_...
+# EMBEDDING_API_BASE=https://api.jina.ai/v1
+# EMBEDDING_MODEL=jina-embeddings-v3
+# EMBEDDING_DIMENSIONS=1024
+
+# LLM model for fact extraction (/api/analyze) and retrieval-augmented chat (/api/ask).
+# Accepts any OpenRouter or OpenAI-compatible model identifier.
+# LLM_MODEL=openai/gpt-4o-mini
+
 # Sui Network (for onchain verification)
 # Controls all network-dependent defaults (RPC, Walrus, SEAL)
 SUI_NETWORK=mainnet

diff --git a/services/server/src/main.rs b/services/server/src/main.rs
@@ -64,6 +64,18 @@ async fn main() {
             .as_deref()
             .unwrap_or("(from client header)")
     );
+    tracing::info!(
+        "  embedding model: {} (base: {})",
+        config.embedding_model,
+        config
+            .embedding_api_base
+            .as_deref()
+            .unwrap_or(&config.openai_api_base)
+    );
+    if let Some(dims) = config.embedding_dimensions {
+        tracing::info!("  embedding dimensions: {}", dims);
+    }
+    tracing::info!("  llm model: {}", config.llm_model);
     tracing::info!(
         "  rate limit: burst={}/min, sustained={}/hr, per-key={}/min, quota={}MB/user",
         config.rate_limit.max_requests_per_minute,
@@ -139,6 +151,30 @@ async fn main() {
             .expect("Failed to connect to PostgreSQL"),
     );
 
+    // Warn if the schema embedding dimension doesn't match EMBEDDING_DIMENSIONS.
+    // Mixing dimensions in the same table breaks cosine similarity queries.
+    // To change dimensions: TRUNCATE vector_entries, then ALTER COLUMN
+    // embedding TYPE vector(<n>).
+    if let Some(configured_dims) = config.embedding_dimensions {
+        let row: Option<(i32,)> = sqlx::query_as(
+            "SELECT atttypmod FROM pg_attribute \
+             WHERE attrelid = 'vector_entries'::regclass AND attname = 'embedding'",
+        )
+        .fetch_optional(db.pool())
+        .await
+        .unwrap_or(None);
+        if let Some((schema_dims,)) = row {
+            if schema_dims > 0 && schema_dims as u32 != configured_dims {
+                tracing::warn!(
+                    "DIMENSION MISMATCH: schema has vector({}) but EMBEDDING_DIMENSIONS={}. \
+                     Recall will fail. Truncate vector_entries and run: \
+                     ALTER TABLE vector_entries ALTER COLUMN embedding TYPE vector({});",
+                    schema_dims, configured_dims, configured_dims
+                );
+            }
+        }
+    }
+
     // Setup Apalis job queue — auto-creates `apalis_jobs` table if not present
     // Uses the same DATABASE_URL as the main DB; no extra infrastructure needed.
     let apalis_pool = sqlx::PgPool::connect(&config.database_url)

diff --git a/services/server/src/routes/admin.rs b/services/server/src/routes/admin.rs
@@ -267,7 +267,7 @@ pub async fn ask(
         .header("Authorization", format!("Bearer {}", api_key))
         .header("Content-Type", "application/json")
         .json(&ChatCompletionRequest {
-            model: "openai/gpt-4o-mini".to_string(),
+            model: state.config.llm_model.clone(),
             messages: vec![
                 ChatMessage {
                     role: "system".to_string(),

diff --git a/services/server/src/routes/recall.rs b/services/server/src/routes/recall.rs
@@ -22,11 +22,16 @@ use super::truncate_str;
 // ============================================================
 
 fn recall_embedding_cache_key(config: &Config, query: &str) -> String {
-    use crate::services::embedder::EMBEDDING_MODEL;
     let mut hasher = sha2::Sha256::new();
-    hasher.update(config.openai_api_base.as_bytes());
+    // Use the effective embedding base + model so cache entries do not collide
+    // across providers when EMBEDDING_API_BASE / EMBEDDING_MODEL are overridden.
+    let base = config
+        .embedding_api_base
+        .as_deref()
+        .unwrap_or(&config.openai_api_base);
+    hasher.update(base.as_bytes());
     hasher.update(b"\0");
-    hasher.update(EMBEDDING_MODEL.as_bytes());
+    hasher.update(config.embedding_model.as_bytes());
     hasher.update(b"\0");
     hasher.update(query.as_bytes());
     format!("memwal:embedding:v1:{:x}", hasher.finalize())

diff --git a/services/server/src/routes/remember.rs b/services/server/src/routes/remember.rs
@@ -389,7 +389,7 @@ async fn summarize_with_prompt(
         .header("Authorization", format!("Bearer {}", api_key))
         .header("Content-Type", "application/json")
         .json(&ChatCompletionRequest {
-            model: "openai/gpt-4o-mini".to_string(),
+            model: config.llm_model.clone(),
             messages: vec![
                 ChatMessage {
                     role: "system".to_string(),

diff --git a/services/server/src/services/embedder.rs b/services/server/src/services/embedder.rs
@@ -14,15 +14,9 @@ use std::sync::Arc;
 
 use crate::types::{AppError, Config};
 
-/// Embedding model used for both ingestion and recall-query embeddings.
-/// Kept here (was a `routes.rs` const) — the recall query-embedding cache
-/// key in `routes.rs` references it via `crate::services::embedder::EMBEDDING_MODEL`
-/// so the cache key changes if the model changes.
-pub const EMBEDDING_MODEL: &str = "openai/text-embedding-3-small";
-
-/// Embedding vector dimensionality (text-embedding-3-small). Also the
-/// width of the deterministic mock vector.
-const EMBEDDING_DIMS: usize = 1536;
+/// Default embedding vector dimensionality. Used by the mock fallback and as
+/// the cache-key fallback when `Config::embedding_dimensions` is unset.
+const DEFAULT_EMBEDDING_DIMS: usize = 1536;
 
 #[async_trait]
 pub trait Embedder: Send + Sync {
@@ -53,19 +47,32 @@ impl OpenAiEmbedder {
 impl Embedder for OpenAiEmbedder {
     #[tracing::instrument(name = "embedder.embed", skip_all, fields(text_len = text.len()))]
     async fn embed(&self, text: &str) -> Result<Vec<f32>, AppError> {
-        match &self.config.openai_api_key {
+        // Fallback chain: EMBEDDING_API_KEY → OPENAI_API_KEY, EMBEDDING_API_BASE → OPENAI_API_BASE.
+        let api_key = self
+            .config
+            .embedding_api_key
+            .as_ref()
+            .or(self.config.openai_api_key.as_ref());
+        let api_base = self
+            .config
+            .embedding_api_base
+            .as_deref()
+            .unwrap_or(&self.config.openai_api_base);
+
+        match api_key {
             Some(api_key) => {
-                // Real embedding via OpenRouter/OpenAI-compatible API
-                let url = format!("{}/embeddings", self.config.openai_api_base);
+                // Real embedding via configured provider (OpenAI, OpenRouter, Jina, …).
+                let url = format!("{}/embeddings", api_base);
 
                 let resp = self
                     .http_client
                     .post(&url)
                     .header("Authorization", format!("Bearer {}", api_key))
                     .header("Content-Type", "application/json")
                     .json(&EmbeddingApiRequest {
-                        model: EMBEDDING_MODEL.to_string(),
+                        model: self.config.embedding_model.clone(),
                         input: text.to_string(),
+                        dimensions: self.config.embedding_dimensions,
                     })
                     .send()
                     .await
@@ -95,14 +102,19 @@ impl Embedder for OpenAiEmbedder {
                 Ok(vector)
             }
             None => {
-                // Mock embedding (deterministic hash-based) — for keyless dev
-                tracing::warn!("  → Using MOCK embedding (no OPENAI_API_KEY set)");
+                // Mock embedding (deterministic hash-based) — for keyless dev.
+                tracing::warn!("  → Using MOCK embedding (no OPENAI_API_KEY or EMBEDDING_API_KEY set)");
                 use sha2::Digest;
                 let hash = sha2::Sha256::digest(text.as_bytes());
+                let dims = self
+                    .config
+                    .embedding_dimensions
+                    .map(|d| d as usize)
+                    .unwrap_or(DEFAULT_EMBEDDING_DIMS);
                 let mock_vector: Vec<f32> = hash
                     .iter()
                     .cycle()
-                    .take(EMBEDDING_DIMS)
+                    .take(dims)
                     .enumerate()
                     .map(|(i, &b)| {
                         let val = (b as f32 / 255.0) * 2.0 - 1.0;
@@ -123,6 +135,8 @@ impl Embedder for OpenAiEmbedder {
 struct EmbeddingApiRequest {
     model: String,
     input: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    dimensions: Option<u32>,
 }
 
 #[derive(serde::Deserialize)]

diff --git a/services/server/src/services/extractor.rs b/services/server/src/services/extractor.rs
@@ -96,7 +96,7 @@ impl Extractor for LlmExtractor {
             .header("Authorization", format!("Bearer {}", api_key))
             .header("Content-Type", "application/json")
             .json(&ChatCompletionRequest {
-                model: "openai/gpt-4o-mini".to_string(),
+                model: self.config.llm_model.clone(),
                 messages: vec![
                     ChatMessage {
                         role: "system".to_string(),

diff --git a/services/server/src/types.rs b/services/server/src/types.rs
@@ -150,6 +150,19 @@ pub struct Config {
     pub memwal_account_id: Option<String>,
     pub openai_api_key: Option<String>,
     pub openai_api_base: String,
+    /// Optional separate API key for embeddings (falls back to OPENAI_API_KEY).
+    pub embedding_api_key: Option<String>,
+    /// Optional separate base URL for embeddings (falls back to OPENAI_API_BASE).
+    pub embedding_api_base: Option<String>,
+    /// Embedding model identifier sent to the OpenAI-compatible /embeddings endpoint
+    /// (e.g. "openai/text-embedding-3-small", "jina-embeddings-v3").
+    pub embedding_model: String,
+    /// Optional `dimensions` override forwarded on the /embeddings request.
+    /// Set this when the model supports variable-dimension output (e.g. Jina v3).
+    pub embedding_dimensions: Option<u32>,
+    /// LLM model identifier used for fact extraction (/api/analyze) and
+    /// retrieval-augmented chat (/api/ask).
+    pub llm_model: String,
     pub walrus_publisher_url: String,
     pub walrus_aggregator_url: String,
     /// Primary key (used for SEAL decrypt / recall). Unchanged.
@@ -199,6 +212,15 @@ impl Config {
             openai_api_key: std::env::var("OPENAI_API_KEY").ok(),
             openai_api_base: std::env::var("OPENAI_API_BASE")
                 .unwrap_or_else(|_| "https://api.openai.com/v1".to_string()),
+            embedding_api_key: std::env::var("EMBEDDING_API_KEY").ok(),
+            embedding_api_base: std::env::var("EMBEDDING_API_BASE").ok(),
+            embedding_model: std::env::var("EMBEDDING_MODEL")
+                .unwrap_or_else(|_| "openai/text-embedding-3-small".to_string()),
+            embedding_dimensions: std::env::var("EMBEDDING_DIMENSIONS")
+                .ok()
+                .and_then(|s| s.parse().ok()),
+            llm_model: std::env::var("LLM_MODEL")
+                .unwrap_or_else(|_| "openai/gpt-4o-mini".to_string()),
             walrus_publisher_url: std::env::var("WALRUS_PUBLISHER_URL")
                 .unwrap_or_else(|_| "https://publisher.walrus-mainnet.walrus.space".to_string()),
             walrus_aggregator_url: std::env::var("WALRUS_AGGREGATOR_URL")