From 34ef7288c0f8138de94653fccf071281a983ad77 Mon Sep 17 00:00:00 2001 From: Ashwin-3cS Date: Thu, 14 May 2026 22:42:34 +0530 Subject: [PATCH 1/2] feat(server): configurable embedding provider and LLM model Five new env vars (EMBEDDING_API_KEY/BASE/MODEL/DIMENSIONS, LLM_MODEL) so self-hosters can swap providers without code changes. Falls back to OPENAI_* when unset. Recall cache key now keyed on effective base+model. Boot WARN when schema vector dim doesn't match EMBEDDING_DIMENSIONS. --- services/server/.env.example | 13 ++++++- services/server/src/main.rs | 36 ++++++++++++++++++ services/server/src/routes/admin.rs | 2 +- services/server/src/routes/recall.rs | 11 ++++-- services/server/src/routes/remember.rs | 2 +- services/server/src/services/embedder.rs | 46 +++++++++++++++-------- services/server/src/services/extractor.rs | 2 +- services/server/src/types.rs | 22 +++++++++++ 8 files changed, 111 insertions(+), 23 deletions(-) diff --git a/services/server/.env.example b/services/server/.env.example index 3b561d1c..28617b93 100644 --- a/services/server/.env.example +++ b/services/server/.env.example @@ -4,10 +4,21 @@ PORT=8000 # Database (PostgreSQL + pgvector) DATABASE_URL=postgresql://memwal:memwal_secret@localhost:5432/memwal -# OpenAI-compatible API (for embedding) +# OpenAI-compatible API (LLM + embedding fallback) OPENAI_API_KEY=sk-... OPENAI_API_BASE=https://api.openai.com/v1 +# Embedding provider — override to use Jina, Cohere, or any OpenAI-compatible provider. +# Falls back to OPENAI_API_KEY / OPENAI_API_BASE when unset. +# EMBEDDING_API_KEY=jina_... +# EMBEDDING_API_BASE=https://api.jina.ai/v1 +# EMBEDDING_MODEL=jina-embeddings-v3 +# EMBEDDING_DIMENSIONS=1024 + +# LLM model for fact extraction (/api/analyze) and retrieval-augmented chat (/api/ask). +# Accepts any OpenRouter or OpenAI-compatible model identifier. +# LLM_MODEL=openai/gpt-4o-mini + # Sui Network (for onchain verification) # Controls all network-dependent defaults (RPC, Walrus, SEAL) SUI_NETWORK=mainnet diff --git a/services/server/src/main.rs b/services/server/src/main.rs index cc728968..5ddcd3cf 100644 --- a/services/server/src/main.rs +++ b/services/server/src/main.rs @@ -64,6 +64,18 @@ async fn main() { .as_deref() .unwrap_or("(from client header)") ); + tracing::info!( + " embedding model: {} (base: {})", + config.embedding_model, + config + .embedding_api_base + .as_deref() + .unwrap_or(&config.openai_api_base) + ); + if let Some(dims) = config.embedding_dimensions { + tracing::info!(" embedding dimensions: {}", dims); + } + tracing::info!(" llm model: {}", config.llm_model); tracing::info!( " rate limit: burst={}/min, sustained={}/hr, per-key={}/min, quota={}MB/user", config.rate_limit.max_requests_per_minute, @@ -139,6 +151,30 @@ async fn main() { .expect("Failed to connect to PostgreSQL"), ); + // Warn if the schema embedding dimension doesn't match EMBEDDING_DIMENSIONS. + // Mixing dimensions in the same table breaks cosine similarity queries. + // To change dimensions: TRUNCATE vector_entries, then ALTER COLUMN + // embedding TYPE vector(). + if let Some(configured_dims) = config.embedding_dimensions { + let row: Option<(i32,)> = sqlx::query_as( + "SELECT atttypmod FROM pg_attribute \ + WHERE attrelid = 'vector_entries'::regclass AND attname = 'embedding'", + ) + .fetch_optional(db.pool()) + .await + .unwrap_or(None); + if let Some((schema_dims,)) = row { + if schema_dims > 0 && schema_dims as u32 != configured_dims { + tracing::warn!( + "DIMENSION MISMATCH: schema has vector({}) but EMBEDDING_DIMENSIONS={}. \ + Recall will fail. Truncate vector_entries and run: \ + ALTER TABLE vector_entries ALTER COLUMN embedding TYPE vector({});", + schema_dims, configured_dims, configured_dims + ); + } + } + } + // Setup Apalis job queue — auto-creates `apalis_jobs` table if not present // Uses the same DATABASE_URL as the main DB; no extra infrastructure needed. let apalis_pool = sqlx::PgPool::connect(&config.database_url) diff --git a/services/server/src/routes/admin.rs b/services/server/src/routes/admin.rs index ec080437..e35329a7 100644 --- a/services/server/src/routes/admin.rs +++ b/services/server/src/routes/admin.rs @@ -267,7 +267,7 @@ pub async fn ask( .header("Authorization", format!("Bearer {}", api_key)) .header("Content-Type", "application/json") .json(&ChatCompletionRequest { - model: "openai/gpt-4o-mini".to_string(), + model: state.config.llm_model.clone(), messages: vec![ ChatMessage { role: "system".to_string(), diff --git a/services/server/src/routes/recall.rs b/services/server/src/routes/recall.rs index 7112bc41..11c57fba 100644 --- a/services/server/src/routes/recall.rs +++ b/services/server/src/routes/recall.rs @@ -22,11 +22,16 @@ use super::truncate_str; // ============================================================ fn recall_embedding_cache_key(config: &Config, query: &str) -> String { - use crate::services::embedder::EMBEDDING_MODEL; let mut hasher = sha2::Sha256::new(); - hasher.update(config.openai_api_base.as_bytes()); + // Use the effective embedding base + model so cache entries do not collide + // across providers when EMBEDDING_API_BASE / EMBEDDING_MODEL are overridden. + let base = config + .embedding_api_base + .as_deref() + .unwrap_or(&config.openai_api_base); + hasher.update(base.as_bytes()); hasher.update(b"\0"); - hasher.update(EMBEDDING_MODEL.as_bytes()); + hasher.update(config.embedding_model.as_bytes()); hasher.update(b"\0"); hasher.update(query.as_bytes()); format!("memwal:embedding:v1:{:x}", hasher.finalize()) diff --git a/services/server/src/routes/remember.rs b/services/server/src/routes/remember.rs index d2a7b0a9..2cffcc39 100644 --- a/services/server/src/routes/remember.rs +++ b/services/server/src/routes/remember.rs @@ -389,7 +389,7 @@ async fn summarize_with_prompt( .header("Authorization", format!("Bearer {}", api_key)) .header("Content-Type", "application/json") .json(&ChatCompletionRequest { - model: "openai/gpt-4o-mini".to_string(), + model: config.llm_model.clone(), messages: vec![ ChatMessage { role: "system".to_string(), diff --git a/services/server/src/services/embedder.rs b/services/server/src/services/embedder.rs index 096fdf99..46fbfc83 100644 --- a/services/server/src/services/embedder.rs +++ b/services/server/src/services/embedder.rs @@ -14,15 +14,9 @@ use std::sync::Arc; use crate::types::{AppError, Config}; -/// Embedding model used for both ingestion and recall-query embeddings. -/// Kept here (was a `routes.rs` const) — the recall query-embedding cache -/// key in `routes.rs` references it via `crate::services::embedder::EMBEDDING_MODEL` -/// so the cache key changes if the model changes. -pub const EMBEDDING_MODEL: &str = "openai/text-embedding-3-small"; - -/// Embedding vector dimensionality (text-embedding-3-small). Also the -/// width of the deterministic mock vector. -const EMBEDDING_DIMS: usize = 1536; +/// Default embedding vector dimensionality. Used by the mock fallback and as +/// the cache-key fallback when `Config::embedding_dimensions` is unset. +const DEFAULT_EMBEDDING_DIMS: usize = 1536; #[async_trait] pub trait Embedder: Send + Sync { @@ -53,10 +47,22 @@ impl OpenAiEmbedder { impl Embedder for OpenAiEmbedder { #[tracing::instrument(name = "embedder.embed", skip_all, fields(text_len = text.len()))] async fn embed(&self, text: &str) -> Result, AppError> { - match &self.config.openai_api_key { + // Fallback chain: EMBEDDING_API_KEY → OPENAI_API_KEY, EMBEDDING_API_BASE → OPENAI_API_BASE. + let api_key = self + .config + .embedding_api_key + .as_ref() + .or(self.config.openai_api_key.as_ref()); + let api_base = self + .config + .embedding_api_base + .as_deref() + .unwrap_or(&self.config.openai_api_base); + + match api_key { Some(api_key) => { - // Real embedding via OpenRouter/OpenAI-compatible API - let url = format!("{}/embeddings", self.config.openai_api_base); + // Real embedding via configured provider (OpenAI, OpenRouter, Jina, …). + let url = format!("{}/embeddings", api_base); let resp = self .http_client @@ -64,8 +70,9 @@ impl Embedder for OpenAiEmbedder { .header("Authorization", format!("Bearer {}", api_key)) .header("Content-Type", "application/json") .json(&EmbeddingApiRequest { - model: EMBEDDING_MODEL.to_string(), + model: self.config.embedding_model.clone(), input: text.to_string(), + dimensions: self.config.embedding_dimensions, }) .send() .await @@ -95,14 +102,19 @@ impl Embedder for OpenAiEmbedder { Ok(vector) } None => { - // Mock embedding (deterministic hash-based) — for keyless dev - tracing::warn!(" → Using MOCK embedding (no OPENAI_API_KEY set)"); + // Mock embedding (deterministic hash-based) — for keyless dev. + tracing::warn!(" → Using MOCK embedding (no OPENAI_API_KEY or EMBEDDING_API_KEY set)"); use sha2::Digest; let hash = sha2::Sha256::digest(text.as_bytes()); + let dims = self + .config + .embedding_dimensions + .map(|d| d as usize) + .unwrap_or(DEFAULT_EMBEDDING_DIMS); let mock_vector: Vec = hash .iter() .cycle() - .take(EMBEDDING_DIMS) + .take(dims) .enumerate() .map(|(i, &b)| { let val = (b as f32 / 255.0) * 2.0 - 1.0; @@ -123,6 +135,8 @@ impl Embedder for OpenAiEmbedder { struct EmbeddingApiRequest { model: String, input: String, + #[serde(skip_serializing_if = "Option::is_none")] + dimensions: Option, } #[derive(serde::Deserialize)] diff --git a/services/server/src/services/extractor.rs b/services/server/src/services/extractor.rs index ed6fe687..7fd98b1e 100644 --- a/services/server/src/services/extractor.rs +++ b/services/server/src/services/extractor.rs @@ -96,7 +96,7 @@ impl Extractor for LlmExtractor { .header("Authorization", format!("Bearer {}", api_key)) .header("Content-Type", "application/json") .json(&ChatCompletionRequest { - model: "openai/gpt-4o-mini".to_string(), + model: self.config.llm_model.clone(), messages: vec![ ChatMessage { role: "system".to_string(), diff --git a/services/server/src/types.rs b/services/server/src/types.rs index b74742b1..e98d4a80 100644 --- a/services/server/src/types.rs +++ b/services/server/src/types.rs @@ -150,6 +150,19 @@ pub struct Config { pub memwal_account_id: Option, pub openai_api_key: Option, pub openai_api_base: String, + /// Optional separate API key for embeddings (falls back to OPENAI_API_KEY). + pub embedding_api_key: Option, + /// Optional separate base URL for embeddings (falls back to OPENAI_API_BASE). + pub embedding_api_base: Option, + /// Embedding model identifier sent to the OpenAI-compatible /embeddings endpoint + /// (e.g. "openai/text-embedding-3-small", "jina-embeddings-v3"). + pub embedding_model: String, + /// Optional `dimensions` override forwarded on the /embeddings request. + /// Set this when the model supports variable-dimension output (e.g. Jina v3). + pub embedding_dimensions: Option, + /// LLM model identifier used for fact extraction (/api/analyze) and + /// retrieval-augmented chat (/api/ask). + pub llm_model: String, pub walrus_publisher_url: String, pub walrus_aggregator_url: String, /// Primary key (used for SEAL decrypt / recall). Unchanged. @@ -199,6 +212,15 @@ impl Config { openai_api_key: std::env::var("OPENAI_API_KEY").ok(), openai_api_base: std::env::var("OPENAI_API_BASE") .unwrap_or_else(|_| "https://api.openai.com/v1".to_string()), + embedding_api_key: std::env::var("EMBEDDING_API_KEY").ok(), + embedding_api_base: std::env::var("EMBEDDING_API_BASE").ok(), + embedding_model: std::env::var("EMBEDDING_MODEL") + .unwrap_or_else(|_| "openai/text-embedding-3-small".to_string()), + embedding_dimensions: std::env::var("EMBEDDING_DIMENSIONS") + .ok() + .and_then(|s| s.parse().ok()), + llm_model: std::env::var("LLM_MODEL") + .unwrap_or_else(|_| "openai/gpt-4o-mini".to_string()), walrus_publisher_url: std::env::var("WALRUS_PUBLISHER_URL") .unwrap_or_else(|_| "https://publisher.walrus-mainnet.walrus.space".to_string()), walrus_aggregator_url: std::env::var("WALRUS_AGGREGATOR_URL") From 05d54a015fa0ded65bed21176e7180bfe22ed7c2 Mon Sep 17 00:00:00 2001 From: Ashwin-3cS Date: Thu, 14 May 2026 22:52:29 +0530 Subject: [PATCH 2/2] docs(relayer): document configurable embedding provider and LLM model Adds EMBEDDING_API_KEY / BASE / MODEL / DIMENSIONS and LLM_MODEL to environment-variables.md (Optional table + notes) and self-hosting.md (new Embedding & LLM Provider subsection). Notes the EMBEDDING_DIMENSIONS / schema dim match requirement and the recall cache key invalidation behaviour. --- docs/reference/environment-variables.md | 9 ++++++++- docs/relayer/self-hosting.md | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/reference/environment-variables.md b/docs/reference/environment-variables.md index 21973e69..3603654e 100644 --- a/docs/reference/environment-variables.md +++ b/docs/reference/environment-variables.md @@ -30,6 +30,11 @@ These are not all enforced at boot, but most real deployments need them. | `PORT` | `8000` | Relayer port | | `SIDECAR_URL` | `http://localhost:9000` | Sidecar HTTP endpoint | | `OPENAI_API_BASE` | `https://api.openai.com/v1` | OpenAI-compatible base URL | +| `EMBEDDING_API_KEY` | falls back to `OPENAI_API_KEY` | Separate API key for the embedding provider. Use this when embeddings and LLM completions live behind different providers (e.g. Jina embeddings + OpenAI chat) | +| `EMBEDDING_API_BASE` | falls back to `OPENAI_API_BASE` | Separate base URL for the embedding provider | +| `EMBEDDING_MODEL` | `openai/text-embedding-3-small` | Embedding model identifier sent on `/embeddings` requests | +| `EMBEDDING_DIMENSIONS` | model default | Optional `dimensions` override forwarded to the embedding provider. Required for variable-dimension models such as `jina-embeddings-v3`. Must match the schema dimension of `vector_entries.embedding` | +| `LLM_MODEL` | `openai/gpt-4o-mini` | LLM model identifier used by `/api/analyze` fact extraction, `/api/ask`, and the summarize-before-embed step in `/api/remember` | | `SUI_NETWORK` | `mainnet` | Picks the fallback RPC URL and network-driven service defaults | | `SUI_RPC_URL` | network default | Override the Sui fullnode URL | | `WALRUS_PUBLISHER_URL` | Walrus mainnet publisher | Override upload endpoint | @@ -54,7 +59,9 @@ These are not all enforced at boot, but most real deployments need them. - If both `SERVER_SUI_PRIVATE_KEYS` and `SERVER_SUI_PRIVATE_KEY` are set, the key pool takes priority for uploads. Upload jobs use the pool in round-robin order. - Keep `ENOKI_FALLBACK_TO_DIRECT_SIGN=false` in production if the server wallet should not pay gas when sponsorship is missing, expired, or rejected. - `OPENAI_API_KEY` and `OPENAI_API_BASE` control the embedding and fact-extraction provider used by `remember`, `recall`, `analyze`, `ask`, and restore re-indexing. -- Without `OPENAI_API_KEY`, the server can fall back to mock embeddings. That is useful for local testing, not for normal production behavior. +- `EMBEDDING_API_KEY` / `EMBEDDING_API_BASE` / `EMBEDDING_MODEL` / `EMBEDDING_DIMENSIONS` let you run the embedding side against a different provider (Jina, Cohere, etc.) without touching code. Anything unset falls back to the corresponding `OPENAI_*` value (or to the model default for `EMBEDDING_DIMENSIONS`). `LLM_MODEL` selects the chat/completion model independently. +- When you switch embedding provider or dimension, the cached recall query embeddings invalidate automatically — the cache key includes the effective base URL and model name. +- Without `OPENAI_API_KEY` (and `EMBEDDING_API_KEY`), the server can fall back to mock embeddings. That is useful for local testing, not for normal production behavior. - `SUI_NETWORK` drives the default RPC URL, Walrus endpoints, Walrus package ID, and upload relay selection. - `SEAL_SERVER_CONFIGS` is a JSON array of `{ objectId, weight, aggregatorUrl?, apiKeyName?, apiKey? }`. Committee key server configs require `aggregatorUrl`. - `SEAL_KEY_SERVERS` is the legacy comma-separated independent key server list. It is only used when `SEAL_SERVER_CONFIGS` is unset. diff --git a/docs/relayer/self-hosting.md b/docs/relayer/self-hosting.md index ea3bf993..b5ffc332 100644 --- a/docs/relayer/self-hosting.md +++ b/docs/relayer/self-hosting.md @@ -85,6 +85,16 @@ curl http://localhost:8000/health - `OPENAI_API_KEY` — enables real embeddings (falls back to mock embeddings without it) - `OPENAI_API_BASE` — point to an OpenAI-compatible provider like OpenRouter +### Embedding & LLM Provider (Optional) + +The embedding and LLM endpoints are pluggable. Anything left unset falls back to the corresponding `OPENAI_*` value. + +- `EMBEDDING_API_KEY` — separate key for the embedding provider (e.g. Jina, Cohere) +- `EMBEDDING_API_BASE` — separate base URL for the embedding provider +- `EMBEDDING_MODEL` — embedding model identifier (default `openai/text-embedding-3-small`) +- `EMBEDDING_DIMENSIONS` — optional dimensions override, required for variable-dimension models like `jina-embeddings-v3`. Must match the schema dimension of `vector_entries.embedding`; the server logs a WARN at boot on mismatch. +- `LLM_MODEL` — chat/completion model used by `/api/analyze`, `/api/ask`, and the summarize-before-embed step (default `openai/gpt-4o-mini`) + ### Rate Limits & Storage (Optional) By default, the relayer enforces rate limits and storage quotas via Redis to prevent abuse. You can customize these limits: