From ec833d6f0261c493e12f21e982a7f4c671296d2e Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Fri, 20 Mar 2026 21:58:19 +1000 Subject: [PATCH 1/3] Model list endpoint caching --- src/app.rs | 63 +++++- src/cli/server.rs | 15 ++ src/config/features.rs | 50 +++++ src/middleware/layers/admin.rs | 6 + src/middleware/layers/api.rs | 6 + src/routes/api/models.rs | 386 +++++++++++++++++---------------- src/routes/execution.rs | 3 + 7 files changed, 342 insertions(+), 187 deletions(-) diff --git a/src/app.rs b/src/app.rs index dae0bca..6eb0aa8 100644 --- a/src/app.rs +++ b/src/app.rs @@ -376,6 +376,10 @@ pub struct AppState { /// Model catalog registry for enriching API responses with model metadata. /// Loaded from embedded data at startup and optionally synced at runtime. pub model_catalog: catalog::ModelCatalogRegistry, + /// In-memory cache of model lists fetched from static (config-file) providers. + /// Warmed on startup and refreshed periodically to avoid per-request latency. + pub static_models_cache: + Arc>>, } impl AppState { @@ -1059,7 +1063,7 @@ impl AppState { Arc::new(services::ProviderMetricsService::new()) }; - Ok(Self { + let result = Ok(Self { http_client, config: Arc::new(config), db, @@ -1096,7 +1100,19 @@ impl AppState { default_org_id, provider_metrics, model_catalog, - }) + static_models_cache: Arc::new(tokio::sync::RwLock::new( + std::collections::HashMap::new(), + )), + }); + + // Warm the static models cache so /v1/models is fast from the first request + if let Ok(ref state) = result + && state.config.features.static_models_cache.enabled() + { + state.warm_static_models_cache().await; + } + + result } /// Ensure a default user exists for anonymous access when auth is disabled. @@ -1816,6 +1832,49 @@ impl AppState { } } } + + /// Fetch model lists from all static (config-file) providers in parallel and + /// store them in `self.static_models_cache`. Failures for individual providers + /// are logged and skipped so one slow/broken provider cannot block the rest. + pub async fn warm_static_models_cache(&self) { + use futures::future::join_all; + + let futures: Vec<_> = self + .config + .providers + .iter() + .map(|(name, cfg)| { + let name = name.to_owned(); + let http = self.http_client.clone(); + let cbs = self.circuit_breakers.clone(); + async move { + let result = providers::list_models_for_config(cfg, &name, &http, &cbs).await; + (name, result) + } + }) + .collect(); + + let results = join_all(futures).await; + + let mut cache = self.static_models_cache.write().await; + let mut count = 0usize; + for (name, result) in results { + match result { + Ok(response) => { + count += response.data.len(); + cache.insert(name, response); + } + Err(e) => { + tracing::warn!(provider = %name, error = %e, "Failed to fetch models for cache warm"); + } + } + } + tracing::info!( + providers = cache.len(), + models = count, + "Static models cache warmed" + ); + } } #[cfg(feature = "server")] diff --git a/src/cli/server.rs b/src/cli/server.rs index 61bdeca..93888c6 100644 --- a/src/cli/server.rs +++ b/src/cli/server.rs @@ -335,6 +335,21 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b None }; + // Refresh the static models cache periodically in the background + // (initial warming already happened in AppState::new) + if config.features.static_models_cache.enabled() { + let interval = config.features.static_models_cache.refresh_interval(); + let state_ref = state.clone(); + tokio::spawn(async move { + let mut ticker = tokio::time::interval(interval); + ticker.tick().await; // skip the immediate first tick (already warmed) + loop { + ticker.tick().await; + state_ref.warm_static_models_cache().await; + } + }); + } + let task_tracker = state.task_tracker.clone(); let app = build_app(&config, state); diff --git a/src/config/features.rs b/src/config/features.rs index a4d5325..1f4f10e 100644 --- a/src/config/features.rs +++ b/src/config/features.rs @@ -57,6 +57,11 @@ pub struct FeaturesConfig { /// Validates URLs with SSRF protection and enforces size limits. #[serde(default)] pub web_fetch: Option, + + /// Static models cache configuration. + /// Caches model lists from config-file providers to avoid per-request latency. + #[serde(default)] + pub static_models_cache: StaticModelsCacheConfig, } impl FeaturesConfig { @@ -2563,6 +2568,51 @@ fn default_catalog_api_url() -> String { "https://models.dev/api.json".to_string() } +/// Configuration for the static models cache. +/// +/// Model lists from config-file providers are cached in memory and refreshed +/// periodically so that `/v1/models` does not make upstream HTTP calls on every +/// request. +/// +/// ```toml +/// [features.static_models_cache] +/// refresh_interval_secs = 300 +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] +#[serde(deny_unknown_fields)] +pub struct StaticModelsCacheConfig { + /// How often to refresh the cached model lists, in seconds. + /// Set to 0 to disable caching (every request will query providers directly). + /// Default: 300 (5 minutes). + #[serde(default = "default_static_models_refresh_interval_secs")] + pub refresh_interval_secs: u64, +} + +impl Default for StaticModelsCacheConfig { + fn default() -> Self { + Self { + refresh_interval_secs: default_static_models_refresh_interval_secs(), + } + } +} + +impl StaticModelsCacheConfig { + /// Whether caching is enabled (interval > 0). + pub fn enabled(&self) -> bool { + self.refresh_interval_secs > 0 + } + + /// Refresh interval as a `Duration`. + pub fn refresh_interval(&self) -> std::time::Duration { + std::time::Duration::from_secs(self.refresh_interval_secs) + } +} + +fn default_static_models_refresh_interval_secs() -> u64 { + 300 // 5 minutes +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/middleware/layers/admin.rs b/src/middleware/layers/admin.rs index 0470051..b3f7463 100644 --- a/src/middleware/layers/admin.rs +++ b/src/middleware/layers/admin.rs @@ -2373,6 +2373,9 @@ mod tests { crate::services::ProviderMetricsService::with_local_metrics(|| None), ), model_catalog: crate::catalog::ModelCatalogRegistry::new(), + static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new( + std::collections::HashMap::new(), + )), } } @@ -2674,6 +2677,9 @@ mod tests { crate::services::ProviderMetricsService::with_local_metrics(|| None), ), model_catalog: crate::catalog::ModelCatalogRegistry::new(), + static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new( + std::collections::HashMap::new(), + )), } } diff --git a/src/middleware/layers/api.rs b/src/middleware/layers/api.rs index dde8d5b..9fa8431 100644 --- a/src/middleware/layers/api.rs +++ b/src/middleware/layers/api.rs @@ -2289,6 +2289,9 @@ mod tests { crate::services::ProviderMetricsService::with_local_metrics(|| None), ), model_catalog: crate::catalog::ModelCatalogRegistry::new(), + static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new( + std::collections::HashMap::new(), + )), } } @@ -2340,6 +2343,9 @@ mod tests { crate::services::ProviderMetricsService::with_local_metrics(|| None), ), model_catalog: crate::catalog::ModelCatalogRegistry::new(), + static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new( + std::collections::HashMap::new(), + )), } } diff --git a/src/routes/api/models.rs b/src/routes/api/models.rs index 62f0d86..c14fac4 100644 --- a/src/routes/api/models.rs +++ b/src/routes/api/models.rs @@ -31,217 +31,233 @@ pub async fn api_v1_models( State(state): State, auth: Option>, ) -> Result, ApiError> { - use futures::future::join_all; - - // Create futures for fetching models from all providers in parallel - let fetch_futures: Vec<_> = state - .config - .providers - .iter() - .map(|(provider_name, provider_config)| { - let provider_name = provider_name.to_owned(); - let http_client = state.http_client.clone(); - let circuit_breakers = state.circuit_breakers.clone(); - - async move { - let models_result = crate::providers::list_models_for_config( - provider_config, - &provider_name, - &http_client, - &circuit_breakers, - ) - .await; - (provider_name, models_result) + // Read static provider models from the in-memory cache (warmed on startup, + // refreshed periodically). Providers missing from the cache (e.g. if warming + // failed) are fetched live as a fallback. + let cache_enabled = state.config.features.static_models_cache.enabled(); + let mut hits: Vec<(String, crate::providers::ModelsResponse)> = Vec::new(); + let mut misses: Vec<(String, &crate::config::ProviderConfig)> = Vec::new(); + if cache_enabled { + let cached = state.static_models_cache.read().await; + for (name, cfg) in state.config.providers.iter() { + if let Some(resp) = cached.get(name) { + hits.push((name.to_owned(), resp.clone())); + } else { + misses.push((name.to_owned(), cfg)); } - }) - .collect(); + } + } else { + misses.extend( + state + .config + .providers + .iter() + .map(|(name, cfg)| (name.to_owned(), cfg)), + ); + } - // Fetch from all providers in parallel - let results = join_all(fetch_futures).await; + // Live-fetch any providers not in the cache + if !misses.is_empty() { + use futures::future::join_all; + + let futures: Vec<_> = misses + .into_iter() + .map(|(name, cfg)| { + let http = state.http_client.clone(); + let cbs = state.circuit_breakers.clone(); + async move { + let result = + crate::providers::list_models_for_config(cfg, &name, &http, &cbs).await; + (name, result) + } + }) + .collect(); + + for (name, result) in join_all(futures).await { + if let Ok(resp) = result { + hits.push((name, resp)); + } + } + } // Collect successful results and enrich with catalog data let mut all_models = Vec::new(); - for (provider_name, models_result) in results { - if let Ok(models_response) = models_result { - // Get the provider config for catalog lookup - let provider_config = state.config.providers.get(&provider_name); - - // Resolve the catalog provider ID for this provider - let catalog_provider_id = provider_config.and_then(|pc| { - crate::catalog::resolve_catalog_provider_id( - pc.provider_type_name(), - pc.base_url(), - pc.catalog_provider(), - ) - }); - - // Prefix each model ID with the provider name and enrich with catalog + config data - for model in models_response.data { - let prefixed_id = format!("{}/{}", provider_name, model.id); - let mut model_json = model.extra; - if let Some(obj) = model_json.as_object_mut() { - obj.insert("id".to_string(), serde_json::Value::String(prefixed_id)); - - // Look up catalog enrichment and config override - let enrichment = catalog_provider_id - .as_ref() - .and_then(|pid| state.model_catalog.lookup(pid, &model.id)); - let model_config = - provider_config.and_then(|pc| pc.get_model_config(&model.id)); - - // Merge metadata: config wins if present, else catalog, else omit. - // Only enrich if at least one source has data. - if enrichment.is_some() || model_config.is_some() { - // Capabilities: config overrides catalog - if let Some(ref caps) = model_config.and_then(|mc| mc.capabilities.as_ref()) - { - obj.insert( - "capabilities".to_string(), - serde_json::to_value(caps).unwrap_or_default(), - ); - } else if let Some(ref e) = enrichment { - obj.insert( - "capabilities".to_string(), - serde_json::to_value(&e.capabilities).unwrap_or_default(), - ); - } + for (provider_name, models_response) in hits { + // Get the provider config for catalog lookup + let provider_config = state.config.providers.get(&provider_name); + + // Resolve the catalog provider ID for this provider + let catalog_provider_id = provider_config.and_then(|pc| { + crate::catalog::resolve_catalog_provider_id( + pc.provider_type_name(), + pc.base_url(), + pc.catalog_provider(), + ) + }); + + // Prefix each model ID with the provider name and enrich with catalog + config data + for model in models_response.data { + let prefixed_id = format!("{}/{}", provider_name, model.id); + let mut model_json = model.extra; + if let Some(obj) = model_json.as_object_mut() { + obj.insert("id".to_string(), serde_json::Value::String(prefixed_id)); + + // Look up catalog enrichment and config override + let enrichment = catalog_provider_id + .as_ref() + .and_then(|pid| state.model_catalog.lookup(pid, &model.id)); + let model_config = provider_config.and_then(|pc| pc.get_model_config(&model.id)); + + // Merge metadata: config wins if present, else catalog, else omit. + // Only enrich if at least one source has data. + if enrichment.is_some() || model_config.is_some() { + // Capabilities: config overrides catalog + if let Some(ref caps) = model_config.and_then(|mc| mc.capabilities.as_ref()) { + obj.insert( + "capabilities".to_string(), + serde_json::to_value(caps).unwrap_or_default(), + ); + } else if let Some(ref e) = enrichment { + obj.insert( + "capabilities".to_string(), + serde_json::to_value(&e.capabilities).unwrap_or_default(), + ); + } - // Context length: config > provider response > catalog - if let Some(ctx_len) = model_config.and_then(|mc| mc.context_length) { - obj.insert( - "context_length".to_string(), - serde_json::Value::Number(ctx_len.into()), - ); - } else if !obj.contains_key("context_length") - && let Some(ctx_len) = - enrichment.as_ref().and_then(|e| e.limits.context_length) - { - obj.insert( - "context_length".to_string(), - serde_json::Value::Number(ctx_len.into()), - ); - } + // Context length: config > provider response > catalog + if let Some(ctx_len) = model_config.and_then(|mc| mc.context_length) { + obj.insert( + "context_length".to_string(), + serde_json::Value::Number(ctx_len.into()), + ); + } else if !obj.contains_key("context_length") + && let Some(ctx_len) = + enrichment.as_ref().and_then(|e| e.limits.context_length) + { + obj.insert( + "context_length".to_string(), + serde_json::Value::Number(ctx_len.into()), + ); + } - // Max output tokens - if let Some(max_out) = model_config.and_then(|mc| mc.max_output_tokens) { - obj.insert( - "max_output_tokens".to_string(), - serde_json::Value::Number(max_out.into()), - ); - } else if let Some(max_out) = - enrichment.as_ref().and_then(|e| e.limits.max_output_tokens) - { - obj.insert( - "max_output_tokens".to_string(), - serde_json::Value::Number(max_out.into()), - ); - } + // Max output tokens + if let Some(max_out) = model_config.and_then(|mc| mc.max_output_tokens) { + obj.insert( + "max_output_tokens".to_string(), + serde_json::Value::Number(max_out.into()), + ); + } else if let Some(max_out) = + enrichment.as_ref().and_then(|e| e.limits.max_output_tokens) + { + obj.insert( + "max_output_tokens".to_string(), + serde_json::Value::Number(max_out.into()), + ); + } - // Modalities: config overrides catalog - if let Some(ref mods) = model_config.and_then(|mc| mc.modalities.as_ref()) { - obj.insert( - "modalities".to_string(), - serde_json::to_value(mods).unwrap_or_default(), - ); - } else if let Some(ref e) = enrichment { - obj.insert( - "modalities".to_string(), - serde_json::to_value(&e.modalities).unwrap_or_default(), - ); - } + // Modalities: config overrides catalog + if let Some(ref mods) = model_config.and_then(|mc| mc.modalities.as_ref()) { + obj.insert( + "modalities".to_string(), + serde_json::to_value(mods).unwrap_or_default(), + ); + } else if let Some(ref e) = enrichment { + obj.insert( + "modalities".to_string(), + serde_json::to_value(&e.modalities).unwrap_or_default(), + ); + } + + // Tasks: config overrides catalog + let tasks = model_config + .filter(|mc| !mc.tasks.is_empty()) + .map(|mc| &mc.tasks) + .or(enrichment + .as_ref() + .filter(|e| !e.tasks.is_empty()) + .map(|e| &e.tasks)); + if let Some(tasks) = tasks { + obj.insert( + "tasks".to_string(), + serde_json::to_value(tasks).unwrap_or_default(), + ); + } + + // Catalog pricing for display (from catalog only) + if let Some(ref e) = enrichment { + obj.insert( + "catalog_pricing".to_string(), + serde_json::to_value(&e.catalog_pricing).unwrap_or_default(), + ); + } - // Tasks: config overrides catalog - let tasks = model_config - .filter(|mc| !mc.tasks.is_empty()) - .map(|mc| &mc.tasks) - .or(enrichment - .as_ref() - .filter(|e| !e.tasks.is_empty()) - .map(|e| &e.tasks)); - if let Some(tasks) = tasks { + // Family: config overrides catalog + if let Some(family) = model_config + .and_then(|mc| mc.family.as_ref()) + .or(enrichment.as_ref().and_then(|e| e.family.as_ref())) + { + obj.insert( + "family".to_string(), + serde_json::Value::String(family.clone()), + ); + } + + // Open weights: config overrides catalog + if let Some(ow) = model_config.and_then(|mc| mc.open_weights) { + obj.insert("open_weights".to_string(), serde_json::Value::Bool(ow)); + } else if let Some(ref e) = enrichment { + obj.insert( + "open_weights".to_string(), + serde_json::Value::Bool(e.open_weights), + ); + } + + // Image generation metadata (config only) + if let Some(mc) = model_config { + if !mc.image_sizes.is_empty() { obj.insert( - "tasks".to_string(), - serde_json::to_value(tasks).unwrap_or_default(), + "image_sizes".to_string(), + serde_json::to_value(&mc.image_sizes).unwrap_or_default(), ); } - - // Catalog pricing for display (from catalog only) - if let Some(ref e) = enrichment { + if !mc.image_qualities.is_empty() { obj.insert( - "catalog_pricing".to_string(), - serde_json::to_value(&e.catalog_pricing).unwrap_or_default(), + "image_qualities".to_string(), + serde_json::to_value(&mc.image_qualities).unwrap_or_default(), ); } - - // Family: config overrides catalog - if let Some(family) = model_config - .and_then(|mc| mc.family.as_ref()) - .or(enrichment.as_ref().and_then(|e| e.family.as_ref())) - { + if let Some(max) = mc.max_images { obj.insert( - "family".to_string(), - serde_json::Value::String(family.clone()), + "max_images".to_string(), + serde_json::Value::Number(max.into()), ); } - - // Open weights: config overrides catalog - if let Some(ow) = model_config.and_then(|mc| mc.open_weights) { - obj.insert("open_weights".to_string(), serde_json::Value::Bool(ow)); - } else if let Some(ref e) = enrichment { + if !mc.voices.is_empty() { obj.insert( - "open_weights".to_string(), - serde_json::Value::Bool(e.open_weights), + "voices".to_string(), + serde_json::to_value(&mc.voices).unwrap_or_default(), ); } - - // Image generation metadata (config only) - if let Some(mc) = model_config { - if !mc.image_sizes.is_empty() { - obj.insert( - "image_sizes".to_string(), - serde_json::to_value(&mc.image_sizes).unwrap_or_default(), - ); - } - if !mc.image_qualities.is_empty() { - obj.insert( - "image_qualities".to_string(), - serde_json::to_value(&mc.image_qualities).unwrap_or_default(), - ); - } - if let Some(max) = mc.max_images { - obj.insert( - "max_images".to_string(), - serde_json::Value::Number(max.into()), - ); - } - if !mc.voices.is_empty() { - obj.insert( - "voices".to_string(), - serde_json::to_value(&mc.voices).unwrap_or_default(), - ); - } - } } + } - // Sovereignty: merge provider → model override (independent of catalog) - let provider_sov = provider_config.and_then(|pc| pc.sovereignty()); - let model_sov = model_config.and_then(|mc| mc.sovereignty.as_ref()); - if let Some(merged) = - crate::config::SovereigntyMetadata::merge(provider_sov, model_sov) - .filter(|m| !m.is_empty()) - { - obj.insert( - "sovereignty".to_string(), - serde_json::to_value(&merged).unwrap_or_default(), - ); - } - } else { - model_json = serde_json::json!({ "id": prefixed_id }); + // Sovereignty: merge provider → model override (independent of catalog) + let provider_sov = provider_config.and_then(|pc| pc.sovereignty()); + let model_sov = model_config.and_then(|mc| mc.sovereignty.as_ref()); + if let Some(merged) = + crate::config::SovereigntyMetadata::merge(provider_sov, model_sov) + .filter(|m| !m.is_empty()) + { + obj.insert( + "sovereignty".to_string(), + serde_json::to_value(&merged).unwrap_or_default(), + ); } - all_models.push(model_json); + } else { + model_json = serde_json::json!({ "id": prefixed_id }); } + all_models.push(model_json); } - // Skip providers that fail to return models } // Mark all static models with source diff --git a/src/routes/execution.rs b/src/routes/execution.rs index 54b816e..0a82db5 100644 --- a/src/routes/execution.rs +++ b/src/routes/execution.rs @@ -874,6 +874,9 @@ mod tests { crate::services::ProviderMetricsService::with_local_metrics(|| None), ), model_catalog: crate::catalog::ModelCatalogRegistry::new(), + static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new( + std::collections::HashMap::new(), + )), } } From 9fb6527ff6813195f637f9bc4ad75f34c8e4664c Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Fri, 20 Mar 2026 22:20:29 +1000 Subject: [PATCH 2/3] Review fixes --- src/app.rs | 6 +++--- src/cli/server.rs | 3 ++- src/routes/api/models.rs | 5 +++-- src/wasm.rs | 1 + 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/app.rs b/src/app.rs index 6eb0aa8..ba8ebeb 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1857,11 +1857,10 @@ impl AppState { let results = join_all(futures).await; let mut cache = self.static_models_cache.write().await; - let mut count = 0usize; + cache.retain(|name, _| self.config.providers.get(name).is_some()); for (name, result) in results { match result { Ok(response) => { - count += response.data.len(); cache.insert(name, response); } Err(e) => { @@ -1869,9 +1868,10 @@ impl AppState { } } } + let total_models: usize = cache.values().map(|r| r.data.len()).sum(); tracing::info!( providers = cache.len(), - models = count, + models = total_models, "Static models cache warmed" ); } diff --git a/src/cli/server.rs b/src/cli/server.rs index 93888c6..fde9279 100644 --- a/src/cli/server.rs +++ b/src/cli/server.rs @@ -339,8 +339,9 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b // (initial warming already happened in AppState::new) if config.features.static_models_cache.enabled() { let interval = config.features.static_models_cache.refresh_interval(); + let task_tracker = state.task_tracker.clone(); let state_ref = state.clone(); - tokio::spawn(async move { + task_tracker.spawn(async move { let mut ticker = tokio::time::interval(interval); ticker.tick().await; // skip the immediate first tick (already warmed) loop { diff --git a/src/routes/api/models.rs b/src/routes/api/models.rs index c14fac4..4d98d3e 100644 --- a/src/routes/api/models.rs +++ b/src/routes/api/models.rs @@ -74,8 +74,9 @@ pub async fn api_v1_models( .collect(); for (name, result) in join_all(futures).await { - if let Ok(resp) = result { - hits.push((name, resp)); + match result { + Ok(resp) => hits.push((name, resp)), + Err(e) => tracing::warn!(provider = %name, error = %e, "Live-fetch fallback failed for cache-miss provider"), } } } diff --git a/src/wasm.rs b/src/wasm.rs index 883f4d1..7666300 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -156,6 +156,7 @@ impl HadrianGateway { default_org_id, provider_metrics: Arc::new(services::ProviderMetricsService::new()), model_catalog: catalog::ModelCatalogRegistry::new(), + static_models_cache: Arc::new(tokio::sync::RwLock::new(Default::default())), }; let router = build_wasm_router(state, default_user_id, default_org_id); From 7690b834b906fd11fb03f4c00f7f53f0d445f9d1 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Fri, 20 Mar 2026 22:40:12 +1000 Subject: [PATCH 3/3] Review fixes --- src/cli/server.rs | 3 +-- src/routes/api/models.rs | 17 ++++++++++++++++- src/wasm.rs | 7 ++++++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/cli/server.rs b/src/cli/server.rs index fde9279..93888c6 100644 --- a/src/cli/server.rs +++ b/src/cli/server.rs @@ -339,9 +339,8 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b // (initial warming already happened in AppState::new) if config.features.static_models_cache.enabled() { let interval = config.features.static_models_cache.refresh_interval(); - let task_tracker = state.task_tracker.clone(); let state_ref = state.clone(); - task_tracker.spawn(async move { + tokio::spawn(async move { let mut ticker = tokio::time::interval(interval); ticker.tick().await; // skip the immediate first tick (already warmed) loop { diff --git a/src/routes/api/models.rs b/src/routes/api/models.rs index 4d98d3e..d5f2761 100644 --- a/src/routes/api/models.rs +++ b/src/routes/api/models.rs @@ -73,12 +73,27 @@ pub async fn api_v1_models( }) .collect(); + let mut live_fetched = Vec::new(); for (name, result) in join_all(futures).await { match result { - Ok(resp) => hits.push((name, resp)), + Ok(resp) => { + if cache_enabled { + live_fetched.push((name.clone(), resp.clone())); + } + hits.push((name, resp)); + } Err(e) => tracing::warn!(provider = %name, error = %e, "Live-fetch fallback failed for cache-miss provider"), } } + + // Write successful live-fetches back to the cache so subsequent requests + // don't repeat the same upstream calls until the next background refresh. + if !live_fetched.is_empty() { + let mut cache = state.static_models_cache.write().await; + for (name, resp) in live_fetched { + cache.insert(name, resp); + } + } } // Collect successful results and enrich with catalog data diff --git a/src/wasm.rs b/src/wasm.rs index 7666300..a3200ac 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -479,7 +479,12 @@ fn wasm_default_config() -> config::GatewayConfig { }, providers: config::ProvidersConfig::default(), limits: config::LimitsConfig::default(), - features: config::FeaturesConfig::default(), + features: config::FeaturesConfig { + static_models_cache: config::StaticModelsCacheConfig { + refresh_interval_secs: 0, + }, + ..Default::default() + }, observability: config::ObservabilityConfig::default(), ui: config::UiConfig { pages: config::PagesConfig {