From ec833d6f0261c493e12f21e982a7f4c671296d2e Mon Sep 17 00:00:00 2001
From: ScriptSmith <ScriptSmith@users.noreply.github.com>
Date: Fri, 20 Mar 2026 21:58:19 +1000
Subject: [PATCH 1/3] Model list endpoint caching

---
 src/app.rs                     |  63 +++++-
 src/cli/server.rs              |  15 ++
 src/config/features.rs         |  50 +++++
 src/middleware/layers/admin.rs |   6 +
 src/middleware/layers/api.rs   |   6 +
 src/routes/api/models.rs       | 386 +++++++++++++++++----------------
 src/routes/execution.rs        |   3 +
 7 files changed, 342 insertions(+), 187 deletions(-)

diff --git a/src/app.rs b/src/app.rs
index dae0bca..6eb0aa8 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -376,6 +376,10 @@ pub struct AppState {
     /// Model catalog registry for enriching API responses with model metadata.
     /// Loaded from embedded data at startup and optionally synced at runtime.
     pub model_catalog: catalog::ModelCatalogRegistry,
+    /// In-memory cache of model lists fetched from static (config-file) providers.
+    /// Warmed on startup and refreshed periodically to avoid per-request latency.
+    pub static_models_cache:
+        Arc<tokio::sync::RwLock<std::collections::HashMap<String, providers::ModelsResponse>>>,
 }
 
 impl AppState {
@@ -1059,7 +1063,7 @@ impl AppState {
             Arc::new(services::ProviderMetricsService::new())
         };
 
-        Ok(Self {
+        let result = Ok(Self {
             http_client,
             config: Arc::new(config),
             db,
@@ -1096,7 +1100,19 @@ impl AppState {
             default_org_id,
             provider_metrics,
             model_catalog,
-        })
+            static_models_cache: Arc::new(tokio::sync::RwLock::new(
+                std::collections::HashMap::new(),
+            )),
+        });
+
+        // Warm the static models cache so /v1/models is fast from the first request
+        if let Ok(ref state) = result
+            && state.config.features.static_models_cache.enabled()
+        {
+            state.warm_static_models_cache().await;
+        }
+
+        result
     }
 
     /// Ensure a default user exists for anonymous access when auth is disabled.
@@ -1816,6 +1832,49 @@ impl AppState {
             }
         }
     }
+
+    /// Fetch model lists from all static (config-file) providers in parallel and
+    /// store them in `self.static_models_cache`. Failures for individual providers
+    /// are logged and skipped so one slow/broken provider cannot block the rest.
+    pub async fn warm_static_models_cache(&self) {
+        use futures::future::join_all;
+
+        let futures: Vec<_> = self
+            .config
+            .providers
+            .iter()
+            .map(|(name, cfg)| {
+                let name = name.to_owned();
+                let http = self.http_client.clone();
+                let cbs = self.circuit_breakers.clone();
+                async move {
+                    let result = providers::list_models_for_config(cfg, &name, &http, &cbs).await;
+                    (name, result)
+                }
+            })
+            .collect();
+
+        let results = join_all(futures).await;
+
+        let mut cache = self.static_models_cache.write().await;
+        let mut count = 0usize;
+        for (name, result) in results {
+            match result {
+                Ok(response) => {
+                    count += response.data.len();
+                    cache.insert(name, response);
+                }
+                Err(e) => {
+                    tracing::warn!(provider = %name, error = %e, "Failed to fetch models for cache warm");
+                }
+            }
+        }
+        tracing::info!(
+            providers = cache.len(),
+            models = count,
+            "Static models cache warmed"
+        );
+    }
 }
 
 #[cfg(feature = "server")]
diff --git a/src/cli/server.rs b/src/cli/server.rs
index 61bdeca..93888c6 100644
--- a/src/cli/server.rs
+++ b/src/cli/server.rs
@@ -335,6 +335,21 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b
         None
     };
 
+    // Refresh the static models cache periodically in the background
+    // (initial warming already happened in AppState::new)
+    if config.features.static_models_cache.enabled() {
+        let interval = config.features.static_models_cache.refresh_interval();
+        let state_ref = state.clone();
+        tokio::spawn(async move {
+            let mut ticker = tokio::time::interval(interval);
+            ticker.tick().await; // skip the immediate first tick (already warmed)
+            loop {
+                ticker.tick().await;
+                state_ref.warm_static_models_cache().await;
+            }
+        });
+    }
+
     let task_tracker = state.task_tracker.clone();
     let app = build_app(&config, state);
 
diff --git a/src/config/features.rs b/src/config/features.rs
index a4d5325..1f4f10e 100644
--- a/src/config/features.rs
+++ b/src/config/features.rs
@@ -57,6 +57,11 @@ pub struct FeaturesConfig {
     /// Validates URLs with SSRF protection and enforces size limits.
     #[serde(default)]
     pub web_fetch: Option<WebFetchConfig>,
+
+    /// Static models cache configuration.
+    /// Caches model lists from config-file providers to avoid per-request latency.
+    #[serde(default)]
+    pub static_models_cache: StaticModelsCacheConfig,
 }
 
 impl FeaturesConfig {
@@ -2563,6 +2568,51 @@ fn default_catalog_api_url() -> String {
     "https://models.dev/api.json".to_string()
 }
 
+/// Configuration for the static models cache.
+///
+/// Model lists from config-file providers are cached in memory and refreshed
+/// periodically so that `/v1/models` does not make upstream HTTP calls on every
+/// request.
+///
+/// ```toml
+/// [features.static_models_cache]
+/// refresh_interval_secs = 300
+/// ```
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+#[serde(deny_unknown_fields)]
+pub struct StaticModelsCacheConfig {
+    /// How often to refresh the cached model lists, in seconds.
+    /// Set to 0 to disable caching (every request will query providers directly).
+    /// Default: 300 (5 minutes).
+    #[serde(default = "default_static_models_refresh_interval_secs")]
+    pub refresh_interval_secs: u64,
+}
+
+impl Default for StaticModelsCacheConfig {
+    fn default() -> Self {
+        Self {
+            refresh_interval_secs: default_static_models_refresh_interval_secs(),
+        }
+    }
+}
+
+impl StaticModelsCacheConfig {
+    /// Whether caching is enabled (interval > 0).
+    pub fn enabled(&self) -> bool {
+        self.refresh_interval_secs > 0
+    }
+
+    /// Refresh interval as a `Duration`.
+    pub fn refresh_interval(&self) -> std::time::Duration {
+        std::time::Duration::from_secs(self.refresh_interval_secs)
+    }
+}
+
+fn default_static_models_refresh_interval_secs() -> u64 {
+    300 // 5 minutes
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/middleware/layers/admin.rs b/src/middleware/layers/admin.rs
index 0470051..b3f7463 100644
--- a/src/middleware/layers/admin.rs
+++ b/src/middleware/layers/admin.rs
@@ -2373,6 +2373,9 @@ mod tests {
                 crate::services::ProviderMetricsService::with_local_metrics(|| None),
             ),
             model_catalog: crate::catalog::ModelCatalogRegistry::new(),
+            static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new(
+                std::collections::HashMap::new(),
+            )),
         }
     }
 
@@ -2674,6 +2677,9 @@ mod tests {
                 crate::services::ProviderMetricsService::with_local_metrics(|| None),
             ),
             model_catalog: crate::catalog::ModelCatalogRegistry::new(),
+            static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new(
+                std::collections::HashMap::new(),
+            )),
         }
     }
 
diff --git a/src/middleware/layers/api.rs b/src/middleware/layers/api.rs
index dde8d5b..9fa8431 100644
--- a/src/middleware/layers/api.rs
+++ b/src/middleware/layers/api.rs
@@ -2289,6 +2289,9 @@ mod tests {
                 crate::services::ProviderMetricsService::with_local_metrics(|| None),
             ),
             model_catalog: crate::catalog::ModelCatalogRegistry::new(),
+            static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new(
+                std::collections::HashMap::new(),
+            )),
         }
     }
 
@@ -2340,6 +2343,9 @@ mod tests {
                 crate::services::ProviderMetricsService::with_local_metrics(|| None),
             ),
             model_catalog: crate::catalog::ModelCatalogRegistry::new(),
+            static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new(
+                std::collections::HashMap::new(),
+            )),
         }
     }
 
diff --git a/src/routes/api/models.rs b/src/routes/api/models.rs
index 62f0d86..c14fac4 100644
--- a/src/routes/api/models.rs
+++ b/src/routes/api/models.rs
@@ -31,217 +31,233 @@ pub async fn api_v1_models(
     State(state): State<AppState>,
     auth: Option<Extension<crate::auth::AuthenticatedRequest>>,
 ) -> Result<Json<CombinedModelsResponse>, ApiError> {
-    use futures::future::join_all;
-
-    // Create futures for fetching models from all providers in parallel
-    let fetch_futures: Vec<_> = state
-        .config
-        .providers
-        .iter()
-        .map(|(provider_name, provider_config)| {
-            let provider_name = provider_name.to_owned();
-            let http_client = state.http_client.clone();
-            let circuit_breakers = state.circuit_breakers.clone();
-
-            async move {
-                let models_result = crate::providers::list_models_for_config(
-                    provider_config,
-                    &provider_name,
-                    &http_client,
-                    &circuit_breakers,
-                )
-                .await;
-                (provider_name, models_result)
+    // Read static provider models from the in-memory cache (warmed on startup,
+    // refreshed periodically). Providers missing from the cache (e.g. if warming
+    // failed) are fetched live as a fallback.
+    let cache_enabled = state.config.features.static_models_cache.enabled();
+    let mut hits: Vec<(String, crate::providers::ModelsResponse)> = Vec::new();
+    let mut misses: Vec<(String, &crate::config::ProviderConfig)> = Vec::new();
+    if cache_enabled {
+        let cached = state.static_models_cache.read().await;
+        for (name, cfg) in state.config.providers.iter() {
+            if let Some(resp) = cached.get(name) {
+                hits.push((name.to_owned(), resp.clone()));
+            } else {
+                misses.push((name.to_owned(), cfg));
             }
-        })
-        .collect();
+        }
+    } else {
+        misses.extend(
+            state
+                .config
+                .providers
+                .iter()
+                .map(|(name, cfg)| (name.to_owned(), cfg)),
+        );
+    }
 
-    // Fetch from all providers in parallel
-    let results = join_all(fetch_futures).await;
+    // Live-fetch any providers not in the cache
+    if !misses.is_empty() {
+        use futures::future::join_all;
+
+        let futures: Vec<_> = misses
+            .into_iter()
+            .map(|(name, cfg)| {
+                let http = state.http_client.clone();
+                let cbs = state.circuit_breakers.clone();
+                async move {
+                    let result =
+                        crate::providers::list_models_for_config(cfg, &name, &http, &cbs).await;
+                    (name, result)
+                }
+            })
+            .collect();
+
+        for (name, result) in join_all(futures).await {
+            if let Ok(resp) = result {
+                hits.push((name, resp));
+            }
+        }
+    }
 
     // Collect successful results and enrich with catalog data
     let mut all_models = Vec::new();
-    for (provider_name, models_result) in results {
-        if let Ok(models_response) = models_result {
-            // Get the provider config for catalog lookup
-            let provider_config = state.config.providers.get(&provider_name);
-
-            // Resolve the catalog provider ID for this provider
-            let catalog_provider_id = provider_config.and_then(|pc| {
-                crate::catalog::resolve_catalog_provider_id(
-                    pc.provider_type_name(),
-                    pc.base_url(),
-                    pc.catalog_provider(),
-                )
-            });
-
-            // Prefix each model ID with the provider name and enrich with catalog + config data
-            for model in models_response.data {
-                let prefixed_id = format!("{}/{}", provider_name, model.id);
-                let mut model_json = model.extra;
-                if let Some(obj) = model_json.as_object_mut() {
-                    obj.insert("id".to_string(), serde_json::Value::String(prefixed_id));
-
-                    // Look up catalog enrichment and config override
-                    let enrichment = catalog_provider_id
-                        .as_ref()
-                        .and_then(|pid| state.model_catalog.lookup(pid, &model.id));
-                    let model_config =
-                        provider_config.and_then(|pc| pc.get_model_config(&model.id));
-
-                    // Merge metadata: config wins if present, else catalog, else omit.
-                    // Only enrich if at least one source has data.
-                    if enrichment.is_some() || model_config.is_some() {
-                        // Capabilities: config overrides catalog
-                        if let Some(ref caps) = model_config.and_then(|mc| mc.capabilities.as_ref())
-                        {
-                            obj.insert(
-                                "capabilities".to_string(),
-                                serde_json::to_value(caps).unwrap_or_default(),
-                            );
-                        } else if let Some(ref e) = enrichment {
-                            obj.insert(
-                                "capabilities".to_string(),
-                                serde_json::to_value(&e.capabilities).unwrap_or_default(),
-                            );
-                        }
+    for (provider_name, models_response) in hits {
+        // Get the provider config for catalog lookup
+        let provider_config = state.config.providers.get(&provider_name);
+
+        // Resolve the catalog provider ID for this provider
+        let catalog_provider_id = provider_config.and_then(|pc| {
+            crate::catalog::resolve_catalog_provider_id(
+                pc.provider_type_name(),
+                pc.base_url(),
+                pc.catalog_provider(),
+            )
+        });
+
+        // Prefix each model ID with the provider name and enrich with catalog + config data
+        for model in models_response.data {
+            let prefixed_id = format!("{}/{}", provider_name, model.id);
+            let mut model_json = model.extra;
+            if let Some(obj) = model_json.as_object_mut() {
+                obj.insert("id".to_string(), serde_json::Value::String(prefixed_id));
+
+                // Look up catalog enrichment and config override
+                let enrichment = catalog_provider_id
+                    .as_ref()
+                    .and_then(|pid| state.model_catalog.lookup(pid, &model.id));
+                let model_config = provider_config.and_then(|pc| pc.get_model_config(&model.id));
+
+                // Merge metadata: config wins if present, else catalog, else omit.
+                // Only enrich if at least one source has data.
+                if enrichment.is_some() || model_config.is_some() {
+                    // Capabilities: config overrides catalog
+                    if let Some(ref caps) = model_config.and_then(|mc| mc.capabilities.as_ref()) {
+                        obj.insert(
+                            "capabilities".to_string(),
+                            serde_json::to_value(caps).unwrap_or_default(),
+                        );
+                    } else if let Some(ref e) = enrichment {
+                        obj.insert(
+                            "capabilities".to_string(),
+                            serde_json::to_value(&e.capabilities).unwrap_or_default(),
+                        );
+                    }
 
-                        // Context length: config > provider response > catalog
-                        if let Some(ctx_len) = model_config.and_then(|mc| mc.context_length) {
-                            obj.insert(
-                                "context_length".to_string(),
-                                serde_json::Value::Number(ctx_len.into()),
-                            );
-                        } else if !obj.contains_key("context_length")
-                            && let Some(ctx_len) =
-                                enrichment.as_ref().and_then(|e| e.limits.context_length)
-                        {
-                            obj.insert(
-                                "context_length".to_string(),
-                                serde_json::Value::Number(ctx_len.into()),
-                            );
-                        }
+                    // Context length: config > provider response > catalog
+                    if let Some(ctx_len) = model_config.and_then(|mc| mc.context_length) {
+                        obj.insert(
+                            "context_length".to_string(),
+                            serde_json::Value::Number(ctx_len.into()),
+                        );
+                    } else if !obj.contains_key("context_length")
+                        && let Some(ctx_len) =
+                            enrichment.as_ref().and_then(|e| e.limits.context_length)
+                    {
+                        obj.insert(
+                            "context_length".to_string(),
+                            serde_json::Value::Number(ctx_len.into()),
+                        );
+                    }
 
-                        // Max output tokens
-                        if let Some(max_out) = model_config.and_then(|mc| mc.max_output_tokens) {
-                            obj.insert(
-                                "max_output_tokens".to_string(),
-                                serde_json::Value::Number(max_out.into()),
-                            );
-                        } else if let Some(max_out) =
-                            enrichment.as_ref().and_then(|e| e.limits.max_output_tokens)
-                        {
-                            obj.insert(
-                                "max_output_tokens".to_string(),
-                                serde_json::Value::Number(max_out.into()),
-                            );
-                        }
+                    // Max output tokens
+                    if let Some(max_out) = model_config.and_then(|mc| mc.max_output_tokens) {
+                        obj.insert(
+                            "max_output_tokens".to_string(),
+                            serde_json::Value::Number(max_out.into()),
+                        );
+                    } else if let Some(max_out) =
+                        enrichment.as_ref().and_then(|e| e.limits.max_output_tokens)
+                    {
+                        obj.insert(
+                            "max_output_tokens".to_string(),
+                            serde_json::Value::Number(max_out.into()),
+                        );
+                    }
 
-                        // Modalities: config overrides catalog
-                        if let Some(ref mods) = model_config.and_then(|mc| mc.modalities.as_ref()) {
-                            obj.insert(
-                                "modalities".to_string(),
-                                serde_json::to_value(mods).unwrap_or_default(),
-                            );
-                        } else if let Some(ref e) = enrichment {
-                            obj.insert(
-                                "modalities".to_string(),
-                                serde_json::to_value(&e.modalities).unwrap_or_default(),
-                            );
-                        }
+                    // Modalities: config overrides catalog
+                    if let Some(ref mods) = model_config.and_then(|mc| mc.modalities.as_ref()) {
+                        obj.insert(
+                            "modalities".to_string(),
+                            serde_json::to_value(mods).unwrap_or_default(),
+                        );
+                    } else if let Some(ref e) = enrichment {
+                        obj.insert(
+                            "modalities".to_string(),
+                            serde_json::to_value(&e.modalities).unwrap_or_default(),
+                        );
+                    }
+
+                    // Tasks: config overrides catalog
+                    let tasks = model_config
+                        .filter(|mc| !mc.tasks.is_empty())
+                        .map(|mc| &mc.tasks)
+                        .or(enrichment
+                            .as_ref()
+                            .filter(|e| !e.tasks.is_empty())
+                            .map(|e| &e.tasks));
+                    if let Some(tasks) = tasks {
+                        obj.insert(
+                            "tasks".to_string(),
+                            serde_json::to_value(tasks).unwrap_or_default(),
+                        );
+                    }
+
+                    // Catalog pricing for display (from catalog only)
+                    if let Some(ref e) = enrichment {
+                        obj.insert(
+                            "catalog_pricing".to_string(),
+                            serde_json::to_value(&e.catalog_pricing).unwrap_or_default(),
+                        );
+                    }
 
-                        // Tasks: config overrides catalog
-                        let tasks = model_config
-                            .filter(|mc| !mc.tasks.is_empty())
-                            .map(|mc| &mc.tasks)
-                            .or(enrichment
-                                .as_ref()
-                                .filter(|e| !e.tasks.is_empty())
-                                .map(|e| &e.tasks));
-                        if let Some(tasks) = tasks {
+                    // Family: config overrides catalog
+                    if let Some(family) = model_config
+                        .and_then(|mc| mc.family.as_ref())
+                        .or(enrichment.as_ref().and_then(|e| e.family.as_ref()))
+                    {
+                        obj.insert(
+                            "family".to_string(),
+                            serde_json::Value::String(family.clone()),
+                        );
+                    }
+
+                    // Open weights: config overrides catalog
+                    if let Some(ow) = model_config.and_then(|mc| mc.open_weights) {
+                        obj.insert("open_weights".to_string(), serde_json::Value::Bool(ow));
+                    } else if let Some(ref e) = enrichment {
+                        obj.insert(
+                            "open_weights".to_string(),
+                            serde_json::Value::Bool(e.open_weights),
+                        );
+                    }
+
+                    // Image generation metadata (config only)
+                    if let Some(mc) = model_config {
+                        if !mc.image_sizes.is_empty() {
                             obj.insert(
-                                "tasks".to_string(),
-                                serde_json::to_value(tasks).unwrap_or_default(),
+                                "image_sizes".to_string(),
+                                serde_json::to_value(&mc.image_sizes).unwrap_or_default(),
                             );
                         }
-
-                        // Catalog pricing for display (from catalog only)
-                        if let Some(ref e) = enrichment {
+                        if !mc.image_qualities.is_empty() {
                             obj.insert(
-                                "catalog_pricing".to_string(),
-                                serde_json::to_value(&e.catalog_pricing).unwrap_or_default(),
+                                "image_qualities".to_string(),
+                                serde_json::to_value(&mc.image_qualities).unwrap_or_default(),
                             );
                         }
-
-                        // Family: config overrides catalog
-                        if let Some(family) = model_config
-                            .and_then(|mc| mc.family.as_ref())
-                            .or(enrichment.as_ref().and_then(|e| e.family.as_ref()))
-                        {
+                        if let Some(max) = mc.max_images {
                             obj.insert(
-                                "family".to_string(),
-                                serde_json::Value::String(family.clone()),
+                                "max_images".to_string(),
+                                serde_json::Value::Number(max.into()),
                             );
                         }
-
-                        // Open weights: config overrides catalog
-                        if let Some(ow) = model_config.and_then(|mc| mc.open_weights) {
-                            obj.insert("open_weights".to_string(), serde_json::Value::Bool(ow));
-                        } else if let Some(ref e) = enrichment {
+                        if !mc.voices.is_empty() {
                             obj.insert(
-                                "open_weights".to_string(),
-                                serde_json::Value::Bool(e.open_weights),
+                                "voices".to_string(),
+                                serde_json::to_value(&mc.voices).unwrap_or_default(),
                             );
                         }
-
-                        // Image generation metadata (config only)
-                        if let Some(mc) = model_config {
-                            if !mc.image_sizes.is_empty() {
-                                obj.insert(
-                                    "image_sizes".to_string(),
-                                    serde_json::to_value(&mc.image_sizes).unwrap_or_default(),
-                                );
-                            }
-                            if !mc.image_qualities.is_empty() {
-                                obj.insert(
-                                    "image_qualities".to_string(),
-                                    serde_json::to_value(&mc.image_qualities).unwrap_or_default(),
-                                );
-                            }
-                            if let Some(max) = mc.max_images {
-                                obj.insert(
-                                    "max_images".to_string(),
-                                    serde_json::Value::Number(max.into()),
-                                );
-                            }
-                            if !mc.voices.is_empty() {
-                                obj.insert(
-                                    "voices".to_string(),
-                                    serde_json::to_value(&mc.voices).unwrap_or_default(),
-                                );
-                            }
-                        }
                     }
+                }
 
-                    // Sovereignty: merge provider → model override (independent of catalog)
-                    let provider_sov = provider_config.and_then(|pc| pc.sovereignty());
-                    let model_sov = model_config.and_then(|mc| mc.sovereignty.as_ref());
-                    if let Some(merged) =
-                        crate::config::SovereigntyMetadata::merge(provider_sov, model_sov)
-                            .filter(|m| !m.is_empty())
-                    {
-                        obj.insert(
-                            "sovereignty".to_string(),
-                            serde_json::to_value(&merged).unwrap_or_default(),
-                        );
-                    }
-                } else {
-                    model_json = serde_json::json!({ "id": prefixed_id });
+                // Sovereignty: merge provider → model override (independent of catalog)
+                let provider_sov = provider_config.and_then(|pc| pc.sovereignty());
+                let model_sov = model_config.and_then(|mc| mc.sovereignty.as_ref());
+                if let Some(merged) =
+                    crate::config::SovereigntyMetadata::merge(provider_sov, model_sov)
+                        .filter(|m| !m.is_empty())
+                {
+                    obj.insert(
+                        "sovereignty".to_string(),
+                        serde_json::to_value(&merged).unwrap_or_default(),
+                    );
                 }
-                all_models.push(model_json);
+            } else {
+                model_json = serde_json::json!({ "id": prefixed_id });
             }
+            all_models.push(model_json);
         }
-        // Skip providers that fail to return models
     }
 
     // Mark all static models with source
diff --git a/src/routes/execution.rs b/src/routes/execution.rs
index 54b816e..0a82db5 100644
--- a/src/routes/execution.rs
+++ b/src/routes/execution.rs
@@ -874,6 +874,9 @@ mod tests {
                 crate::services::ProviderMetricsService::with_local_metrics(|| None),
             ),
             model_catalog: crate::catalog::ModelCatalogRegistry::new(),
+            static_models_cache: std::sync::Arc::new(tokio::sync::RwLock::new(
+                std::collections::HashMap::new(),
+            )),
         }
     }
 

From 9fb6527ff6813195f637f9bc4ad75f34c8e4664c Mon Sep 17 00:00:00 2001
From: ScriptSmith <ScriptSmith@users.noreply.github.com>
Date: Fri, 20 Mar 2026 22:20:29 +1000
Subject: [PATCH 2/3] Review fixes

---
 src/app.rs               | 6 +++---
 src/cli/server.rs        | 3 ++-
 src/routes/api/models.rs | 5 +++--
 src/wasm.rs              | 1 +
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/app.rs b/src/app.rs
index 6eb0aa8..ba8ebeb 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -1857,11 +1857,10 @@ impl AppState {
         let results = join_all(futures).await;
 
         let mut cache = self.static_models_cache.write().await;
-        let mut count = 0usize;
+        cache.retain(|name, _| self.config.providers.get(name).is_some());
         for (name, result) in results {
             match result {
                 Ok(response) => {
-                    count += response.data.len();
                     cache.insert(name, response);
                 }
                 Err(e) => {
@@ -1869,9 +1868,10 @@ impl AppState {
                 }
             }
         }
+        let total_models: usize = cache.values().map(|r| r.data.len()).sum();
         tracing::info!(
             providers = cache.len(),
-            models = count,
+            models = total_models,
             "Static models cache warmed"
         );
     }
diff --git a/src/cli/server.rs b/src/cli/server.rs
index 93888c6..fde9279 100644
--- a/src/cli/server.rs
+++ b/src/cli/server.rs
@@ -339,8 +339,9 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b
     // (initial warming already happened in AppState::new)
     if config.features.static_models_cache.enabled() {
         let interval = config.features.static_models_cache.refresh_interval();
+        let task_tracker = state.task_tracker.clone();
         let state_ref = state.clone();
-        tokio::spawn(async move {
+        task_tracker.spawn(async move {
             let mut ticker = tokio::time::interval(interval);
             ticker.tick().await; // skip the immediate first tick (already warmed)
             loop {
diff --git a/src/routes/api/models.rs b/src/routes/api/models.rs
index c14fac4..4d98d3e 100644
--- a/src/routes/api/models.rs
+++ b/src/routes/api/models.rs
@@ -74,8 +74,9 @@ pub async fn api_v1_models(
             .collect();
 
         for (name, result) in join_all(futures).await {
-            if let Ok(resp) = result {
-                hits.push((name, resp));
+            match result {
+                Ok(resp) => hits.push((name, resp)),
+                Err(e) => tracing::warn!(provider = %name, error = %e, "Live-fetch fallback failed for cache-miss provider"),
             }
         }
     }
diff --git a/src/wasm.rs b/src/wasm.rs
index 883f4d1..7666300 100644
--- a/src/wasm.rs
+++ b/src/wasm.rs
@@ -156,6 +156,7 @@ impl HadrianGateway {
             default_org_id,
             provider_metrics: Arc::new(services::ProviderMetricsService::new()),
             model_catalog: catalog::ModelCatalogRegistry::new(),
+            static_models_cache: Arc::new(tokio::sync::RwLock::new(Default::default())),
         };
 
         let router = build_wasm_router(state, default_user_id, default_org_id);

From 7690b834b906fd11fb03f4c00f7f53f0d445f9d1 Mon Sep 17 00:00:00 2001
From: ScriptSmith <ScriptSmith@users.noreply.github.com>
Date: Fri, 20 Mar 2026 22:40:12 +1000
Subject: [PATCH 3/3] Review fixes

---
 src/cli/server.rs        |  3 +--
 src/routes/api/models.rs | 17 ++++++++++++++++-
 src/wasm.rs              |  7 ++++++-
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/cli/server.rs b/src/cli/server.rs
index fde9279..93888c6 100644
--- a/src/cli/server.rs
+++ b/src/cli/server.rs
@@ -339,9 +339,8 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b
     // (initial warming already happened in AppState::new)
     if config.features.static_models_cache.enabled() {
         let interval = config.features.static_models_cache.refresh_interval();
-        let task_tracker = state.task_tracker.clone();
         let state_ref = state.clone();
-        task_tracker.spawn(async move {
+        tokio::spawn(async move {
             let mut ticker = tokio::time::interval(interval);
             ticker.tick().await; // skip the immediate first tick (already warmed)
             loop {
diff --git a/src/routes/api/models.rs b/src/routes/api/models.rs
index 4d98d3e..d5f2761 100644
--- a/src/routes/api/models.rs
+++ b/src/routes/api/models.rs
@@ -73,12 +73,27 @@ pub async fn api_v1_models(
             })
             .collect();
 
+        let mut live_fetched = Vec::new();
         for (name, result) in join_all(futures).await {
             match result {
-                Ok(resp) => hits.push((name, resp)),
+                Ok(resp) => {
+                    if cache_enabled {
+                        live_fetched.push((name.clone(), resp.clone()));
+                    }
+                    hits.push((name, resp));
+                }
                 Err(e) => tracing::warn!(provider = %name, error = %e, "Live-fetch fallback failed for cache-miss provider"),
             }
         }
+
+        // Write successful live-fetches back to the cache so subsequent requests
+        // don't repeat the same upstream calls until the next background refresh.
+        if !live_fetched.is_empty() {
+            let mut cache = state.static_models_cache.write().await;
+            for (name, resp) in live_fetched {
+                cache.insert(name, resp);
+            }
+        }
     }
 
     // Collect successful results and enrich with catalog data
diff --git a/src/wasm.rs b/src/wasm.rs
index 7666300..a3200ac 100644
--- a/src/wasm.rs
+++ b/src/wasm.rs
@@ -479,7 +479,12 @@ fn wasm_default_config() -> config::GatewayConfig {
         },
         providers: config::ProvidersConfig::default(),
         limits: config::LimitsConfig::default(),
-        features: config::FeaturesConfig::default(),
+        features: config::FeaturesConfig {
+            static_models_cache: config::StaticModelsCacheConfig {
+                refresh_interval_secs: 0,
+            },
+            ..Default::default()
+        },
         observability: config::ObservabilityConfig::default(),
         ui: config::UiConfig {
             pages: config::PagesConfig {