From 55215ee6fc9456d203b2911d3451f79fec72fb35 Mon Sep 17 00:00:00 2001
From: Siva Kumar Yarramsetti <nsiva390@gmail.com>
Date: Tue, 16 Jun 2026 22:41:59 +0530
Subject: [PATCH] feat(R4): make LLM rate limit configurable via
 LLM_MAX_CALLS_PER_MIN

- Add LLM_MAX_CALLS_PER_MIN field to Settings (default 60, ge=1)
- Replace hardcoded >= 3 check in _wait_for_rate_limit with configurable value
- Read setting from settings singleton in ReactInvestigationLoop.__init__
- Document recommended values per provider in README
- Improve PUT /config error handling (400 vs 500 separation)

Closes #13
---
 README.md                        | 23 ++++++++++++++++
 docker-compose.yml               |  2 ++
 pyproject.toml                   |  1 +
 repi/api/config.py               | 47 ++++++++++++++++++++------------
 repi/core/config.py              | 25 +++++++++++++++--
 repi/investigation/react_loop.py | 17 ++++++++++--
 uv.lock                          |  2 ++
 web/package-lock.json            |  4 +--
 8 files changed, 97 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index 57d7f5b..9ad016a 100644
--- a/README.md
+++ b/README.md
@@ -163,7 +163,30 @@ All keys live in `.repi/config.json` (see `config.example.json` for the full sch
 | `UI_PORT` | `3000` | Port the web UI binds to (read by `repi ui`) |
 | `WATCHER_CONFIG_REFRESH_SECS` | `30` | How often the worker polls for config changes |
 | `OLLAMA_BASE_URL` | `http://localhost:11434` | Ollama endpoint |
+| `LLM_MAX_CALLS_PER_MIN` | `60` | Max LLM calls per rolling 60-second window in the ReAct loop. Lower for free-tier providers; raise for paid/high-tier accounts. |
 
+
+## Rate Limiting
+
+`LLM_MAX_CALLS_PER_MIN` (in `.repi/config.json`, default `60`) caps how many LLM calls the ReAct investigation loop makes per rolling 60-second window. If the cap is reached the loop sleeps until a slot frees up.
+
+| Provider tier | Recommended value |
+|---|---|
+| Mistral free tier | `3` |
+| Other free-tier providers | `3-15` |
+| Paid / standard tier | `60` |
+| High-tier / enterprise | `100-1000` |
+| Local / self-hosted (Ollama) | `1000` (effectively unlimited) |
+
+Update via the **Config** page in the UI, or:
+
+```bash
+curl -X PUT http://localhost:8000/config \
+  -H "Content-Type: application/json" \
+  -d '{"LLM_MAX_CALLS_PER_MIN": 120}'
+```
+
+Or edit `.repi/config.json` directly and restart the API.
 ## Development
 
 ```bash
diff --git a/docker-compose.yml b/docker-compose.yml
index e5040b1..d5e895b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -19,6 +19,8 @@ services:
   redis:
     image: redis:7-alpine
     container_name: repi-redis
+    ports:
+      - "6379:6379"
     # No host port published — only the app container talks to redis via
     # compose-internal DNS. Avoids clashing with a redis already running on
     # the host (common on dev boxes).
diff --git a/pyproject.toml b/pyproject.toml
index f031fb7..17643b1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "click==8.1.7",
     "typer>=0.12.0,<0.13",
     "watchfiles>=0.21.0,<0.22",
+    "httpx>=0.28.1",
 ]
 
 [project.scripts]
diff --git a/repi/api/config.py b/repi/api/config.py
index f85e769..ede32e0 100644
--- a/repi/api/config.py
+++ b/repi/api/config.py
@@ -1,19 +1,21 @@
 import json
 import logging
 from fastapi import APIRouter, HTTPException
-from pydantic import BaseModel
-from repi.core.config import settings, CONFIG_PATH, CONFIG_DIR
+from pydantic import ValidationError
+from repi.core.config import settings, Settings, CONFIG_PATH, CONFIG_DIR
 from repi.core.container import get_container
 
 logger = logging.getLogger("repi.api.config")
 
 router = APIRouter()
 
+
 @router.get("/config")
 async def get_config():
     """Return the current configuration."""
     return settings.model_dump()
 
+
 @router.put("/config")
 async def update_config(new_config: dict):
     """Merge `new_config` on top of the existing config.json and reload.
@@ -22,32 +24,43 @@ async def update_config(new_config: dict):
     must not clobber unsent fields with their class defaults, which would
     break a running container instantly.
     """
-    try:
-        from repi.core.config import Settings
+    existing: dict = {}
+    if CONFIG_PATH.exists():
+        try:
+            existing = json.loads(CONFIG_PATH.read_text())
+        except json.JSONDecodeError:
+            existing = {}
 
-        existing: dict = {}
-        if CONFIG_PATH.exists():
-            try:
-                existing = json.loads(CONFIG_PATH.read_text())
-            except json.JSONDecodeError:
-                existing = {}
+    merged = {**existing, **new_config}
 
-        merged = {**existing, **new_config}
+    # Validation errors (bad field/value, e.g. LLM_MAX_CALLS_PER_MIN < 1) → 400
+    try:
         validated = Settings(**merged)
+    except ValidationError as e:
+        logger.warning(f"Config validation failed: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
 
-        # Fail fast on an unknown EMBEDDING_BACKEND so we don't persist a
-        # value that would 500 on first /ingest or /investigate.
+    # Invalid EMBEDDING_BACKEND is also a client error → 400
+    try:
         from repi.embeddings import create_embedder
         create_embedder(validated.EMBEDDING_BACKEND)
+    except Exception as e:
+        logger.warning(f"Invalid EMBEDDING_BACKEND '{validated.EMBEDDING_BACKEND}': {e}")
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid EMBEDDING_BACKEND '{validated.EMBEDDING_BACKEND}': {e}",
+        )
 
+    # File write / reload failures are server-side → 500
+    try:
         CONFIG_DIR.mkdir(parents=True, exist_ok=True)
         with open(CONFIG_PATH, "w") as f:
             json.dump(validated.model_dump(), f, indent=2)
 
         settings.reload()
         get_container().refresh_llm()
-
-        return {"status": "success", "message": "Configuration updated and reloaded"}
     except Exception as e:
-        logger.error(f"Failed to update config: {e}")
-        raise HTTPException(status_code=400, detail=str(e))
+        logger.error(f"Failed to persist/reload config: {e}")
+        raise HTTPException(status_code=500, detail="Failed to persist or reload configuration")
+
+    return {"status": "success", "message": "Configuration updated and reloaded"}
diff --git a/repi/core/config.py b/repi/core/config.py
index ef4fada..7a94ac5 100644
--- a/repi/core/config.py
+++ b/repi/core/config.py
@@ -1,11 +1,16 @@
 from __future__ import annotations
 import os
 import json
+from typing import Any, List, Optional
 from pathlib import Path
 from typing import List, Optional
-from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict
-from pydantic import Field
 
+from pydantic import Field
+from pydantic_settings import (
+    BaseSettings,
+    SettingsConfigDict,
+    PydanticBaseSettingsSource,
+)
 def _resolve_config_path() -> Path:
     """Locate .repi/config.json: cwd first (docker runs from /app), then parent
     directories (running from a subdir of a checkout), then alongside the
@@ -58,6 +63,20 @@ class Settings(BaseSettings):
 
     WATCHER_CONFIG_REFRESH_SECS: int = 30
 
+    # R4: configurable LLM calls-per-minute cap for the ReAct investigation loop.
+    # Default 60 is safe for paid/high-tier providers. Set to 3 for Mistral free
+    # tier (or any other provider with a low RPM quota) to avoid 429 errors.
+    # Must be >= 1; validated by ge=1 so PUT /config rejects 0 or negative values.
+    LLM_MAX_CALLS_PER_MIN: int = Field(
+        default=60,
+        ge=1,
+        description=(
+            "Maximum LLM calls per rolling 60-second window in the ReAct "
+            "investigation loop. Set low (e.g. 3) for free-tier providers to "
+            "avoid 429s; set high (60+) for paid/high-tier accounts."
+        ),
+    )
+
     # "fastembed" (ONNX Runtime, ~50 MB) or "torch" via sentence-transformers
     # (~790 MB). Vectors are byte-identical; the choice is image size / RSS.
     EMBEDDING_BACKEND: str = "fastembed"
@@ -129,4 +148,4 @@ def get_settings() -> Settings:
             print(f"Error loading config.json: {e}")
     return Settings()
 
-settings = get_settings()
+settings = get_settings()
\ No newline at end of file
diff --git a/repi/investigation/react_loop.py b/repi/investigation/react_loop.py
index a74ab94..d429178 100644
--- a/repi/investigation/react_loop.py
+++ b/repi/investigation/react_loop.py
@@ -107,6 +107,7 @@ def __init__(
         enable_reflection: bool = True,
         reflection_interval: int = 3,
         max_reflections: int = 2,
+        llm_max_calls_per_min: Optional[int] = None,  # Added for dynamic rate limiting
     ) -> None:
         self.llm = llm
         self.tools = tools
@@ -120,6 +121,17 @@ def __init__(
         self.enable_reflection = enable_reflection
         self.reflection_interval = reflection_interval
         self.max_reflections = max_reflections
+        
+        # Setup dynamic rate limit with a reasonable fallback (e.g., 60 if not specified)
+        if llm_max_calls_per_min is None:
+            llm_max_calls_per_min = 60
+        if llm_max_calls_per_min < 1:
+            logger.warning(
+                "llm_max_calls_per_min=%s is invalid (<1); falling back to 60",
+                llm_max_calls_per_min,
+            )
+            llm_max_calls_per_min = 60
+        self.llm_max_calls_per_min = llm_max_calls_per_min
         self._llm_call_timestamps: list[float] = []
 
     @staticmethod
@@ -145,7 +157,8 @@ def _ledger_summary(ledger: dict[str, dict]) -> str:
     async def _wait_for_rate_limit(self):
         now = time.time()
         self._llm_call_timestamps = [t for t in self._llm_call_timestamps if now - t < 60]
-        while len(self._llm_call_timestamps) >= 3:
+        # Dynamically checking against the configured limit instead of hardcoded 3
+        while len(self._llm_call_timestamps) >= self.llm_max_calls_per_min:
             wait_time = 60 - (now - self._llm_call_timestamps[0]) + 1
             logger.warning(f"Rate limit: Waiting {wait_time:.1f}s...")
             await asyncio.sleep(wait_time)
@@ -902,4 +915,4 @@ def _build_system_prompt(self) -> str:
 
 def asdict(obj):
     from dataclasses import asdict as _asdict
-    return _asdict(obj)
+    return _asdict(obj)
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
index 42f63e0..7ff1451 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1357,6 +1357,7 @@ dependencies = [
     { name = "click" },
     { name = "fastapi" },
     { name = "fastembed" },
+    { name = "httpx" },
     { name = "numpy" },
     { name = "pgvector" },
     { name = "pydantic" },
@@ -1388,6 +1389,7 @@ requires-dist = [
     { name = "click", specifier = "==8.1.7" },
     { name = "fastapi", specifier = ">=0.111.0,<0.112.0" },
     { name = "fastembed", specifier = ">=0.4,<0.8" },
+    { name = "httpx", specifier = ">=0.28.1" },
     { name = "numpy", specifier = ">=1.26.4,<2" },
     { name = "pgvector", specifier = ">=0.4.2,<0.5" },
     { name = "pydantic", specifier = ">=2.7.4,<3" },
diff --git a/web/package-lock.json b/web/package-lock.json
index c8424c4..e74ae43 100644
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "web",
-  "version": "0.1.0",
+  "version": "1.0.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "web",
-      "version": "0.1.0",
+      "version": "1.0.0",
       "dependencies": {
         "@base-ui/react": "^1.4.1",
         "class-variance-authority": "^0.7.1",