From 55215ee6fc9456d203b2911d3451f79fec72fb35 Mon Sep 17 00:00:00 2001 From: Siva Kumar Yarramsetti Date: Tue, 16 Jun 2026 22:41:59 +0530 Subject: [PATCH] feat(R4): make LLM rate limit configurable via LLM_MAX_CALLS_PER_MIN - Add LLM_MAX_CALLS_PER_MIN field to Settings (default 60, ge=1) - Replace hardcoded >= 3 check in _wait_for_rate_limit with configurable value - Read setting from settings singleton in ReactInvestigationLoop.__init__ - Document recommended values per provider in README - Improve PUT /config error handling (400 vs 500 separation) Closes #13 --- README.md | 23 ++++++++++++++++ docker-compose.yml | 2 ++ pyproject.toml | 1 + repi/api/config.py | 47 ++++++++++++++++++++------------ repi/core/config.py | 25 +++++++++++++++-- repi/investigation/react_loop.py | 17 ++++++++++-- uv.lock | 2 ++ web/package-lock.json | 4 +-- 8 files changed, 97 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 57d7f5b..9ad016a 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,30 @@ All keys live in `.repi/config.json` (see `config.example.json` for the full sch | `UI_PORT` | `3000` | Port the web UI binds to (read by `repi ui`) | | `WATCHER_CONFIG_REFRESH_SECS` | `30` | How often the worker polls for config changes | | `OLLAMA_BASE_URL` | `http://localhost:11434` | Ollama endpoint | +| `LLM_MAX_CALLS_PER_MIN` | `60` | Max LLM calls per rolling 60-second window in the ReAct loop. Lower for free-tier providers; raise for paid/high-tier accounts. | + +## Rate Limiting + +`LLM_MAX_CALLS_PER_MIN` (in `.repi/config.json`, default `60`) caps how many LLM calls the ReAct investigation loop makes per rolling 60-second window. If the cap is reached the loop sleeps until a slot frees up. + +| Provider tier | Recommended value | +|---|---| +| Mistral free tier | `3` | +| Other free-tier providers | `3-15` | +| Paid / standard tier | `60` | +| High-tier / enterprise | `100-1000` | +| Local / self-hosted (Ollama) | `1000` (effectively unlimited) | + +Update via the **Config** page in the UI, or: + +```bash +curl -X PUT http://localhost:8000/config \ + -H "Content-Type: application/json" \ + -d '{"LLM_MAX_CALLS_PER_MIN": 120}' +``` + +Or edit `.repi/config.json` directly and restart the API. ## Development ```bash diff --git a/docker-compose.yml b/docker-compose.yml index e5040b1..d5e895b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,8 @@ services: redis: image: redis:7-alpine container_name: repi-redis + ports: + - "6379:6379" # No host port published — only the app container talks to redis via # compose-internal DNS. Avoids clashing with a redis already running on # the host (common on dev boxes). diff --git a/pyproject.toml b/pyproject.toml index f031fb7..17643b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "click==8.1.7", "typer>=0.12.0,<0.13", "watchfiles>=0.21.0,<0.22", + "httpx>=0.28.1", ] [project.scripts] diff --git a/repi/api/config.py b/repi/api/config.py index f85e769..ede32e0 100644 --- a/repi/api/config.py +++ b/repi/api/config.py @@ -1,19 +1,21 @@ import json import logging from fastapi import APIRouter, HTTPException -from pydantic import BaseModel -from repi.core.config import settings, CONFIG_PATH, CONFIG_DIR +from pydantic import ValidationError +from repi.core.config import settings, Settings, CONFIG_PATH, CONFIG_DIR from repi.core.container import get_container logger = logging.getLogger("repi.api.config") router = APIRouter() + @router.get("/config") async def get_config(): """Return the current configuration.""" return settings.model_dump() + @router.put("/config") async def update_config(new_config: dict): """Merge `new_config` on top of the existing config.json and reload. @@ -22,32 +24,43 @@ async def update_config(new_config: dict): must not clobber unsent fields with their class defaults, which would break a running container instantly. """ - try: - from repi.core.config import Settings + existing: dict = {} + if CONFIG_PATH.exists(): + try: + existing = json.loads(CONFIG_PATH.read_text()) + except json.JSONDecodeError: + existing = {} - existing: dict = {} - if CONFIG_PATH.exists(): - try: - existing = json.loads(CONFIG_PATH.read_text()) - except json.JSONDecodeError: - existing = {} + merged = {**existing, **new_config} - merged = {**existing, **new_config} + # Validation errors (bad field/value, e.g. LLM_MAX_CALLS_PER_MIN < 1) → 400 + try: validated = Settings(**merged) + except ValidationError as e: + logger.warning(f"Config validation failed: {e}") + raise HTTPException(status_code=400, detail=str(e)) - # Fail fast on an unknown EMBEDDING_BACKEND so we don't persist a - # value that would 500 on first /ingest or /investigate. + # Invalid EMBEDDING_BACKEND is also a client error → 400 + try: from repi.embeddings import create_embedder create_embedder(validated.EMBEDDING_BACKEND) + except Exception as e: + logger.warning(f"Invalid EMBEDDING_BACKEND '{validated.EMBEDDING_BACKEND}': {e}") + raise HTTPException( + status_code=400, + detail=f"Invalid EMBEDDING_BACKEND '{validated.EMBEDDING_BACKEND}': {e}", + ) + # File write / reload failures are server-side → 500 + try: CONFIG_DIR.mkdir(parents=True, exist_ok=True) with open(CONFIG_PATH, "w") as f: json.dump(validated.model_dump(), f, indent=2) settings.reload() get_container().refresh_llm() - - return {"status": "success", "message": "Configuration updated and reloaded"} except Exception as e: - logger.error(f"Failed to update config: {e}") - raise HTTPException(status_code=400, detail=str(e)) + logger.error(f"Failed to persist/reload config: {e}") + raise HTTPException(status_code=500, detail="Failed to persist or reload configuration") + + return {"status": "success", "message": "Configuration updated and reloaded"} diff --git a/repi/core/config.py b/repi/core/config.py index ef4fada..7a94ac5 100644 --- a/repi/core/config.py +++ b/repi/core/config.py @@ -1,11 +1,16 @@ from __future__ import annotations import os import json +from typing import Any, List, Optional from pathlib import Path from typing import List, Optional -from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict -from pydantic import Field +from pydantic import Field +from pydantic_settings import ( + BaseSettings, + SettingsConfigDict, + PydanticBaseSettingsSource, +) def _resolve_config_path() -> Path: """Locate .repi/config.json: cwd first (docker runs from /app), then parent directories (running from a subdir of a checkout), then alongside the @@ -58,6 +63,20 @@ class Settings(BaseSettings): WATCHER_CONFIG_REFRESH_SECS: int = 30 + # R4: configurable LLM calls-per-minute cap for the ReAct investigation loop. + # Default 60 is safe for paid/high-tier providers. Set to 3 for Mistral free + # tier (or any other provider with a low RPM quota) to avoid 429 errors. + # Must be >= 1; validated by ge=1 so PUT /config rejects 0 or negative values. + LLM_MAX_CALLS_PER_MIN: int = Field( + default=60, + ge=1, + description=( + "Maximum LLM calls per rolling 60-second window in the ReAct " + "investigation loop. Set low (e.g. 3) for free-tier providers to " + "avoid 429s; set high (60+) for paid/high-tier accounts." + ), + ) + # "fastembed" (ONNX Runtime, ~50 MB) or "torch" via sentence-transformers # (~790 MB). Vectors are byte-identical; the choice is image size / RSS. EMBEDDING_BACKEND: str = "fastembed" @@ -129,4 +148,4 @@ def get_settings() -> Settings: print(f"Error loading config.json: {e}") return Settings() -settings = get_settings() +settings = get_settings() \ No newline at end of file diff --git a/repi/investigation/react_loop.py b/repi/investigation/react_loop.py index a74ab94..d429178 100644 --- a/repi/investigation/react_loop.py +++ b/repi/investigation/react_loop.py @@ -107,6 +107,7 @@ def __init__( enable_reflection: bool = True, reflection_interval: int = 3, max_reflections: int = 2, + llm_max_calls_per_min: Optional[int] = None, # Added for dynamic rate limiting ) -> None: self.llm = llm self.tools = tools @@ -120,6 +121,17 @@ def __init__( self.enable_reflection = enable_reflection self.reflection_interval = reflection_interval self.max_reflections = max_reflections + + # Setup dynamic rate limit with a reasonable fallback (e.g., 60 if not specified) + if llm_max_calls_per_min is None: + llm_max_calls_per_min = 60 + if llm_max_calls_per_min < 1: + logger.warning( + "llm_max_calls_per_min=%s is invalid (<1); falling back to 60", + llm_max_calls_per_min, + ) + llm_max_calls_per_min = 60 + self.llm_max_calls_per_min = llm_max_calls_per_min self._llm_call_timestamps: list[float] = [] @staticmethod @@ -145,7 +157,8 @@ def _ledger_summary(ledger: dict[str, dict]) -> str: async def _wait_for_rate_limit(self): now = time.time() self._llm_call_timestamps = [t for t in self._llm_call_timestamps if now - t < 60] - while len(self._llm_call_timestamps) >= 3: + # Dynamically checking against the configured limit instead of hardcoded 3 + while len(self._llm_call_timestamps) >= self.llm_max_calls_per_min: wait_time = 60 - (now - self._llm_call_timestamps[0]) + 1 logger.warning(f"Rate limit: Waiting {wait_time:.1f}s...") await asyncio.sleep(wait_time) @@ -902,4 +915,4 @@ def _build_system_prompt(self) -> str: def asdict(obj): from dataclasses import asdict as _asdict - return _asdict(obj) + return _asdict(obj) \ No newline at end of file diff --git a/uv.lock b/uv.lock index 42f63e0..7ff1451 100644 --- a/uv.lock +++ b/uv.lock @@ -1357,6 +1357,7 @@ dependencies = [ { name = "click" }, { name = "fastapi" }, { name = "fastembed" }, + { name = "httpx" }, { name = "numpy" }, { name = "pgvector" }, { name = "pydantic" }, @@ -1388,6 +1389,7 @@ requires-dist = [ { name = "click", specifier = "==8.1.7" }, { name = "fastapi", specifier = ">=0.111.0,<0.112.0" }, { name = "fastembed", specifier = ">=0.4,<0.8" }, + { name = "httpx", specifier = ">=0.28.1" }, { name = "numpy", specifier = ">=1.26.4,<2" }, { name = "pgvector", specifier = ">=0.4.2,<0.5" }, { name = "pydantic", specifier = ">=2.7.4,<3" }, diff --git a/web/package-lock.json b/web/package-lock.json index c8424c4..e74ae43 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,12 +1,12 @@ { "name": "web", - "version": "0.1.0", + "version": "1.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "web", - "version": "0.1.0", + "version": "1.0.0", "dependencies": { "@base-ui/react": "^1.4.1", "class-variance-authority": "^0.7.1",