Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,30 @@ All keys live in `.repi/config.json` (see `config.example.json` for the full sch
| `UI_PORT` | `3000` | Port the web UI binds to (read by `repi ui`) |
| `WATCHER_CONFIG_REFRESH_SECS` | `30` | How often the worker polls for config changes |
| `OLLAMA_BASE_URL` | `http://localhost:11434` | Ollama endpoint |
| `LLM_MAX_CALLS_PER_MIN` | `60` | Max LLM calls per rolling 60-second window in the ReAct loop. Lower for free-tier providers; raise for paid/high-tier accounts. |


## Rate Limiting

`LLM_MAX_CALLS_PER_MIN` (in `.repi/config.json`, default `60`) caps how many LLM calls the ReAct investigation loop makes per rolling 60-second window. If the cap is reached the loop sleeps until a slot frees up.

| Provider tier | Recommended value |
|---|---|
| Mistral free tier | `3` |
| Other free-tier providers | `3-15` |
| Paid / standard tier | `60` |
| High-tier / enterprise | `100-1000` |
| Local / self-hosted (Ollama) | `1000` (effectively unlimited) |

Update via the **Config** page in the UI, or:

```bash
curl -X PUT http://localhost:8000/config \
-H "Content-Type: application/json" \
-d '{"LLM_MAX_CALLS_PER_MIN": 120}'
```

Or edit `.repi/config.json` directly and restart the API.
## Development

```bash
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ services:
redis:
image: redis:7-alpine
container_name: repi-redis
ports:
- "6379:6379"
# No host port published — only the app container talks to redis via
# compose-internal DNS. Avoids clashing with a redis already running on
# the host (common on dev boxes).
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies = [
"click==8.1.7",
"typer>=0.12.0,<0.13",
"watchfiles>=0.21.0,<0.22",
"httpx>=0.28.1",
]

[project.scripts]
Expand Down
47 changes: 30 additions & 17 deletions repi/api/config.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
import json
import logging
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from repi.core.config import settings, CONFIG_PATH, CONFIG_DIR
from pydantic import ValidationError
from repi.core.config import settings, Settings, CONFIG_PATH, CONFIG_DIR
from repi.core.container import get_container

logger = logging.getLogger("repi.api.config")

router = APIRouter()


@router.get("/config")
async def get_config():
"""Return the current configuration."""
return settings.model_dump()


@router.put("/config")
async def update_config(new_config: dict):
"""Merge `new_config` on top of the existing config.json and reload.
Expand All @@ -22,32 +24,43 @@ async def update_config(new_config: dict):
must not clobber unsent fields with their class defaults, which would
break a running container instantly.
"""
try:
from repi.core.config import Settings
existing: dict = {}
if CONFIG_PATH.exists():
try:
existing = json.loads(CONFIG_PATH.read_text())
except json.JSONDecodeError:
existing = {}

existing: dict = {}
if CONFIG_PATH.exists():
try:
existing = json.loads(CONFIG_PATH.read_text())
except json.JSONDecodeError:
existing = {}
merged = {**existing, **new_config}

merged = {**existing, **new_config}
# Validation errors (bad field/value, e.g. LLM_MAX_CALLS_PER_MIN < 1) → 400
try:
validated = Settings(**merged)
except ValidationError as e:
logger.warning(f"Config validation failed: {e}")
raise HTTPException(status_code=400, detail=str(e))

# Fail fast on an unknown EMBEDDING_BACKEND so we don't persist a
# value that would 500 on first /ingest or /investigate.
# Invalid EMBEDDING_BACKEND is also a client error → 400
try:
from repi.embeddings import create_embedder
create_embedder(validated.EMBEDDING_BACKEND)
except Exception as e:
logger.warning(f"Invalid EMBEDDING_BACKEND '{validated.EMBEDDING_BACKEND}': {e}")
raise HTTPException(
status_code=400,
detail=f"Invalid EMBEDDING_BACKEND '{validated.EMBEDDING_BACKEND}': {e}",
)

# File write / reload failures are server-side → 500
try:
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
with open(CONFIG_PATH, "w") as f:
json.dump(validated.model_dump(), f, indent=2)

settings.reload()
get_container().refresh_llm()

return {"status": "success", "message": "Configuration updated and reloaded"}
except Exception as e:
logger.error(f"Failed to update config: {e}")
raise HTTPException(status_code=400, detail=str(e))
logger.error(f"Failed to persist/reload config: {e}")
raise HTTPException(status_code=500, detail="Failed to persist or reload configuration")

return {"status": "success", "message": "Configuration updated and reloaded"}
25 changes: 22 additions & 3 deletions repi/core/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from __future__ import annotations
import os
import json
from typing import Any, List, Optional
from pathlib import Path
from typing import List, Optional
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict
from pydantic import Field

from pydantic import Field
from pydantic_settings import (
BaseSettings,
SettingsConfigDict,
PydanticBaseSettingsSource,
)
def _resolve_config_path() -> Path:
"""Locate .repi/config.json: cwd first (docker runs from /app), then parent
directories (running from a subdir of a checkout), then alongside the
Expand Down Expand Up @@ -58,6 +63,20 @@ class Settings(BaseSettings):

WATCHER_CONFIG_REFRESH_SECS: int = 30

# R4: configurable LLM calls-per-minute cap for the ReAct investigation loop.
# Default 60 is safe for paid/high-tier providers. Set to 3 for Mistral free
# tier (or any other provider with a low RPM quota) to avoid 429 errors.
# Must be >= 1; validated by ge=1 so PUT /config rejects 0 or negative values.
LLM_MAX_CALLS_PER_MIN: int = Field(
default=60,
ge=1,
description=(
"Maximum LLM calls per rolling 60-second window in the ReAct "
"investigation loop. Set low (e.g. 3) for free-tier providers to "
"avoid 429s; set high (60+) for paid/high-tier accounts."
),
)

# "fastembed" (ONNX Runtime, ~50 MB) or "torch" via sentence-transformers
# (~790 MB). Vectors are byte-identical; the choice is image size / RSS.
EMBEDDING_BACKEND: str = "fastembed"
Expand Down Expand Up @@ -129,4 +148,4 @@ def get_settings() -> Settings:
print(f"Error loading config.json: {e}")
return Settings()

settings = get_settings()
settings = get_settings()
17 changes: 15 additions & 2 deletions repi/investigation/react_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def __init__(
enable_reflection: bool = True,
reflection_interval: int = 3,
max_reflections: int = 2,
llm_max_calls_per_min: Optional[int] = None, # Added for dynamic rate limiting
) -> None:
self.llm = llm
self.tools = tools
Expand All @@ -120,6 +121,17 @@ def __init__(
self.enable_reflection = enable_reflection
self.reflection_interval = reflection_interval
self.max_reflections = max_reflections

# Setup dynamic rate limit with a reasonable fallback (e.g., 60 if not specified)
if llm_max_calls_per_min is None:
llm_max_calls_per_min = 60
if llm_max_calls_per_min < 1:
logger.warning(
"llm_max_calls_per_min=%s is invalid (<1); falling back to 60",
llm_max_calls_per_min,
)
llm_max_calls_per_min = 60
self.llm_max_calls_per_min = llm_max_calls_per_min
self._llm_call_timestamps: list[float] = []

@staticmethod
Expand All @@ -145,7 +157,8 @@ def _ledger_summary(ledger: dict[str, dict]) -> str:
async def _wait_for_rate_limit(self):
now = time.time()
self._llm_call_timestamps = [t for t in self._llm_call_timestamps if now - t < 60]
while len(self._llm_call_timestamps) >= 3:
# Dynamically checking against the configured limit instead of hardcoded 3
while len(self._llm_call_timestamps) >= self.llm_max_calls_per_min:
wait_time = 60 - (now - self._llm_call_timestamps[0]) + 1
logger.warning(f"Rate limit: Waiting {wait_time:.1f}s...")
await asyncio.sleep(wait_time)
Expand Down Expand Up @@ -902,4 +915,4 @@ def _build_system_prompt(self) -> str:

def asdict(obj):
from dataclasses import asdict as _asdict
return _asdict(obj)
return _asdict(obj)
2 changes: 2 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions web/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading