diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 9133bba1..59952c50 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -22,3 +22,7 @@ **Vulnerability:** Server-Side Request Forgery (SSRF) / Local File Inclusion **Learning:** Functions that fetch URLs provided via user inputs (e.g., `wait_for_url` fetching `--backend-ready-url` in CI scripts) can inadvertently read local files if they do not validate the scheme. Python's `urllib.request.urlopen` supports `file://` schemes, allowing attackers to access arbitrary file contents from the host machine or sandbox if they can control the URL parameter. **Prevention:** Always validate URL inputs to restrict allowed schemes. Check that URLs explicitly start with `http://` or `https://` before fetching them with standard libraries like `urllib`. +## 2026-06-30 - Prevent SSRF via Unvalidated URL Schemes in API Clients +**Vulnerability:** Server-Side Request Forgery (SSRF) / Local File Inclusion +**Learning:** API clients that load configuration from environment variables (e.g., `NOEMA_LLM_API_URL`) can be exploited if the environment is compromised or influenced by external input. `urllib.request.urlopen` supports `file://` schemes, which can allow arbitrary file reads if the URL scheme is not explicitly restricted. +**Prevention:** Always validate URL schemes for API clients, even when sourced from environment variables. Ensure URLs explicitly start with `http://` or `https://` before making requests to prevent SSRF and local file inclusion. diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py index 1e4661b7..45dad09b 100644 --- a/scripts/ci/noema_review_gate.py +++ b/scripts/ci/noema_review_gate.py @@ -267,6 +267,8 @@ def call_llm(repo: str, number: int, pr: dict[str, Any], diff: str, truncated: b if not api_url or not api_key: print("Noema LLM review unavailable: NOEMA_LLM_API_URL or NOEMA_LLM_API_KEY is not configured.") return None + if not api_url.lower().startswith(("http://", "https://")): + raise ValueError("Invalid NOEMA_LLM_API_URL scheme. URL must start with http:// or https://") prompt = { "role": "user", @@ -304,7 +306,7 @@ def call_llm(repo: str, number: int, pr: dict[str, Any], diff: str, truncated: b }, method="POST", ) - with urllib.request.urlopen(request, timeout=120) as response: + with urllib.request.urlopen(request, timeout=120) as response: # nosec B310 raw = response.read().decode("utf-8") data = json.loads(raw) content = (((data.get("choices") or [{}])[0].get("message") or {}).get("content") or "").strip() diff --git a/tests/test_noema_review_gate.py b/tests/test_noema_review_gate.py index 0b333ab3..545a2b45 100644 --- a/tests/test_noema_review_gate.py +++ b/tests/test_noema_review_gate.py @@ -198,6 +198,11 @@ def test_call_llm_handles_configuration_and_verdicts(monkeypatch): monkeypatch.delenv("NOEMA_LLM_API_KEY", raising=False) assert noema.call_llm("owner/repo", 1, pr, "diff", False) is None + monkeypatch.setenv("NOEMA_LLM_API_URL", "file:///etc/passwd") + monkeypatch.setenv("NOEMA_LLM_API_KEY", "secret") + with pytest.raises(ValueError, match="Invalid NOEMA_LLM_API_URL scheme. URL must start with http:// or https://"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + monkeypatch.setenv("NOEMA_LLM_API_URL", "https://llm.example.test/chat") monkeypatch.setenv("NOEMA_LLM_API_KEY", "secret") monkeypatch.setenv("NOEMA_LLM_MODEL", "review-model")