Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions FORK.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ line in the PR that syncs it back.
| 5ac94c35 | slackbot: channel-thread replies require @-mention (undoes joined-thread auto-reply from #2); codex wrapper falls back to fresh thread on dead rollout | [#12](https://github.com/Tavus-Engineering/centaur/pull/12) |
| a9d47a12 | api: finalize codex turn.failed as failed_permanent + post failure notice to Slack; raise iron-proxy upstream header timeout to 300s (codex remote compaction); fold signoz/aws header allowlist into base.yaml | [#13](https://github.com/Tavus-Engineering/centaur/pull/13) |
| dcfd647c | slackbot: route in-thread Watch Agent mentions through DM and post results back only after approval | [#14](https://github.com/Tavus-Engineering/centaur/pull/14) |
| 964493b4 | sandbox: launch Codex with external-sandbox bypass so shell commands work in Kubernetes sandboxes | [#15](https://github.com/Tavus-Engineering/centaur/pull/15) |
23 changes: 22 additions & 1 deletion services/api/tests/test_codex_app_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,13 @@ def fake_popen(args: list[str], *other_args, **kwargs) -> FakeProcess:

wrapper.main()

assert popen_args == ["codex", "app-server", "--listen", "stdio://"]
assert popen_args == [
"codex",
"--dangerously-bypass-approvals-and-sandbox",
"app-server",
"--listen",
"stdio://",
]
assert requests[0] == (
"initialize",
{
Expand All @@ -437,6 +443,21 @@ def fake_popen(args: list[str], *other_args, **kwargs) -> FakeProcess:
assert {"type": "turn.completed"} in emitted


def test_codex_app_server_command_can_keep_inner_sandbox(monkeypatch) -> None:
wrapper = _load_wrapper()

monkeypatch.setenv("CENTAUR_CODEX_BYPASS_INNER_SANDBOX", "0")

assert wrapper._codex_app_server_command("high") == [
"codex",
"app-server",
"-c",
"model_reasoning_effort=high",
"--listen",
"stdio://",
]


def test_reasoning_effort_bumps_on_code_work(monkeypatch) -> None:
wrapper = _load_wrapper()
monkeypatch.delenv("CENTAUR_CODEX_DYNAMIC_EFFORT", raising=False)
Expand Down
27 changes: 24 additions & 3 deletions services/sandbox/codex-app-wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,29 @@ def _reasoning_effort_for_text(text: str) -> str | None:
if text and _CODE_WORK_RE.search(text):
return (os.environ.get("CENTAUR_CODEX_EFFORT_HIGH") or "medium").strip()
return None


def _bypass_codex_inner_sandbox() -> bool:
"""Let Kubernetes be the sandbox boundary instead of Codex's bwrap layer."""
return (
os.environ.get("CENTAUR_CODEX_BYPASS_INNER_SANDBOX", "1")
.strip()
.lower()
not in ("0", "false", "no", "off")
)


def _codex_app_server_command(reasoning_effort: str | None = None) -> list[str]:
cmd = ["codex"]
if _bypass_codex_inner_sandbox():
cmd.append("--dangerously-bypass-approvals-and-sandbox")
cmd.append("app-server")
if reasoning_effort:
cmd += ["-c", f"model_reasoning_effort={reasoning_effort}"]
cmd += ["--listen", "stdio://"]
return cmd


OTEL_PROXY: ThreadingHTTPServer | None = None
OTEL_PROXY_TARGET_ENDPOINT: str | None = None
OTEL_PROXY_SPAN_PREFIX = "codex."
Expand Down Expand Up @@ -157,17 +180,15 @@ def start_app_server(reasoning_effort: str | None = None) -> None:
APP = None
APP_INITIALIZED = False

cmd = ["codex", "app-server"]
cmd = _codex_app_server_command(reasoning_effort)
if reasoning_effort:
cmd += ["-c", f"model_reasoning_effort={reasoning_effort}"]
emit(
{
"type": "system",
"subtype": "codex_reasoning_effort",
"effort": reasoning_effort,
}
)
cmd += ["--listen", "stdio://"]
APP = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
Expand Down
Loading