diff --git a/AGENTS.md b/AGENTS.md index a58e7742a0..71461cff85 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -136,7 +136,9 @@ When reviewing code, provide constructive feedback: - Remote workspace git operations should call `/api/git/changes` and `/api/git/diff` via the `path` query parameter with slash-normalized strings; building those URLs with `pathlib.Path` leaks host-platform separators and breaks Windows paths. The grep tool now prefers `rg`, then system `grep`, then Python; both the real grep executor and the SDK's terminal-command compatibility fallback should keep that order. For grep parity, the Python fallback should hide dotfiles by default but still let explicit `include` globs surface files like `.env`, matching ripgrep. For glob parity, any symlink-preservation regression test should force the Python fallback path, because ripgrep availability changes whether the fallback implementation runs at all. - Keep path helpers split by purpose: `is_absolute_path_source()` is for cross-platform source/wire syntax detection, while local filesystem writes/validation (for example, the file editor) should use host-native absolute-path semantics so POSIX does not silently accept Windows drive paths as creatable files. - Tool availability filtering belongs in `openhands-sdk/openhands/sdk/tool/registry.py` via `list_usable_tools()`, which preserves registration order and defaults tools to usable unless they expose an `is_usable()` callable. Environment-specific checks like Chromium detection should live on the concrete tool class (`BrowserToolSet.is_usable()`), while agent-server surfaces such as `/server_info` should consume the registry helper rather than re-implement per-tool filtering. -- Pydantic secret field helpers live in `openhands-sdk/openhands/sdk/utils/pydantic_secrets.py`. `serialize_secret()` handles serialization (cipher / `expose_secrets` / default Pydantic masking); `validate_secret()` handles deserialization (cipher decryption, redacted/empty → `None`); `is_redacted_secret()` checks for the sentinel; `REDACTED_SECRET_VALUE` is the canonical sentinel string. For `dict[str, str]` fields whose values are all secrets, wrap each value in `SecretStr` and call `serialize_secret` per value (see `LookupSecret._serialize_secrets` and `ACPAgent._serialize_acp_env`). Do not hand-roll redaction logic in field serializers. +- Pydantic secret field helpers live in `openhands-sdk/openhands/sdk/utils/pydantic_secrets.py`. `serialize_secret()` handles serialization (cipher / `expose_secrets` / default Pydantic masking); `validate_secret()` handles deserialization (cipher decryption, redacted/empty → `None`); `is_redacted_secret()` checks for the sentinel; `REDACTED_SECRET_VALUE` is the canonical sentinel string. For `dict[str, str]` fields whose values are all secrets, wrap each value in `SecretStr` and call `serialize_secret` per value (see `LookupSecret._serialize_secrets` and `ACPAgent._serialize_acp_env`). Do not hand-roll redaction logic in field serializers. For `str | None` resume-token style fields, pair a `field_serializer` with a `field_validator` so the redacted sentinel and any cipher-encrypted form round-trip cleanly (see `ACPAgent.acp_resume_session_id` for the canonical pattern). + +- `ACPAgent.acp_resume_session_id` is the explicit-resume override for ACP conversations: when set, `_start_acp_server` uses it as the prior session id for `session/load` instead of (and in precedence over) `state.agent_state["acp_session_id"]`. Designed for cloud sandboxes where the per-conversation `base_state.json` is wiped on recycle but the id has been mirrored into durable storage elsewhere. Falls back to `new_session` if the ACP server can't load the id. Treated as a bearer secret on the wire — default `model_dump` redacts; trusted backend round-trips opt in via `context={"expose_secrets": "plaintext"}` and frontend round-trips via `context={"expose_secrets": "encrypted", "cipher": cipher}`. Session ids in log lines must go through `_fingerprint_session_id` (returns `...`), matching the same bearer-token classification — `model_dump` masking alone leaks via Datadog/CloudWatch retention. - `LookupSecret` normalizes hostless URLs against `OH_INTERNAL_SERVER_URL` (set by `openhands-agent-server.__main__` from the bound host/port, rewriting wildcard binds to loopback) and otherwise falls back to `http://127.0.0.1:8000`, so relative secret URLs can safely target the current agent-server instance. diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 744c44fb34..abdef0eac8 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -42,7 +42,14 @@ UsageUpdate, ) from acp.transports import default_environment -from pydantic import Field, PrivateAttr, SecretStr, field_serializer +from pydantic import ( + Field, + PrivateAttr, + SecretStr, + ValidationInfo, + field_serializer, + field_validator, +) from openhands.sdk.agent.base import AgentBase from openhands.sdk.conversation.state import ConversationExecutionStatus @@ -65,7 +72,7 @@ from openhands.sdk.tool import Tool # noqa: TC002 from openhands.sdk.tool.builtins.finish import FinishAction, FinishObservation from openhands.sdk.utils import maybe_truncate -from openhands.sdk.utils.pydantic_secrets import serialize_secret +from openhands.sdk.utils.pydantic_secrets import serialize_secret, validate_secret logger = get_logger(__name__) @@ -108,19 +115,67 @@ # used by the terminal tool and the default max_message_chars in LLM config. MAX_ACP_CONTENT_CHARS: int = 30_000 -# Env vars that must be removed from the subprocess environment when a -# particular "dominant" env var is present. +# Env vars that must be removed from the subprocess environment when an +# active auth mechanism would conflict with them. # -# Rationale: some auth mechanisms are mutually exclusive and their env vars -# conflict. For example, CLAUDE_CONFIG_DIR activates Claude Code's OAuth -# credential-file flow. If ANTHROPIC_API_KEY or ANTHROPIC_BASE_URL are -# also present they redirect requests to a different endpoint (e.g. a proxy) -# that doesn't support OAuth bearer tokens, breaking authentication silently. -# When CLAUDE_CONFIG_DIR is detected we strip the conflicting vars so the -# subprocess can reach api.anthropic.com with its own OAuth token. -_ENV_CONFLICT_MAP: dict[str, frozenset[str]] = { - "CLAUDE_CONFIG_DIR": frozenset({"ANTHROPIC_API_KEY", "ANTHROPIC_BASE_URL"}), -} +# Rationale: some Claude Code auth mechanisms are mutually exclusive. When +# Claude Code's OAuth credential-file flow is active (an OAuth credentials +# file is present inside ``CLAUDE_CONFIG_DIR``), the presence of +# ANTHROPIC_API_KEY / ANTHROPIC_BASE_URL would redirect requests to a +# different endpoint (e.g. a proxy) that doesn't support OAuth bearer +# tokens, breaking authentication silently. We strip those env vars only +# when OAuth credentials are actually present — CLAUDE_CONFIG_DIR alone +# does not imply OAuth (it can be set purely to relocate Claude Code's +# session/history storage onto a persistent volume). +_CLAUDE_OAUTH_CREDENTIALS_FILES: tuple[str, ...] = (".credentials.json",) +_CLAUDE_AUTH_CONFLICTING_ENV: frozenset[str] = frozenset( + {"ANTHROPIC_API_KEY", "ANTHROPIC_BASE_URL"} +) + + +def _claude_oauth_credentials_present(config_dir: str | None) -> bool: + """Return ``True`` when an OAuth credentials file exists in ``config_dir``. + + The presence of such a file is the only reliable signal that Claude + Code's OAuth flow is the active auth mechanism for this run. When + ``CLAUDE_CONFIG_DIR`` is set purely to relocate session storage onto + a persistent volume, no credentials file is present and API-key auth + (``ANTHROPIC_API_KEY``) remains valid. + """ + if not config_dir: + return False + base = Path(config_dir) + return any((base / name).is_file() for name in _CLAUDE_OAUTH_CREDENTIALS_FILES) + + +# Number of trailing characters of an ACP session id retained in log lines +# for correlation. ACP session ids are server-issued UUIDs whose possession +# alone is enough to call ``session/load`` — they're effectively bearer +# tokens. ``model_dump`` / ``model_dump_json`` already redact the +# ``acp_resume_session_id`` field; log aggregators (Datadog, CloudWatch, …) +# are another serialization boundary that retains lines for weeks, so the +# same redaction applies there. Eight trailing characters give enough +# entropy to correlate across log lines for one conversation but not +# enough to brute-force the full id. +_SESSION_ID_LOG_SUFFIX_LEN: int = 8 + + +def _fingerprint_session_id(session_id: str | None) -> str: + """Render an ACP session id as a short, non-reversible fingerprint. + + ACP session ids are effectively bearer tokens (anyone holding one can + call ``session/load`` against the ACP server), so the full value must + not appear in logs — see :data:`_SESSION_ID_LOG_SUFFIX_LEN`. + + Returns ``""`` for ``None``, ``""`` for ids shorter than + the suffix length (e.g. test fixtures), and ``"..."`` otherwise. + """ + if session_id is None: + return "" + if len(session_id) <= _SESSION_ID_LOG_SUFFIX_LEN: + return "" + return f"...{session_id[-_SESSION_ID_LOG_SUFFIX_LEN:]}" + # Limit for asyncio.StreamReader buffers used by the ACP subprocess pipes. # The default (64 KiB) is too small for session_update notifications that @@ -720,6 +775,64 @@ def _serialize_acp_env(self, value: dict[str, str], info): "If None, the server picks its default." ), ) + acp_resume_session_id: str | None = Field( + default=None, + description=( + "Optional explicit ACP session id to resume. When set, takes " + "precedence over the id persisted in ``state.agent_state`` and " + "is used to call ``session/load`` on the ACP server. Designed " + "for environments where the per-conversation filesystem (and " + "therefore ``base_state.json``) does not survive across restarts " + "(e.g. cloud sandbox recycles), but the id has been mirrored " + "into durable storage elsewhere. Falls back to a fresh session " + "if the server cannot load the id. Treated as a secret on the " + "wire — possession of the id is enough to resume the underlying " + "ACP session, so default serialization redacts it; pass " + "``expose_secrets='plaintext'`` (trusted backend) or " + "``expose_secrets='encrypted'`` plus a cipher (frontend round-" + "trip) when the value must cross a serialization boundary." + ), + ) + + @field_serializer("acp_resume_session_id", when_used="always") + def _serialize_acp_resume_session_id(self, value: str | None, info): + """Mask ``acp_resume_session_id`` via :func:`serialize_secret`. + + Default ``model_dump`` / ``model_dump_json`` redacts the id so it + cannot leak into logs, trace exports, or PR review attachments. + Trusted backend callers opt into plaintext via Pydantic's context + (``expose_secrets='plaintext'``); frontend round-trips use the + ``"encrypted"`` mode with a cipher in the context. + """ + if value is None: + return None + return serialize_secret(SecretStr(value), info) + + @field_validator("acp_resume_session_id", mode="before") + @classmethod + def _validate_acp_resume_session_id( + cls, value: Any, info: ValidationInfo + ) -> str | None: + """Reverse :meth:`_serialize_acp_resume_session_id` on load. + + Without this validator, ``model_validate_json`` of a previously + serialized agent would put garbage in ``acp_resume_session_id``: + + - Default-redacted dumps would reload as the literal ``"**********"`` + sentinel — calling ``session/load`` with that fails server-side and + we fall back to a fresh session every time, defeating the whole + point of the durable mirror. + - Encrypted dumps (frontend round-trip) would reload as the raw + Fernet ciphertext — same failure mode, plus the ciphertext sits + in ``state.agent_state`` on disk. + + :func:`validate_secret` returns ``None`` for empty/redacted values + and decrypts when a cipher is present in the validation context. + We unwrap the returned ``SecretStr`` so the field's runtime type + stays ``str | None`` (the rest of the SDK reads it directly). + """ + secret = validate_secret(value, info) + return secret.get_secret_value() if secret is not None else None def model_post_init(self, __context: object) -> None: super().model_post_init(__context) @@ -738,6 +851,12 @@ def model_post_init(self, __context: object) -> None: _executor: Any = PrivateAttr(default=None) _conn: Any = PrivateAttr(default=None) # ClientSideConnection _session_id: str | None = PrivateAttr(default=None) + # Whether the active session was recovered via ``session/load`` (True) + # rather than freshly created via ``new_session`` (False). Set by + # ``_start_acp_server`` and read by ``init_state`` to decide whether + # the agent_context suffix is already in the ACP server's history or + # still needs to be injected on the first prompt of this turn. + _session_loaded: bool = PrivateAttr(default=False) _process: Any = PrivateAttr(default=None) # asyncio subprocess _client: Any = PrivateAttr(default=None) # _OpenHandsACPBridge _filtered_reader: Any = PrivateAttr(default=None) # StreamReader @@ -909,10 +1028,6 @@ def init_state( # Render the suffix once, pulling secrets from the conversation's # secret_registry to match the regular Agent's get_dynamic_context(). self._installed_suffix = self._render_suffix(state) - # A prior session id in agent_state means we are resuming; the suffix - # is already in the subprocess's persisted history from the original - # session, so no re-injection is needed. - resumed = state.agent_state.get("acp_session_id") is not None try: self._start_acp_server(state) @@ -921,6 +1036,16 @@ def init_state( self._cleanup() raise + # ``_start_acp_server`` records ``_session_loaded`` based on the + # actual outcome of the ACP handshake: True iff ``session/load`` + # succeeded against the prior id, False if we fell back to + # ``new_session`` (stale id, server forgot, etc.). Driving the + # suffix-install decision off this signal — rather than a hopeful + # "we had an id to try" — guarantees the agent_context suffix is + # injected on the first prompt of any fresh session even when an + # explicit ``acp_resume_session_id`` was provided. + resumed = self._session_loaded + self._initialized = True # Persist agent info + the ACP session id + its cwd in agent_state. @@ -1003,26 +1128,58 @@ def _start_acp_server(self, state: ConversationState) -> None: env.pop("CLAUDECODE", None) # Strip env vars that conflict with an active auth mechanism. - # E.g. CLAUDE_CONFIG_DIR (OAuth credential file) conflicts with - # ANTHROPIC_API_KEY / ANTHROPIC_BASE_URL (API-key + proxy auth). - for dominant, conflicts in _ENV_CONFLICT_MAP.items(): - if dominant in env: - for conflict in conflicts: - env.pop(conflict, None) + # Claude Code's OAuth flow conflicts with ANTHROPIC_API_KEY / + # ANTHROPIC_BASE_URL only when OAuth credentials are actually + # present in CLAUDE_CONFIG_DIR. Setting CLAUDE_CONFIG_DIR purely + # for session-storage relocation must not disable API-key auth. + if _claude_oauth_credentials_present(env.get("CLAUDE_CONFIG_DIR")): + for conflict in _CLAUDE_AUTH_CONFLICTING_ENV: + env.pop(conflict, None) command = self.acp_command[0] args = list(self.acp_command[1:]) + list(self.acp_args) working_dir = str(state.workspace.working_dir) - # Prior ACP session id — survives agent-server restarts via + # Prior ACP session id — typically survives agent-server restarts via # ConversationState.agent_state (serialized into base_state.json). # Its presence is the signal to resume; its absence means fresh start. # ACP servers key persistence by ``cwd``; if the workspace moved we # drop the id so we don't accidentally resume (or silently load) a # session the server associates with a different directory. - prior_session_id: str | None = state.agent_state.get("acp_session_id") - prior_session_cwd: str | None = state.agent_state.get("acp_session_cwd") + # + # ``acp_resume_session_id`` (set on the agent config) takes precedence + # over the FS-persisted id. This lets cloud deployments mirror the + # id into a durable store and pass it back on the first launch of a + # fresh sandbox, even when ``base_state.json`` was wiped along with + # the previous sandbox filesystem. + # + # Note the asymmetry on the cwd guard: for an FS-persisted id we + # have the cwd it was created under and can refuse to load when it + # differs, because resuming the wrong session silently would be + # catastrophic. For an explicit ``acp_resume_session_id`` we do + # *not* have that recorded cwd — the contract is "the caller knows + # what they're doing", typically because the app-server only mirrors + # ids for conversations whose sandbox always lands in the same + # ``working_dir``. We therefore assume cwd-compatibility and let + # the ACP server's own ``session/load`` validation be the last line + # of defence: if the server-side cwd doesn't match it returns an + # ``ACPRequestError``, which is already caught and falls back to + # ``new_session`` — the same recovery path as a forgotten id. + fs_session_id: str | None = state.agent_state.get("acp_session_id") + fs_session_cwd: str | None = state.agent_state.get("acp_session_cwd") + if self.acp_resume_session_id and self.acp_resume_session_id != fs_session_id: + logger.info( + "Using explicit acp_resume_session_id (%s); " + "filesystem agent_state had id=%s", + _fingerprint_session_id(self.acp_resume_session_id), + _fingerprint_session_id(fs_session_id), + ) + prior_session_id: str | None = self.acp_resume_session_id + prior_session_cwd: str | None = working_dir + else: + prior_session_id = fs_session_id + prior_session_cwd = fs_session_cwd if prior_session_id is not None and prior_session_cwd not in ( None, working_dir, @@ -1030,13 +1187,13 @@ def _start_acp_server(self, state: ConversationState) -> None: logger.warning( "ACP session %s was created with cwd=%s; current cwd=%s differs, " "starting a fresh session instead of resuming", - prior_session_id, + _fingerprint_session_id(prior_session_id), prior_session_cwd, working_dir, ) prior_session_id = None - async def _init() -> tuple[Any, Any, Any, str, str, str]: + async def _init() -> tuple[Any, Any, Any, str, str, str, bool]: # Spawn the subprocess directly so we can install a # filtering reader that skips non-JSON-RPC lines some # ACP servers (e.g. claude-code-acp v0.1.x) write to @@ -1119,6 +1276,7 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: # subprocess crash) propagate — there is no working connection to # fall back on, and the outer init_state handler cleans up. session_id: str | None = None + session_loaded = False if prior_session_id is not None: try: await conn.load_session( @@ -1127,15 +1285,16 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: mcp_servers=[], ) session_id = prior_session_id + session_loaded = True logger.info( - "Resumed ACP session: %s (cwd=%s)", - session_id, + "Resumed ACP session %s (cwd=%s)", + _fingerprint_session_id(session_id), working_dir, ) except ACPRequestError as e: logger.warning( "ACP load_session(%s) failed (%s); starting a fresh session", - prior_session_id, + _fingerprint_session_id(prior_session_id), e, ) @@ -1165,7 +1324,15 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: logger.info("Setting ACP session mode: %s", mode_id) await conn.set_session_mode(mode_id=mode_id, session_id=session_id) - return conn, process, filtered_reader, session_id, agent_name, agent_version + return ( + conn, + process, + filtered_reader, + session_id, + agent_name, + agent_version, + session_loaded, + ) result = self._executor.run_async(_init) ( @@ -1175,6 +1342,7 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: self._session_id, self._agent_name, self._agent_version, + self._session_loaded, ) = result self._working_dir = working_dir @@ -1311,7 +1479,7 @@ async def _prompt() -> PromptResponse: logger.warning( "UsageUpdate not received within %.1fs for session %s", _USAGE_UPDATE_TIMEOUT, - self._session_id, + _fingerprint_session_id(self._session_id), ) return response @@ -1547,7 +1715,7 @@ async def _fork_and_prompt() -> str: logger.warning( "UsageUpdate not received within %.1fs for fork session %s", _USAGE_UPDATE_TIMEOUT, - fork_session_id, + _fingerprint_session_id(fork_session_id), ) fork_elapsed = time.monotonic() - fork_t0 diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 4d19773e85..89fc7d9776 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -571,11 +571,50 @@ def test_init_state_sets_installed_for_resumed_session(self, tmp_path): state = _make_state(tmp_path) state.agent_state = {"acp_session_id": "prior-session-id"} - with patch("openhands.sdk.agent.acp_agent.ACPAgent._start_acp_server"): + # Fake handshake that actually loaded the prior session: ``init_state`` + # bases the suffix-install state on ``_session_loaded`` (which + # ``_start_acp_server`` sets only when ``session/load`` succeeds), not + # on the bare presence of an id in ``agent_state``. + def _fake_start(self, _state): + self._session_loaded = True + + with patch( + "openhands.sdk.agent.acp_agent.ACPAgent._start_acp_server", _fake_start + ): agent.init_state(state, on_event=lambda _: None) assert agent._suffix_install_state == "installed" + def test_init_state_sets_pending_when_explicit_resume_id_fell_back(self, tmp_path): + """Stale ``acp_resume_session_id`` falls back to ``new_session``. + + The suffix is NOT yet in the fresh ACP server's history, so + ``init_state`` must mark the install state as ``pending_first_prompt`` + so the next ``step()`` injects ``agent_context`` into the first user + message. Regression test for the bug where the resume *signal* + (``acp_resume_session_id`` set) was conflated with resume *success*. + """ + agent = _make_agent( + agent_context=AgentContext(system_message_suffix="Team rules."), + acp_resume_session_id="stale-id-server-forgot", + ) + state = _make_state(tmp_path) + + # Simulate the exact scenario the reviewer flagged: explicit resume id + # supplied, but ``_start_acp_server`` had to fall back to new_session, + # so it left ``_session_loaded = False`` (the field's default). + def _fake_start(self, _state): + self._session_loaded = False + + with patch( + "openhands.sdk.agent.acp_agent.ACPAgent._start_acp_server", _fake_start + ): + agent.init_state(state, on_event=lambda _: None) + + assert agent._suffix_install_state == "pending_first_prompt" + assert agent._installed_suffix is not None + assert "Team rules." in agent._installed_suffix + def test_init_state_includes_registry_secrets_in_suffix(self, tmp_path): from pydantic import SecretStr @@ -3349,6 +3388,76 @@ def test_load_session_failure_falls_back_to_new_session(self, tmp_path): conn.new_session.assert_awaited_once() assert agent._session_id == "replacement-sess" + def test_session_ids_redacted_in_resume_log_lines(self, tmp_path, caplog): + """Resume / fallback / cwd-mismatch log lines must not emit plaintext ids. + + ACP session ids are bearer tokens; logger aggregators retain lines + for weeks, so they're a serialization boundary in their own right. + Verifies the four ``_start_acp_server`` log lines that mention a + session id only emit a short suffix fingerprint. + """ + sensitive_explicit = "explicit-do-not-log-abc12345-XXXXLAST" + sensitive_fs = "fs-do-not-log-XYZWLAST" + + # -- successful resume via explicit id ------------------------------ + agent = _make_agent(acp_resume_session_id=sensitive_explicit) + state = _make_state(tmp_path) + state.agent_state = {**state.agent_state, "acp_session_id": sensitive_fs} + conn = self._make_conn() + with caplog.at_level("INFO"): + self._patched_start_acp_server(agent, state, conn=conn) + messages = "\n".join(rec.message for rec in caplog.records) + assert sensitive_explicit not in messages + assert sensitive_fs not in messages + assert "...XXXLAST"[-8:] in messages or "...XXXXLAST"[-8:] in messages + assert "...XYZWLAST"[-8:] in messages + + # -- cwd mismatch warning ------------------------------------------- + caplog.clear() + agent2 = _make_agent(acp_resume_session_id=sensitive_explicit) + state2 = _make_state(tmp_path) + state2.agent_state = { + **state2.agent_state, + "acp_session_id": sensitive_fs, + "acp_session_cwd": "/some/other/place", + } + conn2 = self._make_conn(new_session_id="fresh") + with caplog.at_level("WARNING"): + self._patched_start_acp_server(agent2, state2, conn=conn2) + cwd_warnings = "\n".join( + rec.message for rec in caplog.records if "differs" in rec.message + ) + assert sensitive_explicit not in cwd_warnings + assert sensitive_fs not in cwd_warnings + + # -- load_session failure ------------------------------------------- + caplog.clear() + agent3 = _make_agent(acp_resume_session_id=sensitive_explicit) + state3 = _make_state(tmp_path) + conn3 = self._make_conn( + new_session_id="replacement", + load_exc=ACPRequestError(-32602, "unknown session"), + ) + with caplog.at_level("WARNING"): + self._patched_start_acp_server(agent3, state3, conn=conn3) + fail_warnings = "\n".join( + rec.message for rec in caplog.records if "load_session" in rec.message + ) + assert sensitive_explicit not in fail_warnings + + def test_fingerprint_session_id_helper(self): + """``_fingerprint_session_id`` returns last-8 suffix, never the full id.""" + from openhands.sdk.agent.acp_agent import _fingerprint_session_id + + assert _fingerprint_session_id(None) == "" + assert _fingerprint_session_id("short") == "" + assert _fingerprint_session_id("exactly8") == "" + long_sid = "a" * 24 + "12345678" + out = _fingerprint_session_id(long_sid) + assert long_sid not in out + assert out.endswith("12345678") + assert out.startswith("...") + def test_session_id_not_on_serialized_agent(self): """Session id must not leak onto the agent model — it lives in ConversationState.agent_state, not on the frozen ACPAgent. @@ -3358,6 +3467,110 @@ def test_session_id_not_on_serialized_agent(self): assert "acp_session_id" not in data assert not hasattr(agent, "acp_session_id") + def test_acp_resume_session_id_redacted_by_default(self): + """Default serialization must mask ``acp_resume_session_id``. + + Possession of the id is enough to resume the underlying ACP + session, so it's a transient resume token — never include it + in cleartext in logs, trace exports, PR review attachments, or + anything reachable via plain ``model_dump`` / ``model_dump_json``. + """ + sensitive = "super-secret-resume-id-do-not-leak" + agent = _make_agent(acp_resume_session_id=sensitive) + + data_json = agent.model_dump_json() + assert sensitive not in data_json, ( + f"plaintext id leaked into model_dump_json: {data_json}" + ) + + data = json.loads(data_json) + # The field is present (callers may want to know it exists for + # debugging) but the value must be the redacted sentinel. + assert data.get("acp_resume_session_id") == REDACTED_SECRET_VALUE + + # ``model_dump`` (Python mode) should also redact: while it + # returns a SecretStr instance rather than a plain string, the + # raw secret value must never be readable from its repr/str. + py_dump = agent.model_dump() + py_value = py_dump.get("acp_resume_session_id") + assert sensitive not in repr(py_value) + assert sensitive not in str(py_value) + + def test_acp_resume_session_id_plaintext_with_expose_secrets(self): + """Trusted backend can opt into plaintext via Pydantic context. + + Required so the OpenHands app server can pass the durable id to + the agent server inside ``StartConversationRequest`` (using the + existing ``context={'expose_secrets': True}`` pattern). + """ + sensitive = "super-secret-resume-id-do-not-leak" + agent = _make_agent(acp_resume_session_id=sensitive) + + exposed = agent.model_dump_json(context={"expose_secrets": "plaintext"}) + assert json.loads(exposed)["acp_resume_session_id"] == sensitive + + def test_acp_resume_session_id_none_serializes_as_none(self): + """Absence is not a secret — ``None`` must round-trip as ``null``.""" + agent = _make_agent() + data = json.loads(agent.model_dump_json()) + assert data.get("acp_resume_session_id") is None + + def test_acp_resume_session_id_redacted_sentinel_loads_as_none(self): + """Default-redacted dump must reload as ``None``, not ``'**********'``. + + Without a matching ``field_validator``, ``model_validate_json`` of + a default ``model_dump_json`` would leave ``acp_resume_session_id`` + set to the literal sentinel string — calling ``session/load`` with + that fails server-side and we'd fall back to ``new_session`` every + time, defeating the durable-mirror design. + """ + sensitive = "super-secret-resume-id-do-not-leak" + agent = _make_agent(acp_resume_session_id=sensitive) + + dumped = agent.model_dump_json() # default → redacted + reloaded = ACPAgent.model_validate_json(dumped) + + assert reloaded.acp_resume_session_id is None + + def test_acp_resume_session_id_encrypted_roundtrip(self): + """Encrypted dump + cipher in context decrypts back to the real id. + + Mirrors the frontend round-trip the OpenHands web UI uses for + ``acp_env``: client receives an encrypted blob, ships it back via + ``secrets_encrypted=True``, server decrypts on validate. + """ + from openhands.sdk.utils.cipher import Cipher + + sensitive = "super-secret-resume-id-do-not-leak" + agent = _make_agent(acp_resume_session_id=sensitive) + cipher = Cipher(secret_key="test-cipher-secret-key-for-roundtrip-only") + + encrypted_json = agent.model_dump_json( + context={"expose_secrets": "encrypted", "cipher": cipher} + ) + # Ciphertext, not plaintext, is on the wire. + assert sensitive not in encrypted_json + + reloaded = ACPAgent.model_validate_json( + encrypted_json, context={"cipher": cipher} + ) + assert reloaded.acp_resume_session_id == sensitive + + def test_acp_resume_session_id_plaintext_roundtrip(self): + """Plaintext dump (trusted backend) reloads verbatim without a cipher. + + Trusted backend path: OpenHands app-server → agent-server uses + ``context={'expose_secrets': True}`` and no cipher. The agent- + server must accept the plaintext id on load. + """ + sensitive = "super-secret-resume-id-do-not-leak" + agent = _make_agent(acp_resume_session_id=sensitive) + + exposed = agent.model_dump_json(context={"expose_secrets": "plaintext"}) + reloaded = ACPAgent.model_validate_json(exposed) + + assert reloaded.acp_resume_session_id == sensitive + def test_init_state_writes_cwd_alongside_session_id(self, tmp_path): """init_state records the cwd the session was created under so a later resume can reject cwd mismatches (ACP keys persistence by cwd). @@ -3478,6 +3691,92 @@ def test_resume_path_still_applies_session_mode_and_model(self, tmp_path): session_id="stored-sess", ) + def test_acp_resume_session_id_drives_load_session_when_no_fs_state(self, tmp_path): + """``acp_resume_session_id`` resumes even when ``agent_state`` is empty. + + Cloud sandboxes lose ``base_state.json`` on recycle, so the FS-persisted + ``acp_session_id`` is gone. The app-server mirrors the id into durable + storage and passes it back via ``acp_resume_session_id`` — that should + still drive ``load_session``. + """ + agent = _make_agent(acp_resume_session_id="externally-stored-sess") + state = _make_state(tmp_path) + assert "acp_session_id" not in state.agent_state + conn = self._make_conn() + + self._patched_start_acp_server(agent, state, conn=conn) + + conn.load_session.assert_awaited_once() + _, kwargs = conn.load_session.call_args + assert kwargs["session_id"] == "externally-stored-sess" + assert kwargs["cwd"] == str(tmp_path) + conn.new_session.assert_not_awaited() + assert agent._session_id == "externally-stored-sess" + + def test_acp_resume_session_id_overrides_fs_session_id(self, tmp_path): + """The explicit field wins over the FS-persisted id when they differ. + + Lets the app-server's durable record take precedence over whatever a + previous sandbox happened to write into ``base_state.json``. + """ + agent = _make_agent(acp_resume_session_id="durable-sess") + state = _make_state(tmp_path) + state.agent_state = { + **state.agent_state, + "acp_session_id": "fs-sess", + "acp_session_cwd": str(tmp_path), + } + conn = self._make_conn() + + self._patched_start_acp_server(agent, state, conn=conn) + + conn.load_session.assert_awaited_once() + _, kwargs = conn.load_session.call_args + assert kwargs["session_id"] == "durable-sess" + conn.new_session.assert_not_awaited() + assert agent._session_id == "durable-sess" + + def test_acp_resume_session_id_failure_falls_back_to_new_session(self, tmp_path): + """If the server can't load the explicit id, fall back to new_session. + + The ACP server may have lost its own session storage (no PVC, different + host …); failing closed by aborting the conversation is worse than + starting fresh. Matches the existing ``load_session`` failure path. + """ + agent = _make_agent(acp_resume_session_id="missing-sess") + state = _make_state(tmp_path) + conn = self._make_conn( + new_session_id="replacement-sess", + load_exc=ACPRequestError(-32602, "unknown session"), + ) + + self._patched_start_acp_server(agent, state, conn=conn) + + conn.load_session.assert_awaited_once() + conn.new_session.assert_awaited_once() + assert agent._session_id == "replacement-sess" + + def test_acp_resume_session_id_matches_fs_id_uses_fs_cwd(self, tmp_path): + """When the explicit id equals the FS id, the FS cwd is used (no change). + + Avoids spurious "cwd inferred from current workspace" branches when + the agent_state was just hydrated from the same id. + """ + agent = _make_agent(acp_resume_session_id="same-sess") + state = _make_state(tmp_path) + state.agent_state = { + **state.agent_state, + "acp_session_id": "same-sess", + "acp_session_cwd": str(tmp_path), + } + conn = self._make_conn() + + self._patched_start_acp_server(agent, state, conn=conn) + + conn.load_session.assert_awaited_once() + _, kwargs = conn.load_session.call_args + assert kwargs["session_id"] == "same-sess" + def test_roundtrip_via_conversation_state_persistence(self, tmp_path): """End-to-end round-trip through ConversationState persistence: @@ -3687,15 +3986,17 @@ def test_empty_string_secret_not_injected(self, tmp_path): class TestACPEnvConflictSuppression: - """CLAUDE_CONFIG_DIR OAuth auth must not coexist with API-key env vars. + """Claude Code OAuth credentials must not coexist with API-key env vars. - When CLAUDE_CONFIG_DIR is present in the subprocess environment the agent - uses a credential file for OAuth. If ANTHROPIC_API_KEY or + When Claude Code's OAuth credential file is present in CLAUDE_CONFIG_DIR + the agent authenticates with that OAuth token. If ANTHROPIC_API_KEY or ANTHROPIC_BASE_URL are also present they redirect requests to a proxy that does not support OAuth bearer tokens, breaking auth silently. - _start_acp_server must strip the conflicting vars regardless of where they - came from: acp_env, os.environ, or agent_context.secrets. + The stripping must only happen when OAuth is actually active (credential + file present). CLAUDE_CONFIG_DIR can also be set purely to relocate + session storage onto a persistent volume; in that case API-key auth + remains valid and must not be stripped. """ @staticmethod @@ -3771,25 +4072,42 @@ async def _fake_filter(_src, _dst): return captured + @staticmethod + def _make_oauth_dir(tmp_path) -> str: + """Create a CLAUDE_CONFIG_DIR with a ``.credentials.json`` OAuth file.""" + oauth_dir = tmp_path / "claude-creds" + oauth_dir.mkdir() + (oauth_dir / ".credentials.json").write_text('{"token":"oauth-x"}') + return str(oauth_dir) + + @staticmethod + def _make_empty_dir(tmp_path) -> str: + """Create a CLAUDE_CONFIG_DIR without any OAuth credential file.""" + persist_dir = tmp_path / "claude-persist" + persist_dir.mkdir() + return str(persist_dir) + def test_claude_config_dir_suppresses_api_key_from_acp_env(self, tmp_path): - """ANTHROPIC_API_KEY from acp_env is stripped when CLAUDE_CONFIG_DIR present.""" + """API key is stripped when an OAuth credentials file is present.""" + oauth_dir = self._make_oauth_dir(tmp_path) agent = _make_agent( acp_env={ - "CLAUDE_CONFIG_DIR": "/tmp/claude-creds", + "CLAUDE_CONFIG_DIR": oauth_dir, "ANTHROPIC_API_KEY": "sk-conflict", "ANTHROPIC_BASE_URL": "https://proxy.example.com", } ) env = self._run_start_capturing_env(agent, tmp_path) - assert env["CLAUDE_CONFIG_DIR"] == "/tmp/claude-creds" + assert env["CLAUDE_CONFIG_DIR"] == oauth_dir assert "ANTHROPIC_API_KEY" not in env assert "ANTHROPIC_BASE_URL" not in env def test_claude_config_dir_suppresses_api_key_from_os_environ(self, tmp_path): - """ANTHROPIC_API_KEY leaking in from os.environ is stripped too.""" + """API key leaking in from os.environ is stripped too when OAuth active.""" + oauth_dir = self._make_oauth_dir(tmp_path) agent = _make_agent( - acp_env={"CLAUDE_CONFIG_DIR": "/tmp/claude-creds"}, + acp_env={"CLAUDE_CONFIG_DIR": oauth_dir}, ) env = self._run_start_capturing_env( agent, @@ -3805,13 +4123,14 @@ def test_claude_config_dir_suppresses_api_key_from_os_environ(self, tmp_path): assert "ANTHROPIC_BASE_URL" not in env def test_claude_config_dir_suppresses_api_key_from_secrets(self, tmp_path): - """ANTHROPIC_API_KEY injected via agent_context.secrets is stripped too.""" + """API key injected via agent_context.secrets is stripped too.""" from pydantic import SecretStr from openhands.sdk.secret import StaticSecret + oauth_dir = self._make_oauth_dir(tmp_path) agent = _make_agent( - acp_env={"CLAUDE_CONFIG_DIR": "/tmp/claude-creds"}, + acp_env={"CLAUDE_CONFIG_DIR": oauth_dir}, agent_context=AgentContext( secrets={ "ANTHROPIC_API_KEY": StaticSecret( @@ -3829,6 +4148,27 @@ def test_claude_config_dir_suppresses_api_key_from_secrets(self, tmp_path): assert "ANTHROPIC_API_KEY" not in env assert "ANTHROPIC_BASE_URL" not in env + def test_persistence_only_claude_config_dir_keeps_api_key(self, tmp_path): + """CLAUDE_CONFIG_DIR without OAuth credentials must keep ANTHROPIC_API_KEY. + + Persistence-only use of CLAUDE_CONFIG_DIR (relocating session storage + onto a durable volume) must not strip the API key — API-key auth and + the persistent storage location are independent concerns. + """ + persist_dir = self._make_empty_dir(tmp_path) + agent = _make_agent( + acp_env={ + "CLAUDE_CONFIG_DIR": persist_dir, + "ANTHROPIC_API_KEY": "sk-valid", + "ANTHROPIC_BASE_URL": "https://proxy.example.com", + } + ) + env = self._run_start_capturing_env(agent, tmp_path) + + assert env["CLAUDE_CONFIG_DIR"] == persist_dir + assert env.get("ANTHROPIC_API_KEY") == "sk-valid" + assert env.get("ANTHROPIC_BASE_URL") == "https://proxy.example.com" + def test_no_suppression_without_claude_config_dir(self, tmp_path): """Without CLAUDE_CONFIG_DIR, ANTHROPIC_API_KEY passes through unchanged.""" agent = _make_agent(