From 5d0d63131bc41095b02cba12204a746ecc2bf67c Mon Sep 17 00:00:00 2001 From: Debug Agent Date: Wed, 27 May 2026 17:42:45 +0200 Subject: [PATCH 1/2] fix(acp): keep current_model_id honest on resume and unapplied overrides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses two correctness gaps in the model-state surfacing from #3347 that were flagged in review but went in unaddressed (review landed ~3 min before merge). Both make `ConversationInfo.current_model_id` (the picker/chip) lie about the model the ACP server is actually running. 1. Stale id on resume (asymmetric persistence gating). `init_state` cleared the persisted `acp_available_models` when the server reported a `models` block with no usable models, but gated `acp_current_model_id` only on the value being non-None — so a true resume reporting `currentModelId: ""` / `availableModels: []` cleared the list while keeping the old id, leaving a chip pointing at a model absent from the (now-empty) picker. The clear branch now mirrors the list gating (`_available_models is not None`). 2. Unapplied override surfaced as current. `current_model_id = self.acp_model or reported_model_id` trusted the override even on paths where it never reached the server: a fresh unknown/custom provider (no `_meta`, no `set_session_model`), or a resume whose `set_session_model` the server rejected (swallowed `ACPRequestError`). `_maybe_set_session_model` and `_reapply_session_model_on_resume` now return whether the override was actually applied, and the resolution prefers `acp_model` only then — otherwise it surfaces the server-reported id (or None). Tests: extend the helper unit tests with the new bool return, strengthen the explicit-empty-models resume test to assert the id clears too, and add integration coverage for unknown-provider / rejected-resume / applied-override resolution. All four bug-path tests fail on the pre-fix code. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 70 +++++-- tests/sdk/agent/test_acp_agent.py | 194 +++++++++++++++++- 2 files changed, 241 insertions(+), 23 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 1574609f4e..12f30b1977 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -260,7 +260,7 @@ async def _maybe_set_session_model( agent_name: str, session_id: str, acp_model: str | None, -) -> None: +) -> bool: """Apply the *initial* session model right after session creation. This is the session-creation path only, gated on @@ -275,12 +275,20 @@ async def _maybe_set_session_model( :meth:`ACPAgent.set_acp_model` instead, which always uses ``set_session_model`` and is gated on the separate ``supports_runtime_model_switch`` capability flag. + + Returns ``True`` only when this issued a ``set_session_model`` call — i.e. + the override was actually pushed to the server via *this* path. ``False`` + when there is nothing to apply (no ``acp_model``) or the provider selects + its model another way (``_meta``) or not at all (unknown/custom server), so + the caller can tell whether the live session is really running ``acp_model``. """ if not acp_model: - return + return False provider = detect_acp_provider_by_agent_name(agent_name) if provider is not None and provider.supports_set_session_model: await conn.set_session_model(model_id=acp_model, session_id=session_id) + return True + return False async def _reapply_session_model_on_resume( @@ -288,7 +296,7 @@ async def _reapply_session_model_on_resume( agent_name: str, session_id: str, acp_model: str | None, -) -> None: +) -> bool: """Reapply the persisted model to a *resumed* session. ``load_session()`` carries no model ``_meta``, so a session resumed after a @@ -303,14 +311,22 @@ async def _reapply_session_model_on_resume( ``set_session_model`` for later switches. A server that rejects the call is tolerated (logged) — like the ``load_session`` fallback above — so resume can't break; the session keeps the server default until the next switch. + + Returns ``True`` only when ``set_session_model`` was issued and accepted, so + the caller knows the resumed live session is actually running ``acp_model``. + ``False`` when there is nothing to reapply, the provider doesn't support the + switch, or the server rejected the call (swallowed) — in those cases the + session keeps the server default and the override must not be surfaced as + the current model. """ if not acp_model: - return + return False provider = detect_acp_provider_by_agent_name(agent_name) if provider is not None and not provider.supports_runtime_model_switch: - return + return False try: await conn.set_session_model(model_id=acp_model, session_id=session_id) + return True except ACPRequestError as e: logger.warning( "Could not reapply model %r on resumed session %s (%s); the live " @@ -319,6 +335,7 @@ async def _reapply_session_model_on_resume( session_id, e, ) + return False def _extract_token_usage( @@ -1170,9 +1187,18 @@ def init_state( # ``current_model_id`` is known whenever the caller forced ``acp_model`` # (e.g. a prior runtime switch) or the server reported one, even on a # resume whose ``load_session`` omitted the UNSTABLE ``models`` block. + # The clear branch mirrors the ``acp_available_models`` gating below so + # the two fields can't disagree: drop the persisted id when either the + # session was replaced (``not truly_resumed``) OR the server *did* + # report a ``models`` block this launch but with no usable current id + # (``_available_models is not None`` while ``_current_model_id is None``, + # e.g. ``currentModelId: ""`` / ``availableModels: []``). Without the + # second clause a stale id would survive a resume even though the server + # explicitly reported no current model, leaving a chip that points at a + # model absent from the (now-cleared) picker list. if self._current_model_id is not None: new_agent_state["acp_current_model_id"] = self._current_model_id - elif not truly_resumed: + elif not truly_resumed or self._available_models is not None: new_agent_state.pop("acp_current_model_id", None) # The list is gated *independently* on whether the server actually # reported a ``models`` block this launch (``None`` = absent), NOT on @@ -1446,6 +1472,12 @@ async def _init() -> tuple[ e, ) + # Track whether ``acp_model`` was actually pushed to the server so + # ``current_model_id`` below can stay honest: a caller override that + # never reached the server (unknown provider on a fresh session, or + # a resume whose ``set_session_model`` the server rejected) must not + # be surfaced as the live model. + override_applied = False if session_id is None: # Fresh session. Build _meta content for session options (e.g. # model selection). Extra kwargs to new_session() become the @@ -1458,30 +1490,42 @@ async def _init() -> tuple[ # Initial-selection protocol call for providers that use it # (codex-acp, gemini-cli); no-op for claude, which selected its # model via the _meta above. - await _maybe_set_session_model( + applied_via_call = await _maybe_set_session_model( conn, agent_name, session_id, self.acp_model, ) + # The override actually reached the server iff it rode in via the + # session ``_meta`` (claude — non-empty ``session_meta``) or the + # protocol call was issued (codex/gemini). An unknown/custom + # provider gets neither, so ``acp_model`` is set on the agent but + # the server is running its own default. + override_applied = bool(session_meta) or applied_via_call else: # Resumed session. load_session() does not carry model _meta, so # reapply the persisted (possibly runtime-switched) acp_model via # the runtime-switch capability — otherwise the resumed live # session would run on the server default while serialized state # claims the switched model. - await _reapply_session_model_on_resume( + override_applied = await _reapply_session_model_on_resume( conn, agent_name, session_id, self.acp_model, ) - # Resolve the model the agent will actually use. If the caller - # forced one via ``acp_model``, trust that; otherwise fall back to - # whatever the server reported in ``models.currentModelId``. Older - # agents that don't surface the field leave it ``None``. - current_model_id = self.acp_model or reported_model_id + # Resolve the model the agent will actually use. Prefer the caller's + # ``acp_model`` only when it was actually applied to the server above; + # otherwise the server is running its own model, so surface what it + # reported in ``models.currentModelId`` (``None`` for older agents + # that don't surface the field). Trusting ``acp_model`` on paths + # where it never reached the server would mislabel the chip/picker. + current_model_id = ( + self.acp_model + if (self.acp_model and override_applied) + else reported_model_id + ) # Resolve the permission mode. Known providers each have their # own mode ID (bypassPermissions, full-access, yolo …). diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index fc3027cbef..1ba96d6430 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -3232,11 +3232,15 @@ class TestMaybeSetSessionModel: @pytest.mark.asyncio async def test_codex_agent_uses_protocol_model_override(self): conn = AsyncMock() - await _maybe_set_session_model(conn, "codex-acp", "session-1", "gpt-5.4") + applied = await _maybe_set_session_model( + conn, "codex-acp", "session-1", "gpt-5.4" + ) conn.set_session_model.assert_awaited_once_with( model_id="gpt-5.4", session_id="session-1", ) + # The override was actually pushed to the server via the protocol call. + assert applied is True @pytest.mark.asyncio async def test_meta_key_provider_skips_protocol_override_at_init(self): @@ -3244,19 +3248,34 @@ async def test_meta_key_provider_skips_protocol_override_at_init(self): # one-shot init set_session_model call is skipped (even though the # provider now supports the protocol call for runtime switches). conn = AsyncMock() - await _maybe_set_session_model( + applied = await _maybe_set_session_model( conn, "claude-agent-acp", "session-1", "claude-opus-4-6", ) conn.set_session_model.assert_not_called() + # Not applied *via this call* — claude rode the model in via _meta on + # new_session, which the caller accounts for separately. + assert applied is False @pytest.mark.asyncio async def test_missing_model_skips_protocol_override(self): conn = AsyncMock() - await _maybe_set_session_model(conn, "codex-acp", "session-1", None) + applied = await _maybe_set_session_model(conn, "codex-acp", "session-1", None) conn.set_session_model.assert_not_called() + assert applied is False + + @pytest.mark.asyncio + async def test_unknown_provider_does_not_apply_override_at_init(self): + # An unknown/custom server gets neither _meta nor the protocol call on a + # fresh session, so the override never reaches the server. + conn = AsyncMock() + applied = await _maybe_set_session_model( + conn, "some-custom-acp", "session-1", "whatever" + ) + conn.set_session_model.assert_not_called() + assert applied is False class TestReapplySessionModelOnResume: @@ -3270,28 +3289,33 @@ async def test_claude_reapplies_persisted_model_on_resume(self): # be reapplied via the runtime-switch gate — _maybe_set_session_model # would skip it. conn = AsyncMock() - await _reapply_session_model_on_resume( + applied = await _reapply_session_model_on_resume( conn, "claude-agent-acp", "sess-1", "claude-haiku-4-5-20251001" ) conn.set_session_model.assert_awaited_once_with( model_id="claude-haiku-4-5-20251001", session_id="sess-1" ) + assert applied is True @pytest.mark.asyncio async def test_codex_reapplies_persisted_model_on_resume(self): conn = AsyncMock() - await _reapply_session_model_on_resume( + applied = await _reapply_session_model_on_resume( conn, "codex-acp", "sess-1", "gpt-5.4/low" ) conn.set_session_model.assert_awaited_once_with( model_id="gpt-5.4/low", session_id="sess-1" ) + assert applied is True @pytest.mark.asyncio async def test_missing_model_skips_reapply(self): conn = AsyncMock() - await _reapply_session_model_on_resume(conn, "claude-agent-acp", "sess-1", None) + applied = await _reapply_session_model_on_resume( + conn, "claude-agent-acp", "sess-1", None + ) conn.set_session_model.assert_not_called() + assert applied is False @pytest.mark.asyncio async def test_unknown_provider_attempts_reapply(self): @@ -3300,12 +3324,13 @@ async def test_unknown_provider_attempts_reapply(self): # resume must mirror that and attempt the reapply too (otherwise the # resumed session would silently revert to the server default). conn = AsyncMock() - await _reapply_session_model_on_resume( + applied = await _reapply_session_model_on_resume( conn, "some-custom-acp", "sess-1", "whatever" ) conn.set_session_model.assert_awaited_once_with( model_id="whatever", session_id="sess-1" ) + assert applied is True @pytest.mark.asyncio async def test_known_unsupported_provider_skips_reapply(self): @@ -3328,8 +3353,11 @@ async def test_known_unsupported_provider_skips_reapply(self): "openhands.sdk.agent.acp_agent.detect_acp_provider_by_agent_name", return_value=unsupported, ): - await _reapply_session_model_on_resume(conn, "legacy-acp", "sess-1", "x") + applied = await _reapply_session_model_on_resume( + conn, "legacy-acp", "sess-1", "x" + ) conn.set_session_model.assert_not_called() + assert applied is False @pytest.mark.asyncio async def test_client_rejection_is_swallowed_on_resume(self): @@ -3340,10 +3368,13 @@ async def test_client_rejection_is_swallowed_on_resume(self): conn.set_session_model.side_effect = ACPRequestError( code=-32601, message="method not found" ) - await _reapply_session_model_on_resume( + applied = await _reapply_session_model_on_resume( conn, "some-custom-acp", "sess-1", "whatever" ) conn.set_session_model.assert_awaited_once() + # Rejected => the live session kept the server default, so the override + # must NOT be reported as applied. + assert applied is False @pytest.mark.asyncio async def test_any_request_error_is_swallowed_on_resume(self): @@ -3354,10 +3385,11 @@ async def test_any_request_error_is_swallowed_on_resume(self): conn.set_session_model.side_effect = ACPRequestError( code=-32603, message="internal error" ) - await _reapply_session_model_on_resume( + applied = await _reapply_session_model_on_resume( conn, "codex-acp", "sess-1", "gpt-5.4/low" ) conn.set_session_model.assert_awaited_once() + assert applied is False class TestSetACPModel: @@ -4236,6 +4268,58 @@ def test_resume_with_explicit_empty_models_clears_stale_list(self, tmp_path): # The stale list is cleared (overwritten with []), not preserved. assert state.agent_state["acp_available_models"] == [] + # ...and the stale current id is cleared in lock-step: the server + # reported a ``models`` block with no usable current id, so leaving the + # old id would render a chip that points at a model absent from the + # (now-empty) picker list. + assert "acp_current_model_id" not in state.agent_state + + def test_resume_with_reported_models_but_no_current_clears_stale_id(self, tmp_path): + """Resume where the server reports a non-empty ``availableModels`` list + but no usable ``currentModelId`` must CLEAR the stale persisted current + id while adopting the freshly reported list. + + This is the asymmetric-gating case: ``_available_models`` is reported + (so the list is overwritten) while ``_current_model_id`` is ``None``. + The current id must follow the list's "reported" signal, not silently + keep a stale value the server no longer claims. + """ + from openhands.sdk.utils.async_executor import AsyncExecutor + + agent = _make_agent() + state = _make_state(tmp_path) + state.agent_state = { + **state.agent_state, + "acp_session_id": "resumable-sess", + "acp_session_cwd": str(tmp_path), + "acp_current_model_id": "model-a", + "acp_available_models": [ + {"model_id": "model-a", "name": "Model A", "description": None}, + ], + } + # load_session carries a models block listing models, but with no + # current selection (e.g. the server cleared its current model). + conn = self._make_conn() + load_response = MagicMock() + load_response.models = MagicMock() + load_response.models.current_model_id = "" + model_x = MagicMock() + model_x.model_id = "model-x" + model_x.name = "Model X" + model_x.description = None + load_response.models.available_models = [model_x] + conn.load_session = AsyncMock(return_value=load_response) + + agent._executor = AsyncExecutor() + with self._transport_patches(conn): + agent.init_state(state, on_event=lambda _: None) + + # The stale current id is dropped (server reported none)... + assert "acp_current_model_id" not in state.agent_state + # ...while the freshly reported picker list replaces the stale one. + assert [m["model_id"] for m in state.agent_state["acp_available_models"]] == [ + "model-x" + ] def test_fresh_replacement_clears_stale_model_when_new_session_omits_models( self, tmp_path @@ -4375,6 +4459,96 @@ def test_resume_path_still_applies_session_mode_and_model(self, tmp_path): session_id="stored-sess", ) + @staticmethod + def _models_block(current_model_id: str, model_ids: list[str]): + """Build a response ``.models`` block mock for the resolution tests.""" + models = MagicMock() + models.current_model_id = current_model_id + entries = [] + for mid in model_ids: + m = MagicMock() + m.model_id = mid + m.name = mid + m.description = None + entries.append(m) + models.available_models = entries + return models + + def test_unknown_provider_surfaces_server_model_not_unapplied_override( + self, tmp_path + ): + """Fresh session on an unknown/custom provider with ``acp_model`` set: + the override is never pushed to the server (no ``_meta``, no protocol + call), so ``current_model_id`` must reflect what the server reported — + not the override the live session isn't actually running. + """ + agent = _make_agent(acp_model="caller-model") + state = _make_state(tmp_path) + new_response = MagicMock() + new_response.session_id = "fresh-sess" + new_response.models = self._models_block("server-model", ["server-model"]) + conn = self._make_conn() + conn.initialize.return_value.agent_info.name = "some-custom-acp" + conn.initialize.return_value.auth_methods = [] + conn.new_session = AsyncMock(return_value=new_response) + + self._patched_start_acp_server(agent, state, conn=conn) + + # Unknown provider => the override never reached the server. + conn.set_session_model.assert_not_awaited() + assert agent.current_model_id == "server-model" + + def test_known_provider_surfaces_applied_override(self, tmp_path): + """Fresh session on a provider that applies the override via the + protocol call (codex): ``current_model_id`` reflects the override, since + it was actually pushed to the server. Guards the precedence the QA + verified — the fix must not regress the happy override path. + """ + agent = _make_agent(acp_model="caller-model") + state = _make_state(tmp_path) + new_response = MagicMock() + new_response.session_id = "fresh-sess" + new_response.models = self._models_block("server-old", ["server-old"]) + conn = self._make_conn() + conn.initialize.return_value.agent_info.name = "codex-acp" + conn.initialize.return_value.auth_methods = [] + conn.new_session = AsyncMock(return_value=new_response) + + self._patched_start_acp_server(agent, state, conn=conn) + + conn.set_session_model.assert_awaited_once_with( + model_id="caller-model", session_id="fresh-sess" + ) + assert agent.current_model_id == "caller-model" + + def test_resume_rejected_override_surfaces_server_model(self, tmp_path): + """Resume where ``set_session_model`` is rejected: the live session keeps + the server default, so ``current_model_id`` must fall back to what the + server reported on ``load_session`` rather than claiming the override. + """ + agent = _make_agent(acp_model="caller-model") + state = _make_state(tmp_path) + state.agent_state = { + **state.agent_state, + "acp_session_id": "stored-sess", + "acp_session_cwd": str(tmp_path), + } + load_response = MagicMock() + load_response.models = self._models_block("server-resumed", ["server-resumed"]) + conn = self._make_conn() + conn.initialize.return_value.agent_info.name = "codex-acp" + conn.initialize.return_value.auth_methods = [] + conn.load_session = AsyncMock(return_value=load_response) + # Server rejects the reapply — swallowed, session keeps its own model. + conn.set_session_model = AsyncMock( + side_effect=ACPRequestError(code=-32601, message="method not found") + ) + + self._patched_start_acp_server(agent, state, conn=conn) + + conn.load_session.assert_awaited_once() + assert agent.current_model_id == "server-resumed" + def test_roundtrip_via_conversation_state_persistence(self, tmp_path): """End-to-end round-trip through ConversationState persistence: From aa8f24541f42a2c5e51b37628509e24159592fbc Mon Sep 17 00:00:00 2001 From: Debug Agent Date: Wed, 27 May 2026 18:00:51 +0200 Subject: [PATCH 2/2] fix(acp): clear current_model_id on rejected resume + absent models block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on this PR: the first commit still let an unapplied override leak through on one path — a true resume where `load_session` omits the (UNSTABLE) `models` block AND the server rejects `set_session_model`. There `_reapply_session_model_on_resume` returns False, `_current_model_id` is None, but `truly_resumed` is true and `_available_models` is None, so the persisted `acp_current_model_id` (which named the rejected override) was preserved — and even after clearing it, `ConversationInfo`'s final `acp_model` fallback would re-assert the override for a live agent. Two coordinated changes: - ACPAgent now carries an explicit `_model_override_applied` signal out of `_init`. `init_state` clears the stale persisted id when an override was attempted but not applied (`acp_model` set yet `_model_override_applied` False), in addition to the existing replaced-session / reported-empty cases. - `_compose_conversation_info` gates the `acp_model` fallback on the agent not being live+initialized. Once `init_state` has fired, `current_model_id` is authoritative (including None); the `acp_model` fallback is only for cold reads. The persisted-id fallback is unchanged (kept honest by `init_state`), so the legitimate preserve-on-resume case still surfaces the last-known model. Tests: regression for rejected `set_session_model` + absent `models` (id cleared, override-not-applied), plus live-agent-no-fallback and cold-read- fallback coverage for `_compose_conversation_info`. Both bug-path tests fail on the pre-fix code. 255 passed; ruff + pyright clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../agent_server/conversation_service.py | 18 +++++-- .../openhands/sdk/agent/acp_agent.py | 48 ++++++++++++++----- .../test_conversation_info_model.py | 34 +++++++++++++ tests/sdk/agent/test_acp_agent.py | 47 ++++++++++++++++++ 4 files changed, 132 insertions(+), 15 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/conversation_service.py b/openhands-agent-server/openhands/agent_server/conversation_service.py index 9500c1d21c..14ddc7d190 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_service.py +++ b/openhands-agent-server/openhands/agent_server/conversation_service.py @@ -259,13 +259,23 @@ def _compose_conversation_info( agent = state.agent # current_model_id: live PrivateAttr (fresh after a runtime switch) → the # persisted hint → the authoritative ``acp_model`` the agent runs on resume. - # The final fallback covers a switched/overridden conversation whose server - # never surfaced the UNSTABLE ``models`` block: ``acp_model`` is a real - # (serialized) field, so it's the source of truth a cold read can trust. + # + # The ``acp_model`` fallback is gated on the agent NOT being a live, + # initialized one. Once ``init_state`` has fired, ``current_model_id`` is the + # authoritative resolved value — including ``None`` when an override couldn't + # be applied (unknown provider, or a resume whose ``set_session_model`` the + # server rejected) — so falling back to ``acp_model`` there would re-assert an + # override the live session isn't actually running. The fallback is only for + # *cold* reads (``init_state`` hasn't fired, PrivateAttrs still empty), where + # the serialized ``acp_model`` is the best last-known hint. The persisted + # ``acp_current_model_id`` hint is kept honest by ``ACPAgent.init_state`` (it + # clears the value whenever the override wasn't applied), so it's safe in + # both cases. + agent_initialized = bool(getattr(agent, "_initialized", False)) current_model_id = ( getattr(agent, "current_model_id", None) or agent_state.get("acp_current_model_id") - or getattr(agent, "acp_model", None) + or (None if agent_initialized else getattr(agent, "acp_model", None)) ) # available_models: the property returns ``[]`` (never ``None``) for *both* a # cold-read agent (PrivateAttr default, init_state hasn't fired) and a live diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 12f30b1977..b50441ea30 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -888,6 +888,15 @@ def model_post_init(self, __context: object) -> None: # in ``init_state`` uses that distinction to preserve vs clear the stored # list on resume. The public ``available_models`` property coerces to ``[]``. _available_models: list[ACPModelInfo] | None = PrivateAttr(default=None) + # Whether the caller's ``acp_model`` was actually pushed to the server in + # the most recent session init (via session ``_meta`` or ``set_session_model``). + # ``False`` when there's no override, the provider can't apply it (unknown + # server on a fresh session), or the server rejected the call on resume — in + # those cases the live session runs its own default, so neither + # ``_current_model_id`` nor the ``ConversationInfo`` fallback may surface + # ``acp_model`` as the active model. Read by ``init_state`` to decide whether + # a stale persisted ``acp_current_model_id`` must be cleared. + _model_override_applied: bool = PrivateAttr(default=False) # Callback to signal that the ACP subprocess is actively working. # Injected by the agent-server to call update_last_execution_time(). _on_activity: Any = PrivateAttr(default=None) # Callable[[], None] | None @@ -1187,18 +1196,33 @@ def init_state( # ``current_model_id`` is known whenever the caller forced ``acp_model`` # (e.g. a prior runtime switch) or the server reported one, even on a # resume whose ``load_session`` omitted the UNSTABLE ``models`` block. - # The clear branch mirrors the ``acp_available_models`` gating below so - # the two fields can't disagree: drop the persisted id when either the - # session was replaced (``not truly_resumed``) OR the server *did* - # report a ``models`` block this launch but with no usable current id - # (``_available_models is not None`` while ``_current_model_id is None``, - # e.g. ``currentModelId: ""`` / ``availableModels: []``). Without the - # second clause a stale id would survive a resume even though the server - # explicitly reported no current model, leaving a chip that points at a - # model absent from the (now-cleared) picker list. + # When it is *not* known (``None``), drop any stale persisted id unless + # we're in the one case where the persisted value is still our best + # guess: a true resume where the server didn't report a ``models`` block + # AND no override was attempted (so the server is presumably still on the + # model it had last launch). Clear it otherwise — + # - ``not truly_resumed``: the session was replaced, so the persisted id + # describes a dead session; + # - ``_available_models is not None``: the server reported a ``models`` + # block but no usable current id (``currentModelId: ""``), so it + # explicitly has none; + # - override attempted but not applied (``acp_model`` set yet + # ``_model_override_applied`` is False): we tried to force a model and + # the server rejected/ignored it, so the persisted id (which named + # that override) no longer reflects reality. + # This mirrors the ``acp_available_models`` gating below so the two fields + # can't disagree, and keeps the chip/picker from advertising a model the + # live session isn't actually running. + override_attempted_not_applied = bool(self.acp_model) and ( + not self._model_override_applied + ) if self._current_model_id is not None: new_agent_state["acp_current_model_id"] = self._current_model_id - elif not truly_resumed or self._available_models is not None: + elif ( + not truly_resumed + or self._available_models is not None + or override_attempted_not_applied + ): new_agent_state.pop("acp_current_model_id", None) # The list is gated *independently* on whether the server actually # reported a ``models`` block this launch (``None`` = absent), NOT on @@ -1347,7 +1371,7 @@ def _start_acp_server(self, state: ConversationState) -> None: prior_session_id = None async def _init() -> tuple[ - str, str, str, str | None, list[ACPModelInfo] | None + str, str, str, str | None, list[ACPModelInfo] | None, bool ]: # Spawn the subprocess directly so we can install a # filtering reader that skips non-JSON-RPC lines some @@ -1545,6 +1569,7 @@ async def _init() -> tuple[ agent_version, current_model_id, available_models, + override_applied, ) # _conn / _process / _filtered_reader are assigned inside _init() (right @@ -1556,6 +1581,7 @@ async def _init() -> tuple[ self._agent_version, self._current_model_id, self._available_models, + self._model_override_applied, ) = self._executor.run_async(_init) self._working_dir = working_dir diff --git a/tests/agent_server/test_conversation_info_model.py b/tests/agent_server/test_conversation_info_model.py index be60b39ff9..eb55c65a10 100644 --- a/tests/agent_server/test_conversation_info_model.py +++ b/tests/agent_server/test_conversation_info_model.py @@ -140,6 +140,40 @@ def test_current_model_id_propagates_init_resolution( assert info.current_model_id == expected +def test_cold_read_falls_back_to_acp_model_override(): + """Cold list read (``init_state`` not fired): with no live/persisted current + id, the serialized ``acp_model`` is the best last-known hint and is surfaced. + """ + agent = ACPAgent(acp_command=["echo", "test"], acp_model="model-x") + # _initialized defaults to False (cold read); _current_model_id defaults None. + state = _make_state(agent) + stored = _make_stored(state) + + info = _compose_conversation_info(stored, state) + + assert info.current_model_id == "model-x" + + +def test_live_agent_does_not_fall_back_to_unapplied_override(): + """Live initialized agent whose override was NOT applied (e.g. a resume whose + ``set_session_model`` the server rejected): ``current_model_id`` is the + authoritative ``None`` and must NOT fall back to ``acp_model`` — that would + re-assert an override the live session isn't running. + """ + agent = ACPAgent(acp_command=["echo", "test"], acp_model="model-x") + # init_state has fired; the override resolved to None (rejected) and was + # recorded as not applied. The persisted hint was cleared by init_state. + agent._initialized = True + agent._current_model_id = None + agent._model_override_applied = False + state = _make_state(agent) + stored = _make_stored(state) + + info = _compose_conversation_info(stored, state) + + assert info.current_model_id is None + + def test_available_models_lifted_from_acp_agent(): """``ConversationInfo.available_models`` mirrors the agent's model list. diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 1ba96d6430..08f45fa28d 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -4321,6 +4321,53 @@ def test_resume_with_reported_models_but_no_current_clears_stale_id(self, tmp_pa "model-x" ] + def test_resume_rejected_override_with_absent_models_clears_stale_id( + self, tmp_path + ): + """Resume where ``set_session_model`` is rejected AND ``load_session`` + omits the ``models`` block must CLEAR the stale persisted current id. + + This is the case the preserve-on-resume rule would otherwise keep: + ``truly_resumed`` is true and ``_available_models`` is ``None`` (server + didn't report a block), so the only signal that the persisted id is now + wrong is that we attempted to force ``acp_model`` and the server rejected + it (``_model_override_applied`` is False). The persisted id named that + rejected override, so it no longer reflects the live session. + """ + from openhands.sdk.utils.async_executor import AsyncExecutor + + # ``model-x`` was the authoritative model last launch (applied + persisted). + agent = _make_agent(acp_model="model-x") + state = _make_state(tmp_path) + state.agent_state = { + **state.agent_state, + "acp_session_id": "resumable-sess", + "acp_session_cwd": str(tmp_path), + "acp_current_model_id": "model-x", + } + # load_session succeeds (id preserved => truly_resumed) but carries no + # models block, and the server now rejects the reapply of ``model-x``. + conn = self._make_conn() + conn.initialize.return_value.agent_info.name = "codex-acp" + conn.initialize.return_value.auth_methods = [] + load_response = MagicMock(spec=[]) # no .models block + conn.load_session = AsyncMock(return_value=load_response) + conn.set_session_model = AsyncMock( + side_effect=ACPRequestError(code=-32601, message="method not found") + ) + + agent._executor = AsyncExecutor() + with self._transport_patches(conn): + agent.init_state(state, on_event=lambda _: None) + + # Resume kept the same session id (so this is a true resume)... + assert state.agent_state["acp_session_id"] == "resumable-sess" + # ...the override was not applied, so neither the live attr nor the + # persisted hint may claim ``model-x``. + assert agent.current_model_id is None + assert agent._model_override_applied is False + assert "acp_current_model_id" not in state.agent_state + def test_fresh_replacement_clears_stale_model_when_new_session_omits_models( self, tmp_path ):