diff --git a/openhands-agent-server/openhands/agent_server/conversation_service.py b/openhands-agent-server/openhands/agent_server/conversation_service.py index 14ddc7d190..1a9cbb8f2d 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_service.py +++ b/openhands-agent-server/openhands/agent_server/conversation_service.py @@ -258,24 +258,27 @@ def _compose_conversation_info( agent_state = getattr(state, "agent_state", {}) or {} agent = state.agent # current_model_id: live PrivateAttr (fresh after a runtime switch) → the - # persisted hint → the authoritative ``acp_model`` the agent runs on resume. + # persisted hint → the serialized ``acp_model`` the agent was asked to run. # - # The ``acp_model`` fallback is gated on the agent NOT being a live, - # initialized one. Once ``init_state`` has fired, ``current_model_id`` is the - # authoritative resolved value — including ``None`` when an override couldn't - # be applied (unknown provider, or a resume whose ``set_session_model`` the - # server rejected) — so falling back to ``acp_model`` there would re-assert an - # override the live session isn't actually running. The fallback is only for - # *cold* reads (``init_state`` hasn't fired, PrivateAttrs still empty), where - # the serialized ``acp_model`` is the best last-known hint. The persisted - # ``acp_current_model_id`` hint is kept honest by ``ACPAgent.init_state`` (it - # clears the value whenever the override wasn't applied), so it's safe in - # both cases. - agent_initialized = bool(getattr(agent, "_initialized", False)) + # The ``acp_model`` fallback is gated on NO ACP session having been started + # yet (no persisted ``acp_session_id``). Once a session exists, a launch of + # ``ACPAgent.init_state`` has authoritatively recorded the resolved model + # into ``acp_current_model_id`` — *present* = the model the live session + # actually ran, *absent* = deliberately cleared because the override was + # rejected/unapplied. Either way the persisted hint is the source of truth, + # so we must NOT fall back to ``acp_model`` (the merely-requested override), + # which would re-advertise a model the server isn't running. Crucially this + # holds across agent-server restarts too: a cold read reconstructs the agent + # with ``acp_model`` set and PrivateAttrs reset, so an ``_initialized``-based + # gate would wrongly resurrect a cleared id — keying on the persisted session + # id instead keeps a cleared id cleared. The fallback applies only *before* + # the first session, where ``acp_model`` is the best (and only) hint for what + # the conversation will run. + has_acp_session = bool(agent_state.get("acp_session_id")) current_model_id = ( getattr(agent, "current_model_id", None) or agent_state.get("acp_current_model_id") - or (None if agent_initialized else getattr(agent, "acp_model", None)) + or (None if has_acp_session else getattr(agent, "acp_model", None)) ) # available_models: the property returns ``[]`` (never ``None``) for *both* a # cold-read agent (PrivateAttr default, init_state hasn't fired) and a live diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index b50441ea30..0bb9c6e926 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1520,18 +1520,23 @@ async def _init() -> tuple[ session_id, self.acp_model, ) + # The override rode in via the session ``_meta`` (claude — + # non-empty ``session_meta``); ``new_session`` has already + # processed it, so ``reported_model_id`` reflects the server's + # *post-override* selection. + applied_via_meta = bool(session_meta) # The override actually reached the server iff it rode in via the - # session ``_meta`` (claude — non-empty ``session_meta``) or the - # protocol call was issued (codex/gemini). An unknown/custom - # provider gets neither, so ``acp_model`` is set on the agent but - # the server is running its own default. - override_applied = bool(session_meta) or applied_via_call + # ``_meta`` above or the protocol call was issued (codex/gemini). + # An unknown/custom provider gets neither, so ``acp_model`` is set + # on the agent but the server is running its own default. + override_applied = applied_via_meta or applied_via_call else: # Resumed session. load_session() does not carry model _meta, so # reapply the persisted (possibly runtime-switched) acp_model via # the runtime-switch capability — otherwise the resumed live # session would run on the server default while serialized state # claims the switched model. + applied_via_meta = False override_applied = await _reapply_session_model_on_resume( conn, agent_name, @@ -1539,17 +1544,30 @@ async def _init() -> tuple[ self.acp_model, ) - # Resolve the model the agent will actually use. Prefer the caller's - # ``acp_model`` only when it was actually applied to the server above; - # otherwise the server is running its own model, so surface what it - # reported in ``models.currentModelId`` (``None`` for older agents - # that don't surface the field). Trusting ``acp_model`` on paths - # where it never reached the server would mislabel the chip/picker. - current_model_id = ( - self.acp_model - if (self.acp_model and override_applied) - else reported_model_id - ) + # Resolve the model the agent will actually use. + # + # ``_meta`` path: ``new_session`` already returned the server's actual + # ``models.currentModelId`` *after* applying the override, so trust + # that reported id — the server may normalize, alias, or silently + # ignore the request (e.g. claude-agent-acp reports ``"default"`` even + # when a specific model was requested). Surfacing ``self.acp_model`` + # here would claim a model the server isn't on and mismatch the picker + # (``available_models`` is keyed by the server's ids). Fall back to the + # requested id only when the server surfaced no model state at all. + # + # ``set_session_model`` path (codex/gemini fresh, or any resume reapply): + # the protocol call happens *after* the new_session/load_session + # response, so ``reported_model_id`` predates the switch — the + # requested ``acp_model`` is what the live session now runs. + # + # Not applied (or no override): the server runs its own model, so + # surface what it reported (``None`` for agents that omit the field). + if applied_via_meta: + current_model_id = reported_model_id or self.acp_model + elif self.acp_model and override_applied: + current_model_id = self.acp_model + else: + current_model_id = reported_model_id # Resolve the permission mode. Known providers each have their # own mode ID (bypassPermissions, full-access, yolo …). diff --git a/tests/agent_server/test_conversation_info_model.py b/tests/agent_server/test_conversation_info_model.py index eb55c65a10..7bc4a5ddd0 100644 --- a/tests/agent_server/test_conversation_info_model.py +++ b/tests/agent_server/test_conversation_info_model.py @@ -140,12 +140,12 @@ def test_current_model_id_propagates_init_resolution( assert info.current_model_id == expected -def test_cold_read_falls_back_to_acp_model_override(): - """Cold list read (``init_state`` not fired): with no live/persisted current - id, the serialized ``acp_model`` is the best last-known hint and is surfaced. +def test_presession_cold_read_falls_back_to_acp_model_override(): + """Pre-session cold read (no ACP session started yet): with no live/persisted + current id, the serialized ``acp_model`` is the best last-known hint. """ agent = ACPAgent(acp_command=["echo", "test"], acp_model="model-x") - # _initialized defaults to False (cold read); _current_model_id defaults None. + # No acp_session_id => never started; _current_model_id defaults None. state = _make_state(agent) stored = _make_stored(state) @@ -155,18 +155,16 @@ def test_cold_read_falls_back_to_acp_model_override(): def test_live_agent_does_not_fall_back_to_unapplied_override(): - """Live initialized agent whose override was NOT applied (e.g. a resume whose - ``set_session_model`` the server rejected): ``current_model_id`` is the - authoritative ``None`` and must NOT fall back to ``acp_model`` — that would - re-assert an override the live session isn't running. + """Live agent whose override was NOT applied (e.g. a resume whose + ``set_session_model`` the server rejected): the persisted current id was + cleared by ``init_state`` and must NOT be resurrected from ``acp_model`` — + that would re-assert an override the live session isn't running. """ agent = ACPAgent(acp_command=["echo", "test"], acp_model="model-x") - # init_state has fired; the override resolved to None (rejected) and was - # recorded as not applied. The persisted hint was cleared by init_state. - agent._initialized = True - agent._current_model_id = None - agent._model_override_applied = False + agent._current_model_id = None # override resolved to None (rejected) state = _make_state(agent) + # A session was started (so init_state ran) and it CLEARED the current id. + state.agent_state = {**state.agent_state, "acp_session_id": "sess-1"} stored = _make_stored(state) info = _compose_conversation_info(stored, state) @@ -174,6 +172,48 @@ def test_live_agent_does_not_fall_back_to_unapplied_override(): assert info.current_model_id is None +def test_cold_read_after_restart_keeps_cleared_override_cleared(): + """Cold read after an agent-server restart of a conversation whose override + was rejected. PrivateAttrs reset (``_current_model_id`` None) and the agent + still carries ``acp_model``, but a prior ``init_state`` cleared + ``acp_current_model_id`` while persisting ``acp_session_id``. The cleared id + must STAY cleared — the ``acp_model`` fallback is suppressed once a session + exists, so it can't resurrect the model the live session wasn't running. + """ + agent = ACPAgent(acp_command=["echo", "test"], acp_model="model-x") + # Cold read: _current_model_id default None (PrivateAttrs reset on restart). + state = _make_state(agent) + state.agent_state = { + **state.agent_state, + "acp_session_id": "prior-sess", # a session was started before restart + # acp_current_model_id deliberately ABSENT (cleared by prior init_state) + } + stored = _make_stored(state) + + info = _compose_conversation_info(stored, state) + + assert info.current_model_id is None + + +def test_cold_read_after_restart_surfaces_persisted_applied_model(): + """Counterpart: when the override WAS applied, the persisted + ``acp_current_model_id`` survives the restart cold read (not dropped by the + session-gating, which only suppresses the ``acp_model`` fallback). + """ + agent = ACPAgent(acp_command=["echo", "test"], acp_model="model-x") + state = _make_state(agent) + state.agent_state = { + **state.agent_state, + "acp_session_id": "prior-sess", + "acp_current_model_id": "model-x", # was applied, persisted + } + stored = _make_stored(state) + + info = _compose_conversation_info(stored, state) + + assert info.current_model_id == "model-x" + + def test_available_models_lifted_from_acp_agent(): """``ConversationInfo.available_models`` mirrors the agent's model list. diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 08f45fa28d..193aefda40 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -4568,6 +4568,51 @@ def test_known_provider_surfaces_applied_override(self, tmp_path): ) assert agent.current_model_id == "caller-model" + def test_meta_path_prefers_server_reported_id_over_requested(self, tmp_path): + """``_meta`` provider (claude): ``new_session`` applies the override and + reports the server's actual ``currentModelId`` in the same response. When + the server normalizes/ignores the request (e.g. reports ``"default"`` + even though a specific model was requested), ``current_model_id`` must + surface the server's reported id — not the requested one — so the chip + matches the picker (``available_models`` is keyed by the server's ids). + + Reproduces real claude-agent-acp@0.30.0 behaviour: requesting + ``claude-opus-4-7`` yields ``currentModelId='default'``. + """ + agent = _make_agent(acp_model="claude-opus-4-7") + state = _make_state(tmp_path) + new_response = MagicMock() + new_response.session_id = "fresh-sess" + new_response.models = self._models_block( + "default", ["default", "sonnet", "haiku", "claude-opus-4-7"] + ) + conn = self._make_conn() # default agent_info.name == "claude-agent-acp" + conn.new_session = AsyncMock(return_value=new_response) + + self._patched_start_acp_server(agent, state, conn=conn) + + # claude selects via _meta, so no protocol set_session_model call fires. + conn.set_session_model.assert_not_awaited() + # The server's reported id wins over the requested override. + assert agent.current_model_id == "default" + + def test_meta_path_falls_back_to_requested_when_server_omits_models(self, tmp_path): + """``_meta`` provider where ``new_session`` omits the ``models`` block: + with no server-reported id to trust, fall back to the requested + ``acp_model`` (it was embedded in the session and accepted). + """ + agent = _make_agent(acp_model="claude-opus-4-7") + state = _make_state(tmp_path) + new_response = MagicMock(spec=["session_id"]) # no .models attribute + new_response.session_id = "fresh-sess" + conn = self._make_conn() # claude-agent-acp + conn.new_session = AsyncMock(return_value=new_response) + + self._patched_start_acp_server(agent, state, conn=conn) + + conn.set_session_model.assert_not_awaited() + assert agent.current_model_id == "claude-opus-4-7" + def test_resume_rejected_override_surfaces_server_model(self, tmp_path): """Resume where ``set_session_model`` is rejected: the live session keeps the server default, so ``current_model_id`` must fall back to what the