From 737335ccc56d35efcafe8f230b37136bd11d15c1 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 15:59:24 +0530 Subject: [PATCH 1/9] fix: drop message items orphaned by handoff function calls consuming their reasoning item When a model turn during a handoff emits [reasoning, function_call, message], providers such as Azure OpenAI treat the reasoning item as consumed by the function_call. The trailing message item then has no paired reasoning and is rejected with HTTP 400: Item 'msg_...' of type 'message' was provided without its required 'reasoning' item Add drop_orphaned_messages_after_consumed_reasoning() and call it from prepare_model_input_items() alongside the existing drop_orphan_function_calls() pass. The new function tracks whether the most-recent reasoning item has been consumed by a function_call and drops any subsequent message item that would be left without a partner. This is the inverse of drop_orphan_function_calls(), which removes function calls without outputs and their preceding reasoning items. --- src/agents/run_internal/items.py | 46 ++++++++++++++++++++++++++ tests/test_agent_runner.py | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index aadba1d361..4ada043dce 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -37,6 +37,7 @@ "TOOL_CALL_SESSION_TITLE_KEY", "copy_input_items", "drop_orphan_function_calls", + "drop_orphaned_messages_after_consumed_reasoning", "ensure_input_item_format", "prepare_model_input_items", "run_item_to_input_item", @@ -179,6 +180,50 @@ def _drop_reasoning_items_preceding_dropped_calls( return [entry for idx, entry in enumerate(items) if idx not in excluded] +def drop_orphaned_messages_after_consumed_reasoning( + items: list[TResponseInputItem], +) -> list[TResponseInputItem]: + """Drop message items that are orphaned because their preceding reasoning item was consumed + by a function call. + + The Responses API requires every message item to be paired with its own reasoning item. When + an agent hands off via a function call, the reasoning item that immediately preceded the call + is considered consumed by that call. Any message item that follows (e.g. the handoff agent's + closing message) has no paired reasoning and causes a 400 from some providers: + ``Item 'msg_...' of type 'message' was provided without its required 'reasoning' item``. + + This is the inverse of :func:`drop_orphan_function_calls`, which removes function calls + without outputs and their preceding reasoning items. + """ + had_any_reasoning = False + fresh_reasoning = False # True when the most-recent reasoning item is not yet consumed + result: list[TResponseInputItem] = [] + + for item in items: + if not isinstance(item, dict): + result.append(item) + continue + item_type = item.get("type") + + if item_type == "reasoning": + had_any_reasoning = True + fresh_reasoning = True + result.append(item) + elif item_type in ("function_call", "computer_call"): + if fresh_reasoning: + fresh_reasoning = False # reasoning is consumed by this call + result.append(item) + elif item_type == "message": + if had_any_reasoning and not fresh_reasoning: + pass # orphaned — no paired reasoning available; drop to prevent API rejection + else: + result.append(item) + else: + result.append(item) + + return result + + def ensure_input_item_format(item: TResponseInputItem) -> TResponseInputItem: """Ensure a single item is normalized for model input.""" coerced = _coerce_to_dict(item) @@ -214,6 +259,7 @@ def prepare_model_input_items( normalized_generated_items = normalize_input_items_for_api(list(generated_items)) filtered_generated_items = drop_orphan_function_calls(normalized_generated_items) + filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(filtered_generated_items) return normalized_caller_items + filtered_generated_items diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index eb22c70f14..26186d2e70 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -1032,6 +1032,61 @@ def capture_model_input(data): assert "reasoning" not in handoff_input_types +@pytest.mark.asyncio +async def test_handoff_drops_orphaned_message_after_consumed_reasoning() -> None: + """ + When a model turn during a handoff emits [reasoning, function_call, message], the reasoning + item is consumed by the function_call. The trailing message has no paired reasoning and some + providers (e.g. Azure OpenAI) reject it with HTTP 400. Verify it is dropped from input[]. + """ + model = FakeModel() + delegate = Agent(name="delegate", model=model) + triage = Agent(name="triage", model=model, handoffs=[delegate]) + + model.add_multiple_turn_outputs( + [ + [ + ResponseReasoningItem( + id="rs_111", + type="reasoning", + summary=[Summary(text="Thinking about handoff.", type="summary_text")], + ), + get_handoff_tool_call(delegate), + get_text_message("I'm transferring you now."), # orphaned — no own reasoning + ], + [get_text_message("done")], + ] + ) + + captured_inputs: list[list[dict[str, Any]]] = [] + + def capture_model_input(data): + if isinstance(data.model_data.input, list): + captured_inputs.append( + [item for item in data.model_data.input if isinstance(item, dict)] + ) + return data.model_data + + result = await Runner.run( + triage, + input="user_message", + run_config=RunConfig(call_model_input_filter=capture_model_input), + ) + + assert result.final_output == "done" + assert len(captured_inputs) >= 2 + + second_input = captured_inputs[1] + orphaned_messages = [ + item for item in second_input + if item.get("type") == "message" and item.get("role") == "assistant" + ] + assert not orphaned_messages, ( + "Message item emitted after a handoff function_call (which consumed the only reasoning " + "item) must be dropped from input[] to prevent provider API rejection." + ) + + @pytest.mark.asyncio async def test_resume_preserves_filtered_model_input_after_handoff(): model = FakeModel() From 4be2bbeb2569eca4119c434f126add9a4d471976 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 16:17:16 +0530 Subject: [PATCH 2/9] fix: scope orphaned-message drop to the immediate trailing message only The previous state machine carried fresh_reasoning=False across all subsequent turns, incorrectly dropping valid assistant messages from later agents that legitimately emit responses without a reasoning item. Replace had_any_reasoning + fresh_reasoning with a single consumed_by_call flag that is reset to False as soon as the first orphaned message is dropped. This limits pruning to the one trailing message inside the same handoff turn and leaves all subsequent turns unaffected. Add clarifying comments to the test showing that the delegate agent response (no reasoning) must survive and reach final_output. --- src/agents/run_internal/items.py | 16 +++++++++++----- tests/test_agent_runner.py | 8 +++++++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index 4ada043dce..7f866f7d40 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -192,11 +192,14 @@ def drop_orphaned_messages_after_consumed_reasoning( closing message) has no paired reasoning and causes a 400 from some providers: ``Item 'msg_...' of type 'message' was provided without its required 'reasoning' item``. + The drop is scoped to the first message after the consuming call. Dropping resets the flag so + that later turns whose assistant messages legitimately lack a reasoning item are not affected. + This is the inverse of :func:`drop_orphan_function_calls`, which removes function calls without outputs and their preceding reasoning items. """ - had_any_reasoning = False fresh_reasoning = False # True when the most-recent reasoning item is not yet consumed + consumed_by_call = False # True after a function_call consumes the fresh reasoning result: list[TResponseInputItem] = [] for item in items: @@ -206,16 +209,19 @@ def drop_orphaned_messages_after_consumed_reasoning( item_type = item.get("type") if item_type == "reasoning": - had_any_reasoning = True fresh_reasoning = True + consumed_by_call = False result.append(item) elif item_type in ("function_call", "computer_call"): if fresh_reasoning: - fresh_reasoning = False # reasoning is consumed by this call + fresh_reasoning = False + consumed_by_call = True # reasoning is now consumed by this call result.append(item) elif item_type == "message": - if had_any_reasoning and not fresh_reasoning: - pass # orphaned — no paired reasoning available; drop to prevent API rejection + if consumed_by_call: + # Orphaned: reasoning was consumed by the preceding function_call. + # Reset so messages from subsequent turns without their own reasoning are kept. + consumed_by_call = False else: result.append(item) else: diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 26186d2e70..cb476ab333 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -1038,6 +1038,9 @@ async def test_handoff_drops_orphaned_message_after_consumed_reasoning() -> None When a model turn during a handoff emits [reasoning, function_call, message], the reasoning item is consumed by the function_call. The trailing message has no paired reasoning and some providers (e.g. Azure OpenAI) reject it with HTTP 400. Verify it is dropped from input[]. + + Also verifies that the drop is scoped to that one trailing message: the delegate agent's + subsequent response (which has no reasoning of its own) must NOT be dropped. """ model = FakeModel() delegate = Agent(name="delegate", model=model) @@ -1054,7 +1057,7 @@ async def test_handoff_drops_orphaned_message_after_consumed_reasoning() -> None get_handoff_tool_call(delegate), get_text_message("I'm transferring you now."), # orphaned — no own reasoning ], - [get_text_message("done")], + [get_text_message("done")], # delegate responds without reasoning — must be kept ] ) @@ -1073,6 +1076,8 @@ def capture_model_input(data): run_config=RunConfig(call_model_input_filter=capture_model_input), ) + # delegate's "done" message must reach final_output — if the drop leaked into later turns + # it would be missing and the runner would stall or return a wrong value. assert result.final_output == "done" assert len(captured_inputs) >= 2 @@ -1080,6 +1085,7 @@ def capture_model_input(data): orphaned_messages = [ item for item in second_input if item.get("type") == "message" and item.get("role") == "assistant" + and "transferring" in str(item.get("content", "")) ] assert not orphaned_messages, ( "Message item emitted after a handoff function_call (which consumed the only reasoning " From b3dcb805cdcbc3806a6dab421da258fc58a83806 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 16:24:08 +0530 Subject: [PATCH 3/9] fix: reset consumed_by_call at function_call_output to prevent cross-turn bleed When the handoff turn emits [reasoning, function_call] with no trailing message, consumed_by_call stayed True and leaked into the next accumulated turn, silently dropping the delegate agent response. The SDK appends HandoffOutputItem (function_call_output) after all model output items, so any orphaned trailing message is dropped before we reach fc_out. Resetting consumed_by_call at function_call_output therefore scopes the drop to the current handoff sequence only and keeps all subsequent turns clean. Add test_handoff_without_trailing_message_keeps_delegate_response to cover this path explicitly. --- src/agents/run_internal/items.py | 11 +++++++++-- tests/test_agent_runner.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index 7f866f7d40..7d65644c8f 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -217,10 +217,17 @@ def drop_orphaned_messages_after_consumed_reasoning( fresh_reasoning = False consumed_by_call = True # reasoning is now consumed by this call result.append(item) + elif item_type == "function_call_output": + # The SDK appends the HandoffOutputItem after all model output items, so any + # orphaned message will already have been dropped by this point. Reset here so + # that turns with no trailing message do not bleed consumed_by_call into the + # next agent's responses. + consumed_by_call = False + result.append(item) elif item_type == "message": if consumed_by_call: - # Orphaned: reasoning was consumed by the preceding function_call. - # Reset so messages from subsequent turns without their own reasoning are kept. + # Orphaned: reasoning was consumed by the preceding function_call and no + # function_call_output has reset the flag yet. Drop and reset. consumed_by_call = False else: result.append(item) diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index cb476ab333..de7fa47a14 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -1093,6 +1093,38 @@ def capture_model_input(data): ) +@pytest.mark.asyncio +async def test_handoff_without_trailing_message_keeps_delegate_response() -> None: + """ + When the handoff turn emits only [reasoning, function_call] with NO trailing message, + consumed_by_call must not leak into the next turn and silently drop the delegate's reply. + """ + model = FakeModel() + delegate = Agent(name="delegate", model=model) + triage = Agent(name="triage", model=model, handoffs=[delegate]) + + model.add_multiple_turn_outputs( + [ + [ + ResponseReasoningItem( + id="rs_111", + type="reasoning", + summary=[Summary(text="Deciding to hand off.", type="summary_text")], + ), + get_handoff_tool_call(delegate), + # no trailing message — the common case + ], + [get_text_message("delegate reply")], + ] + ) + + result = await Runner.run(triage, input="user_message") + + assert result.final_output == "delegate reply", ( + "Delegate response must not be dropped when the handoff turn has no trailing message." + ) + + @pytest.mark.asyncio async def test_resume_preserves_filtered_model_input_after_handoff(): model = FakeModel() From 5ddb96218e6e149f7dd131c1a41ffde4a6ce06ff Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 16:27:40 +0530 Subject: [PATCH 4/9] fix: reset consumed_by_call on all call output types, not just function_call_output computer_call_output, shell_call_output, and other output types were not resetting consumed_by_call, so a reasoning-backed computer_call with no trailing message would leak the flag into the next turn and silently drop the following assistant message. Extract _CALL_OUTPUT_TYPES = frozenset(_TOOL_CALL_TO_OUTPUT_TYPE.values()) and use it as the reset condition so every call output type is covered. --- src/agents/run_internal/items.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index 7d65644c8f..12a485c4a3 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -29,6 +29,7 @@ "local_shell_call": "local_shell_call_output", "tool_search_call": "tool_search_output", } +_CALL_OUTPUT_TYPES: frozenset[str] = frozenset(_TOOL_CALL_TO_OUTPUT_TYPE.values()) __all__ = [ "ReasoningItemIdPolicy", @@ -217,11 +218,12 @@ def drop_orphaned_messages_after_consumed_reasoning( fresh_reasoning = False consumed_by_call = True # reasoning is now consumed by this call result.append(item) - elif item_type == "function_call_output": - # The SDK appends the HandoffOutputItem after all model output items, so any - # orphaned message will already have been dropped by this point. Reset here so - # that turns with no trailing message do not bleed consumed_by_call into the - # next agent's responses. + elif item_type in _CALL_OUTPUT_TYPES: + # Any call output (function_call_output, computer_call_output, etc.) marks the + # end of its call sequence. The SDK appends call outputs after all model output + # items, so any orphaned message has already been dropped by this point. Reset + # here so that turns with no trailing message do not bleed consumed_by_call into + # the next agent's responses regardless of the call type. consumed_by_call = False result.append(item) elif item_type == "message": From 702595022fb37e6685e8b718dab10d029875b920 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 16:32:48 +0530 Subject: [PATCH 5/9] fix: apply orphaned-message pruning to session history on replay save_result_to_session() persists raw run items including any orphaned trailing message from a reasoning handoff turn. On the next Runner.run() with the same session, prepare_input_with_session() rebuilt history using only drop_orphan_function_calls(), so the orphaned message was re-sent to the provider and triggered the same HTTP 400. Import drop_orphaned_messages_after_consumed_reasoning into session_persistence.py and call it immediately after drop_orphan_function_calls() in prepare_input_with_session(), mirroring the existing pattern for function-call orphan pruning. Add test_session_history_drops_orphaned_message_on_next_run to verify the session replay path explicitly. --- .../run_internal/session_persistence.py | 2 + tests/test_agent_runner.py | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/src/agents/run_internal/session_persistence.py b/src/agents/run_internal/session_persistence.py index f483da13a3..15297bcde9 100644 --- a/src/agents/run_internal/session_persistence.py +++ b/src/agents/run_internal/session_persistence.py @@ -29,6 +29,7 @@ copy_input_items, deduplicate_input_items_preferring_latest, drop_orphan_function_calls, + drop_orphaned_messages_after_consumed_reasoning, ensure_input_item_format, fingerprint_input_item, normalize_input_items_for_api, @@ -180,6 +181,7 @@ async def prepare_input_with_session( prepared_as_inputs, pruning_indexes=prune_history_indexes, ) + filtered = drop_orphaned_messages_after_consumed_reasoning(filtered) normalized = normalize_input_items_for_api(filtered) deduplicated = deduplicate_input_items_preferring_latest(normalized) diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index de7fa47a14..8a15d2edcf 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -1125,6 +1125,71 @@ async def test_handoff_without_trailing_message_keeps_delegate_response() -> Non ) +@pytest.mark.asyncio +async def test_session_history_drops_orphaned_message_on_next_run() -> None: + """ + save_result_to_session() persists raw run items including any orphaned trailing message. + On the next Runner.run(..., session=session) the history is rebuilt via + prepare_input_with_session(), which must apply drop_orphaned_messages_after_consumed_reasoning() + so the re-sent history does not contain the orphaned message that would cause a provider 400. + """ + model = FakeModel() + delegate = Agent(name="delegate", model=model) + triage = Agent(name="triage", model=model, handoffs=[delegate]) + session = SimpleListSession() + + # First run: triage reasons, hands off, and emits an orphaned trailing message. + model.add_multiple_turn_outputs( + [ + [ + ResponseReasoningItem( + id="rs_111", + type="reasoning", + summary=[Summary(text="Thinking about handoff.", type="summary_text")], + ), + get_handoff_tool_call(delegate), + get_text_message("I'm transferring you now."), # orphaned + ], + [get_text_message("done")], + ] + ) + first_result = await Runner.run(triage, input="user_message", session=session) + assert first_result.final_output == "done" + + # Second run: history is loaded from session. Capture what the model receives. + model.set_next_output([get_text_message("second done")]) + captured_inputs: list[list[dict[str, Any]]] = [] + + def capture(data): + if isinstance(data.model_data.input, list): + captured_inputs.append( + [item for item in data.model_data.input if isinstance(item, dict)] + ) + return data.model_data + + second_result = await Runner.run( + delegate, + input="follow-up", + session=session, + run_config=RunConfig(call_model_input_filter=capture), + ) + assert second_result.final_output == "second done" + + # The session-reconstructed history must not contain the orphaned trailing message. + assert captured_inputs, "call_model_input_filter must have fired" + first_captured = captured_inputs[0] + orphaned = [ + item for item in first_captured + if item.get("type") == "message" + and item.get("role") == "assistant" + and "transferring" in str(item.get("content", "")) + ] + assert not orphaned, ( + "Orphaned message saved to session must be filtered out when history is " + "replayed on the next run, to prevent provider 400 errors." + ) + + @pytest.mark.asyncio async def test_resume_preserves_filtered_model_input_after_handoff(): model = FakeModel() From 04a4072db955ee5712d335f40c62d38c2e5761de Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 16:36:39 +0530 Subject: [PATCH 6/9] fix: treat all tool call types as consuming reasoning, not just function_call/computer_call custom_tool_call, shell_call, apply_patch_call, local_shell_call, and tool_search_call were not setting consumed_by_call, so a reasoning item followed by any of those call types and then a message would still be sent as [reasoning, , message] and trigger the same provider 400 this PR is fixing. Replace the hardcoded (function_call, computer_call) tuple with _TOOL_CALL_TO_OUTPUT_TYPE, which already enumerates every call type that the runtime tracks and matches to outputs. --- src/agents/run_internal/items.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index 12a485c4a3..748ab230a6 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -185,12 +185,12 @@ def drop_orphaned_messages_after_consumed_reasoning( items: list[TResponseInputItem], ) -> list[TResponseInputItem]: """Drop message items that are orphaned because their preceding reasoning item was consumed - by a function call. + by a tool call. The Responses API requires every message item to be paired with its own reasoning item. When - an agent hands off via a function call, the reasoning item that immediately preceded the call - is considered consumed by that call. Any message item that follows (e.g. the handoff agent's - closing message) has no paired reasoning and causes a 400 from some providers: + any tool call (function_call, computer_call, shell_call, etc.) follows a reasoning item, that + reasoning item is considered consumed by the call. Any message item that follows (e.g. the + handoff agent's closing message) has no paired reasoning and causes a 400 from some providers: ``Item 'msg_...' of type 'message' was provided without its required 'reasoning' item``. The drop is scoped to the first message after the consuming call. Dropping resets the flag so @@ -200,7 +200,7 @@ def drop_orphaned_messages_after_consumed_reasoning( without outputs and their preceding reasoning items. """ fresh_reasoning = False # True when the most-recent reasoning item is not yet consumed - consumed_by_call = False # True after a function_call consumes the fresh reasoning + consumed_by_call = False # True after any tool call consumes the fresh reasoning result: list[TResponseInputItem] = [] for item in items: @@ -213,7 +213,7 @@ def drop_orphaned_messages_after_consumed_reasoning( fresh_reasoning = True consumed_by_call = False result.append(item) - elif item_type in ("function_call", "computer_call"): + elif item_type in _TOOL_CALL_TO_OUTPUT_TYPE: if fresh_reasoning: fresh_reasoning = False consumed_by_call = True # reasoning is now consumed by this call From b97963d7b2dd00519bd8df7af50f88b21bf2c4c5 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 18:10:10 +0530 Subject: [PATCH 7/9] =?UTF-8?q?fix:=20close=20remaining=20gaps=20=E2=80=94?= =?UTF-8?q?=20normalize=5Fresumed=5Finput=20and=20OAI=20conversation=20tra?= =?UTF-8?q?cker?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Map of every path that assembles input[] for the model, cross-checked against which ones already call drop_orphan_function_calls (all of them should also call drop_orphaned_messages_after_consumed_reasoning): Path Before After prepare_model_input_items done done prepare_input_with_session done done normalize_resumed_input (RunState resume) miss fixed OpenAIServerConversationTracker.prepare miss fixed Changes: - items.py: normalize_resumed_input chains drop_orphaned_messages_after_consumed_reasoning after drop_orphan_function_calls (same pattern as the other call sites) - oai_conversation.py: import + one-line call after drop_orphan_function_calls in OpenAIServerConversationTracker.prepare_input; id() tracking is safe because the function returns items from the input list without copying Tests added: - test_normalize_resumed_input_drops_orphaned_message_after_consumed_reasoning - test_server_conversation_tracker_drops_orphaned_message_after_consumed_reasoning All 155 tests pass. --- src/agents/run_internal/items.py | 3 +- src/agents/run_internal/oai_conversation.py | 2 + tests/test_agent_runner.py | 76 +++++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index 748ab230a6..cab227ebd8 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -284,7 +284,8 @@ def normalize_resumed_input( """Normalize resumed list inputs and drop orphan tool calls.""" if isinstance(raw_input, list): normalized = normalize_input_items_for_api(raw_input) - return drop_orphan_function_calls(normalized) + filtered = drop_orphan_function_calls(normalized) + return drop_orphaned_messages_after_consumed_reasoning(filtered) return raw_input diff --git a/src/agents/run_internal/oai_conversation.py b/src/agents/run_internal/oai_conversation.py index 4a0e088353..600b252215 100644 --- a/src/agents/run_internal/oai_conversation.py +++ b/src/agents/run_internal/oai_conversation.py @@ -21,6 +21,7 @@ from .items import ( ReasoningItemIdPolicy, drop_orphan_function_calls, + drop_orphaned_messages_after_consumed_reasoning, fingerprint_input_item, normalize_input_items_for_api, prepare_model_input_items, @@ -502,6 +503,7 @@ def prepare_input( ) } filtered_generated_items = drop_orphan_function_calls(normalized_generated_items) + filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(filtered_generated_items) for item in filtered_generated_items: prepared_source_item = normalized_generated_sources.get(id(item)) if prepared_source_item is not None: diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 8a15d2edcf..9d68d1069f 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -376,6 +376,82 @@ def test_normalize_resumed_input_drops_orphan_tool_search_calls(): assert "paired_search" in call_ids +def test_normalize_resumed_input_drops_orphaned_message_after_consumed_reasoning(): + """normalize_resumed_input must strip messages orphaned by a consumed reasoning item. + + The SDK appends tool outputs (function_call_output) after all model-emitted items, so the + orphaned message appears between the function_call and its output in the flat list. + """ + raw_input: list[TResponseInputItem] = [ + cast(TResponseInputItem, {"type": "reasoning", "id": "rs_1", "summary": []}), + cast( + TResponseInputItem, + {"type": "function_call", "call_id": "fc_1", "name": "transfer_to_x", "arguments": "{}"}, + ), + # message comes before function_call_output — model emits it, SDK appends the output after + cast( + TResponseInputItem, + {"type": "message", "role": "assistant", "content": "I'm handing off now."}, + ), + cast(TResponseInputItem, {"type": "function_call_output", "call_id": "fc_1", "output": "ok"}), + ] + + normalized = normalize_resumed_input(raw_input) + assert isinstance(normalized, list) + message_items = [ + item for item in normalized + if isinstance(item, dict) and item.get("type") == "message" and item.get("role") == "assistant" + ] + assert not message_items, "Orphaned assistant message must be dropped by normalize_resumed_input" + + +@pytest.mark.asyncio +async def test_server_conversation_tracker_drops_orphaned_message_after_consumed_reasoning(): + """The OAI server-conversation path must strip orphaned messages via Runner.run end-to-end.""" + model = FakeModel() + delegate = Agent(name="delegate", model=model) + triage = Agent(name="triage", model=model, handoffs=[delegate]) + + model.add_multiple_turn_outputs( + [ + [ + ResponseReasoningItem( + id="rs_111", + type="reasoning", + summary=[Summary(text="Deciding to hand off.", type="summary_text")], + ), + get_handoff_tool_call(delegate), + get_text_message("Transferring now."), # orphaned — no own reasoning + ], + [get_text_message("done")], + ] + ) + + captured: list[list[dict[str, Any]]] = [] + + def capture(data): + if isinstance(data.model_data.input, list): + captured.append([item for item in data.model_data.input if isinstance(item, dict)]) + return data.model_data + + run_result = await Runner.run( + triage, + input="hello", + run_config=RunConfig(call_model_input_filter=capture), + ) + assert run_result.final_output == "done" + assert len(captured) >= 2 + + second_input = captured[1] + orphaned = [ + item for item in second_input + if item.get("type") == "message" + and item.get("role") == "assistant" + and "Transferring" in str(item.get("content", "")) + ] + assert not orphaned, "Orphaned assistant message must be absent from the second model call." + + def test_normalize_resumed_input_preserves_hosted_tool_search_pair_without_call_ids(): raw_input: list[TResponseInputItem] = [ cast( From 20da2dce0a24ffbd19c03d1f28c1ba1b64be0092 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 18:34:01 +0530 Subject: [PATCH 8/9] fix: drop all orphaned messages before call output, not just the first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resetting consumed_by_call after the first dropped message meant a second orphaned message in the same turn — e.g. [reasoning, function_call, msg1, msg2, function_call_output] — would slip through unchecked. Remove the reset from the message branch entirely. The only correct reset point is _CALL_OUTPUT_TYPES (the call output item), which marks the actual turn boundary. Messages that arrive before any call output while consumed_by_call is True are all orphaned and are all dropped; messages that arrive after the call output (delegate agent, next turn) are unaffected because the flag has already been cleared. Add test_normalize_resumed_input_drops_multiple_orphaned_messages_in_same_turn to cover this case explicitly. --- src/agents/run_internal/items.py | 9 ++++----- tests/test_agent_runner.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index cab227ebd8..d461f0b1f1 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -227,12 +227,11 @@ def drop_orphaned_messages_after_consumed_reasoning( consumed_by_call = False result.append(item) elif item_type == "message": - if consumed_by_call: - # Orphaned: reasoning was consumed by the preceding function_call and no - # function_call_output has reset the flag yet. Drop and reset. - consumed_by_call = False - else: + if not consumed_by_call: result.append(item) + # else: orphaned — reasoning consumed by the preceding call; drop without resetting + # so that any further messages in the same turn are also dropped until a + # call-output item resets consumed_by_call. else: result.append(item) diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 9d68d1069f..5a9cee5a2d 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -405,6 +405,30 @@ def test_normalize_resumed_input_drops_orphaned_message_after_consumed_reasoning assert not message_items, "Orphaned assistant message must be dropped by normalize_resumed_input" +def test_normalize_resumed_input_drops_multiple_orphaned_messages_in_same_turn(): + """All orphaned messages before the call output must be dropped, not just the first one.""" + raw_input: list[TResponseInputItem] = [ + cast(TResponseInputItem, {"type": "reasoning", "id": "rs_1", "summary": []}), + cast( + TResponseInputItem, + {"type": "function_call", "call_id": "fc_1", "name": "transfer_to_x", "arguments": "{}"}, + ), + cast(TResponseInputItem, {"type": "message", "role": "assistant", "content": "msg one"}), + cast(TResponseInputItem, {"type": "message", "role": "assistant", "content": "msg two"}), + cast(TResponseInputItem, {"type": "function_call_output", "call_id": "fc_1", "output": "ok"}), + ] + + normalized = normalize_resumed_input(raw_input) + assert isinstance(normalized, list) + assistant_messages = [ + item for item in normalized + if isinstance(item, dict) and item.get("type") == "message" and item.get("role") == "assistant" + ] + assert not assistant_messages, ( + "All orphaned assistant messages before the call output must be dropped, not just the first" + ) + + @pytest.mark.asyncio async def test_server_conversation_tracker_drops_orphaned_message_after_consumed_reasoning(): """The OAI server-conversation path must strip orphaned messages via Runner.run end-to-end.""" From 678ce4cfeccc7a0579358e750b16e3ec79cde79d Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 4 Jun 2026 23:33:03 +0530 Subject: [PATCH 9/9] fix: prune orphaned messages before removing orphaned calls drop_orphan_function_calls() strips [reasoning, function_call] pairs with no output before message-pruning runs, so a trailing message in [reasoning, function_call, message] (no output) lost its consumed-call context and survived. Swap the order at all four call sites so message pruning sees the full reasoning context, then call-pair removal cleans up what remains. Add a regression test covering the no-output case. Co-Authored-By: Claude Sonnet 4.6 --- src/agents/run_internal/items.py | 8 +++--- src/agents/run_internal/oai_conversation.py | 4 +-- .../run_internal/session_persistence.py | 4 +-- tests/test_agent_runner.py | 28 +++++++++++++++++++ 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index d461f0b1f1..0a0cc82f85 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -272,8 +272,8 @@ def prepare_model_input_items( return normalized_caller_items normalized_generated_items = normalize_input_items_for_api(list(generated_items)) - filtered_generated_items = drop_orphan_function_calls(normalized_generated_items) - filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(filtered_generated_items) + filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(normalized_generated_items) + filtered_generated_items = drop_orphan_function_calls(filtered_generated_items) return normalized_caller_items + filtered_generated_items @@ -283,8 +283,8 @@ def normalize_resumed_input( """Normalize resumed list inputs and drop orphan tool calls.""" if isinstance(raw_input, list): normalized = normalize_input_items_for_api(raw_input) - filtered = drop_orphan_function_calls(normalized) - return drop_orphaned_messages_after_consumed_reasoning(filtered) + filtered = drop_orphaned_messages_after_consumed_reasoning(normalized) + return drop_orphan_function_calls(filtered) return raw_input diff --git a/src/agents/run_internal/oai_conversation.py b/src/agents/run_internal/oai_conversation.py index 600b252215..fc8b1c8e67 100644 --- a/src/agents/run_internal/oai_conversation.py +++ b/src/agents/run_internal/oai_conversation.py @@ -502,8 +502,8 @@ def prepare_input( normalized_generated_items, prepared_generated_items, strict=False ) } - filtered_generated_items = drop_orphan_function_calls(normalized_generated_items) - filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(filtered_generated_items) + filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(normalized_generated_items) + filtered_generated_items = drop_orphan_function_calls(filtered_generated_items) for item in filtered_generated_items: prepared_source_item = normalized_generated_sources.get(id(item)) if prepared_source_item is not None: diff --git a/src/agents/run_internal/session_persistence.py b/src/agents/run_internal/session_persistence.py index 15297bcde9..8cfcf4ebca 100644 --- a/src/agents/run_internal/session_persistence.py +++ b/src/agents/run_internal/session_persistence.py @@ -177,11 +177,11 @@ async def prepare_input_with_session( prune_history_indexes, ) prepared_as_inputs = [ensure_input_item_format(item) for item in prepared_items_raw] + filtered = drop_orphaned_messages_after_consumed_reasoning(prepared_as_inputs) filtered = drop_orphan_function_calls( - prepared_as_inputs, + filtered, pruning_indexes=prune_history_indexes, ) - filtered = drop_orphaned_messages_after_consumed_reasoning(filtered) normalized = normalize_input_items_for_api(filtered) deduplicated = deduplicate_input_items_preferring_latest(normalized) diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 5a9cee5a2d..91396baed4 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -429,6 +429,34 @@ def test_normalize_resumed_input_drops_multiple_orphaned_messages_in_same_turn() ) +def test_normalize_resumed_input_drops_orphaned_message_when_no_call_output(): + """Orphaned message must be dropped even when the function_call has no matching output. + + drop_orphan_function_calls() would remove the [reasoning, function_call] pair before + message-pruning runs if the order were reversed, leaving the orphaned message undetected. + Running message-pruning first ensures the message is dropped while the reasoning context + is still present, then drop_orphan_function_calls() cleans up the call pair. + """ + raw_input: list[TResponseInputItem] = [ + cast(TResponseInputItem, {"type": "reasoning", "id": "rs_1", "summary": []}), + cast( + TResponseInputItem, + {"type": "function_call", "call_id": "fc_1", "name": "transfer_to_x", "arguments": "{}"}, + ), + # no function_call_output — orphaned call AND orphaned message + cast( + TResponseInputItem, + {"type": "message", "role": "assistant", "content": "Transferring now."}, + ), + ] + + normalized = normalize_resumed_input(raw_input) + assert isinstance(normalized, list) + assert normalized == [], ( + "Both the orphaned message and the call-without-output (+ its reasoning) must be dropped" + ) + + @pytest.mark.asyncio async def test_server_conversation_tracker_drops_orphaned_message_after_consumed_reasoning(): """The OAI server-conversation path must strip orphaned messages via Runner.run end-to-end."""