From ecad1698ace8dae73f1e8087054b0d1395d49d5b Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Fri, 29 May 2026 12:28:23 -0700 Subject: [PATCH] Fix some of the thinking configuration * anthropic over gateway didn't think unless a reasoning_summary argument was specified * anthropic both natively and over gateway would disable thinking if reasoning_summary=None The second is marginal and if somebody wanted to argue it was correct, they could, I guess. --- src/ai/models/core/params.py | 4 +- src/ai/providers/ai_gateway/protocol.py | 18 ++++-- src/ai/providers/anthropic/protocol.py | 12 ++-- tests/providers/ai_gateway/test_stream.py | 68 +++++++++++++++++++++++ tests/providers/anthropic/test_adapter.py | 3 +- 5 files changed, 93 insertions(+), 12 deletions(-) diff --git a/src/ai/models/core/params.py b/src/ai/models/core/params.py index ee7d906f..4c498b45 100644 --- a/src/ai/models/core/params.py +++ b/src/ai/models/core/params.py @@ -239,7 +239,9 @@ class OutputParams: reasoning_summary: str | ModelProviderDefault | None = DEFAULT """Provider-specific reasoning summary emission level. - None means "disabled".""" + None means the summary is omitted (the model still reasons; it just does + not emit a summary). To turn reasoning off entirely, set + ``ReasoningParams.effort=None``.""" @dataclass(frozen=True, kw_only=True) diff --git a/src/ai/providers/ai_gateway/protocol.py b/src/ai/providers/ai_gateway/protocol.py index 50dc486e..03acc199 100644 --- a/src/ai/providers/ai_gateway/protocol.py +++ b/src/ai/providers/ai_gateway/protocol.py @@ -557,13 +557,21 @@ def _apply_gateway_reasoning( options["thinking"] = {"type": "disabled"} else: options["effort"] = effort - if _not_default(summary): - if summary is None: - options["thinking"] = {"type": "disabled"} - else: + # The gateway only turns thinking on when a `thinking` + # block is present; `effort` alone is a no-op upstream. thinking = dict(options.get("thinking") or {}) thinking.setdefault("type", "adaptive") - thinking["display"] = summary + options["thinking"] = thinking + if _not_default(summary): + # `reasoning_summary` only controls whether the reasoning summary + # is surfaced; it never turns thinking off (use + # `reasoning.effort=None` for that). `None` maps to + # `display="omitted"` -- think, but don't emit a summary -- and is + # ignored when thinking is already disabled. + thinking = dict(options.get("thinking") or {}) + if thinking.get("type") != "disabled": + thinking.setdefault("type", "adaptive") + thinking["display"] = "omitted" if summary is None else summary options["thinking"] = thinking return body["reasoning"] = { diff --git a/src/ai/providers/anthropic/protocol.py b/src/ai/providers/anthropic/protocol.py index 6d8ed528..fe7f82be 100644 --- a/src/ai/providers/anthropic/protocol.py +++ b/src/ai/providers/anthropic/protocol.py @@ -505,12 +505,14 @@ def _apply_anthropic_params( else: _apply_output_config(api_kwargs, {"effort": reasoning.effort}) if _not_default(summary): - if summary is None: - api_kwargs["thinking"] = {"type": "disabled"} - else: - thinking = dict(api_kwargs.get("thinking") or {}) + # `reasoning_summary` only controls whether the reasoning summary is + # surfaced; it never turns thinking off (use `reasoning.effort=None` + # for that). `None` maps to `display="omitted"` -- think, but don't + # emit a summary -- and is ignored when thinking is already disabled. + thinking = dict(api_kwargs.get("thinking") or {}) + if thinking.get("type") != "disabled": thinking.setdefault("type", "adaptive") - thinking["display"] = summary + thinking["display"] = "omitted" if summary is None else summary api_kwargs["thinking"] = thinking if request_params.tool_calling is not None: diff --git a/tests/providers/ai_gateway/test_stream.py b/tests/providers/ai_gateway/test_stream.py index de8ac02f..1566dc0d 100644 --- a/tests/providers/ai_gateway/test_stream.py +++ b/tests/providers/ai_gateway/test_stream.py @@ -407,6 +407,74 @@ def handler(req: httpx.Request) -> httpx.Response: assert captured_body["seed"] == 123 assert captured_body["futureGatewayField"] is True + async def test_gateway_anthropic_effort_enables_thinking(self) -> None: + captured_body: dict[str, Any] = {} + + def handler(req: httpx.Request) -> httpx.Response: + captured_body.update(json.loads(req.content)) + return httpx.Response( + 200, + text=sse( + {"type": "finish", "finishReason": "stop", "usage": {}} + ), + ) + + model = mock_model( + httpx.MockTransport(handler), + model_id="anthropic/claude-opus-4-8", + ) + request_params = ai.InferenceRequestParams( + reasoning=ai.ReasoningParams(effort="high"), + ) + async with models.stream( + model, + [user_msg("Hi")], + params=request_params, + ) as stream: + async for _ in stream: + pass + + # `effort` alone is a no-op upstream; the gateway only turns + # thinking on when a `thinking` block is also present. + assert captured_body["providerOptions"]["anthropic"] == { + "effort": "high", + "thinking": {"type": "adaptive"}, + } + + async def test_gateway_anthropic_summary_none_keeps_thinking(self) -> None: + captured_body: dict[str, Any] = {} + + def handler(req: httpx.Request) -> httpx.Response: + captured_body.update(json.loads(req.content)) + return httpx.Response( + 200, + text=sse( + {"type": "finish", "finishReason": "stop", "usage": {}} + ), + ) + + model = mock_model( + httpx.MockTransport(handler), + model_id="anthropic/claude-opus-4-8", + ) + request_params = ai.InferenceRequestParams( + reasoning=ai.ReasoningParams(effort="high"), + output=ai.OutputParams(reasoning_summary=None), + ) + async with models.stream( + model, + [user_msg("Hi")], + params=request_params, + ) as stream: + async for _ in stream: + pass + + # reasoning_summary=None omits the summary but keeps thinking on. + assert captured_body["providerOptions"]["anthropic"] == { + "effort": "high", + "thinking": {"type": "adaptive", "display": "omitted"}, + } + async def test_gateway_omits_random_seed(self) -> None: captured_body: dict[str, Any] = {} diff --git a/tests/providers/anthropic/test_adapter.py b/tests/providers/anthropic/test_adapter.py index 4fd50862..15c4b59d 100644 --- a/tests/providers/anthropic/test_adapter.py +++ b/tests/providers/anthropic/test_adapter.py @@ -84,7 +84,8 @@ async def test_params_translate_to_sdk_kwargs( ) assert captured["max_tokens"] == 123 - assert captured["thinking"] == {"type": "disabled"} + # reasoning_summary=None omits the summary but keeps thinking on. + assert captured["thinking"] == {"type": "adaptive", "display": "omitted"} assert captured["output_config"] == { "effort": "high", }