Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/ai/models/core/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,9 @@ class OutputParams:
reasoning_summary: str | ModelProviderDefault | None = DEFAULT
"""Provider-specific reasoning summary emission level.

None means "disabled"."""
None means the summary is omitted (the model still reasons; it just does
not emit a summary). To turn reasoning off entirely, set
``ReasoningParams.effort=None``."""


@dataclass(frozen=True, kw_only=True)
Expand Down
18 changes: 13 additions & 5 deletions src/ai/providers/ai_gateway/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,13 +557,21 @@ def _apply_gateway_reasoning(
options["thinking"] = {"type": "disabled"}
else:
options["effort"] = effort
if _not_default(summary):
if summary is None:
options["thinking"] = {"type": "disabled"}
else:
# The gateway only turns thinking on when a `thinking`
# block is present; `effort` alone is a no-op upstream.
thinking = dict(options.get("thinking") or {})
thinking.setdefault("type", "adaptive")
thinking["display"] = summary
options["thinking"] = thinking
if _not_default(summary):
# `reasoning_summary` only controls whether the reasoning summary
# is surfaced; it never turns thinking off (use
# `reasoning.effort=None` for that). `None` maps to
# `display="omitted"` -- think, but don't emit a summary -- and is
# ignored when thinking is already disabled.
thinking = dict(options.get("thinking") or {})
if thinking.get("type") != "disabled":
thinking.setdefault("type", "adaptive")
thinking["display"] = "omitted" if summary is None else summary
options["thinking"] = thinking
return
body["reasoning"] = {
Expand Down
12 changes: 7 additions & 5 deletions src/ai/providers/anthropic/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,12 +505,14 @@ def _apply_anthropic_params(
else:
_apply_output_config(api_kwargs, {"effort": reasoning.effort})
if _not_default(summary):
if summary is None:
api_kwargs["thinking"] = {"type": "disabled"}
else:
thinking = dict(api_kwargs.get("thinking") or {})
# `reasoning_summary` only controls whether the reasoning summary is
# surfaced; it never turns thinking off (use `reasoning.effort=None`
# for that). `None` maps to `display="omitted"` -- think, but don't
# emit a summary -- and is ignored when thinking is already disabled.
thinking = dict(api_kwargs.get("thinking") or {})
if thinking.get("type") != "disabled":
thinking.setdefault("type", "adaptive")
thinking["display"] = summary
thinking["display"] = "omitted" if summary is None else summary
api_kwargs["thinking"] = thinking

if request_params.tool_calling is not None:
Expand Down
68 changes: 68 additions & 0 deletions tests/providers/ai_gateway/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,74 @@ def handler(req: httpx.Request) -> httpx.Response:
assert captured_body["seed"] == 123
assert captured_body["futureGatewayField"] is True

async def test_gateway_anthropic_effort_enables_thinking(self) -> None:
captured_body: dict[str, Any] = {}

def handler(req: httpx.Request) -> httpx.Response:
captured_body.update(json.loads(req.content))
return httpx.Response(
200,
text=sse(
{"type": "finish", "finishReason": "stop", "usage": {}}
),
)

model = mock_model(
httpx.MockTransport(handler),
model_id="anthropic/claude-opus-4-8",
)
request_params = ai.InferenceRequestParams(
reasoning=ai.ReasoningParams(effort="high"),
)
async with models.stream(
model,
[user_msg("Hi")],
params=request_params,
) as stream:
async for _ in stream:
pass

# `effort` alone is a no-op upstream; the gateway only turns
# thinking on when a `thinking` block is also present.
assert captured_body["providerOptions"]["anthropic"] == {
"effort": "high",
"thinking": {"type": "adaptive"},
}

async def test_gateway_anthropic_summary_none_keeps_thinking(self) -> None:
captured_body: dict[str, Any] = {}

def handler(req: httpx.Request) -> httpx.Response:
captured_body.update(json.loads(req.content))
return httpx.Response(
200,
text=sse(
{"type": "finish", "finishReason": "stop", "usage": {}}
),
)

model = mock_model(
httpx.MockTransport(handler),
model_id="anthropic/claude-opus-4-8",
)
request_params = ai.InferenceRequestParams(
reasoning=ai.ReasoningParams(effort="high"),
output=ai.OutputParams(reasoning_summary=None),
)
async with models.stream(
model,
[user_msg("Hi")],
params=request_params,
) as stream:
async for _ in stream:
pass

# reasoning_summary=None omits the summary but keeps thinking on.
assert captured_body["providerOptions"]["anthropic"] == {
"effort": "high",
"thinking": {"type": "adaptive", "display": "omitted"},
}

async def test_gateway_omits_random_seed(self) -> None:
captured_body: dict[str, Any] = {}

Expand Down
3 changes: 2 additions & 1 deletion tests/providers/anthropic/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ async def test_params_translate_to_sdk_kwargs(
)

assert captured["max_tokens"] == 123
assert captured["thinking"] == {"type": "disabled"}
# reasoning_summary=None omits the summary but keeps thinking on.
assert captured["thinking"] == {"type": "adaptive", "display": "omitted"}
assert captured["output_config"] == {
"effort": "high",
}
Expand Down
Loading