vercel-labs · msullivan · May 29, 2026 · May 29, 2026
diff --git a/src/ai/models/core/params.py b/src/ai/models/core/params.py
@@ -239,7 +239,9 @@ class OutputParams:
     reasoning_summary: str | ModelProviderDefault | None = DEFAULT
     """Provider-specific reasoning summary emission level.
 
-    None means "disabled"."""
+    None means the summary is omitted (the model still reasons; it just does
+    not emit a summary). To turn reasoning off entirely, set
+    ``ReasoningParams.effort=None``."""
 
 
 @dataclass(frozen=True, kw_only=True)

diff --git a/src/ai/providers/ai_gateway/protocol.py b/src/ai/providers/ai_gateway/protocol.py
@@ -557,13 +557,21 @@ def _apply_gateway_reasoning(
                 options["thinking"] = {"type": "disabled"}
             else:
                 options["effort"] = effort
-        if _not_default(summary):
-            if summary is None:
-                options["thinking"] = {"type": "disabled"}
-            else:
+                # The gateway only turns thinking on when a `thinking`
+                # block is present; `effort` alone is a no-op upstream.
                 thinking = dict(options.get("thinking") or {})
                 thinking.setdefault("type", "adaptive")
-                thinking["display"] = summary
+                options["thinking"] = thinking
+        if _not_default(summary):
+            # `reasoning_summary` only controls whether the reasoning summary
+            # is surfaced; it never turns thinking off (use
+            # `reasoning.effort=None` for that). `None` maps to
+            # `display="omitted"` -- think, but don't emit a summary -- and is
+            # ignored when thinking is already disabled.
+            thinking = dict(options.get("thinking") or {})
+            if thinking.get("type") != "disabled":
+                thinking.setdefault("type", "adaptive")
+                thinking["display"] = "omitted" if summary is None else summary
                 options["thinking"] = thinking
         return
     body["reasoning"] = {

diff --git a/src/ai/providers/anthropic/protocol.py b/src/ai/providers/anthropic/protocol.py
@@ -505,12 +505,14 @@ def _apply_anthropic_params(
         else:
             _apply_output_config(api_kwargs, {"effort": reasoning.effort})
     if _not_default(summary):
-        if summary is None:
-            api_kwargs["thinking"] = {"type": "disabled"}
-        else:
-            thinking = dict(api_kwargs.get("thinking") or {})
+        # `reasoning_summary` only controls whether the reasoning summary is
+        # surfaced; it never turns thinking off (use `reasoning.effort=None`
+        # for that). `None` maps to `display="omitted"` -- think, but don't
+        # emit a summary -- and is ignored when thinking is already disabled.
+        thinking = dict(api_kwargs.get("thinking") or {})
+        if thinking.get("type") != "disabled":
             thinking.setdefault("type", "adaptive")
-            thinking["display"] = summary
+            thinking["display"] = "omitted" if summary is None else summary
             api_kwargs["thinking"] = thinking
 
     if request_params.tool_calling is not None:

diff --git a/tests/providers/ai_gateway/test_stream.py b/tests/providers/ai_gateway/test_stream.py
@@ -407,6 +407,74 @@ def handler(req: httpx.Request) -> httpx.Response:
         assert captured_body["seed"] == 123
         assert captured_body["futureGatewayField"] is True
 
+    async def test_gateway_anthropic_effort_enables_thinking(self) -> None:
+        captured_body: dict[str, Any] = {}
+
+        def handler(req: httpx.Request) -> httpx.Response:
+            captured_body.update(json.loads(req.content))
+            return httpx.Response(
+                200,
+                text=sse(
+                    {"type": "finish", "finishReason": "stop", "usage": {}}
+                ),
+            )
+
+        model = mock_model(
+            httpx.MockTransport(handler),
+            model_id="anthropic/claude-opus-4-8",
+        )
+        request_params = ai.InferenceRequestParams(
+            reasoning=ai.ReasoningParams(effort="high"),
+        )
+        async with models.stream(
+            model,
+            [user_msg("Hi")],
+            params=request_params,
+        ) as stream:
+            async for _ in stream:
+                pass
+
+        # `effort` alone is a no-op upstream; the gateway only turns
+        # thinking on when a `thinking` block is also present.
+        assert captured_body["providerOptions"]["anthropic"] == {
+            "effort": "high",
+            "thinking": {"type": "adaptive"},
+        }
+
+    async def test_gateway_anthropic_summary_none_keeps_thinking(self) -> None:
+        captured_body: dict[str, Any] = {}
+
+        def handler(req: httpx.Request) -> httpx.Response:
+            captured_body.update(json.loads(req.content))
+            return httpx.Response(
+                200,
+                text=sse(
+                    {"type": "finish", "finishReason": "stop", "usage": {}}
+                ),
+            )
+
+        model = mock_model(
+            httpx.MockTransport(handler),
+            model_id="anthropic/claude-opus-4-8",
+        )
+        request_params = ai.InferenceRequestParams(
+            reasoning=ai.ReasoningParams(effort="high"),
+            output=ai.OutputParams(reasoning_summary=None),
+        )
+        async with models.stream(
+            model,
+            [user_msg("Hi")],
+            params=request_params,
+        ) as stream:
+            async for _ in stream:
+                pass
+
+        # reasoning_summary=None omits the summary but keeps thinking on.
+        assert captured_body["providerOptions"]["anthropic"] == {
+            "effort": "high",
+            "thinking": {"type": "adaptive", "display": "omitted"},
+        }
+
     async def test_gateway_omits_random_seed(self) -> None:
         captured_body: dict[str, Any] = {}
 

diff --git a/tests/providers/anthropic/test_adapter.py b/tests/providers/anthropic/test_adapter.py
@@ -84,7 +84,8 @@ async def test_params_translate_to_sdk_kwargs(
     )
 
     assert captured["max_tokens"] == 123
-    assert captured["thinking"] == {"type": "disabled"}
+    # reasoning_summary=None omits the summary but keeps thinking on.
+    assert captured["thinking"] == {"type": "adaptive", "display": "omitted"}
     assert captured["output_config"] == {
         "effort": "high",
     }