diff --git a/libs/core/langchain_core/callbacks/base.py b/libs/core/langchain_core/callbacks/base.py
index 383b256cb26b3..2341931904078 100644
--- a/libs/core/langchain_core/callbacks/base.py
+++ b/libs/core/langchain_core/callbacks/base.py
@@ -134,7 +134,10 @@ def on_stream_event(
         tags: list[str] | None = None,
         **kwargs: Any,
     ) -> Any:
-        """Run on each protocol event produced by `stream_v2` / `astream_v2`.
+        """Run on each protocol event from `stream_events(version="v3")`.
+
+        Also fires for the async equivalent
+        (`astream_events(version="v3")`).
 
         Fires once per `MessagesData` event — `message-start`, per-block
         `content-block-start` / `content-block-delta` /
@@ -699,7 +702,7 @@ async def on_stream_event(
         tags: list[str] | None = None,
         **kwargs: Any,
     ) -> None:
-        """Run on each protocol event produced by `astream_v2`.
+        """Run on each protocol event produced by `astream_events(version="v3")`.
 
         See :meth:`LLMManagerMixin.on_stream_event` for the full contract.
         Fires once per `MessagesData` event at event granularity, uniformly
diff --git a/libs/core/langchain_core/callbacks/manager.py b/libs/core/langchain_core/callbacks/manager.py
index 0efab4ad04caa..c1ba9b76c0a29 100644
--- a/libs/core/langchain_core/callbacks/manager.py
+++ b/libs/core/langchain_core/callbacks/manager.py
@@ -749,7 +749,7 @@ def on_llm_error(
         )
 
     def on_stream_event(self, event: MessagesData, **kwargs: Any) -> None:
-        """Run on each protocol event from `stream_v2`.
+        """Run on each protocol event from `stream_events(version="v3")`.
 
         Args:
             event: The protocol event.
@@ -871,7 +871,7 @@ async def on_llm_error(
         )
 
     async def on_stream_event(self, event: MessagesData, **kwargs: Any) -> None:
-        """Run on each protocol event from `astream_v2`.
+        """Run on each protocol event from `astream_events(version="v3")`.
 
         Args:
             event: The protocol event.
diff --git a/libs/core/langchain_core/language_models/_compat_bridge.py b/libs/core/langchain_core/language_models/_compat_bridge.py
index bc6928d898128..5c2cdf0ba0d96 100644
--- a/libs/core/langchain_core/language_models/_compat_bridge.py
+++ b/libs/core/langchain_core/language_models/_compat_bridge.py
@@ -61,6 +61,15 @@
 if TYPE_CHECKING:
     from collections.abc import AsyncIterator, Iterator
 
+    from langchain_protocol.protocol import (
+        BlockDelta,
+        BlockDeltaFields,
+        ContentBlockDelta,
+        DataDelta,
+        ReasoningDelta,
+        TextDelta,
+    )
+
     from langchain_core.outputs import ChatGenerationChunk
 
 
@@ -101,6 +110,38 @@ def _to_finalized_block(block: CompatBlock) -> FinalizedContentBlock:
     return cast("FinalizedContentBlock", block)
 
 
+def _to_block_delta_fields(block: CompatBlock) -> BlockDeltaFields:
+    """Narrow an internal working dict to protocol block-delta fields."""
+    return cast("BlockDeltaFields", block)
+
+
+def _to_content_delta(block: CompatBlock) -> ContentBlockDelta:
+    """Convert a content-block slice/snapshot to an explicit protocol delta."""
+    btype = block.get("type")
+    if btype == "text":
+        return cast("TextDelta", {"type": "text-delta", "text": block.get("text", "")})
+    if btype == "reasoning":
+        return cast(
+            "ReasoningDelta",
+            {
+                "type": "reasoning-delta",
+                "reasoning": block.get("reasoning", ""),
+            },
+        )
+    if "data" in block:
+        delta = cast("DataDelta", {"type": "data-delta", "data": block.get("data", "")})
+        if block.get("encoding") == "base64":
+            delta["encoding"] = "base64"
+        return delta
+    return cast(
+        "BlockDelta",
+        {
+            "type": "block-delta",
+            "fields": _to_block_delta_fields(block),
+        },
+    )
+
+
 # ---------------------------------------------------------------------------
 # Block iteration
 # ---------------------------------------------------------------------------
@@ -236,6 +277,8 @@ def _should_emit_delta(block: CompatBlock) -> bool:
         return bool(
             block.get("args") or block.get("id") or block.get("name"),
         )
+    if "data" in block:
+        return bool(block.get("data"))
     return False
 
 
@@ -281,6 +324,15 @@ def _accumulate(state: CompatBlock | None, delta: CompatBlock) -> CompatBlock:
             state["id"] = delta["id"]
         if delta.get("name") is not None:
             state["name"] = delta["name"]
+    elif btype == dtype and "data" in delta:
+        state["data"] = (state.get("data", "") or "") + (delta.get("data") or "")
+        for key, value in delta.items():
+            if key in ("type", "data") or value is None:
+                continue
+            if key == "extras" and isinstance(value, dict):
+                state["extras"] = {**(state.get("extras") or {}), **value}
+            else:
+                state[key] = value
     else:
         # Self-contained or already-finalized types: replace wholesale.
         state.clear()
@@ -429,11 +481,11 @@ def _to_protocol_usage(usage: dict[str, Any] | None) -> UsageInfo | None:
     """Convert accumulated usage to the protocol's `UsageInfo` shape."""
     if usage is None:
         return None
-    result: UsageInfo = {}
+    result: dict[str, Any] = {}
     for key in ("input_tokens", "output_tokens", "total_tokens", "cached_tokens"):
         if key in usage:
             result[key] = usage[key]
-    return result or None
+    return cast("UsageInfo", result) if result else None
 
 
 # ---------------------------------------------------------------------------
@@ -445,10 +497,10 @@ def _build_message_start(
     msg: BaseMessage,
     message_id: str | None,
 ) -> MessageStartData:
-    start_data = MessageStartData(event="message-start", role="ai")
+    start_data = MessageStartData(event="message-start", role="ai", id="")
     resolved_id = message_id if message_id is not None else getattr(msg, "id", None)
     if resolved_id:
-        start_data["message_id"] = resolved_id
+        start_data["id"] = resolved_id
     start_metadata = _extract_start_metadata(msg.response_metadata or {})
     if start_metadata:
         start_data["metadata"] = start_metadata
@@ -464,13 +516,13 @@ def _build_message_finish(
     # `MessageFinishData`; the provider's raw `finish_reason` /
     # `stop_reason` now rides inside `metadata` alongside other
     # response metadata. Pass it through unchanged.
-    finish_data = MessageFinishData(event="message-finish")
+    finish_data: dict[str, Any] = {"event": "message-finish"}
     usage_info = _to_protocol_usage(usage)
     if usage_info is not None:
         finish_data["usage"] = usage_info
     if response_metadata:
         finish_data["metadata"] = dict(response_metadata)
-    return finish_data
+    return cast("MessageFinishData", finish_data)
 
 
 def _finalize_and_build_finish(
@@ -481,7 +533,7 @@ def _finalize_and_build_finish(
     return ContentBlockFinishData(
         event="content-block-finish",
         index=wire_idx,
-        content_block=_finalize_block(block),
+        content=_finalize_block(block),
     )
 
 
@@ -497,12 +549,12 @@ def chunks_to_events(
 ) -> Iterator[MessagesData]:
     """Convert a stream of `ChatGenerationChunk` to protocol events.
 
-    Blocks stream one at a time: when a chunk carries a different block
-    identifier than the currently-open one, the open block is finished
-    before the new block starts, matching the protocol's no-interleave
-    rule.  Source-side identifiers (from the block's `index` field, which
-    may be int or string) are translated to sequential `uint` wire
-    indices.
+    Blocks are tracked independently by source-side identifier. Providers
+    such as Anthropic can interleave parallel tool-call chunks by index, so
+    each first-seen block gets a `content-block-start`, deltas keep their
+    stable wire index, and all open blocks are finalized at message end.
+    Source-side identifiers (from the block's `index` field, which may be
+    int or string) are translated to sequential `uint` wire indices.
 
     Args:
         chunks: Iterator of `ChatGenerationChunk` from `_stream()`.
@@ -512,9 +564,7 @@ def chunks_to_events(
         `MessagesData` lifecycle events.
     """
     started = False
-    open_key: Any = None
-    open_block: CompatBlock | None = None
-    open_wire_idx: int = 0
+    blocks: dict[Any, tuple[int, CompatBlock]] = {}
     next_wire_idx = 0
     usage: dict[str, Any] | None = None
     response_metadata: dict[str, Any] = {}
@@ -545,25 +595,29 @@ def chunks_to_events(
             yield _build_message_start(msg, message_id)
 
         for key, block in _iter_protocol_blocks(msg):
-            if key != open_key:
-                if open_block is not None:
-                    yield _finalize_and_build_finish(open_wire_idx, open_block)
-                open_key = key
-                open_wire_idx = next_wire_idx
+            if key not in blocks:
+                wire_idx = next_wire_idx
                 next_wire_idx += 1
-                open_block = dict(block)
+                blocks[key] = (wire_idx, dict(block))
                 yield ContentBlockStartData(
                     event="content-block-start",
-                    index=open_wire_idx,
-                    content_block=_start_skeleton(block),
+                    index=wire_idx,
+                    content=_start_skeleton(block),
                 )
             else:
-                open_block = _accumulate(open_block, block)
+                wire_idx, existing = blocks[key]
+                blocks[key] = (wire_idx, _accumulate(existing, block))
             if _should_emit_delta(block):
+                wire_idx, current = blocks[key]
+                is_block_delta = block.get("type") in (
+                    "tool_call_chunk",
+                    "server_tool_call_chunk",
+                )
+                delta_source = current if is_block_delta else block
                 yield ContentBlockDeltaData(
                     event="content-block-delta",
-                    index=open_wire_idx,
-                    content_block=_to_protocol_block(block),
+                    index=wire_idx,
+                    delta=_to_content_delta(delta_source or block),
                 )
 
         if msg.usage_metadata:
@@ -572,8 +626,8 @@ def chunks_to_events(
     if not started:
         return
 
-    if open_block is not None:
-        yield _finalize_and_build_finish(open_wire_idx, open_block)
+    for wire_idx, block in blocks.values():
+        yield _finalize_and_build_finish(wire_idx, block)
 
     yield _build_message_finish(
         usage=usage,
@@ -588,9 +642,7 @@ async def achunks_to_events(
 ) -> AsyncIterator[MessagesData]:
     """Async variant of `chunks_to_events`."""
     started = False
-    open_key: Any = None
-    open_block: CompatBlock | None = None
-    open_wire_idx: int = 0
+    blocks: dict[Any, tuple[int, CompatBlock]] = {}
     next_wire_idx = 0
     usage: dict[str, Any] | None = None
     response_metadata: dict[str, Any] = {}
@@ -615,25 +667,29 @@ async def achunks_to_events(
             yield _build_message_start(msg, message_id)
 
         for key, block in _iter_protocol_blocks(msg):
-            if key != open_key:
-                if open_block is not None:
-                    yield _finalize_and_build_finish(open_wire_idx, open_block)
-                open_key = key
-                open_wire_idx = next_wire_idx
+            if key not in blocks:
+                wire_idx = next_wire_idx
                 next_wire_idx += 1
-                open_block = dict(block)
+                blocks[key] = (wire_idx, dict(block))
                 yield ContentBlockStartData(
                     event="content-block-start",
-                    index=open_wire_idx,
-                    content_block=_start_skeleton(block),
+                    index=wire_idx,
+                    content=_start_skeleton(block),
                 )
             else:
-                open_block = _accumulate(open_block, block)
+                wire_idx, existing = blocks[key]
+                blocks[key] = (wire_idx, _accumulate(existing, block))
             if _should_emit_delta(block):
+                wire_idx, current = blocks[key]
+                is_block_delta = block.get("type") in (
+                    "tool_call_chunk",
+                    "server_tool_call_chunk",
+                )
+                delta_source = current if is_block_delta else block
                 yield ContentBlockDeltaData(
                     event="content-block-delta",
-                    index=open_wire_idx,
-                    content_block=_to_protocol_block(block),
+                    index=wire_idx,
+                    delta=_to_content_delta(delta_source or block),
                 )
 
         if msg.usage_metadata:
@@ -642,8 +698,8 @@ async def achunks_to_events(
     if not started:
         return
 
-    if open_block is not None:
-        yield _finalize_and_build_finish(open_wire_idx, open_block)
+    for wire_idx, block in blocks.values():
+        yield _finalize_and_build_finish(wire_idx, block)
 
     yield _build_message_finish(
         usage=usage,
@@ -682,18 +738,18 @@ def message_to_events(
         yield ContentBlockStartData(
             event="content-block-start",
             index=wire_idx,
-            content_block=_start_skeleton(block),
+            content=_start_skeleton(block),
         )
         if _should_emit_delta(block):
             yield ContentBlockDeltaData(
                 event="content-block-delta",
                 index=wire_idx,
-                content_block=_to_protocol_block(block),
+                delta=_to_content_delta(block),
             )
         yield ContentBlockFinishData(
             event="content-block-finish",
             index=wire_idx,
-            content_block=_finalize_block(block),
+            content=_finalize_block(block),
         )
 
     yield _build_message_finish(
diff --git a/libs/core/langchain_core/language_models/chat_model_stream.py b/libs/core/langchain_core/language_models/chat_model_stream.py
index 27b200b58bdc2..6b25a66fd2a4f 100644
--- a/libs/core/langchain_core/language_models/chat_model_stream.py
+++ b/libs/core/langchain_core/language_models/chat_model_stream.py
@@ -1,8 +1,8 @@
 """Per-message streaming objects for content-block protocol events.
 
 `ChatModelStream` is the synchronous variant returned by
-`BaseChatModel.stream_v2()`.  `AsyncChatModelStream` is the
-asynchronous variant returned by `BaseChatModel.astream_v2()`.
+`BaseChatModel.stream_events(version="v3")`.  `AsyncChatModelStream` is the
+asynchronous variant returned by `BaseChatModel.astream_events(version="v3")`.
 
 Both expose typed projection properties (`.text`, `.reasoning`,
 `.tool_calls`, `.usage`, `.output`) that accumulate protocol
@@ -24,7 +24,7 @@
 from langchain_core.messages import AIMessage
 
 if TYPE_CHECKING:
-    from collections.abc import Awaitable, Callable, Generator, Iterator
+    from collections.abc import Awaitable, Callable, Generator, Iterator, Mapping
 
     from langchain_protocol.protocol import (
         ContentBlockDeltaData,
@@ -65,6 +65,54 @@ def _merge_chunk_into_store(
     store[idx] = existing
 
 
+def _merge_block_delta_into_store(
+    store: dict[int, dict[str, Any]],
+    idx: int,
+    fields: dict[str, Any],
+) -> None:
+    """Shallow-merge a block-delta snapshot into an indexed chunk store."""
+    existing = store.get(idx, {})
+    for key, value in fields.items():
+        if value is not None:
+            existing[key] = value
+    store[idx] = existing
+
+
+def _event_content_block(data: Mapping[str, Any]) -> dict[str, Any] | None:
+    """Return start/finish content, tolerating the pre-delta field name."""
+    block = data.get("content") or data.get("content_block")
+    return block if isinstance(block, dict) else None
+
+
+def _legacy_block_to_delta(block: Mapping[str, Any]) -> dict[str, Any]:
+    """Convert the old content-block delta shape to an explicit delta."""
+    btype = block.get("type")
+    if btype == "text":
+        return {"type": "text-delta", "text": block.get("text", "")}
+    if btype == "reasoning":
+        return {
+            "type": "reasoning-delta",
+            "reasoning": block.get("reasoning", ""),
+        }
+    if "data" in block:
+        delta = {"type": "data-delta", "data": block.get("data", "")}
+        if block.get("encoding") == "base64":
+            delta["encoding"] = "base64"
+        return delta
+    return {"type": "legacy-block-delta", "fields": block}
+
+
+def _event_delta(data: Mapping[str, Any]) -> dict[str, Any] | None:
+    """Return an explicit delta, converting legacy content-block deltas."""
+    delta = data.get("delta")
+    if isinstance(delta, dict):
+        return delta
+    block = data.get("content_block")
+    if isinstance(block, dict):
+        return _legacy_block_to_delta(block)
+    return None
+
+
 def _sweep_chunk_store(
     store: dict[int, dict[str, Any]],
     *,
@@ -498,9 +546,9 @@ def message_id(self) -> str | None:
     def set_message_id(self, message_id: str) -> None:
         """Assign the stable message identifier once the run starts.
 
-        Called by the stream driver (`stream_v2` / `astream_v2`) after
-        `on_chat_model_start` produces a run id. Not intended for
-        end-user code.
+        Called by the stream driver (`stream_events(version="v3")` /
+        `astream_events(version="v3")`) after `on_chat_model_start` produces a run
+        id. Not intended for end-user code.
         """
         self._message_id = message_id
 
@@ -520,19 +568,21 @@ def output_message(self) -> AIMessage | None:
 
         Unlike `ChatModelStream.output` (which blocks until the stream
         finishes), this never pumps, blocks, or raises. Intended for the
-        stream driver (`stream_v2` / `astream_v2`) to check whether the
-        stream produced a message before firing `on_llm_end` callbacks.
+        stream driver (`stream_events(version="v3")` and its async
+        equivalent) to check whether the stream produced a message before
+        firing `on_llm_end` callbacks.
         """
         return self._output_message
 
     # -- Event ingestion (public) ------------------------------------------
 
-    def dispatch(self, event: MessagesData) -> None:
+    def dispatch(self, event: Mapping[str, Any]) -> None:
         """Route a protocol event to the appropriate internal handler.
 
         Public entry point for feeding events into the stream. Called by
-        the stream driver (`stream_v2` / `astream_v2`'s pump) and by
-        any observer or test that needs to inject protocol events.
+        the stream driver (the `stream_events(version="v3")` pump and its
+        async equivalent) and by any observer or test that needs to
+        inject protocol events.
         """
         self._record_event(event)
         event_type = event.get("event")
@@ -550,25 +600,27 @@ def dispatch(self, event: MessagesData) -> None:
 
     # -- Internal push API (called by dispatch) ----------------------------
 
-    def _record_event(self, event: MessagesData) -> None:
+    def _record_event(self, event: Mapping[str, Any]) -> None:
         """Append a raw event to the replay buffer."""
-        self._events.append(event)
+        self._events.append(cast("MessagesData", event))
 
     def _push_message_start(self, data: MessageStartData) -> None:
         """Process a `message-start` event."""
         self._start_metadata = data.get("metadata")
+        message_id = data.get("id")
+        if message_id:
+            self._message_id = message_id
 
     def _push_content_block_delta(self, data: ContentBlockDeltaData) -> None:
         """Process a `content-block-delta` event."""
-        block = data.get("content_block")
-        if block is None:
+        delta = _event_delta(data)
+        if delta is None:
             return
-        btype = block.get("type", "")
         event_idx = data.get("index")
+        dtype = delta.get("type", "")
 
-        if btype == "text":
-            text_block = cast("TextContentBlock", block)
-            delta_text = text_block.get("text", "")
+        if dtype == "text-delta":
+            delta_text = delta.get("text", "")
             if delta_text:
                 self._text_acc += delta_text
                 if event_idx is not None:
@@ -576,9 +628,8 @@ def _push_content_block_delta(self, data: ContentBlockDeltaData) -> None:
                         self._text_per_block.get(event_idx, "") + delta_text
                     )
                 self._text_proj.push(delta_text)
-        elif btype == "reasoning":
-            reasoning_block = cast("ReasoningContentBlock", block)
-            delta_r = reasoning_block.get("reasoning", "")
+        elif dtype == "reasoning-delta":
+            delta_r = delta.get("reasoning", "")
             if delta_r:
                 self._reasoning_acc += delta_r
                 if event_idx is not None:
@@ -586,35 +637,94 @@ def _push_content_block_delta(self, data: ContentBlockDeltaData) -> None:
                         self._reasoning_per_block.get(event_idx, "") + delta_r
                     )
                 self._reasoning_proj.push(delta_r)
-        elif btype == "tool_call_chunk":
+        elif dtype == "block-delta":
+            fields = delta.get("fields")
+            if not isinstance(fields, dict):
+                return
+            btype = fields.get("type", "")
+            if btype == "tool_call_chunk":
+                tcc = cast("ToolCallChunk", fields)
+                idx = data.get("index")
+                if idx is None:
+                    idx = tcc.get("index", len(self._tool_call_chunks))
+                _merge_block_delta_into_store(self._tool_call_chunks, idx, dict(tcc))
+                chunk_block: ToolCallChunk = {
+                    "type": "tool_call_chunk",
+                    "id": tcc.get("id"),
+                    "name": tcc.get("name"),
+                    "args": tcc.get("args"),
+                }
+                if "index" in tcc:
+                    chunk_block["index"] = tcc["index"]
+                self._tool_calls_proj.push(chunk_block)
+            elif btype == "server_tool_call_chunk":
+                stcc = cast("ServerToolCallChunk", fields)
+                idx = data.get("index")
+                if idx is None:
+                    idx = len(self._server_tool_call_chunks)
+                _merge_block_delta_into_store(
+                    self._server_tool_call_chunks,
+                    idx,
+                    dict(stcc),
+                )
+        elif dtype == "legacy-block-delta":
+            fields = delta.get("fields")
+            if not isinstance(fields, dict):
+                return
+            btype = fields.get("type", "")
+            if btype == "tool_call_chunk":
+                tcc = cast("ToolCallChunk", fields)
+                idx = data.get("index")
+                if idx is None:
+                    idx = tcc.get("index", len(self._tool_call_chunks))
+                _merge_chunk_into_store(self._tool_call_chunks, idx, dict(tcc))
+                legacy_chunk_block: ToolCallChunk = {
+                    "type": "tool_call_chunk",
+                    "id": tcc.get("id"),
+                    "name": tcc.get("name"),
+                    "args": tcc.get("args"),
+                }
+                if "index" in tcc:
+                    legacy_chunk_block["index"] = tcc["index"]
+                self._tool_calls_proj.push(legacy_chunk_block)
+            elif btype == "server_tool_call_chunk":
+                stcc = cast("ServerToolCallChunk", fields)
+                idx = data.get("index")
+                if idx is None:
+                    idx = len(self._server_tool_call_chunks)
+                _merge_chunk_into_store(
+                    self._server_tool_call_chunks,
+                    idx,
+                    dict(stcc),
+                )
+        elif dtype == "data-delta":
+            # Binary/modal payload deltas are reflected in the final
+            # content-block finish event; there is no dedicated projection.
+            return
+        else:
+            # Transitional legacy path for old `content_block` deltas that
+            # should not be reachable after `_event_delta` conversion, kept
+            # here for custom in-tree test fixtures or third-party emitters.
+            block = data.get("content_block")
+            if not isinstance(block, dict):
+                return
+            btype = block.get("type", "")
+            if btype != "tool_call_chunk":
+                return
             tcc = cast("ToolCallChunk", block)
-            # The protocol puts the block index on the event
-            # (`ContentBlockDeltaData`), not inside `content_block`.
-            # Fall back to `content_block.index` for providers that echo
-            # it there.
             idx = data.get("index")
             if idx is None:
                 idx = tcc.get("index", len(self._tool_call_chunks))
             _merge_chunk_into_store(self._tool_call_chunks, idx, dict(tcc))
-            chunk_block: ToolCallChunk = {
+            fallback_chunk_block: ToolCallChunk = {
                 "type": "tool_call_chunk",
                 "id": tcc.get("id"),
                 "name": tcc.get("name"),
                 "args": tcc.get("args"),
             }
             if "index" in tcc:
-                chunk_block["index"] = tcc["index"]
-            self._tool_calls_proj.push(chunk_block)
-        elif btype == "server_tool_call_chunk":
-            stcc = cast("ServerToolCallChunk", block)
-            idx = data.get("index")
-            if idx is None:
-                idx = len(self._server_tool_call_chunks)
-            _merge_chunk_into_store(
-                self._server_tool_call_chunks,
-                idx,
-                dict(stcc),
-            )
+                fallback_chunk_block["index"] = tcc["index"]
+            self._tool_calls_proj.push(fallback_chunk_block)
 
     def _resolve_block_text(self, idx: int | None, full_text: str) -> str:
         """Return authoritative text for a single text block at `idx`.
@@ -681,7 +791,7 @@ def _resolve_block_reasoning(self, idx: int | None, full_r: str) -> str:
 
     def _push_content_block_finish(self, data: ContentBlockFinishData) -> None:
         """Process a `content-block-finish` event."""
-        block = data.get("content_block")
+        block = _event_content_block(data)
         if block is None:
             return
         btype = block.get("type", "")
@@ -764,7 +874,7 @@ def _push_content_block_finish(self, data: ContentBlockFinishData) -> None:
         ):
             if btype == "server_tool_call" and idx is not None:
                 self._server_tool_call_chunks.pop(idx, None)
-            finalized = block
+            finalized = cast("FinalizedContentBlock", block)
 
         if finalized is not None and idx is not None:
             # Backfill the wire index onto the finalized block when the
@@ -785,7 +895,7 @@ def _finish(self, data: MessageFinishData) -> None:
         """Process a `message-finish` event."""
         self._done = True
         self._usage_value = data.get("usage")
-        self._finish_metadata = data.get("metadata")
+        self._finish_metadata = cast("dict[str, Any] | None", data.get("metadata"))
 
         # Finalize any unswept chunks — both client- and server-side.
         _sweep_chunk_store(
@@ -829,8 +939,8 @@ def _finish(self, data: MessageFinishData) -> None:
     def fail(self, error: BaseException) -> None:
         """Mark the stream as errored and propagate to all projections.
 
-        Public API — called by the stream driver (`stream_v2` /
-        `astream_v2`) when the underlying producer raises, by
+        Public API — called by the stream driver (`stream_events(version="v3")` /
+        `astream_events(version="v3")`) when the underlying producer raises, by
         `dispatch` when an `error` protocol event arrives, and by
         cancellation paths.
         """
@@ -871,8 +981,9 @@ def _assemble_message(self) -> AIMessage:
                 response_metadata["model_name"] = self._start_metadata["model"]
         if self._finish_metadata:
             response_metadata.update(self._finish_metadata)
-        # Pin `output_version` last: `stream_v2` always assembles content as v1
-        # protocol blocks, regardless of the provider's configured output format.
+        # Pin `output_version` last: `stream_events(version="v3")` always
+        # assembles content as v1 protocol blocks, regardless of the
+        # provider's configured output format.
         # A provider-supplied `output_version` in finish metadata (e.g.
         # `"responses/v1"` from `ChatOpenAI(use_responses_api=True, ...)`) would
         # otherwise cause `AIMessage.content_blocks` to re-run the wrong
@@ -918,7 +1029,7 @@ def _assemble_message(self) -> AIMessage:
 class ChatModelStream(_ChatModelStreamBase):
     """Synchronous per-message streaming object for a single LLM response.
 
-    Returned by `BaseChatModel.stream_v2()`.  Content-block protocol
+    Returned by `BaseChatModel.stream_events(version="v3")`.  Content-block protocol
     events are fed into this object and accumulated into typed projections.
 
     Projections (always return the same cached object):
@@ -973,7 +1084,7 @@ def bind_pump(self, pump_one: Callable[[], bool]) -> None:
         """Bind a pump for standalone streaming.
 
         Delegates to `set_request_more`.  Used by
-        `BaseChatModel.stream_v2()`.
+        `BaseChatModel.stream_events(version="v3")`.
         """
         self.set_request_more(pump_one)
 
@@ -1074,7 +1185,7 @@ def _drain(self) -> None:
 class AsyncChatModelStream(_ChatModelStreamBase):
     """Asynchronous per-message streaming object for a single LLM response.
 
-    Returned by `BaseChatModel.astream_v2()`.  Content-block events
+    Returned by `BaseChatModel.astream_events(version="v3")`.  Content-block events
     are fed into this object by a background producer task.
 
     Projections:
@@ -1120,7 +1231,7 @@ def __init__(  # noqa: D107
         # Teardown callback invoked by `aclose()` only when the producer
         # task was cancelled before its body ran (so the normal
         # `_produce` CancelledError handler — which fires
-        # `on_llm_error` — never executed). Set by `astream_v2`.
+        # `on_llm_error` — never executed). Set by `astream_events(version="v3")`.
         self._on_aclose_fail: Callable[[BaseException], Awaitable[None]] | None = None
 
     # -- Pump/pull wiring (async) ------------------------------------------
@@ -1261,7 +1372,7 @@ def _link(_: asyncio.Task[None]) -> None:
                 task.remove_done_callback(_link)
 
         # If the task was cancelled before `_produce` ran (e.g.
-        # `astream_v2()` immediately followed by `aclose()`), the stream
+        # `astream_events(version="v3")` immediately followed by `aclose()`), the stream
         # never reached `_produce`'s CancelledError handler — its
         # projections are still pending and no end-of-lifecycle callback
         # has fired. Resolve both here so callers of `await stream.output`
@@ -1290,10 +1401,10 @@ async def __aexit__(
 
     # -- Internal API (extend base to drive async projections) -------------
 
-    def _record_event(self, event: MessagesData) -> None:
+    def _record_event(self, event: Mapping[str, Any]) -> None:
         """Record event and push to async event replay projection."""
         super()._record_event(event)
-        self._events_proj.push(event)
+        self._events_proj.push(cast("MessagesData", event))
 
     def _finish(self, data: MessageFinishData) -> None:
         """Finish base projections and async-only projections."""
diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py
index 2f5df7a3e46b4..ea4194e69cda4 100644
--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@@ -10,7 +10,7 @@
 from collections.abc import AsyncIterator, Callable, Iterator, Sequence
 from functools import cached_property
 from operator import itemgetter
-from typing import TYPE_CHECKING, Any, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, cast, overload
 
 from langchain_protocol.protocol import MessageFinishData
 from pydantic import BaseModel, ConfigDict, Field, model_validator
@@ -79,7 +79,7 @@
 from langchain_core.outputs.chat_generation import merge_chat_generation_chunks
 from langchain_core.prompt_values import ChatPromptValue, PromptValue, StringPromptValue
 from langchain_core.rate_limiters import BaseRateLimiter
-from langchain_core.runnables import RunnableMap, RunnablePassthrough
+from langchain_core.runnables import RunnableBinding, RunnableMap, RunnablePassthrough
 from langchain_core.runnables.config import ensure_config, run_in_executor
 from langchain_core.tracers._streaming import (
     _StreamingCallbackHandler,
@@ -95,11 +95,13 @@
 if TYPE_CHECKING:
     import builtins
     import uuid
+    from collections.abc import Awaitable
 
     from langchain_protocol.protocol import MessagesData
 
     from langchain_core.output_parsers.base import OutputParserLike
     from langchain_core.runnables import Runnable, RunnableConfig
+    from langchain_core.runnables.schema import StreamEvent
     from langchain_core.tools import BaseTool
 
 
@@ -510,7 +512,7 @@ def _streaming_disabled(self, **kwargs: Any) -> bool:
         """Return whether streaming is hard-disabled for this call.
 
         Shared opt-outs honored by both `_should_stream` and
-        `_should_stream_v2` — these override any affirmative trigger
+        `_should_use_protocol_streaming` — these override any affirmative trigger
         (attached handler, `stream=True`, etc.):
 
         - `self.disable_streaming is True`
@@ -568,7 +570,7 @@ def _should_stream(
         handlers = run_manager.handlers if run_manager else []
         return any(isinstance(h, _StreamingCallbackHandler) for h in handlers)
 
-    def _should_stream_v2(
+    def _should_use_protocol_streaming(
         self,
         *,
         async_api: bool,
@@ -637,8 +639,8 @@ def _iter_v2_events(
     ) -> Iterator[MessagesData]:
         """Drive the v2 event generator with per-event dispatch.
 
-        Shared between `stream_v2`'s pump and the invoke-time v2 branch
-        in `_generate_with_cache`. Picks the native
+        Shared between the `stream_events(version="v3")` pump and the
+        invoke-time v2 branch in `_generate_with_cache`. Picks the native
         `_stream_chat_model_events` hook when the subclass provides one,
         else bridges `_stream` chunks via `chunks_to_events`. Each event
         is dispatched into `stream` and fired as `on_stream_event` on
@@ -969,10 +971,10 @@ async def astream(
             LLMResult(generations=[[generation]]),
         )
 
-    # --- stream_v2 / astream_v2 ---
+    # --- stream_events v3 ---
 
     @beta()
-    def stream_v2(
+    def _chat_model_stream_v3(
         self,
         input: LanguageModelInput,
         config: RunnableConfig | None = None,
@@ -980,43 +982,19 @@ def stream_v2(
         stop: list[str] | None = None,
         **kwargs: Any,
     ) -> ChatModelStream:
-        """Stream content-block lifecycle events for a single model call.
+        """Internal v3 sync streaming implementation.
 
-        Returns a `ChatModelStream` with typed projections
-        (`.text`, `.reasoning`, `.tool_calls`, `.output`).
-
-        !!! warning
-
-            This API is experimental and may change.
-
-        !!! note "Always produces v1-shaped content"
-
-            `ChatModelStream.output.content` is always a list of v1
-            content blocks (text / reasoning / tool_call / image / …),
-            regardless of the model's `output_version` attribute. The
-            setting only affects the legacy `stream()` / `astream()` /
-            `invoke()` paths. If you're mixing `stream_v2` with those
-            paths in the same pipeline and need a consistent output
-            shape across them, set `output_version="v1"` on the model.
-
-        Args:
-            input: The model input.
-            config: Optional runnable config.
-            stop: Optional list of stop words.
-            **kwargs: Additional keyword arguments passed to the model.
-
-        Returns:
-            A `ChatModelStream` with typed projections.
+        Public entry point: `stream_events(version='v3')`.
         """
         config = ensure_config(config)
         messages = self._convert_input(input).to_messages()
         input_messages = _normalize_messages(messages)
 
         # Strip tracing-only kwargs before forwarding to `_stream` — matches
-        # `stream()` / `astream()`. Provider clients reject unknown kwargs, so
-        # `.with_structured_output().stream_v2(...)` and any other binding that
-        # carries `ls_structured_output_format` / `structured_output_format`
-        # would raise without this pop.
+        # `stream()` / `astream()`. Provider clients reject unknown kwargs,
+        # so `.with_structured_output().stream_events(version="v3", ...)`
+        # and any other binding that carries `ls_structured_output_format`
+        # / `structured_output_format` would raise without this pop.
         ls_structured_output_format = kwargs.pop(
             "ls_structured_output_format", None
         ) or kwargs.pop("structured_output_format", None)
@@ -1133,7 +1111,7 @@ def pump_one() -> bool:
         return stream
 
     @beta()
-    async def astream_v2(
+    async def _achat_model_stream_v3(
         self,
         input: LanguageModelInput,
         config: RunnableConfig | None = None,
@@ -1141,36 +1119,16 @@ async def astream_v2(
         stop: list[str] | None = None,
         **kwargs: Any,
     ) -> AsyncChatModelStream:
-        """Async variant of `stream_v2`.
-
-        Returns an `AsyncChatModelStream` whose projections are
-        async-iterable and awaitable.
-
-        !!! warning
-
-            This API is experimental and may change.
+        """Internal v3 async streaming implementation.
 
-        !!! note "Always produces v1-shaped content"
-
-            The assembled message's content is always a list of v1
-            content blocks, regardless of the model's `output_version`
-            attribute — see `stream_v2` for the full rationale.
-
-        Args:
-            input: The model input.
-            config: Optional runnable config.
-            stop: Optional list of stop words.
-            **kwargs: Additional keyword arguments passed to the model.
-
-        Returns:
-            An `AsyncChatModelStream` with typed projections.
+        Public entry point: `astream_events(version='v3')`.
         """
         config = ensure_config(config)
         messages = self._convert_input(input).to_messages()
         input_messages = _normalize_messages(messages)
 
-        # Strip tracing-only kwargs before forwarding — see `stream_v2` for the
-        # full rationale.
+        # Strip tracing-only kwargs before forwarding — see the sync v3
+        # implementation for the full rationale.
         ls_structured_output_format = kwargs.pop(
             "ls_structured_output_format", None
         ) or kwargs.pop("structured_output_format", None)
@@ -1299,6 +1257,118 @@ async def _on_aclose_fail(exc: BaseException) -> None:
         stream._on_aclose_fail = _on_aclose_fail  # noqa: SLF001
         return stream
 
+    @overload  # type: ignore[override]
+    def stream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2"] = "v2",
+        **kwargs: Any,
+    ) -> Iterator[StreamEvent]: ...
+
+    @overload
+    def stream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v3"],
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> ChatModelStream: ...
+
+    def stream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2", "v3"] = "v2",
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> Iterator[StreamEvent] | ChatModelStream:
+        """Stream events from this chat model.
+
+        For `version="v1"` / `"v2"`, yields `StreamEvent` dicts (see
+        `Runnable.stream_events`). For `version="v3"`, returns a
+        `ChatModelStream` exposing typed projections (`.text`,
+        `.reasoning`, `.tool_calls`, `.output`).
+
+        !!! warning "Beta"
+
+            `version="v3"` is in beta. The protocol shape, return type,
+            and surface area may change in future releases. Calling it
+            emits a `LangChainBetaWarning` at runtime.
+
+        !!! note "v3 always produces v1-shaped content"
+
+            `ChatModelStream.output.content` is always a list of v1
+            content blocks (text / reasoning / tool_call / image / …),
+            regardless of the model's `output_version` attribute. The
+            setting only affects the legacy `stream()` / `astream()` /
+            `invoke()` paths. If you're mixing
+            `stream_events(version="v3")` with those paths in the same
+            pipeline and need a consistent output shape across them,
+            set `output_version="v1"` on the model.
+
+        Args:
+            input: The model input.
+            config: Optional runnable config.
+            version: Streaming-event schema version. `"v3"` selects the
+                content-block-centric streaming protocol.
+            stop: Optional stop sequences. Only used for `version="v3"`;
+                ignored otherwise.
+            **kwargs: Additional keyword arguments. For `version="v3"`,
+                forwarded to the model.
+
+        Returns:
+            For `version="v3"`, a `ChatModelStream` with typed
+            projections. Otherwise an `Iterator[StreamEvent]`.
+        """
+        if version == "v3":
+            return self._chat_model_stream_v3(input, config, stop=stop, **kwargs)
+        return super().stream_events(
+            input, config, version=version, stop=stop, **kwargs
+        )
+
+    @overload
+    def astream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2"] = "v2",
+        **kwargs: Any,
+    ) -> AsyncIterator[StreamEvent]: ...
+
+    @overload
+    def astream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v3"],
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> Awaitable[AsyncChatModelStream]: ...
+
+    def astream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2", "v3"] = "v2",
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[StreamEvent] | Awaitable[AsyncChatModelStream]:
+        """Async variant of `stream_events`. See `stream_events` for full docs."""
+        if version == "v3":
+            return self._achat_model_stream_v3(input, config, stop=stop, **kwargs)
+        # v1/v2: forward to Runnable.astream_events (async generator).
+        return super().astream_events(
+            input, config, version=version, stop=stop, **kwargs
+        )
+
     # --- Custom methods ---
 
     def _combine_llm_outputs(self, _llm_outputs: list[dict | None], /) -> dict:
@@ -1352,12 +1422,13 @@ def _replay_v2_events_for_cache_hit(
     ) -> None:
         """Replay cached messages as v2 events when a v2 handler is attached.
 
-        A warm cache must produce the same `on_stream_event` stream as a cold
-        call so LangGraph-style consumers do not observe behavior that depends
-        on cache state. Gated by `_should_stream_v2` so a `disable_streaming`
-        config that suppresses v2 on cold calls also suppresses it here.
+        A warm cache must produce the same `on_stream_event` stream as a
+        cold call so LangGraph-style consumers do not observe behavior
+        that depends on cache state. Gated by
+        `_should_use_protocol_streaming` so a `disable_streaming` config
+        that suppresses v2 on cold calls also suppresses it here.
         """
-        if run_manager is None or not self._should_stream_v2(
+        if run_manager is None or not self._should_use_protocol_streaming(
             async_api=False, run_manager=run_manager, **kwargs
         ):
             return
@@ -1377,7 +1448,7 @@ async def _areplay_v2_events_for_cache_hit(
         **kwargs: Any,
     ) -> None:
         """Async counterpart to `_replay_v2_events_for_cache_hit`."""
-        if run_manager is None or not self._should_stream_v2(
+        if run_manager is None or not self._should_use_protocol_streaming(
             async_api=True, run_manager=run_manager, **kwargs
         ):
             return
@@ -1814,7 +1885,7 @@ def _generate_with_cache(
         # (native or `_stream` compat bridge) through the shared helper so
         # `on_stream_event` fires per event, then returns a normal `ChatResult`
         # so caching / `on_llm_end` stay on the existing generate path.
-        if self._should_stream_v2(
+        if self._should_use_protocol_streaming(
             async_api=False,
             run_manager=run_manager,
             **kwargs,
@@ -1971,7 +2042,7 @@ async def _agenerate_with_cache(
             await self.rate_limiter.aacquire(blocking=True)
 
         # v2 streaming: see sync counterpart in `_generate_with_cache`.
-        if self._should_stream_v2(
+        if self._should_use_protocol_streaming(
             async_api=True,
             run_manager=run_manager,
             **kwargs,
@@ -2215,6 +2286,18 @@ def dict(self, **kwargs: Any) -> dict:
         starter_dict["_type"] = self._llm_type
         return starter_dict
 
+    @override
+    def bind(self, **kwargs: Any) -> _ChatModelBinding:
+        """Bind kwargs to this chat model, returning a typed `_ChatModelBinding`.
+
+        Overrides `Runnable.bind` so the result preserves chat-model-specific
+        `stream_events` / `astream_events` overloads. Without this override,
+        `model.bind(...).stream_events(version="v3")` would type as
+        `Iterator[Any]` and `await model.bind(...).astream_events(version="v3")`
+        as `Any`, forcing callers to `cast`.
+        """
+        return _ChatModelBinding(bound=self, kwargs=kwargs, config={})
+
     def bind_tools(
         self,
         tools: Sequence[builtins.dict[str, Any] | type | Callable | BaseTool],
@@ -2418,6 +2501,95 @@ class AnswerWithJustification(BaseModel):
         return llm | output_parser
 
 
+class _ChatModelBinding(RunnableBinding[LanguageModelInput, AIMessage]):  # type: ignore[no-redef]
+    """`RunnableBinding` that preserves chat-model-typed v3 overloads.
+
+    Returned by `BaseChatModel.bind` so that callers of the bound runnable's
+    `stream_events(version="v3")` / `astream_events(version="v3")` get the
+    typed `ChatModelStream` / `AsyncChatModelStream` back without needing
+    `cast`. At runtime this is a plain `RunnableBinding`; the subclass
+    exists purely to give the type checker a more specific surface.
+
+    The chat-model narrowing is preserved across further `bind` /
+    `with_config` calls because `RunnableBinding.bind` constructs its
+    result via `self.__class__(...)`.
+    """
+
+    @classmethod
+    @override
+    def lc_id(cls) -> list[str]:
+        """Serialize as `RunnableBinding`.
+
+        At runtime this class is behaviorally identical to `RunnableBinding`;
+        keeping the serialized id stable means existing snapshots and the
+        load mapping continue to work without registering a new entry.
+        """
+        return [*cls.get_lc_namespace(), "RunnableBinding"]
+
+    @overload  # type: ignore[override]
+    def stream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2"] = "v2",
+        **kwargs: Any,
+    ) -> Iterator[StreamEvent]: ...
+
+    @overload
+    def stream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v3"],
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> ChatModelStream: ...
+
+    def stream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2", "v3"] = "v2",
+        **kwargs: Any,
+    ) -> Iterator[StreamEvent] | ChatModelStream:
+        return super().stream_events(input, config, version=version, **kwargs)
+
+    @overload
+    def astream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2"] = "v2",
+        **kwargs: Any,
+    ) -> AsyncIterator[StreamEvent]: ...
+
+    @overload
+    def astream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v3"],
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> Awaitable[AsyncChatModelStream]: ...
+
+    def astream_events(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[StreamEvent] | Awaitable[AsyncChatModelStream]:
+        return cast(
+            "AsyncIterator[StreamEvent] | Awaitable[AsyncChatModelStream]",
+            super().astream_events(input, config, **kwargs),
+        )
+
+
 class SimpleChatModel(BaseChatModel):
     """Simplified implementation for a chat model to inherit from.
 
diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py
index c63b5c6fce07e..18aa976beef43 100644
--- a/libs/core/langchain_core/runnables/base.py
+++ b/libs/core/langchain_core/runnables/base.py
@@ -103,10 +103,6 @@
         AsyncCallbackManagerForChainRun,
         CallbackManagerForChainRun,
     )
-    from langchain_core.language_models.chat_model_stream import (
-        AsyncChatModelStream,
-        ChatModelStream,
-    )
     from langchain_core.prompts.base import BasePromptTemplate
     from langchain_core.runnables.fallbacks import (
         RunnableWithFallbacks as RunnableWithFallbacksT,
@@ -1173,46 +1169,6 @@ async def astream(
         """
         yield await self.ainvoke(input, config, **kwargs)
 
-    def stream_v2(
-        self,
-        input: Input,
-        config: RunnableConfig | None = None,
-        **kwargs: Any | None,
-    ) -> ChatModelStream:
-        """Stream content-block lifecycle events (v2 protocol).
-
-        Implemented by `BaseChatModel` (and forwarded by `RunnableBinding`).
-        Generic `Runnable`s don't participate in the v2 event protocol —
-        use `.stream()` instead.
-
-        Raises:
-            NotImplementedError: Always, on the base `Runnable` class.
-        """
-        msg = (
-            f"{type(self).__name__} does not implement `stream_v2`. "
-            "`stream_v2` is only implemented by chat models; use `.stream()` "
-            "for generic Runnables."
-        )
-        raise NotImplementedError(msg)
-
-    async def astream_v2(
-        self,
-        input: Input,
-        config: RunnableConfig | None = None,
-        **kwargs: Any | None,
-    ) -> AsyncChatModelStream:
-        """Async variant of `stream_v2`. See that method.
-
-        Raises:
-            NotImplementedError: Always, on the base `Runnable` class.
-        """
-        msg = (
-            f"{type(self).__name__} does not implement `astream_v2`. "
-            "`astream_v2` is only implemented by chat models; use `.astream()` "
-            "for generic Runnables."
-        )
-        raise NotImplementedError(msg)
-
     @overload
     def astream_log(
         self,
@@ -1314,7 +1270,8 @@ async def astream_log(
         ):
             yield item
 
-    async def astream_events(
+    @overload
+    def astream_events(
         self,
         input: Any,
         config: RunnableConfig | None = None,
@@ -1327,7 +1284,32 @@ async def astream_events(
         exclude_types: Sequence[str] | None = None,
         exclude_tags: Sequence[str] | None = None,
         **kwargs: Any,
-    ) -> AsyncIterator[StreamEvent]:
+    ) -> AsyncIterator[StreamEvent]: ...
+
+    @overload
+    def astream_events(
+        self,
+        input: Any,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v3"],
+        **kwargs: Any,
+    ) -> Awaitable[Any]: ...
+
+    def astream_events(
+        self,
+        input: Any,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2", "v3"] = "v2",
+        include_names: Sequence[str] | None = None,
+        include_types: Sequence[str] | None = None,
+        include_tags: Sequence[str] | None = None,
+        exclude_names: Sequence[str] | None = None,
+        exclude_types: Sequence[str] | None = None,
+        exclude_tags: Sequence[str] | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[StreamEvent] | Awaitable[Any]:
         """Generate a stream of events.
 
         Use to create an iterator over `StreamEvent` that provide real-time information
@@ -1497,15 +1479,23 @@ async def slow_thing(some_input: str, config: RunnableConfig) -> str:
         Args:
             input: The input to the `Runnable`.
             config: The config to use for the `Runnable`.
-            version: The version of the schema to use, either `'v2'` or `'v1'`.
-
-                Users should use `'v2'`.
-
-                `'v1'` is for backwards compatibility and will be deprecated
-                in `0.4.0`.
-
-                No default will be assigned until the API is stabilized.
-                custom events will only be surfaced in `'v2'`.
+            version: The version of the schema to use. One of `'v1'`, `'v2'`,
+                or `'v3'`.
+
+                Most callers should use `'v2'` (the default), which yields
+                `StreamEvent` dicts and supports custom events.
+
+                `'v3'` selects the typed, content-block-centric streaming
+                protocol and is only supported on `Runnable` subclasses that
+                implement it (currently `BaseChatModel` and
+                `langgraph.CompiledGraph`); on a generic `Runnable` it raises
+                `NotImplementedError`. The `'v3'` API is in beta and may
+                change. See the subclass override (e.g.
+                `BaseChatModel.astream_events`) for the v3 return shape.
+
+                `'v1'` is retained for backwards compatibility and will be
+                deprecated in `0.4.0`. Custom events are only surfaced in
+                `'v2'` / `'v3'`.
             include_names: Only include events from `Runnable` objects with matching names.
             include_types: Only include events from `Runnable` objects with matching types.
             include_tags: Only include events from `Runnable` objects with matching tags.
@@ -1514,16 +1504,59 @@ async def slow_thing(some_input: str, config: RunnableConfig) -> str:
             exclude_tags: Exclude events from `Runnable` objects with matching tags.
             **kwargs: Additional keyword arguments to pass to the `Runnable`.
 
-                These will be passed to `astream_log` as this implementation
-                of `astream_events` is built on top of `astream_log`.
-
         Yields:
             An async stream of `StreamEvent`.
 
         Raises:
-            NotImplementedError: If the version is not `'v1'` or `'v2'`.
+            NotImplementedError: If the version is not `'v1'`, `'v2'`, or `'v3'`, or
+                if `'v3'` is requested on a `Runnable` that does not implement the v3
+                streaming protocol.
 
         """  # noqa: E501
+        if version == "v3":
+            return self._astream_events_v3_unsupported()
+        return self._astream_events_v1_v2(
+            input,
+            config=config,
+            version=version,
+            include_names=include_names,
+            include_types=include_types,
+            include_tags=include_tags,
+            exclude_names=exclude_names,
+            exclude_types=exclude_types,
+            exclude_tags=exclude_tags,
+            **kwargs,
+        )
+
+    async def _astream_events_v3_unsupported(self) -> Any:
+        """Coroutine that raises when v3 isn't implemented on this Runnable.
+
+        Lets the public `astream_events(version="v3")` return an awaitable
+        whose error surfaces on `await`, matching the v3 contract on
+        subclasses that do implement the protocol.
+        """
+        msg = (
+            "astream_events(version='v3') is only supported on Runnable "
+            "subclasses that implement the v3 streaming protocol "
+            "(BaseChatModel, CompiledGraph). "
+            f"Got: {type(self).__name__}"
+        )
+        raise NotImplementedError(msg)
+
+    async def _astream_events_v1_v2(
+        self,
+        input: Any,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2"] = "v2",
+        include_names: Sequence[str] | None = None,
+        include_types: Sequence[str] | None = None,
+        include_tags: Sequence[str] | None = None,
+        exclude_names: Sequence[str] | None = None,
+        exclude_types: Sequence[str] | None = None,
+        exclude_tags: Sequence[str] | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[StreamEvent]:
         if version == "v2":
             event_stream = _astream_events_implementation_v2(
                 self,
@@ -1553,13 +1586,100 @@ async def slow_thing(some_input: str, config: RunnableConfig) -> str:
                 **kwargs,
             )
         else:
-            msg = 'Only versions "v1" and "v2" of the schema is currently supported.'
+            msg = f"Unsupported version: {version!r}. Expected 'v1', 'v2', or 'v3'."
             raise NotImplementedError(msg)
 
         async with aclosing(event_stream):
             async for event in event_stream:
                 yield event
 
+    @overload
+    def stream_events(
+        self,
+        input: Any,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2"] = "v2",
+        include_names: Sequence[str] | None = None,
+        include_types: Sequence[str] | None = None,
+        include_tags: Sequence[str] | None = None,
+        exclude_names: Sequence[str] | None = None,
+        exclude_types: Sequence[str] | None = None,
+        exclude_tags: Sequence[str] | None = None,
+        **kwargs: Any,
+    ) -> Iterator[StreamEvent]: ...
+
+    @overload
+    def stream_events(
+        self,
+        input: Any,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v3"],
+        **kwargs: Any,
+    ) -> Iterator[Any]: ...
+
+    def stream_events(
+        self,
+        input: Any,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2", "v3"] = "v2",
+        include_names: Sequence[str] | None = None,
+        include_types: Sequence[str] | None = None,
+        include_tags: Sequence[str] | None = None,
+        exclude_names: Sequence[str] | None = None,
+        exclude_types: Sequence[str] | None = None,
+        exclude_tags: Sequence[str] | None = None,
+        **kwargs: Any,
+    ) -> Iterator[StreamEvent] | Iterator[Any]:
+        """Generate a stream of events synchronously.
+
+        Synchronous counterpart to `astream_events`. For `version='v3'`, subclasses
+        that implement the v3 streaming protocol (`BaseChatModel`, `CompiledGraph`)
+        override this method. All other versions and base-class calls raise
+        `NotImplementedError`.
+
+        Args:
+            input: The input to the `Runnable`.
+            config: The config to use for the `Runnable`.
+            version: The version of the schema to use. `'v3'` requires a subclass
+                that implements the v3 streaming protocol. `'v1'` and `'v2'` are not
+                supported on the sync path.
+            include_names: Only include events from `Runnable` objects with matching
+                names.
+            include_types: Only include events from `Runnable` objects with matching
+                types.
+            include_tags: Only include events from `Runnable` objects with matching
+                tags.
+            exclude_names: Exclude events from `Runnable` objects with matching names.
+            exclude_types: Exclude events from `Runnable` objects with matching types.
+            exclude_tags: Exclude events from `Runnable` objects with matching tags.
+            **kwargs: Additional keyword arguments to pass to the `Runnable`.
+
+        Raises:
+            NotImplementedError: Always. Subclasses override this method for supported
+                versions.
+
+        """
+        # Base impl always raises; consume args so they don't trip ARG002.
+        del input, config, include_names, include_types, include_tags
+        del exclude_names, exclude_types, exclude_tags, kwargs
+        if version == "v3":
+            msg = (
+                "stream_events(version='v3') is only supported on Runnable subclasses "
+                "that implement the v3 streaming protocol "
+                "(BaseChatModel, CompiledGraph). "
+                f"Got: {type(self).__name__}"
+            )
+            raise NotImplementedError(msg)
+        msg = (
+            f"stream_events(version={version!r}) is not supported. "
+            "Use astream_events() for v1/v2, or stream_events(version='v3') "
+            "on a supported subclass."
+        )
+        raise NotImplementedError(msg)
+
     def transform(
         self,
         input: Iterator[Input],
@@ -5439,19 +5559,27 @@ async def ainvoke(
         return await self._acall_with_config(self._ainvoke, input, config, **kwargs)
 
     @override
-    async def astream_events(
+    def astream_events(  # type: ignore[override]
         self,
         input: Input,
         config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2", "v3"] = "v2",
         **kwargs: Any | None,
-    ) -> AsyncIterator[StreamEvent]:
-        def _error_stream_event(message: str) -> StreamEvent:
-            raise NotImplementedError(message)
+    ) -> AsyncIterator[StreamEvent] | Awaitable[Any]:
+        del input, config, kwargs
+        if version == "v3":
+            return self._astream_events_unsupported_v3()
+        return self._astream_events_unsupported_v1_v2()
+
+    async def _astream_events_unsupported_v3(self) -> Any:
+        msg = "RunnableEach does not support astream_events yet."
+        raise NotImplementedError(msg)
 
-        for _ in range(1):
-            yield _error_stream_event(
-                "RunnableEach does not support astream_events yet."
-            )
+    async def _astream_events_unsupported_v1_v2(self) -> AsyncIterator[StreamEvent]:
+        msg = "RunnableEach does not support astream_events yet."
+        raise NotImplementedError(msg)
+        yield  # makes this an async generator (never reached)
 
 
 class RunnableEach(RunnableEachBase[Input, Output]):
@@ -5933,54 +6061,144 @@ async def astream(
         ):
             yield item
 
-    @override
-    def stream_v2(
+    @overload
+    def stream_events(
         self,
         input: Input,
         config: RunnableConfig | None = None,
-        **kwargs: Any | None,
-    ) -> ChatModelStream:
-        """Forward `stream_v2` to the bound runnable with bound kwargs merged.
+        *,
+        version: Literal["v1", "v2"] = "v2",
+        **kwargs: Any,
+    ) -> Iterator[StreamEvent]: ...
+
+    @overload
+    def stream_events(
+        self,
+        input: Input,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v3"],
+        **kwargs: Any,
+    ) -> Any: ...
+
+    def stream_events(
+        self,
+        input: Input,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2", "v3"] = "v2",
+        **kwargs: Any,
+    ) -> Iterator[StreamEvent] | Any:
+        """Forward `stream_events` to the bound runnable with bound kwargs merged.
 
-        Chat-model-specific: the bound runnable must implement `stream_v2`
-        (see `BaseChatModel`). Without this override, `__getattr__` would
-        forward the call but drop `self.kwargs` — losing tools bound via
-        `bind_tools`, `stop` sequences, etc.
+        For `version="v3"`, the bound runnable's typed stream object (e.g.
+        `ChatModelStream`) is returned. For `version="v1"` / `"v2"`, dispatches
+        to the base `Runnable.stream_events`.
+
+        Without this override, `__getattr__` would drop `self.kwargs` — losing
+        tools bound via `bind_tools`, `stop` sequences, etc.
         """
-        return self.bound.stream_v2(
+        # Probe `version` from the merged view so `bind(version="v3")` routes
+        # correctly even when the caller doesn't repeat `version` at the call
+        # site, and strip it before forwarding so it isn't passed twice.
+        merged_kwargs = {**self.kwargs, **kwargs}
+        version = merged_kwargs.get("version", version)
+        merged_without_version = {
+            k: v for k, v in merged_kwargs.items() if k != "version"
+        }
+        if version == "v3":
+            return self.bound.stream_events(
+                input,
+                self._merge_configs(config),
+                version="v3",
+                **merged_without_version,
+            )
+        return super().stream_events(
             input,
             self._merge_configs(config),
-            **{**self.kwargs, **kwargs},
+            version=version,
+            **merged_without_version,
         )
 
-    @override
-    async def astream_v2(
+    async def _astream_events_v3(
         self,
         input: Input,
         config: RunnableConfig | None = None,
-        **kwargs: Any | None,
-    ) -> AsyncChatModelStream:
-        """Forward `astream_v2` to the bound runnable with bound kwargs merged.
+        **kwargs: Any,
+    ) -> Any:
+        """Return the v3 async stream object from the bound runnable.
+
+        Returns an awaitable (an `async def` coroutine, not an async
+        generator) so callers can `await` it to obtain the typed stream
+        (e.g. `AsyncChatModelStream`) directly — Python does not allow
+        `return <value>` inside an async generator.
 
-        Async variant of `stream_v2`. See that method for the full rationale.
+        The caller is responsible for merging `self.kwargs` and stripping
+        `version`; this method passes `version="v3"` explicitly and would
+        raise on a duplicate keyword.
         """
-        return await self.bound.astream_v2(
+        return await self.bound.astream_events(
             input,
             self._merge_configs(config),
-            **{**self.kwargs, **kwargs},
+            version="v3",
+            **kwargs,
         )
 
+    @overload
+    def astream_events(
+        self,
+        input: Input,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v1", "v2"] = "v2",
+        **kwargs: Any,
+    ) -> AsyncIterator[StreamEvent]: ...
+
+    @overload
+    def astream_events(
+        self,
+        input: Input,
+        config: RunnableConfig | None = None,
+        *,
+        version: Literal["v3"],
+        **kwargs: Any,
+    ) -> Awaitable[Any]: ...
+
     @override
-    async def astream_events(
+    def astream_events(
         self,
         input: Input,
         config: RunnableConfig | None = None,
         **kwargs: Any | None,
-    ) -> AsyncIterator[StreamEvent]:
-        async for item in self.bound.astream_events(
-            input, self._merge_configs(config), **{**self.kwargs, **kwargs}
-        ):
-            yield item
+    ) -> AsyncIterator[StreamEvent] | Awaitable[Any]:
+        """Forward `astream_events` to the bound runnable with bound kwargs merged.
+
+        For `version="v3"`, returns an awaitable that resolves to the
+        bound runnable's typed stream object (e.g. `AsyncChatModelStream`).
+        For `version="v1"` / `"v2"`, returns an async iterator over
+        `StreamEvent` items.
+
+        Without this override, `__getattr__` would drop `self.kwargs` — losing
+        tools bound via `bind_tools`, `stop` sequences, etc.
+        """
+        # Probe `version` from the merged view so `bind(version="v3")` routes
+        # correctly even when the caller doesn't repeat `version` at the call
+        # site.
+        merged_kwargs = {**self.kwargs, **kwargs}
+        version = merged_kwargs.get("version", "v2")
+        if version == "v3":
+            merged_without_version = {
+                k: v for k, v in merged_kwargs.items() if k != "version"
+            }
+            return self._astream_events_v3(input, config, **merged_without_version)
+        # v1/v2: bound.astream_events is a real async generator — iterate it
+        # directly without an extra wrapper layer.
+        return cast(
+            "AsyncIterator[StreamEvent]",
+            self.bound.astream_events(
+                input, self._merge_configs(config), **merged_kwargs
+            ),
+        )
 
     @override
     def transform(
diff --git a/libs/core/langchain_core/tracers/_streaming.py b/libs/core/langchain_core/tracers/_streaming.py
index 2c2b54c0e491f..7f5071bdc0786 100644
--- a/libs/core/langchain_core/tracers/_streaming.py
+++ b/libs/core/langchain_core/tracers/_streaming.py
@@ -33,8 +33,9 @@ class _V2StreamingCallbackHandler:
     """Marker base class for handlers that consume `on_stream_event` (v2).
 
     A handler inheriting from this class signals that it wants content-
-    block lifecycle events from `stream_v2` / `astream_v2` rather than
-    the v1 `on_llm_new_token` chunks. `BaseChatModel.invoke` uses
+    block lifecycle events from `stream_events(version="v3")` (and its
+    async equivalent) rather than the v1 `on_llm_new_token` chunks.
+    `BaseChatModel.invoke` uses
     `isinstance(handler, _V2StreamingCallbackHandler)` to decide whether
     to route an invoke through the v2 event generator.
 
diff --git a/libs/core/langchain_core/version.py b/libs/core/langchain_core/version.py
index 765766132f90c..ac08df448164a 100644
--- a/libs/core/langchain_core/version.py
+++ b/libs/core/langchain_core/version.py
@@ -1,3 +1,3 @@
 """langchain-core version information and utilities."""
 
-VERSION = "1.3.2"
+VERSION = "1.4.0"
diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml
index 18f79bed22d05..9b17a184d536d 100644
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@@ -21,7 +21,7 @@ classifiers = [
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 
-version = "1.3.2"
+version = "1.4.0"
 requires-python = ">=3.10.0,<4.0.0"
 dependencies = [
     "langsmith>=0.3.45,<1.0.0",
@@ -32,7 +32,7 @@ dependencies = [
     "packaging>=23.2.0",
     "pydantic>=2.7.4,<3.0.0",
     "uuid-utils>=0.12.0,<1.0",
-    "langchain-protocol>=0.0.10",
+    "langchain-protocol>=0.0.14",
 ]
 
 [project.urls]
diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py
index b1668123a6566..9594ad23d9a7d 100644
--- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py
+++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py
@@ -473,7 +473,7 @@ class _FakeV2Handler(BaseCallbackHandler, _V2StreamingCallbackHandler):
 async def test_streaming_attribute_overrides_v2_callback() -> None:
     """`self.streaming=False` must opt out of the v2 event path too.
 
-    `_should_stream_v2` shares the `_streaming_disabled` opt-outs with
+    `_should_use_protocol_streaming` shares the `_streaming_disabled` opt-outs with
     `_should_stream`, so an instance-level `streaming=False` takes
     precedence over an attached `_V2StreamingCallbackHandler`.
     """
@@ -1561,16 +1561,17 @@ def test_invocation_params_passed_to_tracer_metadata() -> None:
     assert run.metadata == run.extra["metadata"]
 
 
-def test_stream_v2_invocation_params_passed_to_tracer_metadata() -> None:
-    """`stream_v2()` must preserve filtered invocation params for tracing."""
+def test_stream_events_v3_invocation_params_passed_to_tracer_metadata() -> None:
+    """`stream_events(version="v3")` preserves filtered invocation params."""
     llm = FakeStreamingChatModelWithInvocationParams()
     collector = LangChainTracerRunCollector()
 
     with collector.tracing_callback() as tracer:
-        _ = llm.stream_v2(
+        _ = llm.stream_events(
             [HumanMessage(content="Hello")],
             config={"callbacks": [tracer]},
             stop=["done"],
+            version="v3",
         ).output
 
     assert len(collector.runs) == 1
@@ -1581,16 +1582,17 @@ def test_stream_v2_invocation_params_passed_to_tracer_metadata() -> None:
     assert metadata["temperature"] == 0.7
 
 
-async def test_astream_v2_invocation_params_passed_to_tracer_metadata() -> None:
-    """`astream_v2()` must preserve filtered invocation params for tracing."""
+async def test_astream_events_v3_invocation_params_passed_to_tracer_metadata() -> None:
+    """`astream_events(version="v3")` preserves filtered invocation params."""
     llm = FakeStreamingChatModelWithInvocationParams()
     collector = LangChainTracerRunCollector()
 
     with collector.tracing_callback() as tracer:
-        stream = await llm.astream_v2(
+        stream = await llm.astream_events(
             [HumanMessage(content="Hello")],
             config={"callbacks": [tracer]},
             stop=["done"],
+            version="v3",
         )
         _ = await stream
 
diff --git a/libs/core/tests/unit_tests/language_models/test_chat_model_stream.py b/libs/core/tests/unit_tests/language_models/test_chat_model_stream.py
index a27a9e9d1033a..1230e2b23c608 100644
--- a/libs/core/tests/unit_tests/language_models/test_chat_model_stream.py
+++ b/libs/core/tests/unit_tests/language_models/test_chat_model_stream.py
@@ -16,7 +16,7 @@
 )
 
 if TYPE_CHECKING:
-    from langchain_protocol.protocol import ContentBlockFinishData, MessagesData
+    from langchain_protocol.protocol import ContentBlockFinishData
 
 # ---------------------------------------------------------------------------
 # Projection unit tests
@@ -244,7 +244,7 @@ async def test_concurrent_text_and_output_share_pump(self) -> None:
         """Concurrent `stream.text` + `await stream.output` both drive the pump."""
         stream = AsyncChatModelStream(message_id="m1")
 
-        events: list[MessagesData] = [
+        events: list[dict[str, Any]] = [
             {
                 "event": "message-start",
                 "role": "ai",
@@ -313,7 +313,7 @@ def test_tool_calls_projection_cached(self) -> None:
 
     def test_text_deltas_via_pump(self) -> None:
         stream = ChatModelStream()
-        events: list[MessagesData] = [
+        events: list[dict[str, Any]] = [
             {"event": "message-start", "role": "ai"},
             {
                 "event": "content-block-delta",
@@ -363,7 +363,7 @@ def test_tool_call_chunk_streaming(self) -> None:
             }
         )
         stream.dispatch(
-            {  # type: ignore[arg-type,misc]
+            {
                 "event": "content-block-delta",
                 "index": 0,
                 "content_block": {
diff --git a/libs/core/tests/unit_tests/language_models/test_chat_model_streamer.py b/libs/core/tests/unit_tests/language_models/test_chat_model_streamer.py
index 4390825beefd7..1d7a3063ad436 100644
--- a/libs/core/tests/unit_tests/language_models/test_chat_model_streamer.py
+++ b/libs/core/tests/unit_tests/language_models/test_chat_model_streamer.py
@@ -1,9 +1,9 @@
-"""Tests for BaseChatModel.stream_v2() / astream_v2()."""
+"""Tests for `BaseChatModel.stream_events(version="v3")` and its async equivalent."""
 
 from __future__ import annotations
 
 import asyncio
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 import pytest
 from pydantic import Field
@@ -19,7 +19,7 @@
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
 
 if TYPE_CHECKING:
-    from collections.abc import AsyncIterator, Iterator
+    from collections.abc import AsyncIterator, Awaitable, Iterator
 
     from langchain_protocol.protocol import MessagesData
 
@@ -32,11 +32,11 @@
 
 
 class TestStreamV2Sync:
-    """Test BaseChatModel.stream_v2() with FakeListChatModel."""
+    """Test `BaseChatModel.stream_events(version="v3")` with `FakeListChatModel`."""
 
     def test_stream_text(self) -> None:
         model = FakeListChatModel(responses=["Hello world!"])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
 
         assert isinstance(stream, ChatModelStream)
         deltas = list(stream.text)
@@ -45,7 +45,7 @@ def test_stream_text(self) -> None:
 
     def test_stream_output(self) -> None:
         model = FakeListChatModel(responses=["Hello!"])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
 
         msg = stream.output
         assert isinstance(msg.content, list)
@@ -54,7 +54,7 @@ def test_stream_output(self) -> None:
 
     def test_stream_usage_none_for_fake(self) -> None:
         model = FakeListChatModel(responses=["Hi"])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
         # Drain
         for _ in stream.text:
             pass
@@ -62,7 +62,7 @@ def test_stream_usage_none_for_fake(self) -> None:
 
     def test_stream_raw_events(self) -> None:
         model = FakeListChatModel(responses=["ab"])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
 
         events = list(stream)
         event_types = [e.get("event") for e in events]
@@ -72,12 +72,12 @@ def test_stream_raw_events(self) -> None:
 
 
 class TestAstreamV2:
-    """Test BaseChatModel.astream_v2() with FakeListChatModel."""
+    """Test `BaseChatModel.astream_events(version="v3")` with `FakeListChatModel`."""
 
     @pytest.mark.asyncio
     async def test_astream_text_await(self) -> None:
         model = FakeListChatModel(responses=["Hello!"])
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         assert isinstance(stream, AsyncChatModelStream)
         full = await stream.text
@@ -86,7 +86,7 @@ async def test_astream_text_await(self) -> None:
     @pytest.mark.asyncio
     async def test_astream_text_deltas(self) -> None:
         model = FakeListChatModel(responses=["Hi"])
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         deltas = [d async for d in stream.text]
         assert "".join(deltas) == "Hi"
@@ -94,7 +94,7 @@ async def test_astream_text_deltas(self) -> None:
     @pytest.mark.asyncio
     async def test_astream_await_output(self) -> None:
         model = FakeListChatModel(responses=["Hey"])
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         msg = await stream
         assert msg.content == [{"type": "text", "text": "Hey", "index": 0}]
@@ -193,13 +193,13 @@ async def _astream(
 
 
 class TestCallbacks:
-    """Verify stream_v2 fires on_llm_end / on_llm_error callbacks."""
+    """Verify v3 streaming fires `on_llm_end` / `on_llm_error` callbacks."""
 
-    def test_stream_v2_defers_on_chat_model_start_until_consumed(self) -> None:
+    def test_stream_events_v3_defers_on_chat_model_start_until_consumed(self) -> None:
         handler = _RecordingHandler()
         model = FakeListChatModel(responses=["done"], callbacks=[handler])
 
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
 
         assert handler.events == []
 
@@ -210,7 +210,7 @@ def test_stream_v2_defers_on_chat_model_start_until_consumed(self) -> None:
     def test_on_llm_end_fires_after_drain(self) -> None:
         handler = _RecordingHandler()
         model = FakeListChatModel(responses=["done"], callbacks=[handler])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
         for _ in stream.text:
             pass
         _ = stream.output
@@ -225,18 +225,20 @@ def test_on_llm_end_fires_after_drain(self) -> None:
     async def test_on_llm_end_fires_async(self) -> None:
         handler = _AsyncRecordingHandler()
         model = FakeListChatModel(responses=["done"], callbacks=[handler])
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
         _ = await stream
 
         assert "on_chat_model_start" in handler.events
         assert "on_llm_end" in handler.events
 
     @pytest.mark.asyncio
-    async def test_astream_v2_defers_on_chat_model_start_until_consumed(self) -> None:
+    async def test_astream_events_v3_defers_on_chat_model_start_until_consumed(
+        self,
+    ) -> None:
         handler = _AsyncRecordingHandler()
         model = FakeListChatModel(responses=["done"], callbacks=[handler])
 
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         assert handler.events == []
 
@@ -251,7 +253,7 @@ def test_on_llm_end_receives_assembled_message(self) -> None:
         """
         handler = _RecordingHandler()
         model = FakeListChatModel(responses=["hello"], callbacks=[handler])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
         _ = stream.output
 
         response = handler.last_llm_end_response
@@ -265,7 +267,7 @@ def test_on_llm_end_receives_assembled_message(self) -> None:
     async def test_on_llm_end_receives_assembled_message_async(self) -> None:
         handler = _AsyncRecordingHandler()
         model = FakeListChatModel(responses=["hello"], callbacks=[handler])
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
         _ = await stream
 
         response = handler.last_llm_end_response
@@ -277,7 +279,9 @@ async def test_on_llm_end_receives_assembled_message_async(self) -> None:
 
     def test_empty_stream_reports_error_without_finish_only_lifecycle(self) -> None:
         handler = _RecordingHandler()
-        stream = _EmptyStreamModel(callbacks=[handler]).stream_v2("test")
+        stream = _EmptyStreamModel(callbacks=[handler]).stream_events(
+            "test", version="v3"
+        )
 
         with pytest.raises(ValueError, match="No generation chunks were returned"):
             list(stream)
@@ -289,7 +293,9 @@ def test_empty_stream_reports_error_without_finish_only_lifecycle(self) -> None:
     @pytest.mark.asyncio
     async def test_empty_astream_reports_error(self) -> None:
         handler = _AsyncRecordingHandler()
-        stream = await _EmptyStreamModel(callbacks=[handler]).astream_v2("test")
+        stream = await _EmptyStreamModel(callbacks=[handler]).astream_events(
+            "test", version="v3"
+        )
 
         with pytest.raises(ValueError, match="No generation chunks were returned"):
             await stream
@@ -303,12 +309,12 @@ async def test_empty_astream_reports_error(self) -> None:
 
 
 class TestOnStreamEvent:
-    """`on_stream_event` must fire once per protocol event from stream_v2."""
+    """`on_stream_event` fires once per protocol event from v3 streaming."""
 
     def test_on_stream_event_fires_for_every_event_sync(self) -> None:
         handler = _RecordingHandler()
         model = FakeListChatModel(responses=["Hi"], callbacks=[handler])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
         _ = stream.output
 
         # Every event the stream sees should also reach the observer.
@@ -322,7 +328,7 @@ def test_on_stream_event_fires_for_every_event_sync(self) -> None:
     async def test_on_stream_event_fires_for_every_event_async(self) -> None:
         handler = _AsyncRecordingHandler()
         model = FakeListChatModel(responses=["Hi"], callbacks=[handler])
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
         _ = await stream
 
         event_types = [e["event"] for e in handler.stream_events]
@@ -334,7 +340,7 @@ def test_on_stream_event_ordering_relative_to_lifecycle(self) -> None:
         """Stream events must all fire between on_chat_model_start and on_llm_end."""
         handler = _RecordingHandler()
         model = FakeListChatModel(responses=["Hi"], callbacks=[handler])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
         _ = stream.output
 
         # on_stream_event doesn't show up in `events` (different list), but
@@ -346,10 +352,10 @@ def test_on_stream_event_ordering_relative_to_lifecycle(self) -> None:
 
 
 class TestCancellation:
-    """Cancellation of `astream_v2` must propagate, not be swallowed."""
+    """Cancellation of `astream_events(version="v3")` must propagate."""
 
     @pytest.mark.asyncio
-    async def test_astream_v2_cancellation_propagates(self) -> None:
+    async def test_astream_events_v3_cancellation_propagates(self) -> None:
         """Cancelling the producer task must raise CancelledError.
 
         Regression test: the producer's `except BaseException` previously
@@ -357,7 +363,7 @@ async def test_astream_v2_cancellation_propagates(self) -> None:
         `on_llm_error` + `stream._fail` pair that never propagated.
         """
         model = FakeListChatModel(responses=["abcdefghij"], sleep=0.05)
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
         aiter_ = stream.text.__aiter__()
         await aiter_.__anext__()
         task = stream._producer_task
@@ -401,7 +407,7 @@ async def _astream(
 
 
 class TestRunnableBindingForwarding:
-    """`RunnableBinding.stream_v2` must merge bound kwargs into the call.
+    """`RunnableBinding.stream_events(version="v3")` merges bound kwargs.
 
     Without the explicit override on `RunnableBinding`, `__getattr__`
     forwards the call but drops `self.kwargs` — so tools bound via
@@ -409,12 +415,12 @@ class TestRunnableBindingForwarding:
     ignored.
     """
 
-    def test_bound_kwargs_reach_stream_v2(self) -> None:
+    def test_bound_kwargs_reach_stream_events_v3(self) -> None:
         model = _KwargRecordingModel(responses=["hi"])
         model.received_kwargs = []
         bound = model.bind(my_marker="sentinel-42")
 
-        stream = bound.stream_v2("test")
+        stream = bound.stream_events("test", version="v3")
         for _ in stream.text:
             pass
 
@@ -426,20 +432,53 @@ def test_call_kwargs_override_bound_kwargs(self) -> None:
         model.received_kwargs = []
         bound = model.bind(my_marker="from-bind")
 
-        stream = bound.stream_v2("test", my_marker="from-call")
+        stream = bound.stream_events("test", my_marker="from-call", version="v3")
         for _ in stream.text:
             pass
 
         assert model.received_kwargs[0].get("my_marker") == "from-call"
 
     @pytest.mark.asyncio
-    async def test_bound_kwargs_reach_astream_v2(self) -> None:
+    async def test_bound_kwargs_reach_astream_events_v3(self) -> None:
         model = _KwargRecordingModel(responses=["hi"])
         model.received_kwargs = []
         bound = model.bind(my_marker="sentinel-async")
 
-        stream = await bound.astream_v2("test")
+        stream = await bound.astream_events("test", version="v3")
         _ = await stream
 
         assert len(model.received_kwargs) == 1
         assert model.received_kwargs[0].get("my_marker") == "sentinel-async"
+
+    def test_bound_version_routes_to_v3_without_call_site_repeat(self) -> None:
+        # `bind(version="v3").stream_events(input)` must route to the v3
+        # branch (using the bound `version`) and must not forward `version`
+        # to the underlying model as an extra kwarg.
+        model = _KwargRecordingModel(responses=["hi"])
+        model.received_kwargs = []
+        bound = model.bind(version="v3")
+
+        # `version` is in `self.kwargs`, not at the call site, so the
+        # static return type is the v1/v2 iterator overload — narrow it.
+        stream = cast("ChatModelStream", bound.stream_events("test"))
+        chunks = list(stream.text)
+
+        assert "".join(chunks) == "hi"
+        assert len(model.received_kwargs) == 1
+        assert "version" not in model.received_kwargs[0]
+
+    @pytest.mark.asyncio
+    async def test_bound_version_routes_to_v3_async_without_call_site_repeat(
+        self,
+    ) -> None:
+        model = _KwargRecordingModel(responses=["hi"])
+        model.received_kwargs = []
+        bound = model.bind(version="v3")
+
+        stream = await cast(
+            "Awaitable[AsyncChatModelStream]", bound.astream_events("test")
+        )
+        _ = await stream
+
+        assert len(model.received_kwargs) == 1
+        assert "version" not in model.received_kwargs[0]
diff --git a/libs/core/tests/unit_tests/language_models/test_stream_v2.py b/libs/core/tests/unit_tests/language_models/test_chat_model_v3_stream.py
similarity index 89%
rename from libs/core/tests/unit_tests/language_models/test_stream_v2.py
rename to libs/core/tests/unit_tests/language_models/test_chat_model_v3_stream.py
index a46629a5fedb4..ec8f89c8013a0 100644
--- a/libs/core/tests/unit_tests/language_models/test_stream_v2.py
+++ b/libs/core/tests/unit_tests/language_models/test_chat_model_v3_stream.py
@@ -1,4 +1,4 @@
-"""Tests for stream_v2 / astream_v2 and ChatModelStream."""
+"""Tests for `stream_events(version="v3")` (sync + async) and `ChatModelStream`."""
 
 from __future__ import annotations
 
@@ -133,7 +133,7 @@ def test_push_text_delta(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text="Hello"),
+                delta={"type": "text-delta", "text": "Hello"},
             )
         )
         assert stream._text_acc == "Hello"
@@ -144,9 +144,7 @@ def test_push_reasoning_delta(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=ReasoningContentBlock(
-                    type="reasoning", reasoning="think"
-                ),
+                delta={"type": "reasoning-delta", "reasoning": "think"},
             )
         )
         assert stream._reasoning_acc == "think"
@@ -157,7 +155,7 @@ def test_push_content_block_finish_tool_call(self) -> None:
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=0,
-                content_block=ToolCall(
+                content=ToolCall(
                     type="tool_call",
                     id="tc1",
                     name="search",
@@ -188,12 +186,12 @@ def test_pump_driven_text(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text="Hi"),
+                delta={"type": "text-delta", "text": "Hi"},
             ),
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text=" there"),
+                delta={"type": "text-delta", "text": " there"},
             ),
         ]
         finish = MessageFinishData(event="message-finish")
@@ -228,14 +226,14 @@ async def test_text_await(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text="Hello"),
+                delta={"type": "text-delta", "text": "Hello"},
             )
         )
         stream._push_content_block_delta(
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text=" world"),
+                delta={"type": "text-delta", "text": " world"},
             )
         )
         stream._finish(MessageFinishData(event="message-finish"))
@@ -253,7 +251,7 @@ async def produce() -> None:
                 ContentBlockDeltaData(
                     event="content-block-delta",
                     index=0,
-                    content_block=TextContentBlock(type="text", text="a"),
+                    delta={"type": "text-delta", "text": "a"},
                 )
             )
             await asyncio.sleep(0)
@@ -261,7 +259,7 @@ async def produce() -> None:
                 ContentBlockDeltaData(
                     event="content-block-delta",
                     index=0,
-                    content_block=TextContentBlock(type="text", text="b"),
+                    delta={"type": "text-delta", "text": "b"},
                 )
             )
             await asyncio.sleep(0)
@@ -279,7 +277,7 @@ async def test_tool_calls_await(self) -> None:
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=0,
-                content_block=ToolCall(
+                content=ToolCall(
                     type="tool_call",
                     id="tc1",
                     name="search",
@@ -303,20 +301,20 @@ async def test_error_propagation(self) -> None:
 
 
 class TestStreamV2:
-    """Test BaseChatModel.stream_v2() with FakeListChatModel."""
+    """Test `BaseChatModel.stream_events(version="v3")` with `FakeListChatModel`."""
 
-    def test_stream_v2_text(self) -> None:
+    def test_stream_events_v3_text(self) -> None:
         model = FakeListChatModel(responses=["Hello world!"])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
 
         assert isinstance(stream, ChatModelStream)
         deltas = list(stream.text)
         assert "".join(deltas) == "Hello world!"
         assert stream.done
 
-    def test_stream_v2_usage(self) -> None:
+    def test_stream_events_v3_usage(self) -> None:
         model = FakeListChatModel(responses=["Hi"])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
 
         # Drain stream
         for _ in stream.text:
@@ -325,10 +323,12 @@ def test_stream_v2_usage(self) -> None:
         assert stream.output.usage_metadata is None
         assert stream.done
 
-    def test_stream_v2_malformed_tool_args_produce_invalid_tool_call(self) -> None:
+    def test_stream_events_v3_malformed_tool_args_produce_invalid_tool_call(
+        self,
+    ) -> None:
         """End-to-end: malformed tool-call JSON becomes invalid_tool_calls."""
         model = _MalformedToolCallModel()
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
         msg = stream.output
 
         assert msg.tool_calls == []
@@ -338,10 +338,12 @@ def test_stream_v2_malformed_tool_args_produce_invalid_tool_call(self) -> None:
         assert itc["args"] == '{"q": '
         assert itc["id"] == "call_1"
 
-    def test_stream_v2_translates_anthropic_server_tool_use_to_protocol(self) -> None:
+    def test_stream_events_v3_translates_anthropic_server_tool_use_to_protocol(
+        self,
+    ) -> None:
         """Phase E end-to-end: server_tool_use becomes server_tool_call in output."""
         model = _AnthropicStyleServerToolModel()
-        stream = model.stream_v2("weather?")
+        stream = model.stream_events("weather?", version="v3")
         msg = stream.output
 
         assert isinstance(msg.content, list)
@@ -355,21 +357,21 @@ def test_stream_v2_translates_anthropic_server_tool_use_to_protocol(self) -> Non
 
 
 class TestAstreamV2:
-    """Test BaseChatModel.astream_v2() with FakeListChatModel."""
+    """Test `BaseChatModel.astream_events(version="v3")` with `FakeListChatModel`."""
 
     @pytest.mark.asyncio
-    async def test_astream_v2_text(self) -> None:
+    async def test_astream_events_v3_text(self) -> None:
         model = FakeListChatModel(responses=["Hello!"])
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         assert isinstance(stream, AsyncChatModelStream)
         full = await stream.text
         assert full == "Hello!"
 
     @pytest.mark.asyncio
-    async def test_astream_v2_deltas(self) -> None:
+    async def test_astream_events_v3_deltas(self) -> None:
         model = FakeListChatModel(responses=["Hi"])
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         deltas = [d async for d in stream.text]
         assert "".join(deltas) == "Hi"
@@ -391,14 +393,14 @@ def test_two_text_blocks_keep_their_own_text(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text="A"),
+                delta={"type": "text-delta", "text": "A"},
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=0,
-                content_block=TextContentBlock(type="text", text="A"),
+                content=TextContentBlock(type="text", text="A"),
             )
         )
         # Block 1: "B"
@@ -406,14 +408,14 @@ def test_two_text_blocks_keep_their_own_text(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=1,
-                content_block=TextContentBlock(type="text", text="B"),
+                delta={"type": "text-delta", "text": "B"},
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=1,
-                content_block=TextContentBlock(type="text", text="B"),
+                content=TextContentBlock(type="text", text="B"),
             )
         )
         stream.dispatch(MessageFinishData(event="message-finish"))
@@ -437,14 +439,14 @@ def test_two_reasoning_blocks_keep_their_own_text(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=ReasoningContentBlock(type="reasoning", reasoning="one"),
+                delta={"type": "reasoning-delta", "reasoning": "one"},
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=0,
-                content_block=ReasoningContentBlock(type="reasoning", reasoning="one"),
+                content=ReasoningContentBlock(type="reasoning", reasoning="one"),
             )
         )
         # Block 1: "two"
@@ -452,14 +454,14 @@ def test_two_reasoning_blocks_keep_their_own_text(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=1,
-                content_block=ReasoningContentBlock(type="reasoning", reasoning="two"),
+                delta={"type": "reasoning-delta", "reasoning": "two"},
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=1,
-                content_block=ReasoningContentBlock(type="reasoning", reasoning="two"),
+                content=ReasoningContentBlock(type="reasoning", reasoning="two"),
             )
         )
         stream.dispatch(MessageFinishData(event="message-finish"))
@@ -484,14 +486,14 @@ def test_finish_text_reconciles_with_partial_deltas(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text="hel"),
+                delta={"type": "text-delta", "text": "hel"},
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=0,
-                content_block=TextContentBlock(type="text", text="hello"),
+                content=TextContentBlock(type="text", text="hello"),
             )
         )
         stream.dispatch(MessageFinishData(event="message-finish"))
@@ -522,7 +524,7 @@ def test_out_of_order_finish_still_produces_correct_final_text(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text="aaa"),
+                delta={"type": "text-delta", "text": "aaa"},
             )
         )
         # Block 1 streams deltas before block 0 finishes.
@@ -530,7 +532,7 @@ def test_out_of_order_finish_still_produces_correct_final_text(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=1,
-                content_block=TextContentBlock(type="text", text="bb"),
+                delta={"type": "text-delta", "text": "bb"},
             )
         )
         # Block 0 finishes with authoritative text different from deltas.
@@ -538,14 +540,14 @@ def test_out_of_order_finish_still_produces_correct_final_text(self) -> None:
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=0,
-                content_block=TextContentBlock(type="text", text="XXX"),
+                content=TextContentBlock(type="text", text="XXX"),
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=1,
-                content_block=TextContentBlock(type="text", text="bb"),
+                content=TextContentBlock(type="text", text="bb"),
             )
         )
         stream.dispatch(MessageFinishData(event="message-finish"))
@@ -568,16 +570,14 @@ def test_finish_reasoning_reconciles_with_partial_deltas(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=ReasoningContentBlock(type="reasoning", reasoning="thi"),
+                delta={"type": "reasoning-delta", "reasoning": "thi"},
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=0,
-                content_block=ReasoningContentBlock(
-                    type="reasoning", reasoning="thinking"
-                ),
+                content=ReasoningContentBlock(type="reasoning", reasoning="thinking"),
             )
         )
         stream.dispatch(MessageFinishData(event="message-finish"))
@@ -598,14 +598,14 @@ def test_interleaved_text_blocks_around_tool_call(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=0,
-                content_block=TextContentBlock(type="text", text="before"),
+                delta={"type": "text-delta", "text": "before"},
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=0,
-                content_block=TextContentBlock(type="text", text="before"),
+                content=TextContentBlock(type="text", text="before"),
             )
         )
         # Block 1: tool_call
@@ -613,7 +613,7 @@ def test_interleaved_text_blocks_around_tool_call(self) -> None:
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=1,
-                content_block=ToolCall(
+                content=ToolCall(
                     type="tool_call",
                     id="tc1",
                     name="search",
@@ -626,14 +626,14 @@ def test_interleaved_text_blocks_around_tool_call(self) -> None:
             ContentBlockDeltaData(
                 event="content-block-delta",
                 index=2,
-                content_block=TextContentBlock(type="text", text="after"),
+                delta={"type": "text-delta", "text": "after"},
             )
         )
         stream.dispatch(
             ContentBlockFinishData(
                 event="content-block-finish",
                 index=2,
-                content_block=TextContentBlock(type="text", text="after"),
+                content=TextContentBlock(type="text", text="after"),
             )
         )
         stream.dispatch(MessageFinishData(event="message-finish"))
@@ -694,33 +694,34 @@ class TestStructuredOutputKwargStripping:
 
     `stream()` / `astream()` pop `ls_structured_output_format` and
     `structured_output_format` before forwarding kwargs to `_stream` —
-    provider clients reject unknown kwargs. `stream_v2` / `astream_v2`
-    must do the same, or `.with_structured_output().stream_v2()` breaks.
+    provider clients reject unknown kwargs. `stream_events(version="v3")` /
+    `astream_events(version="v3")` must do the same, or
+    `.with_structured_output().stream_events(version="v3")` breaks.
     """
 
-    def test_stream_v2_strips_ls_structured_output_format(self) -> None:
+    def test_stream_events_v3_strips_ls_structured_output_format(self) -> None:
         model = _RecordingStreamModel()
         bound = model.bind(ls_structured_output_format={"schema": {"type": "object"}})
-        stream = bound.stream_v2("test")
-        _ = stream.output  # drain
+        stream = bound.stream_events("test", version="v3")
+        _ = stream.output
         recorded = _RecordingStreamModel.last_stream_kwargs
         assert "ls_structured_output_format" not in recorded
         assert "structured_output_format" not in recorded
 
-    def test_stream_v2_strips_structured_output_format(self) -> None:
+    def test_stream_events_v3_strips_structured_output_format(self) -> None:
         model = _RecordingStreamModel()
         bound = model.bind(structured_output_format={"schema": {"type": "object"}})
-        stream = bound.stream_v2("test")
+        stream = bound.stream_events("test", version="v3")
         _ = stream.output
         recorded = _RecordingStreamModel.last_stream_kwargs
         assert "ls_structured_output_format" not in recorded
         assert "structured_output_format" not in recorded
 
     @pytest.mark.asyncio
-    async def test_astream_v2_strips_ls_structured_output_format(self) -> None:
+    async def test_astream_events_v3_strips_ls_structured_output_format(self) -> None:
         model = _RecordingStreamModel()
         bound = model.bind(ls_structured_output_format={"schema": {"type": "object"}})
-        stream = await bound.astream_v2("test")
+        stream = await bound.astream_events("test", version="v3")
         _ = await stream
         assert (
             "ls_structured_output_format"
@@ -731,10 +732,10 @@ async def test_astream_v2_strips_ls_structured_output_format(self) -> None:
         )
 
     @pytest.mark.asyncio
-    async def test_astream_v2_strips_structured_output_format(self) -> None:
+    async def test_astream_events_v3_strips_structured_output_format(self) -> None:
         model = _RecordingStreamModel()
         bound = model.bind(structured_output_format={"schema": {"type": "object"}})
-        stream = await bound.astream_v2("test")
+        stream = await bound.astream_events("test", version="v3")
         _ = await stream
         assert (
             "ls_structured_output_format"
@@ -849,7 +850,7 @@ class TestAsyncStreamAclose:
     async def test_aclose_cancels_producer_task(self) -> None:
         gate = asyncio.Event()
         model = _GatedStreamModel(gate=gate)
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         # Pull the first delta so the producer enters the gated section.
         aiter_ = stream.text.__aiter__()
@@ -867,7 +868,7 @@ async def test_aclose_cancels_producer_task(self) -> None:
     async def test_aclose_is_idempotent(self) -> None:
         gate = asyncio.Event()
         model = _GatedStreamModel(gate=gate)
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
         aiter_ = stream.text.__aiter__()
         await aiter_.__anext__()
 
@@ -878,7 +879,7 @@ async def test_aclose_is_idempotent(self) -> None:
     async def test_async_context_manager_closes_stream(self) -> None:
         gate = asyncio.Event()
         model = _GatedStreamModel(gate=gate)
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         async with stream as s:
             assert s is stream
@@ -901,7 +902,7 @@ async def test_aclose_propagates_caller_cancellation(self) -> None:
         """
         teardown_gate = asyncio.Event()
         model = _SlowTeardownModel(teardown_gate=teardown_gate)
-        stream = await model.astream_v2("test")
+        stream = await model.astream_events("test", version="v3")
 
         # Prime the producer so it enters `_astream`'s forever-blocking
         # await.
@@ -951,7 +952,7 @@ async def closer() -> None:
     async def test_aclose_before_producer_starts_resolves_projections(self) -> None:
         """Early-cancel path: `_produce` never runs.
 
-        If a consumer calls `astream_v2()` and immediately `aclose()`
+        If a consumer calls `astream_events(version="v3")` and immediately `aclose()`
         (or `async with` exits before the loop schedules `_produce`),
         `task.cancel()` marks the task cancelled without ever invoking
         its body — so neither `stream.fail` nor `on_llm_error` fires.
@@ -967,10 +968,12 @@ async def on_llm_error(self, error: BaseException, **_: Any) -> None:
         handler = RecordingHandler()
         gate = asyncio.Event()
         model = _GatedStreamModel(gate=gate)
-        stream = await model.astream_v2("test", config={"callbacks": [handler]})
-        # No yield to the event loop between `astream_v2` returning and
-        # `aclose()` — the producer task has been created but its body
-        # has not executed.
+        stream = await model.astream_events(
+            "test", config={"callbacks": [handler]}, version="v3"
+        )
+        # No yield to the event loop between `astream_events(version="v3")`
+        # returning and `aclose()` — the producer task has been created
+        # but its body has not executed.
         await stream.aclose()
 
         # `await stream.output` must resolve (with CancelledError)
@@ -1007,7 +1010,9 @@ async def on_llm_error(self, error: BaseException, **_: Any) -> None:
         handler = RecordingHandler()
         gate = asyncio.Event()
         model = _GatedStreamModel(gate=gate)
-        stream = await model.astream_v2("test", config={"callbacks": [handler]})
+        stream = await model.astream_events(
+            "test", config={"callbacks": [handler]}, version="v3"
+        )
 
         aiter_ = stream.text.__aiter__()
         await aiter_.__anext__()
@@ -1047,7 +1052,9 @@ async def on_llm_end(self, response: Any, **_: Any) -> None:
 
         handler = SlowEndHandler()
         model = FakeListChatModel(responses=["ok"])
-        stream = await model.astream_v2("test", config={"callbacks": [handler]})
+        stream = await model.astream_events(
+            "test", config={"callbacks": [handler]}, version="v3"
+        )
 
         # Wait until the stream has assembled the message and the
         # slow on_llm_end handler has started running.
@@ -1167,9 +1174,9 @@ async def test_acache_hit_replays_events_to_v2_handler(self) -> None:
 class _ProviderMetadataStreamModel(BaseChatModel):
     """Fake model that advertises `output_version="responses/v1"` in metadata.
 
-    Verifies `stream_v2` pins the assembled message's `output_version` to
-    `"v1"` — the shape it actually produces — regardless of what the
-    provider's chunk metadata claims.
+    Verifies that `stream_events(version="v3")` pins the assembled
+    message's `output_version` to `"v1"` — the shape it actually
+    produces — regardless of what the provider's chunk metadata claims.
     """
 
     @property
@@ -1203,11 +1210,11 @@ def _stream(
 
 
 class TestOutputVersionPinning:
-    """`stream_v2().output` always serializes as v1 content blocks."""
+    """`stream_events(version="v3").output` always serializes as v1 content blocks."""
 
     def test_output_version_pinned_to_v1(self) -> None:
         model = _ProviderMetadataStreamModel()
-        stream = model.stream_v2("hi")
+        stream = model.stream_events("hi", version="v3")
         msg = stream.output
         # Assembled message must claim `"v1"` even though the provider
         # chunk metadata advertised `"responses/v1"`.
@@ -1329,7 +1336,7 @@ class TestBedrockConverseToolCallArgs:
 
     def test_bedrock_tool_call_assembles_without_error(self) -> None:
         model = _BedrockConverseToolCallModel()
-        stream = model.stream_v2("What's the weather in Boston?")
+        stream = model.stream_events("What's the weather in Boston?", version="v3")
         # Drive the stream to completion — the raise would have surfaced here.
         events = list(stream)
 
diff --git a/libs/core/tests/unit_tests/language_models/test_compat_bridge.py b/libs/core/tests/unit_tests/language_models/test_compat_bridge.py
index ce9f2f75a5a5d..27f1fc87ec50e 100644
--- a/libs/core/tests/unit_tests/language_models/test_compat_bridge.py
+++ b/libs/core/tests/unit_tests/language_models/test_compat_bridge.py
@@ -1,5 +1,6 @@
 """Tests for the compat bridge (chunk-to-event conversion)."""
 
+from collections.abc import AsyncIterator
 from typing import TYPE_CHECKING, Any, cast
 
 import pytest
@@ -9,6 +10,7 @@
     CompatBlock,
     _finalize_block,
     _to_protocol_usage,
+    achunks_to_events,
     amessage_to_events,
     chunks_to_events,
     message_to_events,
@@ -22,13 +24,17 @@
         InvalidToolCall,
         MessageFinishData,
         MessageStartData,
-        ReasoningContentBlock,
         ServerToolCall,
         TextContentBlock,
         ToolCall,
     )
 
 
+def _event_metadata(event: Any) -> dict[str, Any]:
+    """Return event metadata for protocol versions that type it as extensible."""
+    return cast("dict[str, Any]", cast("dict[str, Any]", event).get("metadata") or {})
+
+
 # ---------------------------------------------------------------------------
 # Pure helpers
 # ---------------------------------------------------------------------------
@@ -135,7 +141,7 @@ def test_chunks_to_events_text_only() -> None:
     # No provider finish_reason in fixtures — metadata carries no
     # `finish_reason` key (the bridge passes response_metadata through
     # unchanged).
-    assert "finish_reason" not in (finish.get("metadata") or {})
+    assert "finish_reason" not in _event_metadata(finish)
 
 
 def test_chunks_to_events_empty_iterator() -> None:
@@ -143,14 +149,14 @@ def test_chunks_to_events_empty_iterator() -> None:
     assert list(chunks_to_events(iter([]))) == []
 
 
-def test_chunks_to_events_block_transitions_close_previous_block() -> None:
+def test_chunks_to_events_block_transitions_keep_stable_indices() -> None:
     """String-keyed blocks that transition mid-stream each get their own lifecycle.
 
     Regression test for OpenAI `responses/v1` style streams where
     `content_blocks` uses string identifiers (e.g. `"lc_rs_305f30"`) to
     distinguish blocks. Each distinct block must get its own
     `content-block-start` / `content-block-finish` pair, with sequential
-    `uint` wire indices, and blocks must not interleave.
+    `uint` wire indices, and deltas keep that stable wire index.
     """
     chunks = [
         ChatGenerationChunk(
@@ -199,12 +205,12 @@ def test_chunks_to_events_block_transitions_close_previous_block() -> None:
 
     starts: list[Any] = [e for e in events if e["event"] == "content-block-start"]
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    assert [s["content_block"]["type"] for s in starts] == [
+    assert [s["content"]["type"] for s in starts] == [
         "reasoning",
         "reasoning",
         "text",
     ]
-    assert [f["content_block"]["type"] for f in finishes] == [
+    assert [f["content"]["type"] for f in finishes] == [
         "reasoning",
         "reasoning",
         "text",
@@ -213,8 +219,8 @@ def test_chunks_to_events_block_transitions_close_previous_block() -> None:
     assert [s["index"] for s in starts] == [0, 1, 2]
     assert [f["index"] for f in finishes] == [0, 1, 2]
 
-    # Finish events must be interleaved with starts (no-interleave rule):
-    # block 0 finishes before block 1 starts, etc.
+    # Blocks are finalized at message end so providers can interleave
+    # deltas for parallel content blocks without closing them early.
     events_any: list[Any] = events
     lifecycle = [
         (e["event"], e["index"])
@@ -223,17 +229,17 @@ def test_chunks_to_events_block_transitions_close_previous_block() -> None:
     ]
     assert lifecycle == [
         ("content-block-start", 0),
-        ("content-block-finish", 0),
         ("content-block-start", 1),
-        ("content-block-finish", 1),
         ("content-block-start", 2),
+        ("content-block-finish", 0),
+        ("content-block-finish", 1),
         ("content-block-finish", 2),
     ]
 
     # Each finish carries the accumulated content for its block.
-    assert finishes[0]["content_block"]["reasoning"] == "hmm then"
-    assert finishes[1]["content_block"]["reasoning"] == "different"
-    assert finishes[2]["content_block"]["text"] == "answer: 42"
+    assert finishes[0]["content"]["reasoning"] == "hmm then"
+    assert finishes[1]["content"]["reasoning"] == "different"
+    assert finishes[2]["content"]["text"] == "answer: 42"
 
 
 def test_chunks_to_events_tool_call_multichunk() -> None:
@@ -284,7 +290,7 @@ def test_chunks_to_events_tool_call_multichunk() -> None:
         e for e in events if e["event"] == "content-block-finish"
     ]
     assert len(finish_events) == 1
-    finalized = cast("ToolCall", finish_events[0]["content_block"])
+    finalized = cast("ToolCall", finish_events[0]["content"])
     assert finalized["type"] == "tool_call"
     assert finalized["args"] == {"q": "test"}
 
@@ -292,10 +298,126 @@ def test_chunks_to_events_tool_call_multichunk() -> None:
     # not synthesize one. It deliberately does not infer `"tool_use"`
     # from the presence of a valid tool_call either; terminal reasons
     # are provider-specific (see `_build_message_finish`).
-    assert "finish_reason" not in (
-        cast("MessageFinishData", events[-1]).get("metadata") or {}
+    assert "finish_reason" not in _event_metadata(events[-1])
+
+
+def test_chunks_to_events_interleaved_parallel_tool_calls() -> None:
+    """Parallel tool-call chunks can interleave without losing block lifecycles."""
+    events = list(
+        chunks_to_events(
+            iter(_interleaved_parallel_tool_call_chunks()), message_id="msg-1"
+        )
     )
 
+    _assert_interleaved_parallel_tool_call_events(events)
+
+
+@pytest.mark.asyncio
+async def test_achunks_to_events_interleaved_parallel_tool_calls() -> None:
+    """Async bridge preserves lifecycles for interleaved parallel tool calls."""
+    events = [
+        event
+        async for event in achunks_to_events(
+            _aiter_chunks(_interleaved_parallel_tool_call_chunks()),
+            message_id="msg-1",
+        )
+    ]
+
+    _assert_interleaved_parallel_tool_call_events(events)
+
+
+def _interleaved_parallel_tool_call_chunks() -> list[ChatGenerationChunk]:
+    return [
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content="",
+                id="msg-1",
+                tool_call_chunks=[
+                    {
+                        "index": 0,
+                        "id": "tc1",
+                        "name": "task",
+                        "args": '{"subagent_type": "haiku"',
+                        "type": "tool_call_chunk",
+                    }
+                ],
+            )
+        ),
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content="",
+                id="msg-1",
+                tool_call_chunks=[
+                    {
+                        "index": 1,
+                        "id": "tc2",
+                        "name": "task",
+                        "args": '{"subagent_type": "limerick"',
+                        "type": "tool_call_chunk",
+                    }
+                ],
+            )
+        ),
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content="",
+                id="msg-1",
+                tool_call_chunks=[
+                    {
+                        "index": 0,
+                        "id": None,
+                        "name": None,
+                        "args": ', "description": "Write a haiku"}',
+                        "type": "tool_call_chunk",
+                    }
+                ],
+            )
+        ),
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content="",
+                id="msg-1",
+                tool_call_chunks=[
+                    {
+                        "index": 1,
+                        "id": None,
+                        "name": None,
+                        "args": ', "description": "Write a limerick"}',
+                        "type": "tool_call_chunk",
+                    }
+                ],
+            )
+        ),
+    ]
+
+
+async def _aiter_chunks(
+    chunks: list[ChatGenerationChunk],
+) -> AsyncIterator[ChatGenerationChunk]:
+    for chunk in chunks:
+        yield chunk
+
+
+def _assert_interleaved_parallel_tool_call_events(events: list[Any]) -> None:
+    assert_valid_event_stream(events)
+
+    starts: list[Any] = [e for e in events if e["event"] == "content-block-start"]
+    finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
+    assert [s["index"] for s in starts] == [0, 1]
+    assert [f["index"] for f in finishes] == [0, 1]
+
+    finalized = [cast("ToolCall", event["content"]) for event in finishes]
+    assert finalized[0]["id"] == "tc1"
+    assert finalized[0]["args"] == {
+        "subagent_type": "haiku",
+        "description": "Write a haiku",
+    }
+    assert finalized[1]["id"] == "tc2"
+    assert finalized[1]["args"] == {
+        "subagent_type": "limerick",
+        "description": "Write a limerick",
+    }
+
 
 def test_chunks_to_events_invalid_tool_call_keeps_stop_reason() -> None:
     """Malformed tool-args become invalid_tool_call; finish_reason stays `stop`."""
@@ -323,10 +445,8 @@ def test_chunks_to_events_invalid_tool_call_keeps_stop_reason() -> None:
         e for e in events if e["event"] == "content-block-finish"
     ]
     assert len(finish_events) == 1
-    assert finish_events[0]["content_block"]["type"] == "invalid_tool_call"
-    assert "finish_reason" not in (
-        cast("MessageFinishData", events[-1]).get("metadata") or {}
-    )
+    assert finish_events[0]["content"]["type"] == "invalid_tool_call"
+    assert "finish_reason" not in _event_metadata(events[-1])
 
 
 def test_chunks_to_events_anthropic_server_tool_use_routes_through_translator() -> None:
@@ -350,7 +470,7 @@ def test_chunks_to_events_anthropic_server_tool_use_routes_through_translator()
 
     events = list(chunks_to_events(iter(chunks)))
     finish_blocks: list[Any] = [
-        e["content_block"] for e in events if e["event"] == "content-block-finish"
+        e["content"] for e in events if e["event"] == "content-block-finish"
     ]
     block_types = [b.get("type") for b in finish_blocks]
     assert "server_tool_call" in block_types
@@ -372,7 +492,7 @@ def test_chunks_to_events_unregistered_provider_falls_back() -> None:
     finish_events: list[Any] = [
         e for e in events if e["event"] == "content-block-finish"
     ]
-    assert [e["content_block"]["type"] for e in finish_events] == ["text"]
+    assert [e["content"]["type"] for e in finish_events] == ["text"]
 
 
 def test_chunks_to_events_no_provider_text_plus_tool_call() -> None:
@@ -402,7 +522,7 @@ def test_chunks_to_events_no_provider_text_plus_tool_call() -> None:
 
     events = list(chunks_to_events(iter(chunks)))
     finish_blocks: list[Any] = [
-        e["content_block"] for e in events if e["event"] == "content-block-finish"
+        e["content"] for e in events if e["event"] == "content-block-finish"
     ]
     types = [b.get("type") for b in finish_blocks]
     assert "text" in types
@@ -423,7 +543,7 @@ def test_chunks_to_events_reasoning_in_additional_kwargs() -> None:
 
     events = list(chunks_to_events(iter(chunks)))
     finish_blocks: list[Any] = [
-        e["content_block"] for e in events if e["event"] == "content-block-finish"
+        e["content"] for e in events if e["event"] == "content-block-finish"
     ]
     types = [b.get("type") for b in finish_blocks]
     assert "reasoning" in types
@@ -448,14 +568,13 @@ def test_message_to_events_text_only() -> None:
         "message-finish",
     ]
     start = cast("MessageStartData", events[0])
-    assert start["message_id"] == "msg-1"
+    assert start["id"] == "msg-1"
 
     delta_event = cast("ContentBlockDeltaData", events[2])
-    delta = cast("TextContentBlock", delta_event["content_block"])
-    assert delta["text"] == "Hello world"
+    assert delta_event["delta"] == {"type": "text-delta", "text": "Hello world"}
 
     final = cast("MessageFinishData", events[-1])
-    assert "finish_reason" not in (final.get("metadata") or {})
+    assert "finish_reason" not in _event_metadata(final)
 
 
 def test_message_to_events_empty_content_yields_start_finish_only() -> None:
@@ -477,15 +596,16 @@ def test_message_to_events_reasoning_text_order() -> None:
 
     starts: list[Any] = [e for e in events if e["event"] == "content-block-start"]
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    assert [s["content_block"]["type"] for s in starts] == ["reasoning", "text"]
-    assert [f["content_block"]["type"] for f in finishes] == ["reasoning", "text"]
+    assert [s["content"]["type"] for s in starts] == ["reasoning", "text"]
+    assert [f["content"]["type"] for f in finishes] == ["reasoning", "text"]
 
     deltas: list[Any] = [e for e in events if e["event"] == "content-block-delta"]
     assert len(deltas) == 2
-    assert cast("ReasoningContentBlock", deltas[0]["content_block"])["reasoning"] == (
-        "think hard"
-    )
-    assert cast("TextContentBlock", deltas[1]["content_block"])["text"] == "the answer"
+    assert deltas[0]["delta"] == {
+        "type": "reasoning-delta",
+        "reasoning": "think hard",
+    }
+    assert deltas[1]["delta"] == {"type": "text-delta", "text": "the answer"}
 
 
 def test_message_to_events_tool_call_skips_delta() -> None:
@@ -505,7 +625,7 @@ def test_message_to_events_tool_call_skips_delta() -> None:
 
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
     assert len(finishes) == 1
-    tc = cast("ToolCall", finishes[0]["content_block"])
+    tc = cast("ToolCall", finishes[0]["content"])
     assert tc["type"] == "tool_call"
     assert tc["args"] == {"q": "hi"}
 
@@ -513,7 +633,7 @@ def test_message_to_events_tool_call_skips_delta() -> None:
     # bridge does not synthesize one and does not second-guess based on
     # the presence of a tool_call.
     final = cast("MessageFinishData", events[-1])
-    assert "finish_reason" not in (final.get("metadata") or {})
+    assert "finish_reason" not in _event_metadata(final)
 
 
 def test_message_to_events_invalid_tool_calls_surfaced_from_field() -> None:
@@ -536,7 +656,7 @@ def test_message_to_events_invalid_tool_calls_surfaced_from_field() -> None:
     )
     events = list(message_to_events(msg))
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    types = [f["content_block"]["type"] for f in finishes]
+    types = [f["content"]["type"] for f in finishes]
     assert "invalid_tool_call" in types
 
 
@@ -558,7 +678,7 @@ def test_message_to_events_preserves_finish_reason_and_metadata() -> None:
     # Passthrough: response_metadata lands on `metadata` unchanged,
     # including the raw provider `finish_reason`.
     final = cast("MessageFinishData", events[-1])
-    assert final["metadata"] == {
+    assert _event_metadata(final) == {
         "finish_reason": "length",
         "model_name": "test-model",
         "stop_sequence": "</end>",
@@ -585,7 +705,7 @@ def test_message_to_events_message_id_override() -> None:
     msg = AIMessage(content="x", id="msg-orig")
     events = list(message_to_events(msg, message_id="msg-override"))
     start = cast("MessageStartData", events[0])
-    assert start["message_id"] == "msg-override"
+    assert start["id"] == "msg-override"
 
 
 def test_message_to_events_self_contained_start_strips_heavy_fields() -> None:
@@ -622,32 +742,32 @@ def test_message_to_events_self_contained_start_strips_heavy_fields() -> None:
     events = list(message_to_events(msg))
 
     starts: list[Any] = [e for e in events if e["event"] == "content-block-start"]
-    assert [s["content_block"]["type"] for s in starts] == [
+    assert [s["content"]["type"] for s in starts] == [
         "image",
         "audio",
         "non_standard",
     ]
 
-    image_start = starts[0]["content_block"]
+    image_start = starts[0]["content"]
     assert image_start["id"] == "img-1"
     assert image_start["mime_type"] == "image/png"
     assert "data" not in image_start
 
-    audio_start = starts[1]["content_block"]
+    audio_start = starts[1]["content"]
     assert audio_start["id"] == "aud-1"
     assert audio_start["mime_type"] == "audio/mp3"
     assert "data" not in audio_start
     assert "transcript" not in audio_start
 
-    ns_start = starts[2]["content_block"]
+    ns_start = starts[2]["content"]
     assert ns_start["type"] == "non_standard"
     assert ns_start["value"] == {}
 
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    assert finishes[0]["content_block"]["data"] == "A" * 1024
-    assert finishes[1]["content_block"]["data"] == "B" * 1024
-    assert finishes[1]["content_block"]["transcript"] == "hello"
-    assert finishes[2]["content_block"]["value"] == {"big": "C" * 1024}
+    assert finishes[0]["content"]["data"] == "A" * 1024
+    assert finishes[1]["content"]["data"] == "B" * 1024
+    assert finishes[1]["content"]["transcript"] == "hello"
+    assert finishes[2]["content"]["value"] == {"big": "C" * 1024}
 
 
 def test_message_to_events_finalized_tool_call_start_strips_args() -> None:
@@ -668,14 +788,14 @@ def test_message_to_events_finalized_tool_call_start_strips_args() -> None:
 
     starts: list[Any] = [e for e in events if e["event"] == "content-block-start"]
     assert len(starts) == 1
-    tc_start = starts[0]["content_block"]
+    tc_start = starts[0]["content"]
     assert tc_start["type"] == "tool_call"
     assert tc_start["id"] == "tc1"
     assert tc_start["name"] == "search"
     assert tc_start["args"] == {}
 
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    tc_finish = cast("ToolCall", finishes[0]["content_block"])
+    tc_finish = cast("ToolCall", finishes[0]["content"])
     assert tc_finish["args"] == {"q": "big payload " * 100}
 
 
@@ -756,10 +876,10 @@ def test_lifecycle_validator_openai_chat_completions_style() -> None:
     assert_valid_event_stream(events)
 
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    types = [f["content_block"]["type"] for f in finishes]
+    types = [f["content"]["type"] for f in finishes]
     assert types == ["text", "tool_call"]
-    assert finishes[0]["content_block"]["text"] == "Hello there"
-    assert finishes[1]["content_block"]["args"] == {"q": "pie"}
+    assert finishes[0]["content"]["text"] == "Hello there"
+    assert finishes[1]["content"]["args"] == {"q": "pie"}
 
 
 def test_lifecycle_validator_openai_responses_style() -> None:
@@ -783,7 +903,7 @@ def test_lifecycle_validator_openai_responses_style() -> None:
     starts: list[Any] = [e for e in events if e["event"] == "content-block-start"]
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
     # Four distinct blocks: reasoning, text, reasoning, text
-    assert [s["content_block"]["type"] for s in starts] == [
+    assert [s["content"]["type"] for s in starts] == [
         "reasoning",
         "text",
         "reasoning",
@@ -791,10 +911,10 @@ def test_lifecycle_validator_openai_responses_style() -> None:
     ]
     assert [s["index"] for s in starts] == [0, 1, 2, 3]
     assert [f["index"] for f in finishes] == [0, 1, 2, 3]
-    assert finishes[0]["content_block"]["reasoning"] == "hmm first"
-    assert finishes[1]["content_block"]["text"] == "Answer: 42"
-    assert finishes[2]["content_block"]["reasoning"] == "actually"
-    assert finishes[3]["content_block"]["text"] == "42!"
+    assert finishes[0]["content"]["reasoning"] == "hmm first"
+    assert finishes[1]["content"]["text"] == "Answer: 42"
+    assert finishes[2]["content"]["reasoning"] == "actually"
+    assert finishes[3]["content"]["text"] == "42!"
 
 
 def test_lifecycle_validator_anthropic_style_text_and_thinking() -> None:
@@ -815,9 +935,9 @@ def test_lifecycle_validator_anthropic_style_text_and_thinking() -> None:
     assert_valid_event_stream(events)
 
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    assert [f["content_block"]["type"] for f in finishes] == ["reasoning", "text"]
-    assert finishes[0]["content_block"]["reasoning"] == "Let me think more"
-    assert finishes[1]["content_block"]["text"] == "The answer is 42."
+    assert [f["content"]["type"] for f in finishes] == ["reasoning", "text"]
+    assert finishes[0]["content"]["reasoning"] == "Let me think more"
+    assert finishes[1]["content"]["text"] == "The answer is 42."
 
 
 def test_lifecycle_validator_anthropic_reasoning_preserves_signature() -> None:
@@ -853,7 +973,7 @@ def test_lifecycle_validator_anthropic_reasoning_preserves_signature() -> None:
     assert_valid_event_stream(events)
 
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    reasoning_finish = finishes[0]["content_block"]
+    reasoning_finish = finishes[0]["content"]
     assert reasoning_finish["type"] == "reasoning"
     assert reasoning_finish["reasoning"] == "Let me think more"
     assert reasoning_finish.get("extras", {}).get("signature") == "sig-abc123"
@@ -899,9 +1019,9 @@ def test_lifecycle_validator_anthropic_style_tool_use_after_text() -> None:
     assert_valid_event_stream(events)
 
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    assert [f["content_block"]["type"] for f in finishes] == ["text", "tool_call"]
-    assert finishes[1]["content_block"]["args"] == {"query": "42"}
-    assert finishes[1]["content_block"]["id"] == "toolu_1"
+    assert [f["content"]["type"] for f in finishes] == ["text", "tool_call"]
+    assert finishes[1]["content"]["args"] == {"query": "42"}
+    assert finishes[1]["content"]["id"] == "toolu_1"
 
 
 def test_lifecycle_validator_inline_image_block() -> None:
@@ -925,11 +1045,17 @@ def test_lifecycle_validator_inline_image_block() -> None:
     starts: list[Any] = [e for e in events if e["event"] == "content-block-start"]
     deltas: list[Any] = [e for e in events if e["event"] == "content-block-delta"]
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
-    assert [s["content_block"]["type"] for s in starts] == ["image"]
-    # Self-contained block: no delta, and start has heavy fields stripped.
-    assert deltas == []
-    assert "data" not in starts[0]["content_block"]
-    assert finishes[0]["content_block"]["data"] == "AAAA"
+    assert [s["content"]["type"] for s in starts] == ["image"]
+    # Data payload rides as an explicit data-delta; start has heavy fields stripped.
+    assert deltas == [
+        {
+            "event": "content-block-delta",
+            "index": 0,
+            "delta": {"type": "data-delta", "data": "AAAA"},
+        }
+    ]
+    assert "data" not in starts[0]["content"]
+    assert finishes[0]["content"]["data"] == "AAAA"
 
 
 def test_lifecycle_validator_invalid_tool_call_args() -> None:
@@ -956,7 +1082,7 @@ def test_lifecycle_validator_invalid_tool_call_args() -> None:
 
     finishes: list[Any] = [e for e in events if e["event"] == "content-block-finish"]
     assert len(finishes) == 1
-    assert finishes[0]["content_block"]["type"] == "invalid_tool_call"
+    assert finishes[0]["content"]["type"] == "invalid_tool_call"
 
 
 def test_lifecycle_validator_empty_stream() -> None:
diff --git a/libs/core/tests/unit_tests/language_models/test_v1_parity.py b/libs/core/tests/unit_tests/language_models/test_v1_parity.py
index d7f3f3a602c88..36b0141e234b5 100644
--- a/libs/core/tests/unit_tests/language_models/test_v1_parity.py
+++ b/libs/core/tests/unit_tests/language_models/test_v1_parity.py
@@ -1,7 +1,7 @@
-"""V1 parity tests: stream_v2() output must match model.stream() output.
+"""V1 parity tests: `stream_events(version="v3")` must match `model.stream()` output.
 
-These are the acceptance criteria for streaming v2 — if any test fails,
-v2 has a regression vs v1.
+These are the acceptance criteria for the v3 streaming API — if any test fails,
+v3 has a regression vs v1.
 """
 
 from __future__ import annotations
@@ -128,8 +128,8 @@ def _collect_v1_message(model: BaseChatModel, input_text: str) -> AIMessage:
 
 
 def _collect_v2_message(model: BaseChatModel, input_text: str) -> AIMessage:
-    """Run model.stream_v2() and get .output."""
-    stream = model.stream_v2(input_text)
+    """Run `model.stream_events(version="v3")` and get `.output`."""
+    stream = model.stream_events(input_text, version="v3")
     return stream.output
 
 
@@ -156,13 +156,13 @@ def test_message_id_present(self) -> None:
     def test_empty_response(self) -> None:
         """A truly empty stream is an error, matching `stream()` parity.
 
-        `stream_v2` distinguishes "producer emitted events but no terminal
-        `message-finish`" (which is synthesized, for native-event providers
+        `stream_events(version="v3")` distinguishes "producer emitted events but no
+        terminal `message-finish`" (which is synthesized, for native-event providers
         that omit it) from "producer emitted nothing at all" (which fails
         with `ValueError`, same as `stream()`).
         """
         model = FakeListChatModel(responses=[""])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
         with pytest.raises(ValueError, match="No generation chunks"):
             _ = stream.output
 
@@ -179,7 +179,7 @@ def test_multi_character_response(self) -> None:
 
     def test_text_deltas_reconstruct_content(self) -> None:
         model = FakeListChatModel(responses=["Hello!"])
-        stream = model.stream_v2("test")
+        stream = model.stream_events("test", version="v3")
 
         deltas = list(stream.text)
         content = stream.output.content
@@ -233,7 +233,7 @@ def test_tool_calls_match(self) -> None:
 
     def test_tool_calls_via_projection(self) -> None:
         model = self._make_model()
-        stream = model.stream_v2("weather?")
+        stream = model.stream_events("weather?", version="v3")
         finalized = stream.tool_calls.get()
         assert len(finalized) == 1
         assert finalized[0]["name"] == "get_weather"
@@ -276,7 +276,7 @@ def test_usage_metadata_present(self) -> None:
         assert v1.usage_metadata["total_tokens"] == v2.usage_metadata["total_tokens"]
 
     def test_usage_projection_matches(self) -> None:
-        stream = self._make_model().stream_v2("hello")
+        stream = self._make_model().stream_events("hello", version="v3")
         # Drain so usage is available
         for _ in stream.text:
             pass
@@ -352,7 +352,7 @@ def test_reasoning_text_order(self) -> None:
         assert types_in_order == ["reasoning", "text"]
 
     def test_reasoning_projection(self) -> None:
-        stream = self._make_model().stream_v2("think")
+        stream = self._make_model().stream_events("think", version="v3")
         full_reasoning = str(stream.reasoning)
         assert full_reasoning == "Let me think. Done."
 
@@ -366,7 +366,7 @@ def test_error_propagates_sync(self) -> None:
         ]
         model = _ScriptedChunkModel(scripted_chunks=chunks, raise_after=True)
 
-        stream = model.stream_v2("boom")
+        stream = model.stream_events("boom", version="v3")
         # Drain first; error may surface here or at .output access.
         try:
             list(stream.text)
@@ -382,7 +382,7 @@ async def test_error_propagates_async(self) -> None:
         ]
         model = _ScriptedChunkModel(scripted_chunks=chunks, raise_after=True)
 
-        stream = await model.astream_v2("boom")
+        stream = await model.astream_events("boom", version="v3")
         try:
             async for _ in stream.text:
                 pass
diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v3.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v3.py
new file mode 100644
index 0000000000000..bc79b27078f1d
--- /dev/null
+++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v3.py
@@ -0,0 +1,23 @@
+"""Tests for the v3 dispatch path on Runnable.astream_events / stream_events."""
+
+from __future__ import annotations
+
+import pytest
+
+from langchain_core.runnables import RunnableLambda
+
+
+def _double(x: int) -> int:
+    return x * 2
+
+
+def test_v3_on_plain_runnable_raises_not_implemented_sync() -> None:
+    runnable = RunnableLambda(_double)
+    with pytest.raises(NotImplementedError, match="v3"):
+        runnable.stream_events(2, version="v3")
+
+
+async def test_v3_on_plain_runnable_raises_not_implemented_async() -> None:
+    runnable = RunnableLambda(_double)
+    with pytest.raises(NotImplementedError, match="v3"):
+        await runnable.astream_events(2, version="v3")
diff --git a/libs/core/uv.lock b/libs/core/uv.lock
index c0349649520f0..0bd549b47c863 100644
--- a/libs/core/uv.lock
+++ b/libs/core/uv.lock
@@ -995,7 +995,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "1.3.2"
+version = "1.4.0"
 source = { editable = "." }
 dependencies = [
     { name = "jsonpatch" },
@@ -1046,7 +1046,7 @@ typing = [
 [package.metadata]
 requires-dist = [
     { name = "jsonpatch", specifier = ">=1.33.0,<2.0.0" },
-    { name = "langchain-protocol", specifier = ">=0.0.10" },
+    { name = "langchain-protocol", specifier = ">=0.0.14" },
     { name = "langsmith", specifier = ">=0.3.45,<1.0.0" },
     { name = "packaging", specifier = ">=23.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
@@ -1091,14 +1091,14 @@ typing = [
 
 [[package]]
 name = "langchain-protocol"
-version = "0.0.10"
+version = "0.0.14"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/c3/0d3911d3274f097040e92133f18a425980cd4085e72b6cd65add1f25327c/langchain_protocol-0.0.10.tar.gz", hash = "sha256:5bc530e0b350d3a15a3ab6889abb8132692a2c8a15eed536bce46624751acaaf", size = 6528, upload-time = "2026-04-23T17:31:34.212Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/bf/efb5e2ed832e4d6d45590e25a9e5191986b291b543bc6a807b48bee070b0/langchain_protocol-0.0.14.tar.gz", hash = "sha256:bc1e8553122e6ede310280462d5813023a172ff2785ccbbdec54d43f3a15e5f2", size = 5862, upload-time = "2026-04-29T16:40:18.657Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/11/6c89bc86b5494cfe29ee23420c398406cc147a09b5cf756e323070e358d7/langchain_protocol-0.0.10-py3-none-any.whl", hash = "sha256:040bb2ae966a06ffcd0051a1d1ca7e4926f12e951e83b07440cb80e0e8e12268", size = 6677, upload-time = "2026-04-23T17:31:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/e9/06c47ecb2aff08f83dfa30058da3bf86be64862c19569043ed5331bbeecd/langchain_protocol-0.0.14-py3-none-any.whl", hash = "sha256:ffc35089779bd8ca217015180cef5e660fc3b074efdaa0f2e95df73583f1a047", size = 6984, upload-time = "2026-04-29T16:40:17.841Z" },
 ]
 
 [[package]]
diff --git a/libs/langchain/langchain_classic/chat_models/base.py b/libs/langchain/langchain_classic/chat_models/base.py
index 779e5446fb6c3..36f554aa8789f 100644
--- a/libs/langchain/langchain_classic/chat_models/base.py
+++ b/libs/langchain/langchain_classic/chat_models/base.py
@@ -1006,7 +1006,7 @@ async def astream_log(
             yield x
 
     @override
-    async def astream_events(
+    async def astream_events(  # type: ignore[override]
         self,
         input: Any,
         config: RunnableConfig | None = None,
diff --git a/libs/langchain/uv.lock b/libs/langchain/uv.lock
index 1f8726777b709..94dba4a6c2879 100644
--- a/libs/langchain/uv.lock
+++ b/libs/langchain/uv.lock
@@ -2618,7 +2618,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "jsonpatch", specifier = ">=1.33.0,<2.0.0" },
-    { name = "langchain-protocol", specifier = ">=0.0.10" },
+    { name = "langchain-protocol", specifier = ">=0.0.14" },
     { name = "langsmith", specifier = ">=0.3.45,<1.0.0" },
     { name = "packaging", specifier = ">=23.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
@@ -2849,14 +2849,14 @@ wheels = [
 
 [[package]]
 name = "langchain-protocol"
-version = "0.0.10"
+version = "0.0.14"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/c3/0d3911d3274f097040e92133f18a425980cd4085e72b6cd65add1f25327c/langchain_protocol-0.0.10.tar.gz", hash = "sha256:5bc530e0b350d3a15a3ab6889abb8132692a2c8a15eed536bce46624751acaaf", size = 6528, upload-time = "2026-04-23T17:31:34.212Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/bf/efb5e2ed832e4d6d45590e25a9e5191986b291b543bc6a807b48bee070b0/langchain_protocol-0.0.14.tar.gz", hash = "sha256:bc1e8553122e6ede310280462d5813023a172ff2785ccbbdec54d43f3a15e5f2", size = 5862, upload-time = "2026-04-29T16:40:18.657Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/11/6c89bc86b5494cfe29ee23420c398406cc147a09b5cf756e323070e358d7/langchain_protocol-0.0.10-py3-none-any.whl", hash = "sha256:040bb2ae966a06ffcd0051a1d1ca7e4926f12e951e83b07440cb80e0e8e12268", size = 6677, upload-time = "2026-04-23T17:31:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/e9/06c47ecb2aff08f83dfa30058da3bf86be64862c19569043ed5331bbeecd/langchain_protocol-0.0.14-py3-none-any.whl", hash = "sha256:ffc35089779bd8ca217015180cef5e660fc3b074efdaa0f2e95df73583f1a047", size = 6984, upload-time = "2026-04-29T16:40:17.841Z" },
 ]
 
 [[package]]
diff --git a/libs/langchain_v1/langchain/__init__.py b/libs/langchain_v1/langchain/__init__.py
index 498b37b801ab8..f3608ac63c1e0 100644
--- a/libs/langchain_v1/langchain/__init__.py
+++ b/libs/langchain_v1/langchain/__init__.py
@@ -1,3 +1,3 @@
 """Main entrypoint into LangChain."""
 
-__version__ = "1.2.17"
+__version__ = "1.3.0a2"
diff --git a/libs/langchain_v1/langchain/agents/factory.py b/libs/langchain_v1/langchain/agents/factory.py
index b700a6572b488..b15e9caf1be91 100644
--- a/libs/langchain_v1/langchain/agents/factory.py
+++ b/libs/langchain_v1/langchain/agents/factory.py
@@ -22,6 +22,7 @@
 from langgraph._internal._runnable import RunnableCallable
 from langgraph.constants import END, START
 from langgraph.graph.state import StateGraph
+from langgraph.prebuilt import ToolCallTransformer
 from langgraph.prebuilt.tool_node import ToolNode
 from langgraph.types import Command, Send
 from langsmith import traceable
@@ -406,8 +407,13 @@ def _get_schema_type_hints(schema: type) -> dict[str, Any]:
     return get_type_hints(schema, include_extras=True)
 
 
-def _resolve_schemas(schemas: set[type]) -> tuple[type, type, type]:
-    """Resolve state, input, and output schemas for the given schemas."""
+def _resolve_schemas(schemas: list[type]) -> tuple[type, type, type]:
+    """Resolve state, input, and output schemas for the given schemas.
+
+    Schemas are merged in list order; later entries override earlier ones when the
+    same field is declared by multiple schemas.  Duplicates are harmless — a type
+    that appears more than once is processed at its last position.
+    """
     schema_hints = {schema: _get_schema_type_hints(schema) for schema in schemas}
     return (
         _resolve_schema(schema_hints, "StateSchema", None),
@@ -704,6 +710,7 @@ def create_agent(
     debug: bool = False,
     name: str | None = None,
     cache: BaseCache[Any] | None = None,
+    transformers: Sequence[Callable[[tuple[str, ...]], Any]] | None = None,
 ) -> CompiledStateGraph[
     AgentState[ResponseT], ContextT, _InputAgentState, _OutputAgentState[ResponseT]
 ]:
@@ -799,6 +806,11 @@ def create_agent(
             another graph as a subgraph node - particularly useful for building
             multi-agent systems.
         cache: An optional `BaseCache` instance to enable caching of graph execution.
+        transformers: Optional sequence of scope-aware `StreamTransformer`
+            factories to register on the compiled graph in addition to
+            the agent defaults. Each factory is invoked per-scope
+            (`factory(scope)`) so subgraph mini-muxes get fresh
+            instances. Appended after the built-in `ToolCallTransformer`.
 
     Returns:
         A compiled `StateGraph` that can be used for chat interactions.
@@ -1023,10 +1035,13 @@ def check_weather(location: str) -> str:
         ]
         awrap_model_call_handler = _chain_async_model_call_handlers(async_handlers)
 
-    state_schemas: set[type] = {m.state_schema for m in middleware}
-    # Use provided state_schema if available, otherwise use base AgentState
     base_state = state_schema if state_schema is not None else AgentState
-    state_schemas.add(base_state)
+    # Build an ordered list: middleware schemas first (in registration order),
+    # base_state last so it wins any field conflict.  This lets the caller's
+    # explicit state_schema override middleware annotations — e.g. passing
+    # a DeltaChannel-annotated schema wins over BinaryOperatorAggregate from
+    # AgentState without requiring a post-compilation patch.
+    state_schemas: list[type] = [*(m.state_schema for m in middleware), base_state]
 
     resolved_state_schema, input_schema, output_schema = _resolve_schemas(state_schemas)
 
@@ -1665,6 +1680,7 @@ async def amodel_node(state: AgentState[Any], runtime: Runtime[ContextT]) -> lis
         debug=debug,
         name=name,
         cache=cache,
+        transformers=[ToolCallTransformer, *(transformers or ())],
     ).with_config(config)
 
 
diff --git a/libs/langchain_v1/langchain/chat_models/base.py b/libs/langchain_v1/langchain/chat_models/base.py
index 9757f04d052d7..067ad0313e6f7 100644
--- a/libs/langchain_v1/langchain/chat_models/base.py
+++ b/libs/langchain_v1/langchain/chat_models/base.py
@@ -964,7 +964,7 @@ async def astream_log(
             yield x
 
     @override
-    async def astream_events(
+    async def astream_events(  # type: ignore[override]
         self,
         input: Any,
         config: RunnableConfig | None = None,
diff --git a/libs/langchain_v1/pyproject.toml b/libs/langchain_v1/pyproject.toml
index 1cfc457c483f5..26d0ea632ecc5 100644
--- a/libs/langchain_v1/pyproject.toml
+++ b/libs/langchain_v1/pyproject.toml
@@ -21,11 +21,11 @@ classifiers = [
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 
-version = "1.2.17"
+version = "1.3.0a2"
 requires-python = ">=3.10.0,<4.0.0"
 dependencies = [
-    "langchain-core>=1.3.2,<2.0.0",
-    "langgraph>=1.1.10,<1.2.0",
+    "langchain-core>=1.4.0a2,<2.0.0",
+    "langgraph>=1.2.0a5,<1.3.0",
     "pydantic>=2.7.4,<3.0.0",
 ]
 
diff --git a/libs/langchain_v1/tests/unit_tests/agents/test_agent_streaming.py b/libs/langchain_v1/tests/unit_tests/agents/test_agent_streaming.py
new file mode 100644
index 0000000000000..8b5275137ec21
--- /dev/null
+++ b/libs/langchain_v1/tests/unit_tests/agents/test_agent_streaming.py
@@ -0,0 +1,285 @@
+"""Unit tests for create_agent graphs streaming via `stream_events(version="v3")`."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+import pytest
+from langchain_core.messages import HumanMessage
+from langchain_core.runnables import RunnableConfig  # noqa: TC002
+from langchain_core.tools import tool
+from langgraph.prebuilt import ToolCallTransformer
+from langgraph.stream import StreamChannel, StreamTransformer
+
+from langchain.agents import create_agent
+from langchain.tools import ToolRuntime  # noqa: TC001
+from tests.unit_tests.agents.model import FakeToolCallingModel
+
+if TYPE_CHECKING:
+    from langgraph.prebuilt._tool_call_stream import ToolCallStream
+    from langgraph.stream._types import ProtocolEvent
+
+
+@tool
+def echo(text: str) -> str:
+    """Return the input unchanged."""
+    return text
+
+
+@tool
+def streamer(text: str, runtime: ToolRuntime) -> str:
+    """Stream two chunks, then return the full text."""
+    for chunk in ("one", "two"):
+        runtime.emit_output_delta(chunk)
+    return text
+
+
+@tool
+async def astreamer(text: str, runtime: ToolRuntime) -> str:
+    """Async: stream two chunks, then return the full text."""
+    runtime.emit_output_delta(text)
+    runtime.emit_output_delta(text + "!")
+    return text
+
+
+@tool
+def boom() -> str:
+    """Raise unconditionally."""
+    msg = "nope"
+    raise ValueError(msg)
+
+
+def _single_tool_call_script(name: str, **args: Any) -> list[list[dict[str, Any]]]:
+    """Script: one tool call on turn 0, finish on turn 1."""
+    return [
+        [{"name": name, "args": args, "id": "tc1"}],
+        [],
+    ]
+
+
+class TestAgentStreamV3Sync:
+    def test_stream_returns_agent_run_stream(self) -> None:
+        model = FakeToolCallingModel(tool_calls=_single_tool_call_script("echo", text="x"))
+        agent = create_agent(model, [echo])
+
+        run = agent.stream_events({"messages": [HumanMessage("hi")]}, version="v3")
+
+        # Drain so the run closes cleanly.
+        list(run.tool_calls)
+
+    def test_tool_calls_populated_without_opt_in(self) -> None:
+        """`ToolCallTransformer` is registered by default on the agent streamer."""
+        model = FakeToolCallingModel(tool_calls=_single_tool_call_script("echo", text="x"))
+        agent = create_agent(model, [echo])
+
+        run = agent.stream_events({"messages": [HumanMessage("hi")]}, version="v3")
+
+        collected: list[ToolCallStream] = list(run.tool_calls)
+        assert len(collected) == 1
+        tc = collected[0]
+        assert tc.tool_name == "echo"
+        assert tc.tool_call_id == "tc1"
+        assert tc.completed is True
+        assert tc.error is None
+
+    def test_tool_output_deltas_flow_through(self) -> None:
+        model = FakeToolCallingModel(tool_calls=_single_tool_call_script("streamer", text="x"))
+        agent = create_agent(model, [streamer])
+
+        run = agent.stream_events({"messages": [HumanMessage("hi")]}, version="v3")
+
+        tool_calls: list[ToolCallStream] = []
+        for tc in run.tool_calls:
+            tool_calls.append(tc)
+            assert list(tc.output_deltas) == ["one", "two"]
+        assert len(tool_calls) == 1
+
+    def test_no_tools_run_is_still_usable(self) -> None:
+        """`.tool_calls` is empty when the model never calls a tool."""
+        model = FakeToolCallingModel()  # no tool calls scripted
+        agent = create_agent(model, [])
+
+        run = agent.stream_events({"messages": [HumanMessage("hi")]}, version="v3")
+        assert list(run.tool_calls) == []
+        assert run.output is not None
+
+    def test_messages_projection_present(self) -> None:
+        """`MessagesTransformer` is inherited from `GraphStreamer.builtin_factories`."""
+        model = FakeToolCallingModel(tool_calls=_single_tool_call_script("echo", text="x"))
+        agent = create_agent(model, [echo])
+
+        run = agent.stream_events({"messages": [HumanMessage("hi")]}, version="v3")
+        # The native `messages` projection is bound as an instance attribute
+        # by `BaseRunStream.__init__` whenever `MessagesTransformer` is
+        # registered. Content population is covered by langgraph tests —
+        # here we only assert the agent streamer inherits the built-in.
+        assert "messages" in run._mux.extensions  # type: ignore[attr-defined]
+        assert hasattr(run, "messages")
+        # Drain so the run closes cleanly.
+        for tc in run.tool_calls:
+            list(tc.output_deltas)
+
+    def test_caller_transformers_appended_not_replaced(self) -> None:
+        """User-supplied transformers add to, rather than replace, the agent defaults."""
+
+        class _Marker(StreamTransformer):
+            required_stream_modes = ()
+
+            def __init__(self, scope: tuple[str, ...] = ()) -> None:
+                super().__init__(scope)
+                self._log: StreamChannel[int] = StreamChannel()
+
+            def init(self) -> dict[str, Any]:
+                return {"marker": self._log}
+
+            def process(self, event: ProtocolEvent) -> bool:
+                del event
+                return True
+
+        model = FakeToolCallingModel(tool_calls=_single_tool_call_script("echo", text="x"))
+        agent = create_agent(model, [echo])
+
+        run = agent.stream_events(
+            {"messages": [HumanMessage("hi")]},
+            version="v3",
+            transformers=[_Marker],
+        )
+        # Both the agent default and the user transformer are registered.
+        assert "tool_calls" in run._mux.extensions  # type: ignore[attr-defined]
+        assert "marker" in run._mux.extensions  # type: ignore[attr-defined]
+
+        # `ToolCallTransformer` must come BEFORE the user's transformer in
+        # the registration order so it processes `tools` events first. The
+        # docstring on `create_agent` promises caller transformers are
+        # appended after the built-in.
+        transformers = run._mux._transformers  # type: ignore[attr-defined]
+        tool_call_idx = next(
+            i for i, t in enumerate(transformers) if isinstance(t, ToolCallTransformer)
+        )
+        marker_idx = next(i for i, t in enumerate(transformers) if isinstance(t, _Marker))
+        assert tool_call_idx < marker_idx, (
+            "ToolCallTransformer must be registered before user-supplied transformers"
+        )
+
+        list(run.tool_calls)
+
+    def test_tool_error_sets_error_field(self) -> None:
+        """Tool errors are surfaced on the `ToolCallStream.error` field.
+
+        `create_agent`'s default tool-error handler re-raises, so the
+        overall run fails — the assertion here is that the error is
+        attached to the scoped `ToolCallStream` *before* the run raises.
+        """
+        model = FakeToolCallingModel(tool_calls=_single_tool_call_script("boom"))
+        agent = create_agent(model, [boom])
+
+        run = agent.stream_events({"messages": [HumanMessage("hi")]}, version="v3")
+
+        collected: list[ToolCallStream] = []
+
+        def _drive() -> None:
+            for tc in run.tool_calls:
+                collected.append(tc)
+                list(tc.output_deltas)
+
+        with pytest.raises(ValueError, match="nope"):
+            _drive()
+        assert len(collected) == 1
+        assert collected[0].error is not None
+        assert "nope" in collected[0].error
+        assert collected[0].completed is True
+
+    def test_inner_agent_tool_calls_via_subgraph(self) -> None:
+        """An inner `create_agent` invoked from a tool exposes its own tool calls.
+
+        Wiring a sub-agent through a `@tool` is the canonical pattern
+        for hierarchical agents. The outer caller iterates `run.subgraphs`
+        and finds a `tool_calls` projection on each subgraph handle
+        — populated by a fresh per-scope `ToolCallTransformer` instance.
+        """
+
+        @tool
+        def inner_echo(text: str) -> str:
+            """Echo via inner agent."""
+            return text
+
+        inner_model = FakeToolCallingModel(
+            tool_calls=_single_tool_call_script("inner_echo", text="leaf"),
+        )
+        inner_agent = create_agent(inner_model, [inner_echo])
+
+        @tool
+        def run_inner(query: str, config: RunnableConfig) -> str:
+            """Delegate `query` to the inner agent and return its reply."""
+            result = inner_agent.invoke(
+                {"messages": [HumanMessage(query)]},
+                config,
+            )
+            return str(result["messages"][-1].content)
+
+        outer_model = FakeToolCallingModel(
+            tool_calls=_single_tool_call_script("run_inner", query="leaf"),
+        )
+        outer_agent = create_agent(outer_model, [run_inner])
+
+        run = outer_agent.stream_events(
+            {"messages": [HumanMessage("go")]},
+            version="v3",
+        )
+
+        outer_tool_calls: list[ToolCallStream] = []
+        inner_tool_calls: list[ToolCallStream] = []
+
+        # `tool_calls` and `subgraphs` are single-subscriber channels, so
+        # interleave them through the run's round-robin cursor. Drilling
+        # into `sub.tool_calls` inside the loop body subscribes the inner
+        # mini-mux before the next pump cycle, satisfying the lazy-subscribe
+        # contract on subgraph handles.
+        for name, item in run.interleave("tool_calls", "subgraphs"):
+            if name == "tool_calls":
+                outer_tool_calls.append(item)
+            else:  # "subgraphs"
+                for tc in item.tool_calls:
+                    inner_tool_calls.append(tc)
+                    list(tc.output_deltas)
+
+        # Outer's `tool_calls` projection owns only its own scope: it has
+        # the call to `run_inner` and nothing from the inner subgraph.
+        assert len(outer_tool_calls) == 1
+        assert outer_tool_calls[0].tool_name == "run_inner"
+        assert outer_tool_calls[0].completed is True
+        assert outer_tool_calls[0].error is None
+
+        # The inner agent's `inner_echo` tool call is reachable via the
+        # subgraph's per-scope `tool_calls` projection.
+        assert len(inner_tool_calls) == 1
+        assert inner_tool_calls[0].tool_name == "inner_echo"
+        assert inner_tool_calls[0].completed is True
+        assert inner_tool_calls[0].error is None
+
+
+class TestAgentStreamV3Async:
+    @pytest.mark.anyio
+    async def test_astream_returns_async_agent_run_stream(self) -> None:
+        model = FakeToolCallingModel(tool_calls=_single_tool_call_script("echo", text="x"))
+        agent = create_agent(model, [echo])
+
+        run = await agent.astream_events({"messages": [HumanMessage("hi")]}, version="v3")
+        async for tc in run.tool_calls:
+            async for _ in tc.output_deltas:
+                pass
+
+    @pytest.mark.anyio
+    async def test_async_tool_deltas_flow(self) -> None:
+        model = FakeToolCallingModel(tool_calls=_single_tool_call_script("astreamer", text="hi"))
+        agent = create_agent(model, [astreamer])
+
+        run = await agent.astream_events({"messages": [HumanMessage("hi")]}, version="v3")
+
+        collected: list[ToolCallStream] = []
+        async for tc in run.tool_calls:
+            collected.append(tc)
+            deltas = [d async for d in tc.output_deltas]
+            assert deltas == ["hi", "hi!"]
+        assert len(collected) == 1
+        assert collected[0].completed is True
diff --git a/libs/langchain_v1/tests/unit_tests/agents/test_state_schema.py b/libs/langchain_v1/tests/unit_tests/agents/test_state_schema.py
index cbd66d4a0a32d..26d8dbe36b881 100644
--- a/libs/langchain_v1/tests/unit_tests/agents/test_state_schema.py
+++ b/libs/langchain_v1/tests/unit_tests/agents/test_state_schema.py
@@ -6,7 +6,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Annotated, Any
+from typing import TYPE_CHECKING, Annotated, Any, get_args, get_type_hints
 
 from langchain_core.messages import HumanMessage
 from langchain_core.tools import tool
@@ -256,6 +256,27 @@ def capture_state_tool(x: int, runtime: ToolRuntime) -> str:
     assert len(result["messages"]) == 4  # Human, AI (tool call), Tool result, AI (final)
 
 
+def test_last_schema_wins_for_conflicting_field() -> None:
+    """The last schema in the list wins when multiple schemas declare the same field.
+
+    In practice this means state_schema (passed last) overrides middleware annotations,
+    and a later middleware overrides an earlier one.
+    """
+
+    class FirstState(AgentState[Any]):
+        shared_field: Annotated[str, "first"]
+
+    class MiddleState(AgentState[Any]):
+        shared_field: Annotated[str, "middle"]
+
+    class LastState(AgentState[Any]):
+        shared_field: Annotated[str, "last"]
+
+    resolved, _, _ = factory._resolve_schemas([FirstState, MiddleState, LastState])
+    hints = get_type_hints(resolved, include_extras=True)
+    assert get_args(hints["shared_field"])[1] == "last"
+
+
 def test_get_schema_type_hints_cache_hits_for_reused_schema() -> None:
     """Test repeated schema resolution reuses cached type hints for the same schema."""
 
@@ -266,9 +287,9 @@ class CachedState(AgentState[Any]):
 
     factory._get_schema_type_hints.cache_clear()
 
-    factory._resolve_schemas({CachedState})
+    factory._resolve_schemas([CachedState])
     first_info = factory._get_schema_type_hints.cache_info()
-    factory._resolve_schemas({CachedState})
+    factory._resolve_schemas([CachedState])
     second_info = factory._get_schema_type_hints.cache_info()
 
     assert first_info.misses == 1
@@ -293,9 +314,9 @@ class LocalState(AgentState[Any]):
     schema_a = make_state_schema("LocalStateA")
     schema_b = make_state_schema("LocalStateB")
 
-    factory._resolve_schemas({schema_a, schema_b})
+    factory._resolve_schemas([schema_a, schema_b])
     first_info = factory._get_schema_type_hints.cache_info()
-    factory._resolve_schemas({schema_a, schema_b})
+    factory._resolve_schemas([schema_a, schema_b])
     second_info = factory._get_schema_type_hints.cache_info()
 
     assert first_info.misses == 2
diff --git a/libs/langchain_v1/uv.lock b/libs/langchain_v1/uv.lock
index d3a6eae7b978e..ba7b6017422d8 100644
--- a/libs/langchain_v1/uv.lock
+++ b/libs/langchain_v1/uv.lock
@@ -1912,7 +1912,7 @@ wheels = [
 
 [[package]]
 name = "langchain"
-version = "1.2.17"
+version = "1.3.0a2"
 source = { editable = "." }
 dependencies = [
     { name = "langchain-core" },
@@ -2025,7 +2025,7 @@ requires-dist = [
     { name = "langchain-perplexity", marker = "extra == 'perplexity'" },
     { name = "langchain-together", marker = "extra == 'together'" },
     { name = "langchain-xai", marker = "extra == 'xai'" },
-    { name = "langgraph", specifier = ">=1.1.10,<1.2.0" },
+    { name = "langgraph", specifier = ">=1.2.0a5,<1.3.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
 ]
 provides-extras = ["community", "anthropic", "openai", "azure-ai", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "baseten", "deepseek", "xai", "perplexity"]
@@ -2208,7 +2208,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "1.3.2"
+version = "1.4.0a2"
 source = { editable = "../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -2225,7 +2225,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "jsonpatch", specifier = ">=1.33.0,<2.0.0" },
-    { name = "langchain-protocol", specifier = ">=0.0.10" },
+    { name = "langchain-protocol", specifier = ">=0.0.14" },
     { name = "langsmith", specifier = ">=0.3.45,<1.0.0" },
     { name = "packaging", specifier = ">=23.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
@@ -2456,14 +2456,14 @@ wheels = [
 
 [[package]]
 name = "langchain-protocol"
-version = "0.0.10"
+version = "0.0.14"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/c3/0d3911d3274f097040e92133f18a425980cd4085e72b6cd65add1f25327c/langchain_protocol-0.0.10.tar.gz", hash = "sha256:5bc530e0b350d3a15a3ab6889abb8132692a2c8a15eed536bce46624751acaaf", size = 6528, upload-time = "2026-04-23T17:31:34.212Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/bf/efb5e2ed832e4d6d45590e25a9e5191986b291b543bc6a807b48bee070b0/langchain_protocol-0.0.14.tar.gz", hash = "sha256:bc1e8553122e6ede310280462d5813023a172ff2785ccbbdec54d43f3a15e5f2", size = 5862, upload-time = "2026-04-29T16:40:18.657Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/11/6c89bc86b5494cfe29ee23420c398406cc147a09b5cf756e323070e358d7/langchain_protocol-0.0.10-py3-none-any.whl", hash = "sha256:040bb2ae966a06ffcd0051a1d1ca7e4926f12e951e83b07440cb80e0e8e12268", size = 6677, upload-time = "2026-04-23T17:31:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/e9/06c47ecb2aff08f83dfa30058da3bf86be64862c19569043ed5331bbeecd/langchain_protocol-0.0.14-py3-none-any.whl", hash = "sha256:ffc35089779bd8ca217015180cef5e660fc3b074efdaa0f2e95df73583f1a047", size = 6984, upload-time = "2026-04-29T16:40:17.841Z" },
 ]
 
 [[package]]
@@ -2603,7 +2603,7 @@ wheels = [
 
 [[package]]
 name = "langgraph"
-version = "1.1.10"
+version = "1.2.0a7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
@@ -2613,35 +2613,35 @@ dependencies = [
     { name = "pydantic" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9a/b3/7dec224369c7938eb3227ff69542a0d0f517862a0d27945b8c395f2a781f/langgraph-1.1.10.tar.gz", hash = "sha256:3115beb58203283c98d8752a90c034f3432177d2979a1fe205f76e5f1b744500", size = 560685, upload-time = "2026-04-27T17:19:10.426Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d9/ce/df390d7174df1a089cee99aadc004f9559a99cf672d01578c66782a7bcb4/langgraph-1.2.0a7.tar.gz", hash = "sha256:3dfb5a97aa991b77c9f8654d93273318e91788bafc3ba26f203a042447071c45", size = 678797, upload-time = "2026-05-04T19:35:24.443Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/80/07/057dc1aa7991115fca53f1fa6573a7cc0dd296c05360c672cc67fdb6245b/langgraph-1.1.10-py3-none-any.whl", hash = "sha256:8a4f163f72f4401648d0c11b48ee906947d938ba8cf1f474540fe591534f0d17", size = 173750, upload-time = "2026-04-27T17:19:09.073Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/c3/d73266b5801a752346cb3e235b506042a3a45d3bcb99d6f73dc649c48087/langgraph-1.2.0a7-py3-none-any.whl", hash = "sha256:b3ede4d3d27efd10dcc955b62b3ba43c0b0a2a963b150b41fc2e12e411350c92", size = 229121, upload-time = "2026-05-04T19:35:22.347Z" },
 ]
 
 [[package]]
 name = "langgraph-checkpoint"
-version = "4.0.0"
+version = "4.1.0a4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "ormsgpack" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/98/76/55a18c59dedf39688d72c4b06af73a5e3ea0d1a01bc867b88fbf0659f203/langgraph_checkpoint-4.0.0.tar.gz", hash = "sha256:814d1bd050fac029476558d8e68d87bce9009a0262d04a2c14b918255954a624", size = 137320, upload-time = "2026-01-12T20:30:26.38Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b4/dc/53f2eb893a268e61f225b20be0e021152fb955b076a1366857ba1c3f46d3/langgraph_checkpoint-4.1.0a4.tar.gz", hash = "sha256:be3d0c702fa6e31f3820c47dbfa272c98c17aec2cdf050b8c488f3522d3ec8fa", size = 179885, upload-time = "2026-05-04T19:32:18.611Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/de/ddd53b7032e623f3c7bcdab2b44e8bf635e468f62e10e5ff1946f62c9356/langgraph_checkpoint-4.0.0-py3-none-any.whl", hash = "sha256:3fa9b2635a7c5ac28b338f631abf6a030c3b508b7b9ce17c22611513b589c784", size = 46329, upload-time = "2026-01-12T20:30:25.2Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/c7/78dbe4bc02bb30c8a2520269bf146ecd2b17903586894b74eb95caef674c/langgraph_checkpoint-4.1.0a4-py3-none-any.whl", hash = "sha256:70e405bef16a51fcc831f78bda1e85cccc812ca5754685b44af03f84935917f1", size = 54664, upload-time = "2026-05-04T19:32:17.389Z" },
 ]
 
 [[package]]
 name = "langgraph-prebuilt"
-version = "1.0.12"
+version = "1.1.0a2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "langgraph-checkpoint" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ed/8b/5fff4c63bbfef1475d577e13f5970f91955a4069d8dc4adbaeef92f36732/langgraph_prebuilt-1.0.12.tar.gz", hash = "sha256:edcb11ff29996def816243f267fb2c85c0a2e4fb618c275f3d238aee8dd6a5ec", size = 172831, upload-time = "2026-04-27T17:14:27.152Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/25/4a/591a5c39f318c5b3059504133aa58ca39b47c37e61cc71ac8bd42f546772/langgraph_prebuilt-1.1.0a2.tar.gz", hash = "sha256:0ed039cd83afee5a626d0e631fcae758c9a4d5d76ff45775c85c0f310827564d", size = 178837, upload-time = "2026-05-01T18:03:06.226Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/53/75/1e6e6fd478a1b1e643de03505570103dcb89c57c429c0fd3084d521e522e/langgraph_prebuilt-1.0.12-py3-none-any.whl", hash = "sha256:ab83822d2724d434d3536dc127b86c7d16fe3fb8dc02a89a683bc77b2e55f6e9", size = 37195, upload-time = "2026-04-27T17:14:25.788Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/e0/99653a3776d018c9382eae9167da257e7d0a831cb76829652a1d8ed83a8f/langgraph_prebuilt-1.1.0a2-py3-none-any.whl", hash = "sha256:6e59b8a37d897d926c44c5bc4c5f0348930fa95a9ecebfde6c5e825078fa9441", size = 41062, upload-time = "2026-05-01T18:03:05.254Z" },
 ]
 
 [[package]]
diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
index 113ae45c7914c..3c63ddd5d153b 100644
--- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@@ -6,7 +6,7 @@
 import json
 import os
 from base64 import b64encode
-from typing import Any, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 
 import anthropic
 import httpx
@@ -28,6 +28,14 @@
 from langchain_core.outputs import ChatGeneration, LLMResult
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.tools import tool
+
+if TYPE_CHECKING:
+    from collections.abc import Awaitable
+
+    from langchain_core.language_models.chat_model_stream import (
+        AsyncChatModelStream,
+        ChatModelStream,
+    )
 from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
@@ -927,7 +935,10 @@ def get_weather(location: str) -> str:
     llm_with_tools = llm.bind_tools([get_weather])
     input_message = HumanMessage("What is the weather in San Francisco, CA?")
     if use_v2_stream:
-        tool_call_message = llm_with_tools.stream_v2([input_message]).output
+        tool_call_message = cast(
+            "ChatModelStream",
+            llm_with_tools.stream_events([input_message], version="v3"),
+        ).output
     else:
         tool_call_message = llm_with_tools.invoke([input_message])
     assert isinstance(tool_call_message, AIMessage)
@@ -938,8 +949,12 @@ def get_weather(location: str) -> str:
     tool_message = get_weather.invoke(tool_call)
     assert isinstance(tool_message, ToolMessage)
     if use_v2_stream:
-        response = llm_with_tools.stream_v2(
-            [input_message, tool_call_message, tool_message]
+        response = cast(
+            "ChatModelStream",
+            llm_with_tools.stream_events(
+                [input_message, tool_call_message, tool_message],
+                version="v3",
+            ),
         ).output
     else:
         response = llm_with_tools.invoke(
@@ -954,8 +969,8 @@ def get_weather(location: str) -> str:
 
 @pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")
 @pytest.mark.vcr
-async def test_agent_loop_streaming_astream_v2_v1() -> None:
-    """Async multi-turn through `astream_v2`.
+async def test_agent_loop_streaming_astream_events_v3_v1() -> None:
+    """Async multi-turn through `astream_events(version="v3")`.
 
     Mirrors `test_agent_loop_streaming` for `output_version="v1"` but
     exercises `AsyncChatModelStream` end-to-end.
@@ -973,7 +988,12 @@ def get_weather(location: str) -> str:
     )
     llm_with_tools = llm.bind_tools([get_weather])
     input_message = HumanMessage("What is the weather in San Francisco, CA?")
-    tool_call_message = await (await llm_with_tools.astream_v2([input_message]))
+    tool_call_message = await (
+        await cast(
+            "Awaitable[AsyncChatModelStream]",
+            llm_with_tools.astream_events([input_message], version="v3"),
+        )
+    )
     assert isinstance(tool_call_message, AIMessage)
     tool_calls = tool_call_message.tool_calls
     assert len(tool_calls) == 1
@@ -981,8 +1001,12 @@ def get_weather(location: str) -> str:
     tool_message = get_weather.invoke(tool_call)
     assert isinstance(tool_message, ToolMessage)
     response = await (
-        await llm_with_tools.astream_v2(
-            [input_message, tool_call_message, tool_message]
+        await cast(
+            "Awaitable[AsyncChatModelStream]",
+            llm_with_tools.astream_events(
+                [input_message, tool_call_message, tool_message],
+                version="v3",
+            ),
         )
     )
     assert isinstance(response, AIMessage)
@@ -1030,7 +1054,7 @@ def test_citations(output_version: Literal["v0", "v1"], *, use_v2_stream: bool)
     # Test streaming
     full: BaseMessage
     if use_v2_stream:
-        full = llm.stream_v2(messages).output
+        full = llm.stream_events(messages, version="v3").output
     else:
         aggregated: BaseMessageChunk | None = None
         for chunk in llm.stream(messages):
@@ -1124,7 +1148,7 @@ def test_thinking_v1(*, use_v2_stream: bool) -> None:
     # Test streaming
     full: BaseMessage
     if use_v2_stream:
-        full = llm.stream_v2([input_message]).output
+        full = llm.stream_events([input_message], version="v3").output
     else:
         aggregated: BaseMessageChunk | None = None
         for chunk in llm.stream([input_message]):
@@ -2618,10 +2642,10 @@ def _stable_blocks(blocks: Any) -> list[dict[str, Any]]:
 @pytest.mark.default_cassette("test_streaming_tool_call_v1_v2_parity.yaml.gz")
 @pytest.mark.vcr
 def test_streaming_tool_call_v1_v2_parity() -> None:
-    """`AIMessage` parity between `stream()` reduction and `stream_v2().output`.
+    """`AIMessage` parity between `stream()` and `stream_events(version="v3")` output.
 
     Runs the same forced-tool-call prompt through both the legacy chunk
-    stream (reduced with `AIMessageChunk.__add__`) and the `stream_v2`
+    stream (reduced with `AIMessageChunk.__add__`) and the `stream_events(version="v3")`
     bridge path on a `v1`-output `ChatAnthropic`, then compares the
     resulting messages on path-independent invariants:
 
@@ -2649,7 +2673,7 @@ def test_streaming_tool_call_v1_v2_parity() -> None:
         v1_full = chunk if v1_full is None else v1_full + chunk
     assert isinstance(v1_full, AIMessageChunk)
 
-    stream = with_tool.stream_v2(prompt)
+    stream = cast("ChatModelStream", with_tool.stream_events(prompt, version="v3"))
     events = list(stream)
     assert_valid_event_stream(events)
     v2_message = stream.output
diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
index 710cc9d24e8a6..61549b645f724 100644
--- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
@@ -3053,15 +3053,15 @@ def test_no_task_budget_no_beta() -> None:
         assert "task-budgets-2026-03-13" not in betas
 
 
-def test_anthropic_stream_v2_lifecycle() -> None:
+def test_anthropic_stream_events_v3_lifecycle() -> None:
     """Validate lifecycle events across a thinking + text + tool_use stream.
 
     Anthropic emits raw `content_block_start` / `content_block_delta` /
     `content_block_stop` events with integer `index` fields, interleaved
     with `message_start` and `message_delta`. This test threads a
     realistic event sequence through `_stream` via a mocked raw client
-    and asserts that `stream_v2` produces a spec-conformant event
-    stream: paired start/finish per block, no interleaving, sequential
+    and asserts that `stream_events(version="v3")` produces a spec-conformant
+    event stream: paired start/finish per block, no interleaving, sequential
     `uint` wire indices.
     """
     from unittest.mock import patch
@@ -3182,21 +3182,21 @@ def mock_create(_payload: Any) -> list:
         return events
 
     with patch.object(llm, "_create", mock_create):
-        stream_events = list(llm.stream_v2("Test query"))
+        stream_events = list(llm.stream_events("Test query", version="v3"))
 
     assert_valid_event_stream(stream_events)
 
     finishes = [e for e in stream_events if e["event"] == "content-block-finish"]
-    types = [f["content_block"]["type"] for f in finishes]
+    types = [f["content"]["type"] for f in finishes]
     assert types == ["reasoning", "text", "tool_call"]
 
     wire_indices = [f["index"] for f in finishes]
     assert wire_indices == [0, 1, 2]
 
     # Content accumulation reaches content-block-finish intact.
-    reasoning_block = cast("dict[str, Any]", finishes[0]["content_block"])
-    text_block = cast("dict[str, Any]", finishes[1]["content_block"])
-    tool_block = cast("dict[str, Any]", finishes[2]["content_block"])
+    reasoning_block = cast("dict[str, Any]", finishes[0]["content"])
+    text_block = cast("dict[str, Any]", finishes[1]["content"])
+    tool_block = cast("dict[str, Any]", finishes[2]["content"])
     assert reasoning_block["reasoning"] == "Let me think."
     assert text_block["text"] == "The answer is 42."
     assert tool_block["args"] == {"q": "weather"}
@@ -3206,6 +3206,6 @@ def mock_create(_payload: Any) -> list:
     # (protocol 0.0.9 moved the finish reason off the top-level event
     # and into `metadata`, where the bridge deposits the provider's raw
     # `stop_reason` alongside other response metadata).
-    message_finish = stream_events[-1]
+    message_finish = cast("dict[str, Any]", stream_events[-1])
     assert message_finish["event"] == "message-finish"
     assert message_finish["metadata"]["stop_reason"] == "tool_use"
diff --git a/libs/partners/anthropic/uv.lock b/libs/partners/anthropic/uv.lock
index 5d3a07bc6ddc0..7be71452a860e 100644
--- a/libs/partners/anthropic/uv.lock
+++ b/libs/partners/anthropic/uv.lock
@@ -677,7 +677,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "jsonpatch", specifier = ">=1.33.0,<2.0.0" },
-    { name = "langchain-protocol", specifier = ">=0.0.10" },
+    { name = "langchain-protocol", specifier = ">=0.0.14" },
     { name = "langsmith", specifier = ">=0.3.45,<1.0.0" },
     { name = "packaging", specifier = ">=23.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
@@ -722,19 +722,19 @@ typing = [
 
 [[package]]
 name = "langchain-protocol"
-version = "0.0.10"
+version = "0.0.14"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/c3/0d3911d3274f097040e92133f18a425980cd4085e72b6cd65add1f25327c/langchain_protocol-0.0.10.tar.gz", hash = "sha256:5bc530e0b350d3a15a3ab6889abb8132692a2c8a15eed536bce46624751acaaf", size = 6528, upload-time = "2026-04-23T17:31:34.212Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/bf/efb5e2ed832e4d6d45590e25a9e5191986b291b543bc6a807b48bee070b0/langchain_protocol-0.0.14.tar.gz", hash = "sha256:bc1e8553122e6ede310280462d5813023a172ff2785ccbbdec54d43f3a15e5f2", size = 5862, upload-time = "2026-04-29T16:40:18.657Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/11/6c89bc86b5494cfe29ee23420c398406cc147a09b5cf756e323070e358d7/langchain_protocol-0.0.10-py3-none-any.whl", hash = "sha256:040bb2ae966a06ffcd0051a1d1ca7e4926f12e951e83b07440cb80e0e8e12268", size = 6677, upload-time = "2026-04-23T17:31:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/e9/06c47ecb2aff08f83dfa30058da3bf86be64862c19569043ed5331bbeecd/langchain_protocol-0.0.14-py3-none-any.whl", hash = "sha256:ffc35089779bd8ca217015180cef5e660fc3b074efdaa0f2e95df73583f1a047", size = 6984, upload-time = "2026-04-29T16:40:17.841Z" },
 ]
 
 [[package]]
 name = "langchain-tests"
-version = "1.1.6"
+version = "1.1.7"
 source = { editable = "../../standard-tests" }
 dependencies = [
     { name = "httpx" },
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
index d939877346bbc..56123308fdd5d 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@@ -6,7 +6,7 @@
 from collections.abc import AsyncIterator
 from pathlib import Path
 from textwrap import dedent
-from typing import Any, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 
 import httpx
 import pytest
@@ -29,6 +29,9 @@
 from langchain_openai import ChatOpenAI
 from tests.unit_tests.fake.callbacks import FakeCallbackHandler
 
+if TYPE_CHECKING:
+    from langchain_core.language_models.chat_model_stream import ChatModelStream
+
 MAX_TOKEN_COUNT = 100
 
 
@@ -1289,7 +1292,7 @@ class _Person(BaseModel):
 
 @pytest.mark.vcr
 def test_streaming_tool_call_v1_v2_parity() -> None:
-    """`stream()` and `stream_v2()` must agree on their final `AIMessage`.
+    """`stream()` and `stream_events(version="v3")` agree on their final `AIMessage`.
 
     Both paths are invoked against the same HTTP response (the cassette's
     single recorded interaction, replayed for both calls via
@@ -1310,7 +1313,7 @@ def test_streaming_tool_call_v1_v2_parity() -> None:
         v1 = chunk if v1 is None else v1 + chunk
     assert isinstance(v1, AIMessageChunk)
 
-    stream = with_tool.stream_v2(prompt)
+    stream = cast("ChatModelStream", with_tool.stream_events(prompt, version="v3"))
     events = list(stream)
     assert_valid_event_stream(events)
     v2 = stream.output
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
index 33531867c8fe8..22c30cbe31da5 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -3,7 +3,7 @@
 import base64
 import json
 import os
-from typing import Annotated, Any, Literal, cast
+from typing import TYPE_CHECKING, Annotated, Any, Literal, cast
 
 import openai
 import pytest
@@ -32,6 +32,14 @@
 from langchain_openai import ChatOpenAI, custom_tool
 from langchain_openai.chat_models.base import _convert_to_openai_response_format
 
+if TYPE_CHECKING:
+    from collections.abc import Awaitable
+
+    from langchain_core.language_models.chat_model_stream import (
+        AsyncChatModelStream,
+        ChatModelStream,
+    )
+
 MODEL_NAME = "gpt-4o-mini"
 
 
@@ -113,9 +121,10 @@ def test_web_search(
     # Test streaming
     full: BaseMessage
     if use_v2_stream:
-        full = llm.stream_v2(
+        full = llm.stream_events(
             "What was a positive news story from today?",
             tools=[{"type": "web_search_preview"}],
+            version="v3",
         ).output
     else:
         aggregated: BaseMessageChunk | None = None
@@ -284,7 +293,10 @@ def get_weather(location: str) -> str:
     llm_with_tools = llm.bind_tools([get_weather])
     input_message = HumanMessage("What is the weather in San Francisco, CA?")
     if use_v2_stream:
-        tool_call_message = llm_with_tools.stream_v2([input_message]).output
+        tool_call_message = cast(
+            "ChatModelStream",
+            llm_with_tools.stream_events([input_message], version="v3"),
+        ).output
     else:
         tool_call_message = llm_with_tools.invoke([input_message])
     assert isinstance(tool_call_message, AIMessage)
@@ -294,8 +306,12 @@ def get_weather(location: str) -> str:
     tool_message = get_weather.invoke(tool_call)
     assert isinstance(tool_message, ToolMessage)
     if use_v2_stream:
-        response = llm_with_tools.stream_v2(
-            [input_message, tool_call_message, tool_message]
+        response = cast(
+            "ChatModelStream",
+            llm_with_tools.stream_events(
+                [input_message, tool_call_message, tool_message],
+                version="v3",
+            ),
         ).output
     else:
         response = llm_with_tools.invoke(
@@ -310,8 +326,8 @@ def get_weather(location: str) -> str:
 
 @pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")
 @pytest.mark.vcr
-async def test_agent_loop_streaming_astream_v2_v1() -> None:
-    """Async multi-turn through `astream_v2`.
+async def test_agent_loop_streaming_astream_events_v3_v1() -> None:
+    """Async multi-turn through `astream_events(version="v3")`.
 
     Mirrors `test_agent_loop_streaming` for `output_version="v1"` but
     exercises `AsyncChatModelStream` end-to-end: aggregation in the
@@ -335,7 +351,10 @@ def get_weather(location: str) -> str:
     )
     llm_with_tools = llm.bind_tools([get_weather])
     input_message = HumanMessage("What is the weather in San Francisco, CA?")
-    stream = await llm_with_tools.astream_v2([input_message])
+    stream = await cast(
+        "Awaitable[AsyncChatModelStream]",
+        llm_with_tools.astream_events([input_message], version="v3"),
+    )
     tool_call_message = await stream
     assert isinstance(tool_call_message, AIMessage)
     tool_calls = tool_call_message.tool_calls
@@ -343,8 +362,12 @@ def get_weather(location: str) -> str:
     tool_call = tool_calls[0]
     tool_message = get_weather.invoke(tool_call)
     assert isinstance(tool_message, ToolMessage)
-    stream = await llm_with_tools.astream_v2(
-        [input_message, tool_call_message, tool_message]
+    stream = await cast(
+        "Awaitable[AsyncChatModelStream]",
+        llm_with_tools.astream_events(
+            [input_message, tool_call_message, tool_message],
+            version="v3",
+        ),
     )
     response = await stream
     assert isinstance(response, AIMessage)
@@ -647,7 +670,7 @@ def test_stream_reasoning_summary(
     }
     response_1: BaseMessage
     if use_v2_stream:
-        response_1 = llm.stream_v2([message_1]).output
+        response_1 = llm.stream_events([message_1], version="v3").output
     else:
         aggregated: BaseMessageChunk | None = None
         for chunk in llm.stream([message_1]):
@@ -769,7 +792,10 @@ def test_code_interpreter(
 
     full: BaseMessage
     if use_v2_stream:
-        full = llm_with_tools.stream_v2([input_message]).output
+        full = cast(
+            "ChatModelStream",
+            llm_with_tools.stream_events([input_message], version="v3"),
+        ).output
     else:
         aggregated: BaseMessageChunk | None = None
         for chunk in llm_with_tools.stream([input_message]):
@@ -933,7 +959,10 @@ def test_mcp_builtin_zdr_v1(use_v2_stream: bool) -> None:
     }
     full: BaseMessage
     if use_v2_stream:
-        full = llm_with_tools.stream_v2([input_message]).output
+        full = cast(
+            "ChatModelStream",
+            llm_with_tools.stream_events([input_message], version="v3"),
+        ).output
     else:
         aggregated: BaseMessageChunk | None = None
         for chunk in llm_with_tools.stream([input_message]):
@@ -1365,7 +1394,7 @@ def test_compaction_streaming(
 
     def _run(messages: list) -> AIMessage:
         if use_v2_stream:
-            return llm.stream_v2(messages).output
+            return llm.stream_events(messages, version="v3").output
         result = llm.invoke(messages)
         assert isinstance(result, AIMessage)
         return result
@@ -1746,7 +1775,7 @@ def wrap_tool_call(
 @pytest.mark.default_cassette("test_reasoning_text_v1_v2_parity.yaml.gz")
 @pytest.mark.vcr
 def test_reasoning_text_v1_v2_parity() -> None:
-    """`stream()` and `stream_v2()` must agree on reasoning + text output.
+    """`stream()` and `stream_events(version="v3")` agree on reasoning + text.
 
     Exercises the non-tool-call branch of the parity claim: a reasoning
     model (`o4-mini` via the Responses API) produces one or more
@@ -1767,7 +1796,7 @@ def test_reasoning_text_v1_v2_parity() -> None:
         v1 = chunk if v1 is None else v1 + chunk
     assert isinstance(v1, AIMessageChunk)
 
-    stream = llm.stream_v2([prompt])
+    stream = llm.stream_events([prompt], version="v3")
     events = list(stream)
     assert_valid_event_stream(events)
     v2 = stream.output
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
index 88d1b97aa6311..08b0850fd5fc1 100644
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@@ -616,8 +616,8 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager:
         assert "stream_options" not in call_kwargs[-1]
 
 
-def test_openai_stream_v2_lifecycle(mock_openai_completion: list) -> None:
-    """`stream_v2` on chat completions emits a spec-conformant lifecycle."""
+def test_openai_stream_events_v3_lifecycle(mock_openai_completion: list) -> None:
+    """`stream_events(version="v3")` on chat completions emits a valid lifecycle."""
     from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream
 
     llm = ChatOpenAI(model="gpt-4o")
@@ -628,13 +628,13 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager:
 
     mock_client.create = mock_create
     with patch.object(llm, "client", mock_client):
-        events = list(llm.stream_v2("你的名字叫什么？只回答名字"))
+        events = list(llm.stream_events("你的名字叫什么？只回答名字", version="v3"))
 
     assert_valid_event_stream(events)
     # At minimum, a text block with the accumulated answer.
     finishes = [e for e in events if e["event"] == "content-block-finish"]
     assert len(finishes) >= 1
-    text_finishes = [f for f in finishes if f["content_block"]["type"] == "text"]
+    text_finishes = [f for f in finishes if f["content"]["type"] == "text"]
     assert len(text_finishes) == 1
 
 
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py
index 1a4edfca6089d..5f218cd9beb25 100644
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py
@@ -763,8 +763,8 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager:
         assert dumped == payload["input"][idx]
 
 
-def test_responses_stream_v2_emits_reasoning_lifecycle() -> None:
-    """`stream_v2` must emit `content-block-finish` events for reasoning blocks.
+def test_responses_stream_events_v3_emits_reasoning_lifecycle() -> None:
+    """v3 streaming emits `content-block-finish` events for reasoning blocks.
 
     Regression test: the protocol bridge should surface the full lifecycle
     (`content-block-start` / `content-block-delta` / `content-block-finish`)
@@ -779,21 +779,19 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager:
     mock_client.responses.create = mock_create
 
     with patch.object(llm, "root_client", mock_client):
-        events = list(llm.stream_v2("test"))
+        events = list(llm.stream_events("test", version="v3"))
 
     assert_valid_event_stream(events)
 
     reasoning_starts = [
         e
         for e in events
-        if e["event"] == "content-block-start"
-        and e["content_block"]["type"] == "reasoning"
+        if e["event"] == "content-block-start" and e["content"]["type"] == "reasoning"
     ]
     reasoning_finishes = [
         e
         for e in events
-        if e["event"] == "content-block-finish"
-        and e["content_block"]["type"] == "reasoning"
+        if e["event"] == "content-block-finish" and e["content"]["type"] == "reasoning"
     ]
 
     # The mock stream carries four reasoning summary parts (two per reasoning
@@ -803,9 +801,7 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager:
         f"expected 4 reasoning start events, got {len(reasoning_starts)}"
     )
     all_finish_types = [
-        e["content_block"]["type"]
-        for e in events
-        if e["event"] == "content-block-finish"
+        e["content"]["type"] for e in events if e["event"] == "content-block-finish"
     ]
     assert len(reasoning_finishes) == 4, (
         f"expected 4 reasoning finish events, got {len(reasoning_finishes)}: "
@@ -814,8 +810,7 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager:
 
     # Finish events must carry the accumulated reasoning text.
     reasoning_texts = [
-        cast("dict[str, Any]", f["content_block"])["reasoning"]
-        for f in reasoning_finishes
+        cast("dict[str, Any]", f["content"])["reasoning"] for f in reasoning_finishes
     ]
     assert reasoning_texts == [
         "reasoning block one",
diff --git a/libs/partners/openai/uv.lock b/libs/partners/openai/uv.lock
index 8697ff310112c..1ea687b0d9956 100644
--- a/libs/partners/openai/uv.lock
+++ b/libs/partners/openai/uv.lock
@@ -587,7 +587,7 @@ requires-dist = [
     { name = "langchain-perplexity", marker = "extra == 'perplexity'" },
     { name = "langchain-together", marker = "extra == 'together'" },
     { name = "langchain-xai", marker = "extra == 'xai'" },
-    { name = "langgraph", specifier = ">=1.1.5,<1.2.0" },
+    { name = "langgraph", specifier = ">=1.1.10,<1.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
 ]
 provides-extras = ["community", "anthropic", "openai", "azure-ai", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "baseten", "deepseek", "xai", "perplexity"]
@@ -641,7 +641,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "jsonpatch", specifier = ">=1.33.0,<2.0.0" },
-    { name = "langchain-protocol", specifier = ">=0.0.10" },
+    { name = "langchain-protocol", specifier = ">=0.0.14" },
     { name = "langsmith", specifier = ">=0.3.45,<1.0.0" },
     { name = "packaging", specifier = ">=23.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
@@ -773,19 +773,19 @@ typing = [
 
 [[package]]
 name = "langchain-protocol"
-version = "0.0.10"
+version = "0.0.14"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/c3/0d3911d3274f097040e92133f18a425980cd4085e72b6cd65add1f25327c/langchain_protocol-0.0.10.tar.gz", hash = "sha256:5bc530e0b350d3a15a3ab6889abb8132692a2c8a15eed536bce46624751acaaf", size = 6528, upload-time = "2026-04-23T17:31:34.212Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/bf/efb5e2ed832e4d6d45590e25a9e5191986b291b543bc6a807b48bee070b0/langchain_protocol-0.0.14.tar.gz", hash = "sha256:bc1e8553122e6ede310280462d5813023a172ff2785ccbbdec54d43f3a15e5f2", size = 5862, upload-time = "2026-04-29T16:40:18.657Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/11/6c89bc86b5494cfe29ee23420c398406cc147a09b5cf756e323070e358d7/langchain_protocol-0.0.10-py3-none-any.whl", hash = "sha256:040bb2ae966a06ffcd0051a1d1ca7e4926f12e951e83b07440cb80e0e8e12268", size = 6677, upload-time = "2026-04-23T17:31:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/e9/06c47ecb2aff08f83dfa30058da3bf86be64862c19569043ed5331bbeecd/langchain_protocol-0.0.14-py3-none-any.whl", hash = "sha256:ffc35089779bd8ca217015180cef5e660fc3b074efdaa0f2e95df73583f1a047", size = 6984, upload-time = "2026-04-29T16:40:17.841Z" },
 ]
 
 [[package]]
 name = "langchain-tests"
-version = "1.1.6"
+version = "1.1.7"
 source = { editable = "../../standard-tests" }
 dependencies = [
     { name = "httpx" },
@@ -830,7 +830,7 @@ typing = [
 
 [[package]]
 name = "langgraph"
-version = "1.1.5"
+version = "1.1.10"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
@@ -840,9 +840,9 @@ dependencies = [
     { name = "pydantic" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a4/8a/47b983e33d3afc8c2c2385d2d8f3731ddfb5cb08e88f307f75105252a94c/langgraph-1.1.5.tar.gz", hash = "sha256:24b85d2d40cd002766d489e76f18027f947e4151366ac7ed97bab030ce50e494", size = 548492, upload-time = "2026-04-03T14:12:33.14Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/b3/7dec224369c7938eb3227ff69542a0d0f517862a0d27945b8c395f2a781f/langgraph-1.1.10.tar.gz", hash = "sha256:3115beb58203283c98d8752a90c034f3432177d2979a1fe205f76e5f1b744500", size = 560685, upload-time = "2026-04-27T17:19:10.426Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/6a/542bb56c8270d3df858285be138aec5e292b4e43dadb6b0b6fe051f535c1/langgraph-1.1.5-py3-none-any.whl", hash = "sha256:cb25c20d135167837951906c0feeb26c91c733bd5001a920c4cb1ffb92a1097c", size = 169354, upload-time = "2026-04-03T14:12:31.879Z" },
+    { url = "https://files.pythonhosted.org/packages/80/07/057dc1aa7991115fca53f1fa6573a7cc0dd296c05360c672cc67fdb6245b/langgraph-1.1.10-py3-none-any.whl", hash = "sha256:8a4f163f72f4401648d0c11b48ee906947d938ba8cf1f474540fe591534f0d17", size = 173750, upload-time = "2026-04-27T17:19:09.073Z" },
 ]
 
 [[package]]
@@ -860,15 +860,15 @@ wheels = [
 
 [[package]]
 name = "langgraph-prebuilt"
-version = "1.0.9"
+version = "1.0.13"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "langgraph-checkpoint" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/99/4c/06dac899f4945bedb0c3a1583c19484c2cc894114ea30d9a538dd270086e/langgraph_prebuilt-1.0.9.tar.gz", hash = "sha256:93de7512e9caade4b77ead92428f6215c521fdb71b8ffda8cd55f0ad814e64de", size = 165850, upload-time = "2026-04-03T14:06:37.721Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/a4/f8ac75fa7c503103f0cf7680944e28bbaaef74c19a8d163d7346869cc369/langgraph_prebuilt-1.0.13.tar.gz", hash = "sha256:ad219782a80e1718e7e7794de49e0ae307111d45cbcffab9a52725a66a609456", size = 172913, upload-time = "2026-04-30T01:48:15.742Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/a2/8368ac187b75e7f9d938ca075d34f116683f5cfc48d924029ee79aea147b/langgraph_prebuilt-1.0.9-py3-none-any.whl", hash = "sha256:776c8e3154a5aef5ad0e5bf3f263f2dcaab3983786cc20014b7f955d99d2d1b2", size = 35958, upload-time = "2026-04-03T14:06:36.58Z" },
+    { url = "https://files.pythonhosted.org/packages/69/ef/5ada0bef4013ef5ae53a0ca1de5736517f1076a54d313f156ca545ec65d5/langgraph_prebuilt-1.0.13-py3-none-any.whl", hash = "sha256:7055e9fad41fbd3593800aed0aea0a6e974b17f33ed51b80d3d3a031212dd7c0", size = 37214, upload-time = "2026-04-30T01:48:14.507Z" },
 ]
 
 [[package]]
diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
index 02d842ca0adbd..e09ba9bfed5b2 100644
--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
@@ -912,25 +912,26 @@ async def test_astream(self, model: BaseChatModel) -> None:
             f"got {last_chunk.chunk_position!r}"
         )
 
-    def test_stream_v2(self, model: BaseChatModel) -> None:
-        """Test that `model.stream_v2(simple_message)` works.
+    def test_stream_events_v3(self, model: BaseChatModel) -> None:
+        """Test that `model.stream_events("Hello", version="v3")` works.
 
         Exercises the content-block-centric streaming protocol. Passing this
-        test indicates the model participates in `stream_v2` either natively
-        (via `_stream_chat_model_events`) or through the compat bridge that
+        test indicates the model participates in `stream_events(version="v3")` either
+        natively (via `_stream_chat_model_events`) or through the compat bridge that
         converts `_stream` chunks into protocol events.
 
         ??? question "Troubleshooting"
 
             First, debug
             `langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`
-            — `stream_v2` falls back to the same `_stream` path via the compat
-            bridge when the model does not implement
+            — `stream_events(version="v3")` falls back to the same
+            `_stream` path via the compat bridge when the model does not
+            implement
             `_stream_chat_model_events`. If `test_stream` passes but this does
             not, inspect the raised lifecycle violation: it identifies the
             event index and the rule broken.
         """
-        stream = model.stream_v2("Hello")
+        stream = model.stream_events("Hello", version="v3")
         assert isinstance(stream, ChatModelStream)
 
         events = list(stream)
@@ -942,13 +943,13 @@ def test_stream_v2(self, model: BaseChatModel) -> None:
         assert message.content
         assert len(message.content_blocks) == 1
         assert message.content_blocks[0]["type"] == "text"
-        # `stream_v2` always assembles content as v1 protocol blocks.
+        # `stream_events(version="v3")` always assembles content as v1 protocol blocks.
         assert message.response_metadata.get("output_version") == "v1"
 
-    async def test_astream_v2(self, model: BaseChatModel) -> None:
-        """Test that `await model.astream_v2(simple_message)` works.
+    async def test_astream_events_v3(self, model: BaseChatModel) -> None:
+        """Test that `await model.astream_events("Hello", version="v3")` works.
 
-        Async counterpart to `test_stream_v2`. Exercises the
+        Async counterpart to `test_stream_events_v3`. Exercises the
         `AsyncChatModelStream` path end-to-end: the background producer task,
         replay-buffer-backed event iteration, and the awaitable `output`
         projection.
@@ -961,7 +962,7 @@ async def test_astream_v2(self, model: BaseChatModel) -> None:
             lifecycle violation; it identifies the event index and the rule
             broken.
         """
-        stream = await model.astream_v2("Hello")
+        stream = await model.astream_events("Hello", version="v3")
         assert isinstance(stream, AsyncChatModelStream)
 
         events = [event async for event in stream]
diff --git a/libs/standard-tests/langchain_tests/utils/stream_lifecycle.py b/libs/standard-tests/langchain_tests/utils/stream_lifecycle.py
index 29bb02ae9dc64..4e294caea4265 100644
--- a/libs/standard-tests/langchain_tests/utils/stream_lifecycle.py
+++ b/libs/standard-tests/langchain_tests/utils/stream_lifecycle.py
@@ -1,13 +1,14 @@
 """Validator for LangChain content-block protocol event streams.
 
-Checks that an event stream emitted by a chat model (via `stream_v2`,
+Checks that an event stream emitted by a chat model (via `stream_events(version="v3")`,
 or by the compat bridge's `chunks_to_events` / `message_to_events`)
 conforms to the protocol lifecycle rules:
 
 - `message-start` opens and `message-finish` closes the stream.
-- Content blocks do not interleave: each block runs
+- Content blocks may interleave: each block index runs
   `content-block-start` → optional `content-block-delta`s →
-  `content-block-finish` before the next block begins.
+  `content-block-finish`, while other block indices may start or receive
+  deltas before that block finishes.
 - Wire indices on content-block events are sequential `uint` values
   starting at 0.
 - For deltaable block types (`text`, `reasoning`, `tool_call_chunk`,
@@ -37,7 +38,7 @@ def assert_valid_event_stream(events: Iterable[Any]) -> None:
 
     Args:
         events: Iterable of protocol event dicts (as yielded by
-            `stream_v2` or `chunks_to_events`).
+            `stream_events(version="v3")` or `chunks_to_events`).
 
     Raises:
         AssertionError: On the first lifecycle violation found. The
@@ -71,7 +72,7 @@ def assert_valid_event_stream(events: Iterable[Any]) -> None:
             "`message-finish` must be the final event"
         )
 
-    open_idx: int | None = None
+    open_indices: set[int] = set()
     expected_next_idx = 0
     start_events: dict[int, dict[str, Any]] = {}
     finish_events: dict[int, dict[str, Any]] = {}
@@ -83,8 +84,9 @@ def assert_valid_event_stream(events: Iterable[Any]) -> None:
             assert i == 0, f"duplicate `message-start` at event {i}"
             continue
         if ev == "message-finish":
-            assert open_idx is None, (
-                f"`message-finish` while block {open_idx} still open (event {i})"
+            assert not open_indices, (
+                f"`message-finish` while blocks {sorted(open_indices)} "
+                f"still open (event {i})"
             )
             continue
         if ev == "error":
@@ -102,36 +104,41 @@ def assert_valid_event_stream(events: Iterable[Any]) -> None:
             assert idx == expected_next_idx, (
                 f"expected next wire index {expected_next_idx}, got {idx} at event {i}"
             )
-            assert open_idx is None, (
-                f"content-block-start at idx={idx} while block {open_idx} "
-                f"still open (event {i}); blocks must not interleave"
+            assert idx not in start_events, (
+                f"duplicate content-block-start for idx={idx} at event {i}"
             )
-            open_idx = idx
-            start_events[idx] = event["content_block"]
+            open_indices.add(idx)
+            start_events[idx] = event.get("content") or event["content_block"]
             delta_accum[idx] = {}
             expected_next_idx += 1
         elif ev == "content-block-delta":
             idx = event["index"]
-            assert idx == open_idx, (
-                f"content-block-delta at idx={idx} but currently-open block is "
-                f"{open_idx} (event {i})"
+            assert idx in open_indices, (
+                f"content-block-delta at idx={idx} but that block is not open "
+                f"(event {i})"
             )
-            block = event["content_block"]
-            _accumulate_delta(delta_accum[idx], block)
+            delta = event.get("delta")
+            if delta is None and "content_block" in event:
+                delta = _legacy_block_to_delta(event["content_block"])
+            _accumulate_delta(delta_accum[idx], delta)
         elif ev == "content-block-finish":
             idx = event["index"]
-            assert idx == open_idx, (
-                f"content-block-finish at idx={idx} but currently-open block is "
-                f"{open_idx} (event {i})"
+            assert idx in open_indices, (
+                f"content-block-finish at idx={idx} but that block is not open "
+                f"(event {i})"
             )
-            finish_events[idx] = event["content_block"]
-            open_idx = None
+            assert idx not in finish_events, (
+                f"duplicate content-block-finish for idx={idx} at event {i}"
+            )
+            finish_events[idx] = event.get("content") or event["content_block"]
+            open_indices.remove(idx)
         else:
             # Unknown event types are accepted; the CDDL allows extensions.
             continue
 
-    assert open_idx is None, (
-        f"block {open_idx} still open at end of stream — no content-block-finish"
+    assert not open_indices, (
+        f"blocks {sorted(open_indices)} still open at end of stream — "
+        "no content-block-finish"
     )
     missing = set(start_events) - set(finish_events)
     assert not missing, (
@@ -143,21 +150,40 @@ def assert_valid_event_stream(events: Iterable[Any]) -> None:
         _assert_delta_matches_finish(idx, delta_accum[idx], finish_block)
 
 
-def _accumulate_delta(accum: dict[str, Any], block: dict[str, Any]) -> None:
-    """Fold a delta block into the running accumulator for its index."""
+def _legacy_block_to_delta(block: dict[str, Any]) -> dict[str, Any]:
+    """Convert the old content-block delta shape to an explicit delta."""
     btype = block.get("type")
-    if btype not in _DELTAABLE_TYPES:
-        return
     if btype == "text":
-        accum["text"] = accum.get("text", "") + block.get("text", "")
-    elif btype == "reasoning":
-        accum["reasoning"] = accum.get("reasoning", "") + block.get("reasoning", "")
-    else:  # tool_call_chunk / server_tool_call_chunk
-        accum["args"] = accum.get("args", "") + (block.get("args") or "")
-        if block.get("id") is not None:
-            accum["id"] = block["id"]
-        if block.get("name") is not None:
-            accum["name"] = block["name"]
+        return {"type": "text-delta", "text": block.get("text", "")}
+    if btype == "reasoning":
+        return {
+            "type": "reasoning-delta",
+            "reasoning": block.get("reasoning", ""),
+        }
+    if "data" in block:
+        return {"type": "data-delta", "data": block.get("data", "")}
+    return {"type": "block-delta", "fields": block}
+
+
+def _accumulate_delta(accum: dict[str, Any], delta: dict[str, Any] | None) -> None:
+    """Fold a delta block into the running accumulator for its index."""
+    if delta is None:
+        return
+    dtype = delta.get("type")
+    if dtype == "text-delta":
+        accum["text"] = accum.get("text", "") + delta.get("text", "")
+    elif dtype == "reasoning-delta":
+        accum["reasoning"] = accum.get("reasoning", "") + delta.get("reasoning", "")
+    elif dtype == "data-delta":
+        accum["data"] = accum.get("data", "") + delta.get("data", "")
+    elif dtype == "block-delta":
+        fields = delta.get("fields")
+        if not isinstance(fields, dict):
+            return
+        btype = fields.get("type")
+        if btype not in _DELTAABLE_TYPES:
+            return
+        accum.update({k: v for k, v in fields.items() if v is not None})
 
 
 def _assert_delta_matches_finish(
@@ -197,6 +223,8 @@ def _assert_delta_matches_finish(
         except json.JSONDecodeError:
             parsed = None
         assert finish_block.get("args") == parsed
+    elif "data" in accum:
+        assert finish_block.get("data") == accum["data"]
 
 
 __all__ = ["assert_valid_event_stream"]
diff --git a/libs/standard-tests/uv.lock b/libs/standard-tests/uv.lock
index 235cf0181ed19..8473451fa147b 100644
--- a/libs/standard-tests/uv.lock
+++ b/libs/standard-tests/uv.lock
@@ -341,7 +341,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "jsonpatch", specifier = ">=1.33.0,<2.0.0" },
-    { name = "langchain-protocol", specifier = ">=0.0.10" },
+    { name = "langchain-protocol", specifier = ">=0.0.14" },
     { name = "langsmith", specifier = ">=0.3.45,<1.0.0" },
     { name = "packaging", specifier = ">=23.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
@@ -386,14 +386,14 @@ typing = [
 
 [[package]]
 name = "langchain-protocol"
-version = "0.0.10"
+version = "0.0.14"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/c3/0d3911d3274f097040e92133f18a425980cd4085e72b6cd65add1f25327c/langchain_protocol-0.0.10.tar.gz", hash = "sha256:5bc530e0b350d3a15a3ab6889abb8132692a2c8a15eed536bce46624751acaaf", size = 6528, upload-time = "2026-04-23T17:31:34.212Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/bf/efb5e2ed832e4d6d45590e25a9e5191986b291b543bc6a807b48bee070b0/langchain_protocol-0.0.14.tar.gz", hash = "sha256:bc1e8553122e6ede310280462d5813023a172ff2785ccbbdec54d43f3a15e5f2", size = 5862, upload-time = "2026-04-29T16:40:18.657Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/11/6c89bc86b5494cfe29ee23420c398406cc147a09b5cf756e323070e358d7/langchain_protocol-0.0.10-py3-none-any.whl", hash = "sha256:040bb2ae966a06ffcd0051a1d1ca7e4926f12e951e83b07440cb80e0e8e12268", size = 6677, upload-time = "2026-04-23T17:31:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/e9/06c47ecb2aff08f83dfa30058da3bf86be64862c19569043ed5331bbeecd/langchain_protocol-0.0.14-py3-none-any.whl", hash = "sha256:ffc35089779bd8ca217015180cef5e660fc3b074efdaa0f2e95df73583f1a047", size = 6984, upload-time = "2026-04-29T16:40:17.841Z" },
 ]
 
 [[package]]