From 6f2c3e53ce8943192f535b3efb431c354fbb03d1 Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Mon, 8 Jun 2026 10:14:34 -0700 Subject: [PATCH 1/4] Fix LangGraph content-block serialization in gen_ai.output.messages (#189) LangChain/LangGraph AIMessage.content may be a list of content-block dicts (e.g. [{'type': 'text', 'text': '...', 'phase': 'final_answer', 'id': '...'}]). The previous _langchain_content helper called str(c) on this value, producing a Python-repr blob with single quotes and leaked phase/index/id keys inside what the GenAI semconv requires to be a plain TextPart.content string. Changes: - New _flatten_lc_content_blocks helper concatenates the text of every type=='text' block (joined with newline) into a plain string. - _langchain_content now delegates to that helper for both BaseMessage and dict-shaped messages. - _langchain_tool_calls additionally harvests {'type': 'tool_use', ...} entries embedded in list-shaped content as ToolCallRequest parts so they surface as spec-typed parts instead of being dropped. - Three regression tests covering the exact issue shape, multi-block text join, and embedded tool_use harvest. - CHANGELOG entry under Unreleased. Verified the serialized output against the upstream gen-ai-output-messages.json schema. Fixes #189 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 8 ++ .../opentelemetry/_genai/_langchain/_utils.py | 83 +++++++++++++++++-- tests/langchain/test_utils.py | 78 +++++++++++++++++ 3 files changed, 163 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d96844c..e3f28bbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Release History +## Unreleased + +### Bugs Fixed +- Flatten LangChain/LangGraph multi-part message ``content`` into a plain + ``TextPart.content`` string so ``gen_ai.output.messages`` no longer contains + a Python-``repr`` blob on ``invoke_agent`` wrapper spans + ([#189](https://github.com/microsoft/opentelemetry-distro-python/issues/189)) + # 1.3.2 (2026-05-29) ### Bugs Fixed diff --git a/src/microsoft/opentelemetry/_genai/_langchain/_utils.py b/src/microsoft/opentelemetry/_genai/_langchain/_utils.py index 2425cb67..3413fa88 100644 --- a/src/microsoft/opentelemetry/_genai/_langchain/_utils.py +++ b/src/microsoft/opentelemetry/_genai/_langchain/_utils.py @@ -1134,20 +1134,71 @@ def _langchain_role(message: Any) -> str: return "unknown" +def _flatten_lc_content_blocks(content: Any) -> str | None: + """Normalize a LangChain message ``content`` value to a plain text string. + + LangChain/LangGraph ``AIMessage.content`` may be either a plain string or a + list of content-block dicts (e.g. ``[{"type": "text", "text": "...", + "phase": "final_answer", "id": "..."}]``). The GenAI semantic-conventions + ``TextPart.content`` field must be a plain string, so we concatenate the + ``text`` of every ``type=="text"`` block (joined with ``\n``) and drop the + rest. Non-text blocks (e.g. ``tool_use`` / ``tool_result``) are surfaced + elsewhere as typed parts, so it is correct to omit them from the text. + + Returns ``None`` when there is no meaningful text content. + """ + if content is None: + return None + if isinstance(content, str): + return content or None + if isinstance(content, list): + chunks: list[str] = [] + for block in content: + if isinstance(block, Mapping): + block_type = block.get("type") + # Accept both spec-typed text blocks and untyped {"text": "..."} entries. + if block_type in (None, "text"): + text = block.get("text") + if isinstance(text, str) and text: + chunks.append(text) + elif isinstance(block, str) and block: + chunks.append(block) + if chunks: + return "\n".join(chunks) + return None + return str(content) or None + + def _langchain_content(message: Any) -> str | None: """Extract text content from a LangChain message.""" if isinstance(message, BaseMessage): - c = getattr(message, "content", None) - return str(c) if c else None + return _flatten_lc_content_blocks(getattr(message, "content", None)) if hasattr(message, "get"): - if c := message.get("content"): - return str(c) + if (c := message.get("content")) is not None: + flat = _flatten_lc_content_blocks(c) + if flat is not None: + return flat if kwargs := message.get("kwargs"): - if hasattr(kwargs, "get") and (c := kwargs.get("content")): - return str(c) + if hasattr(kwargs, "get") and (c := kwargs.get("content")) is not None: + return _flatten_lc_content_blocks(c) return None +def _lc_content_blocks(message: Any) -> list[Any] | None: + """Return the raw ``content`` value of a LangChain message if it is a list + of content blocks, otherwise ``None``.""" + raw: Any = None + if isinstance(message, BaseMessage): + raw = getattr(message, "content", None) + elif hasattr(message, "get"): + raw = message.get("content") + if raw is None: + kwargs = message.get("kwargs") or {} + if hasattr(kwargs, "get"): + raw = kwargs.get("content") + return raw if isinstance(raw, list) else None + + def _langchain_tool_calls(message: Any) -> list[ToolCall]: """Extract tool calls from a LangChain message into OTel ToolCall parts.""" calls: list[ToolCall] = [] @@ -1165,6 +1216,26 @@ def _langchain_tool_calls(message: Any) -> list[ToolCall]: additional = kwargs.get("additional_kwargs") or {} raw_calls = additional.get("tool_calls") if not raw_calls or not isinstance(raw_calls, list): + raw_calls = [] + # LangChain/LangGraph may also embed tool calls as ``{"type": "tool_use", + # "id": "...", "name": "...", "input": {...}}`` entries inside a list-shaped + # ``content`` field. Harvest those so they surface as ``ToolCallRequest`` + # parts rather than being silently dropped (or repr-dumped into a TextPart). + content_blocks = _lc_content_blocks(message) + if content_blocks: + for block in content_blocks: + if not isinstance(block, Mapping): + continue + if block.get("type") != "tool_use": + continue + raw_calls.append( + { + "name": block.get("name"), + "id": block.get("id"), + "args": block.get("input"), + } + ) + if not raw_calls: return calls for tc in raw_calls: if not isinstance(tc, Mapping): diff --git a/tests/langchain/test_utils.py b/tests/langchain/test_utils.py index abb93ccb..a98beeed 100644 --- a/tests/langchain/test_utils.py +++ b/tests/langchain/test_utils.py @@ -976,6 +976,84 @@ def test_extracts_from_nested_list(self): self.assertEqual(len(result), 1) self.assertEqual(result[0].parts[0].content, "Nested answer") + def test_flattens_langgraph_content_blocks(self): + """LangGraph ``AIMessage.content`` may be a list of content-block dicts. + + Per the GenAI semconv ``TextPart.content`` must be a plain string; the + instrumentation must concatenate the ``text`` of every ``type=="text"`` + block instead of stringifying the list (which produced a Python-repr + blob with single quotes and extra ``phase``/``index``/``id`` keys). + Regression test for issue #189. + """ + outputs = { + "messages": [ + { + "role": "ai", + "content": [ + { + "type": "text", + "text": "# One-Day Food Walk in Vancouver\n\n## Assumptions", + "phase": "final_answer", + "index": 0, + "id": "msg_045afd", + } + ], + } + ] + } + result = _extract_agent_output_messages(outputs) + self.assertEqual(len(result), 1) + self.assertEqual(len(result[0].parts), 1) + part = result[0].parts[0] + self.assertEqual(part.type, "text") + self.assertEqual(part.content, "# One-Day Food Walk in Vancouver\n\n## Assumptions") + # The Python-repr giveaways from issue #189 must be absent. + self.assertNotIn("'type'", part.content) + self.assertNotIn("phase", part.content) + + def test_joins_multiple_text_content_blocks(self): + outputs = { + "messages": [ + { + "role": "ai", + "content": [ + {"type": "text", "text": "first"}, + {"type": "text", "text": "second"}, + ], + } + ] + } + result = _extract_agent_output_messages(outputs) + self.assertEqual(result[0].parts[0].content, "first\nsecond") + + def test_extracts_embedded_tool_use_block(self): + """``tool_use`` blocks embedded in list-shaped ``content`` should surface + as ``ToolCallRequest`` parts, not be dropped or repr-dumped.""" + outputs = { + "messages": [ + { + "role": "ai", + "content": [ + {"type": "text", "text": "calling search"}, + { + "type": "tool_use", + "id": "tool_1", + "name": "search", + "input": {"q": "vancouver food"}, + }, + ], + } + ] + } + result = _extract_agent_output_messages(outputs) + self.assertEqual(len(result), 1) + parts = result[0].parts + self.assertEqual(parts[0].content, "calling search") + # Tool-call part should have the harvested name/id and JSON-encoded args. + self.assertEqual(parts[1].name, "search") + self.assertEqual(parts[1].id, "tool_1") + self.assertEqual(json.loads(parts[1].arguments), {"q": "vancouver food"}) + # ---- Agent metadata extractors ----------------------------------------------- From a6a9396f980b8ae96436723070a38bf1e95823bf Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Mon, 8 Jun 2026 10:21:22 -0700 Subject: [PATCH 2/4] Address Copilot review: avoid mutating caller's tool_calls; fix CHANGELOG heading level - _langchain_tool_calls now copies raw_calls before appending harvested tool_use blocks, so it never mutates BaseMessage.tool_calls or additional_kwargs['tool_calls']. - New test_extraction_does_not_mutate_input_message guards against regressions. - CHANGELOG Unreleased heading switched from ## to # to match the surrounding release headings. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 2 +- .../opentelemetry/_genai/_langchain/_utils.py | 3 +++ tests/langchain/test_utils.py | 23 +++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3f28bbd..cd09773a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Release History -## Unreleased +# Unreleased ### Bugs Fixed - Flatten LangChain/LangGraph multi-part message ``content`` into a plain diff --git a/src/microsoft/opentelemetry/_genai/_langchain/_utils.py b/src/microsoft/opentelemetry/_genai/_langchain/_utils.py index 3413fa88..d6e233a8 100644 --- a/src/microsoft/opentelemetry/_genai/_langchain/_utils.py +++ b/src/microsoft/opentelemetry/_genai/_langchain/_utils.py @@ -1217,6 +1217,9 @@ def _langchain_tool_calls(message: Any) -> list[ToolCall]: raw_calls = additional.get("tool_calls") if not raw_calls or not isinstance(raw_calls, list): raw_calls = [] + else: + # Copy before mutation so we never alter the caller's message object. + raw_calls = list(raw_calls) # LangChain/LangGraph may also embed tool calls as ``{"type": "tool_use", # "id": "...", "name": "...", "input": {...}}`` entries inside a list-shaped # ``content`` field. Harvest those so they surface as ``ToolCallRequest`` diff --git a/tests/langchain/test_utils.py b/tests/langchain/test_utils.py index a98beeed..855ca932 100644 --- a/tests/langchain/test_utils.py +++ b/tests/langchain/test_utils.py @@ -1054,6 +1054,29 @@ def test_extracts_embedded_tool_use_block(self): self.assertEqual(parts[1].id, "tool_1") self.assertEqual(json.loads(parts[1].arguments), {"q": "vancouver food"}) + def test_extraction_does_not_mutate_input_message(self): + """Harvesting ``tool_use`` blocks must not append to the caller's + ``tool_calls`` list (which may be ``BaseMessage.tool_calls`` shared by + reference).""" + original_tool_calls = [ + {"name": "preexisting", "id": "tc_0", "args": {"x": 1}}, + ] + message = { + "role": "ai", + "content": [ + { + "type": "tool_use", + "id": "tool_1", + "name": "search", + "input": {"q": "food"}, + }, + ], + "tool_calls": original_tool_calls, + } + snapshot = list(original_tool_calls) + _extract_agent_output_messages({"messages": [message]}) + self.assertEqual(original_tool_calls, snapshot) + # ---- Agent metadata extractors ----------------------------------------------- From 107e3603ff86af1fba777037b34257a1b2554c0f Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Mon, 8 Jun 2026 10:25:05 -0700 Subject: [PATCH 3/4] Drop CHANGELOG entry from this PR Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd09773a..9d96844c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,5 @@ # Release History -# Unreleased - -### Bugs Fixed -- Flatten LangChain/LangGraph multi-part message ``content`` into a plain - ``TextPart.content`` string so ``gen_ai.output.messages`` no longer contains - a Python-``repr`` blob on ``invoke_agent`` wrapper spans - ([#189](https://github.com/microsoft/opentelemetry-distro-python/issues/189)) - # 1.3.2 (2026-05-29) ### Bugs Fixed From fc1f53bfa3175a40233bdb7f2b4d6f67c190ce9c Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Mon, 8 Jun 2026 11:11:42 -0700 Subject: [PATCH 4/4] Clarify why tool_use blocks would be dropped without the harvest loop Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../opentelemetry/_genai/_langchain/_utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/microsoft/opentelemetry/_genai/_langchain/_utils.py b/src/microsoft/opentelemetry/_genai/_langchain/_utils.py index d6e233a8..e4e297c8 100644 --- a/src/microsoft/opentelemetry/_genai/_langchain/_utils.py +++ b/src/microsoft/opentelemetry/_genai/_langchain/_utils.py @@ -1220,10 +1220,15 @@ def _langchain_tool_calls(message: Any) -> list[ToolCall]: else: # Copy before mutation so we never alter the caller's message object. raw_calls = list(raw_calls) - # LangChain/LangGraph may also embed tool calls as ``{"type": "tool_use", - # "id": "...", "name": "...", "input": {...}}`` entries inside a list-shaped - # ``content`` field. Harvest those so they surface as ``ToolCallRequest`` - # parts rather than being silently dropped (or repr-dumped into a TextPart). + # Anthropic / LangGraph models often emit tool calls only as + # ``{"type": "tool_use", "id": "...", "name": "...", "input": {...}}`` + # entries inside a list-shaped ``content`` field, with + # ``message.tool_calls`` and ``additional_kwargs["tool_calls"]`` empty. + # Without this loop those calls would never reach ``parts``: the + # ``tool_calls`` lookup above returns nothing, and ``_langchain_content`` + # (via ``_flatten_lc_content_blocks``) only keeps ``type=="text"`` blocks + # and discards the rest. Harvest them here so they surface as spec + # ``ToolCallRequest`` parts. content_blocks = _lc_content_blocks(message) if content_blocks: for block in content_blocks: