From 74cddb7db02e7848b29bde2c72d486c593230353 Mon Sep 17 00:00:00 2001 From: blakestone-x Date: Sun, 31 May 2026 01:19:37 -0500 Subject: [PATCH] Strip unsupported numeric-bound keywords from Anthropic tool schemas Pydantic Field(ge=, le=) constraints emit JSON-schema minimum/maximum (and exclusiveMinimum/exclusiveMaximum) on the tool/function schema that with_structured_output sends. Anthropic's tool-schema validator rejects these with HTTP 400 ("For 'integer' type, property 'minimum' is not supported"), so under SKILLSPECTOR_PROVIDER=anthropic every structured LLM call fails and each analyzer silently falls back to static analysis. Add a provider-aware structured-output builder: for every provider that tolerates the keywords (OpenAI, NVIDIA) behavior is unchanged (with_structured_output keeps the constraints). For the Anthropic provider only, the generated tool schema is stripped of the numeric-bound keywords before it is sent, while responses are still parsed and validated against the original Pydantic schema, so output constraints are preserved even though they are dropped from the request. - providers: add is_anthropic_provider() - structured_output: strip_unsupported_numeric_bounds + build_structured_llm - llm_analyzer_base: route with_structured_output through build_structured_llm - tests: regression repro + sanitizer + provider-routing coverage Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: blakestone-x --- src/skillspector/llm_analyzer_base.py | 5 +- src/skillspector/providers/__init__.py | 12 ++ src/skillspector/structured_output.py | 106 ++++++++++ tests/unit/test_structured_output.py | 262 +++++++++++++++++++++++++ 4 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 src/skillspector/structured_output.py create mode 100644 tests/unit/test_structured_output.py diff --git a/src/skillspector/llm_analyzer_base.py b/src/skillspector/llm_analyzer_base.py index a678e4e..a793dc1 100644 --- a/src/skillspector/llm_analyzer_base.py +++ b/src/skillspector/llm_analyzer_base.py @@ -38,6 +38,7 @@ from skillspector.logging_config import get_logger from skillspector.model_info import get_max_input_tokens from skillspector.models import Finding +from skillspector.structured_output import build_structured_llm logger = get_logger(__name__) @@ -242,7 +243,9 @@ def __init__(self, base_prompt: str, model: str): self._input_budget = get_max_input_tokens(model) self._llm = get_chat_model(model=model) self._structured_llm = ( - self._llm.with_structured_output(self.response_schema) if self.response_schema else None + build_structured_llm(self._llm, self.response_schema) + if self.response_schema + else None ) # -- Batching ----------------------------------------------------------- diff --git a/src/skillspector/providers/__init__.py b/src/skillspector/providers/__init__.py index 78bdd17..13a3b74 100644 --- a/src/skillspector/providers/__init__.py +++ b/src/skillspector/providers/__init__.py @@ -72,6 +72,17 @@ def get_metadata_provider() -> ModelMetadataProvider: return _select_active_provider() +def is_anthropic_provider() -> bool: + """Return ``True`` when the active provider is Anthropic. + + Anthropic is selected only via an explicit ``SKILLSPECTOR_PROVIDER=anthropic`` + (see :func:`_select_active_provider`). Callers use this to apply + Anthropic-specific tool-schema handling without paying the cost of + constructing the provider. + """ + return os.environ.get("SKILLSPECTOR_PROVIDER", "").strip().lower() == "anthropic" + + def resolve_provider_credentials() -> tuple[str, str | None] | None: """Return ``(api_key, base_url)`` from the active provider. @@ -85,5 +96,6 @@ def resolve_provider_credentials() -> tuple[str, str | None] | None: "CredentialsProvider", "ModelMetadataProvider", "get_metadata_provider", + "is_anthropic_provider", "resolve_provider_credentials", ] diff --git a/src/skillspector/structured_output.py b/src/skillspector/structured_output.py new file mode 100644 index 0000000..41a3b4d --- /dev/null +++ b/src/skillspector/structured_output.py @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Structured-output helpers with provider-aware tool-schema sanitization. + +``LLMAnalyzerBase`` drives every LLM analyzer through LangChain's +``with_structured_output``, which turns a Pydantic ``response_schema`` into an +OpenAI tool/function schema and asks the model to fill it in. + +Pydantic ``Field(ge=, le=, gt=, lt=)`` constraints emit the JSON-schema +keywords ``minimum`` / ``maximum`` / ``exclusiveMinimum`` / ``exclusiveMaximum`` +on the generated tool parameters. The OpenAI and NVIDIA endpoints tolerate +these keywords, but Anthropic's tool-schema validator rejects them with an +HTTP 400:: + + For 'integer' type, property 'minimum' is not supported. + For 'number' type, property 'maximum' is not supported. + +When the Anthropic provider is active that 400 makes every structured LLM call +fail, and each analyzer silently falls back to static-only analysis. + +:func:`build_structured_llm` keeps the constraints on every provider that +tolerates them and, **only for the Anthropic provider**, strips the unsupported +numeric-bound keywords from the tool schema before it is sent — while still +validating responses against the original Pydantic model (constraints included). +""" + +from __future__ import annotations + +import copy +from typing import Any, cast + +from langchain_core.language_models import BaseChatModel +from langchain_core.output_parsers.openai_tools import PydanticToolsParser +from langchain_core.runnables import Runnable +from langchain_core.utils.function_calling import convert_to_openai_tool + +from skillspector.providers import is_anthropic_provider + +# JSON-schema numeric-bound keywords that Anthropic's tool-schema validator +# rejects. Stripping them loses input-side validation at the API boundary, but +# the Pydantic model still enforces every constraint when the response is parsed. +UNSUPPORTED_NUMERIC_KEYWORDS: frozenset[str] = frozenset( + {"minimum", "maximum", "exclusiveMinimum", "exclusiveMaximum"} +) + + +def strip_unsupported_numeric_bounds(schema: Any) -> Any: + """Recursively remove unsupported numeric-bound keywords from a JSON schema. + + Returns a deep copy with every occurrence of the keywords in + :data:`UNSUPPORTED_NUMERIC_KEYWORDS` removed, at any nesting depth (object + properties, array ``items``, ``$defs``, ``anyOf`` branches, etc.). The + input is not mutated. + """ + if isinstance(schema, dict): + return { + key: strip_unsupported_numeric_bounds(value) + for key, value in schema.items() + if key not in UNSUPPORTED_NUMERIC_KEYWORDS + } + if isinstance(schema, list): + return [strip_unsupported_numeric_bounds(item) for item in schema] + return schema + + +def sanitized_openai_tool(schema: type) -> dict[str, Any]: + """Convert *schema* to an OpenAI tool dict with numeric bounds stripped.""" + tool = convert_to_openai_tool(schema) + return cast(dict[str, Any], strip_unsupported_numeric_bounds(copy.deepcopy(tool))) + + +def build_structured_llm(llm: BaseChatModel, schema: type) -> Runnable: + """Return a runnable that yields validated *schema* instances. + + For every provider except Anthropic this is exactly + ``llm.with_structured_output(schema)`` — the numeric-bound constraints are + preserved in the tool schema sent to the API. + + For the Anthropic provider the tool schema is sanitized of the numeric-bound + keywords Anthropic rejects (see :func:`strip_unsupported_numeric_bounds`), + the sanitized tool is force-selected via tool-calling, and the response is + still parsed and validated against the original Pydantic *schema* (so the + constraints are enforced on output even though they were dropped from the + request). + """ + if not is_anthropic_provider(): + return llm.with_structured_output(schema) + + tool = sanitized_openai_tool(schema) + tool_name = tool["function"]["name"] + bound = llm.bind_tools([tool], tool_choice=tool_name) + parser = PydanticToolsParser(tools=[schema], first_tool_only=True) + return bound | parser diff --git a/tests/unit/test_structured_output.py b/tests/unit/test_structured_output.py new file mode 100644 index 0000000..735e29b --- /dev/null +++ b/tests/unit/test_structured_output.py @@ -0,0 +1,262 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for provider-aware structured-output tool-schema sanitization. + +Covers the Anthropic 400 regression: Pydantic numeric-bound constraints +(``Field(ge=, le=)``) emit ``minimum`` / ``maximum`` JSON-schema keywords that +Anthropic's tool-schema validator rejects, silently dropping every LLM +analyzer to static-only analysis. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest +from langchain_core.messages import AIMessage +from langchain_core.output_parsers.openai_tools import PydanticToolsParser +from langchain_core.utils.function_calling import convert_to_openai_tool +from pydantic import ValidationError + +from skillspector.llm_analyzer_base import LLMAnalysisResult +from skillspector.nodes.meta_analyzer import MetaAnalyzerResult +from skillspector.structured_output import ( + UNSUPPORTED_NUMERIC_KEYWORDS, + build_structured_llm, + sanitized_openai_tool, + strip_unsupported_numeric_bounds, +) + + +@pytest.fixture(autouse=True) +def _clean_provider_env(monkeypatch: pytest.MonkeyPatch): + monkeypatch.delenv("SKILLSPECTOR_PROVIDER", raising=False) + yield + + +def _collect_keys(obj: object) -> set[str]: + """Recursively collect every dict key in a nested structure.""" + keys: set[str] = set() + if isinstance(obj, dict): + for key, value in obj.items(): + keys.add(key) + keys |= _collect_keys(value) + elif isinstance(obj, list): + for item in obj: + keys |= _collect_keys(item) + return keys + + +# --------------------------------------------------------------------------- +# Regression guard: the unsanitized schema really does emit the bad keywords +# --------------------------------------------------------------------------- + + +class TestRegressionReproduction: + """Without sanitization the generated tool schema carries the keywords + Anthropic rejects. These assertions document the bug the fix targets.""" + + def test_llm_analysis_result_emits_minimum_and_maximum(self) -> None: + tool = convert_to_openai_tool(LLMAnalysisResult) + keys = _collect_keys(tool) + # start_line=Field(ge=1) -> minimum; confidence=Field(ge=0.0, le=1.0) -> minimum/maximum + assert "minimum" in keys + assert "maximum" in keys + + def test_meta_analyzer_result_emits_minimum_and_maximum(self) -> None: + tool = convert_to_openai_tool(MetaAnalyzerResult) + keys = _collect_keys(tool) + assert "minimum" in keys + assert "maximum" in keys + + +# --------------------------------------------------------------------------- +# strip_unsupported_numeric_bounds +# --------------------------------------------------------------------------- + + +class TestStripUnsupportedNumericBounds: + def test_strips_top_level_keywords(self) -> None: + out = strip_unsupported_numeric_bounds( + {"type": "integer", "minimum": 1, "maximum": 10} + ) + assert out == {"type": "integer"} + + def test_strips_exclusive_variants(self) -> None: + out = strip_unsupported_numeric_bounds( + {"type": "number", "exclusiveMinimum": 0, "exclusiveMaximum": 1} + ) + assert out == {"type": "number"} + + def test_strips_nested_in_properties_and_items(self) -> None: + schema = { + "type": "object", + "properties": { + "findings": { + "type": "array", + "items": { + "type": "object", + "properties": { + "start_line": {"type": "integer", "minimum": 1}, + "confidence": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + }, + }, + }, + } + }, + } + out = strip_unsupported_numeric_bounds(schema) + assert _collect_keys(out).isdisjoint(UNSUPPORTED_NUMERIC_KEYWORDS) + # Non-bound keys survive. + item_props = out["properties"]["findings"]["items"]["properties"] + assert item_props["start_line"] == {"type": "integer"} + assert item_props["confidence"] == {"type": "number"} + + def test_preserves_unrelated_keys(self) -> None: + schema = { + "type": "string", + "description": "keep me", + "enum": ["A", "B"], + "default": "A", + } + assert strip_unsupported_numeric_bounds(schema) == schema + + def test_does_not_mutate_input(self) -> None: + schema = {"type": "integer", "minimum": 1} + strip_unsupported_numeric_bounds(schema) + assert schema == {"type": "integer", "minimum": 1} + + def test_handles_anyof_branches(self) -> None: + schema = { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "null"}, + ] + } + out = strip_unsupported_numeric_bounds(schema) + assert out == {"anyOf": [{"type": "integer"}, {"type": "null"}]} + + +# --------------------------------------------------------------------------- +# sanitized_openai_tool — real schemas, fully cleaned +# --------------------------------------------------------------------------- + + +class TestSanitizedOpenAITool: + @pytest.mark.parametrize("schema", [LLMAnalysisResult, MetaAnalyzerResult]) + def test_no_unsupported_keywords_remain(self, schema: type) -> None: + tool = sanitized_openai_tool(schema) + assert _collect_keys(tool).isdisjoint(UNSUPPORTED_NUMERIC_KEYWORDS) + + def test_tool_structure_and_descriptions_preserved(self) -> None: + tool = sanitized_openai_tool(LLMAnalysisResult) + assert tool["type"] == "function" + assert tool["function"]["name"] == "LLMAnalysisResult" + # Field types and the enum survive the strip. + item = tool["function"]["parameters"]["properties"]["findings"]["items"] + props = item["properties"] + assert props["start_line"]["type"] == "integer" + assert props["confidence"]["type"] == "number" + assert props["severity"]["enum"] == ["LOW", "MEDIUM", "HIGH", "CRITICAL"] + + +# --------------------------------------------------------------------------- +# build_structured_llm — provider routing +# --------------------------------------------------------------------------- + + +class TestBuildStructuredLLM: + def test_non_anthropic_uses_with_structured_output(self) -> None: + """Other providers keep the constraints via plain with_structured_output.""" + llm = MagicMock() + sentinel = object() + llm.with_structured_output.return_value = sentinel + result = build_structured_llm(llm, LLMAnalysisResult) + llm.with_structured_output.assert_called_once_with(LLMAnalysisResult) + llm.bind_tools.assert_not_called() + assert result is sentinel + + def test_anthropic_binds_sanitized_tool(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SKILLSPECTOR_PROVIDER", "anthropic") + llm = MagicMock() + build_structured_llm(llm, LLMAnalysisResult) + + llm.with_structured_output.assert_not_called() + llm.bind_tools.assert_called_once() + (tools,), kwargs = llm.bind_tools.call_args + assert kwargs["tool_choice"] == "LLMAnalysisResult" + assert _collect_keys(tools).isdisjoint(UNSUPPORTED_NUMERIC_KEYWORDS) + + def test_anthropic_parser_validates_against_original_schema( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Sanitizing the request schema must not relax output validation — + the original Pydantic constraints still apply when the response parses.""" + monkeypatch.setenv("SKILLSPECTOR_PROVIDER", "anthropic") + # The parser the Anthropic path attaches is keyed to the original schema. + parser = PydanticToolsParser(tools=[LLMAnalysisResult], first_tool_only=True) + + # A valid tool-call round-trips into a validated Pydantic instance. + valid = AIMessage( + content="", + tool_calls=[ + { + "name": "LLMAnalysisResult", + "args": { + "findings": [ + { + "rule_id": "SEC-1", + "message": "x", + "severity": "HIGH", + "start_line": 5, + "confidence": 0.9, + } + ] + }, + "id": "call_1", + } + ], + ) + parsed = parser.invoke(valid) + assert isinstance(parsed, LLMAnalysisResult) + assert parsed.findings[0].confidence == 0.9 + + # An out-of-bound confidence is still rejected by the Pydantic model. + invalid = AIMessage( + content="", + tool_calls=[ + { + "name": "LLMAnalysisResult", + "args": { + "findings": [ + { + "rule_id": "SEC-1", + "message": "x", + "severity": "HIGH", + "start_line": 5, + "confidence": 1.5, + } + ] + }, + "id": "call_2", + } + ], + ) + with pytest.raises(ValidationError): + parser.invoke(invalid)