diff --git a/src/skillspector/llm_analyzer_base.py b/src/skillspector/llm_analyzer_base.py index a678e4e..a793dc1 100644 --- a/src/skillspector/llm_analyzer_base.py +++ b/src/skillspector/llm_analyzer_base.py @@ -38,6 +38,7 @@ from skillspector.logging_config import get_logger from skillspector.model_info import get_max_input_tokens from skillspector.models import Finding +from skillspector.structured_output import build_structured_llm logger = get_logger(__name__) @@ -242,7 +243,9 @@ def __init__(self, base_prompt: str, model: str): self._input_budget = get_max_input_tokens(model) self._llm = get_chat_model(model=model) self._structured_llm = ( - self._llm.with_structured_output(self.response_schema) if self.response_schema else None + build_structured_llm(self._llm, self.response_schema) + if self.response_schema + else None ) # -- Batching ----------------------------------------------------------- diff --git a/src/skillspector/providers/__init__.py b/src/skillspector/providers/__init__.py index 78bdd17..13a3b74 100644 --- a/src/skillspector/providers/__init__.py +++ b/src/skillspector/providers/__init__.py @@ -72,6 +72,17 @@ def get_metadata_provider() -> ModelMetadataProvider: return _select_active_provider() +def is_anthropic_provider() -> bool: + """Return ``True`` when the active provider is Anthropic. + + Anthropic is selected only via an explicit ``SKILLSPECTOR_PROVIDER=anthropic`` + (see :func:`_select_active_provider`). Callers use this to apply + Anthropic-specific tool-schema handling without paying the cost of + constructing the provider. + """ + return os.environ.get("SKILLSPECTOR_PROVIDER", "").strip().lower() == "anthropic" + + def resolve_provider_credentials() -> tuple[str, str | None] | None: """Return ``(api_key, base_url)`` from the active provider. @@ -85,5 +96,6 @@ def resolve_provider_credentials() -> tuple[str, str | None] | None: "CredentialsProvider", "ModelMetadataProvider", "get_metadata_provider", + "is_anthropic_provider", "resolve_provider_credentials", ] diff --git a/src/skillspector/structured_output.py b/src/skillspector/structured_output.py new file mode 100644 index 0000000..41a3b4d --- /dev/null +++ b/src/skillspector/structured_output.py @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Structured-output helpers with provider-aware tool-schema sanitization. + +``LLMAnalyzerBase`` drives every LLM analyzer through LangChain's +``with_structured_output``, which turns a Pydantic ``response_schema`` into an +OpenAI tool/function schema and asks the model to fill it in. + +Pydantic ``Field(ge=, le=, gt=, lt=)`` constraints emit the JSON-schema +keywords ``minimum`` / ``maximum`` / ``exclusiveMinimum`` / ``exclusiveMaximum`` +on the generated tool parameters. The OpenAI and NVIDIA endpoints tolerate +these keywords, but Anthropic's tool-schema validator rejects them with an +HTTP 400:: + + For 'integer' type, property 'minimum' is not supported. + For 'number' type, property 'maximum' is not supported. + +When the Anthropic provider is active that 400 makes every structured LLM call +fail, and each analyzer silently falls back to static-only analysis. + +:func:`build_structured_llm` keeps the constraints on every provider that +tolerates them and, **only for the Anthropic provider**, strips the unsupported +numeric-bound keywords from the tool schema before it is sent — while still +validating responses against the original Pydantic model (constraints included). +""" + +from __future__ import annotations + +import copy +from typing import Any, cast + +from langchain_core.language_models import BaseChatModel +from langchain_core.output_parsers.openai_tools import PydanticToolsParser +from langchain_core.runnables import Runnable +from langchain_core.utils.function_calling import convert_to_openai_tool + +from skillspector.providers import is_anthropic_provider + +# JSON-schema numeric-bound keywords that Anthropic's tool-schema validator +# rejects. Stripping them loses input-side validation at the API boundary, but +# the Pydantic model still enforces every constraint when the response is parsed. +UNSUPPORTED_NUMERIC_KEYWORDS: frozenset[str] = frozenset( + {"minimum", "maximum", "exclusiveMinimum", "exclusiveMaximum"} +) + + +def strip_unsupported_numeric_bounds(schema: Any) -> Any: + """Recursively remove unsupported numeric-bound keywords from a JSON schema. + + Returns a deep copy with every occurrence of the keywords in + :data:`UNSUPPORTED_NUMERIC_KEYWORDS` removed, at any nesting depth (object + properties, array ``items``, ``$defs``, ``anyOf`` branches, etc.). The + input is not mutated. + """ + if isinstance(schema, dict): + return { + key: strip_unsupported_numeric_bounds(value) + for key, value in schema.items() + if key not in UNSUPPORTED_NUMERIC_KEYWORDS + } + if isinstance(schema, list): + return [strip_unsupported_numeric_bounds(item) for item in schema] + return schema + + +def sanitized_openai_tool(schema: type) -> dict[str, Any]: + """Convert *schema* to an OpenAI tool dict with numeric bounds stripped.""" + tool = convert_to_openai_tool(schema) + return cast(dict[str, Any], strip_unsupported_numeric_bounds(copy.deepcopy(tool))) + + +def build_structured_llm(llm: BaseChatModel, schema: type) -> Runnable: + """Return a runnable that yields validated *schema* instances. + + For every provider except Anthropic this is exactly + ``llm.with_structured_output(schema)`` — the numeric-bound constraints are + preserved in the tool schema sent to the API. + + For the Anthropic provider the tool schema is sanitized of the numeric-bound + keywords Anthropic rejects (see :func:`strip_unsupported_numeric_bounds`), + the sanitized tool is force-selected via tool-calling, and the response is + still parsed and validated against the original Pydantic *schema* (so the + constraints are enforced on output even though they were dropped from the + request). + """ + if not is_anthropic_provider(): + return llm.with_structured_output(schema) + + tool = sanitized_openai_tool(schema) + tool_name = tool["function"]["name"] + bound = llm.bind_tools([tool], tool_choice=tool_name) + parser = PydanticToolsParser(tools=[schema], first_tool_only=True) + return bound | parser diff --git a/tests/unit/test_structured_output.py b/tests/unit/test_structured_output.py new file mode 100644 index 0000000..735e29b --- /dev/null +++ b/tests/unit/test_structured_output.py @@ -0,0 +1,262 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for provider-aware structured-output tool-schema sanitization. + +Covers the Anthropic 400 regression: Pydantic numeric-bound constraints +(``Field(ge=, le=)``) emit ``minimum`` / ``maximum`` JSON-schema keywords that +Anthropic's tool-schema validator rejects, silently dropping every LLM +analyzer to static-only analysis. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest +from langchain_core.messages import AIMessage +from langchain_core.output_parsers.openai_tools import PydanticToolsParser +from langchain_core.utils.function_calling import convert_to_openai_tool +from pydantic import ValidationError + +from skillspector.llm_analyzer_base import LLMAnalysisResult +from skillspector.nodes.meta_analyzer import MetaAnalyzerResult +from skillspector.structured_output import ( + UNSUPPORTED_NUMERIC_KEYWORDS, + build_structured_llm, + sanitized_openai_tool, + strip_unsupported_numeric_bounds, +) + + +@pytest.fixture(autouse=True) +def _clean_provider_env(monkeypatch: pytest.MonkeyPatch): + monkeypatch.delenv("SKILLSPECTOR_PROVIDER", raising=False) + yield + + +def _collect_keys(obj: object) -> set[str]: + """Recursively collect every dict key in a nested structure.""" + keys: set[str] = set() + if isinstance(obj, dict): + for key, value in obj.items(): + keys.add(key) + keys |= _collect_keys(value) + elif isinstance(obj, list): + for item in obj: + keys |= _collect_keys(item) + return keys + + +# --------------------------------------------------------------------------- +# Regression guard: the unsanitized schema really does emit the bad keywords +# --------------------------------------------------------------------------- + + +class TestRegressionReproduction: + """Without sanitization the generated tool schema carries the keywords + Anthropic rejects. These assertions document the bug the fix targets.""" + + def test_llm_analysis_result_emits_minimum_and_maximum(self) -> None: + tool = convert_to_openai_tool(LLMAnalysisResult) + keys = _collect_keys(tool) + # start_line=Field(ge=1) -> minimum; confidence=Field(ge=0.0, le=1.0) -> minimum/maximum + assert "minimum" in keys + assert "maximum" in keys + + def test_meta_analyzer_result_emits_minimum_and_maximum(self) -> None: + tool = convert_to_openai_tool(MetaAnalyzerResult) + keys = _collect_keys(tool) + assert "minimum" in keys + assert "maximum" in keys + + +# --------------------------------------------------------------------------- +# strip_unsupported_numeric_bounds +# --------------------------------------------------------------------------- + + +class TestStripUnsupportedNumericBounds: + def test_strips_top_level_keywords(self) -> None: + out = strip_unsupported_numeric_bounds( + {"type": "integer", "minimum": 1, "maximum": 10} + ) + assert out == {"type": "integer"} + + def test_strips_exclusive_variants(self) -> None: + out = strip_unsupported_numeric_bounds( + {"type": "number", "exclusiveMinimum": 0, "exclusiveMaximum": 1} + ) + assert out == {"type": "number"} + + def test_strips_nested_in_properties_and_items(self) -> None: + schema = { + "type": "object", + "properties": { + "findings": { + "type": "array", + "items": { + "type": "object", + "properties": { + "start_line": {"type": "integer", "minimum": 1}, + "confidence": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + }, + }, + }, + } + }, + } + out = strip_unsupported_numeric_bounds(schema) + assert _collect_keys(out).isdisjoint(UNSUPPORTED_NUMERIC_KEYWORDS) + # Non-bound keys survive. + item_props = out["properties"]["findings"]["items"]["properties"] + assert item_props["start_line"] == {"type": "integer"} + assert item_props["confidence"] == {"type": "number"} + + def test_preserves_unrelated_keys(self) -> None: + schema = { + "type": "string", + "description": "keep me", + "enum": ["A", "B"], + "default": "A", + } + assert strip_unsupported_numeric_bounds(schema) == schema + + def test_does_not_mutate_input(self) -> None: + schema = {"type": "integer", "minimum": 1} + strip_unsupported_numeric_bounds(schema) + assert schema == {"type": "integer", "minimum": 1} + + def test_handles_anyof_branches(self) -> None: + schema = { + "anyOf": [ + {"type": "integer", "minimum": 1}, + {"type": "null"}, + ] + } + out = strip_unsupported_numeric_bounds(schema) + assert out == {"anyOf": [{"type": "integer"}, {"type": "null"}]} + + +# --------------------------------------------------------------------------- +# sanitized_openai_tool — real schemas, fully cleaned +# --------------------------------------------------------------------------- + + +class TestSanitizedOpenAITool: + @pytest.mark.parametrize("schema", [LLMAnalysisResult, MetaAnalyzerResult]) + def test_no_unsupported_keywords_remain(self, schema: type) -> None: + tool = sanitized_openai_tool(schema) + assert _collect_keys(tool).isdisjoint(UNSUPPORTED_NUMERIC_KEYWORDS) + + def test_tool_structure_and_descriptions_preserved(self) -> None: + tool = sanitized_openai_tool(LLMAnalysisResult) + assert tool["type"] == "function" + assert tool["function"]["name"] == "LLMAnalysisResult" + # Field types and the enum survive the strip. + item = tool["function"]["parameters"]["properties"]["findings"]["items"] + props = item["properties"] + assert props["start_line"]["type"] == "integer" + assert props["confidence"]["type"] == "number" + assert props["severity"]["enum"] == ["LOW", "MEDIUM", "HIGH", "CRITICAL"] + + +# --------------------------------------------------------------------------- +# build_structured_llm — provider routing +# --------------------------------------------------------------------------- + + +class TestBuildStructuredLLM: + def test_non_anthropic_uses_with_structured_output(self) -> None: + """Other providers keep the constraints via plain with_structured_output.""" + llm = MagicMock() + sentinel = object() + llm.with_structured_output.return_value = sentinel + result = build_structured_llm(llm, LLMAnalysisResult) + llm.with_structured_output.assert_called_once_with(LLMAnalysisResult) + llm.bind_tools.assert_not_called() + assert result is sentinel + + def test_anthropic_binds_sanitized_tool(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SKILLSPECTOR_PROVIDER", "anthropic") + llm = MagicMock() + build_structured_llm(llm, LLMAnalysisResult) + + llm.with_structured_output.assert_not_called() + llm.bind_tools.assert_called_once() + (tools,), kwargs = llm.bind_tools.call_args + assert kwargs["tool_choice"] == "LLMAnalysisResult" + assert _collect_keys(tools).isdisjoint(UNSUPPORTED_NUMERIC_KEYWORDS) + + def test_anthropic_parser_validates_against_original_schema( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Sanitizing the request schema must not relax output validation — + the original Pydantic constraints still apply when the response parses.""" + monkeypatch.setenv("SKILLSPECTOR_PROVIDER", "anthropic") + # The parser the Anthropic path attaches is keyed to the original schema. + parser = PydanticToolsParser(tools=[LLMAnalysisResult], first_tool_only=True) + + # A valid tool-call round-trips into a validated Pydantic instance. + valid = AIMessage( + content="", + tool_calls=[ + { + "name": "LLMAnalysisResult", + "args": { + "findings": [ + { + "rule_id": "SEC-1", + "message": "x", + "severity": "HIGH", + "start_line": 5, + "confidence": 0.9, + } + ] + }, + "id": "call_1", + } + ], + ) + parsed = parser.invoke(valid) + assert isinstance(parsed, LLMAnalysisResult) + assert parsed.findings[0].confidence == 0.9 + + # An out-of-bound confidence is still rejected by the Pydantic model. + invalid = AIMessage( + content="", + tool_calls=[ + { + "name": "LLMAnalysisResult", + "args": { + "findings": [ + { + "rule_id": "SEC-1", + "message": "x", + "severity": "HIGH", + "start_line": 5, + "confidence": 1.5, + } + ] + }, + "id": "call_2", + } + ], + ) + with pytest.raises(ValidationError): + parser.invoke(invalid)