Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 41 additions & 4 deletions libs/core/langchain_core/language_models/model_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class ModelProfile(TypedDict, total=False):

image_inputs: bool
"""Whether image inputs are supported."""
# TODO: add more detail about formats?

image_url_inputs: bool
"""Whether [image URL inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
Expand All @@ -59,17 +58,36 @@ class ModelProfile(TypedDict, total=False):
pdf_inputs: bool
"""Whether [PDF inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
# TODO: add more detail about formats? e.g. bytes or base64

audio_inputs: bool
"""Whether [audio inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
# TODO: add more detail about formats? e.g. bytes or base64

video_inputs: bool
"""Whether [video inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
# TODO: add more detail about formats? e.g. bytes or base64

input_mime_types: dict[str, list[str]]
"""MIME types accepted as input, grouped by modality.

Keys mirror the modality names used by [models.dev](https://models.dev) — for
example, `'image'`, `'audio'`, `'pdf'`, `'video'`. Values are lists of
[IANA media types](https://www.iana.org/assignments/media-types/) such as
`'image/png'` or `'audio/mpeg'`.

This field is *informational*: an entry means the provider is known to accept
that MIME type, but an empty or missing list does not necessarily mean a
type is rejected. Consumers that need hard validation should consult the
upstream provider's documentation.

Example:
```python
{
"image": ["image/png", "image/jpeg", "image/gif", "image/webp"],
"pdf": ["application/pdf"],
}
```
"""

image_tool_message: bool
"""Whether images can be included in tool messages."""
Expand Down Expand Up @@ -100,6 +118,25 @@ class ModelProfile(TypedDict, total=False):
"""Whether [video outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""

output_mime_types: dict[str, list[str]]
"""MIME types produced as output, grouped by modality.

Keys mirror the modality names used by [models.dev](https://models.dev) — for
example, `'image'`, `'audio'`, `'video'`. Values are lists of
[IANA media types](https://www.iana.org/assignments/media-types/) such as
`'image/png'` or `'audio/mpeg'`.

This field is *informational*: an entry means the provider is known to
return that MIME type, but absence does not imply a guarantee that no other
type can be returned. Consumers that need hard validation should consult the
upstream provider's documentation.

Example:
```python
{"image": ["image/png"], "audio": ["audio/mpeg"]}
```
"""

# --- Tool calling ---
tool_calling: bool
"""Whether the model supports [tool calling](https://docs.langchain.com/oss/python/langchain/models#tool-calling)"""
Expand Down
39 changes: 38 additions & 1 deletion libs/model-profiles/langchain_model_profiles/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,30 @@ def _validate_data_dir(data_dir: Path) -> Path:
return resolved


def _profile_field_names() -> frozenset[str]:
"""Return the set of keys declared on `ModelProfile`, or an empty set."""
try:
from langchain_core.language_models.model_profile import ModelProfile
except ImportError:
return frozenset()

try:
return frozenset(get_type_hints(ModelProfile).keys())
except (TypeError, NameError):
return frozenset()


def _load_augmentations(
data_dir: Path,
) -> tuple[dict[str, Any], dict[str, dict[str, Any]]]:
"""Load augmentations from `profile_augmentations.toml`.

Provider-level overrides are top-level keys under `[overrides]`. Model-level
overrides are keyed by model id under `[overrides."model-id"]`. Because TOML
subtables produce `dict` values, we distinguish the two by checking the key
against the declared `ModelProfile` field names. If `ModelProfile` cannot be
imported, we fall back to the legacy heuristic of "dict value ⇒ model id."

Args:
data_dir: Directory containing `profile_augmentations.toml`.

Expand Down Expand Up @@ -90,8 +109,26 @@ def _load_augmentations(
provider_aug: dict[str, Any] = {}
model_augs: dict[str, dict[str, Any]] = {}

profile_fields = _profile_field_names()

for key, value in overrides.items():
if isinstance(value, dict):
if profile_fields:
# Schema-driven: known profile field names are provider-level; all
# other keys are treated as model identifiers (whose values must be
# dict overrides).
if key in profile_fields:
provider_aug[key] = value
elif isinstance(value, dict):
model_augs[key] = value
else:
msg = (
f"Augmentation key '{key}' is not a declared ModelProfile "
f"field and its value is not a table of overrides."
)
print(f"❌ {msg}", file=sys.stderr)
sys.exit(1)
Comment on lines +115 to +129

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Robustness Medium

The new schema-driven branch rejects any non-dict key not present on ModelProfile, which breaks forward compatibility when profile_augmentations.toml contains a newly added provider-level field; keep unknown scalar keys as provider overrides instead of exiting.

Suggested fix
        if profile_fields:
            # Schema-driven: known profile field names are provider-level; dict
            # values for unknown keys are treated as model identifiers.
            if key in profile_fields:
                provider_aug[key] = value
            elif isinstance(value, dict):
                model_augs[key] = value
            else:
                provider_aug[key] = value
Prompt for AI assistance

Copy the prompt below and paste it into ChatGPT, Claude, or any LLM:

You are an expert python developer with deep knowledge of security, performance, and best practices.

### Context

File: libs/model-profiles/langchain_model_profiles/cli.py
Lines: 115-129
Issue Type: robustness-medium
Severity: medium

Issue Description:
The new schema-driven branch rejects any non-dict key not present on `ModelProfile`, which breaks forward compatibility when `profile_augmentations.toml` contains a newly added provider-level field; keep unknown scalar keys as provider overrides instead of exiting.

Current Code:
        if profile_fields:
            # Schema-driven: known profile field names are provider-level; all
            # other keys are treated as model identifiers (whose values must be
            # dict overrides).
            if key in profile_fields:
                provider_aug[key] = value
            elif isinstance(value, dict):
                model_augs[key] = value
            else:
                msg = (
                    f"Augmentation key '{key}' is not a declared ModelProfile "
                    f"field and its value is not a table of overrides."
                )
                print(f"❌ {msg}", file=sys.stderr)
                sys.exit(1)

---

### Instructions

1. Fix the issue described above
2. Maintain the exact indentation and code style from the original
3. Follow python best practices and language-specific idioms
4. Ensure the fix addresses the root cause, not just the symptoms
5. Add brief inline comments explaining the fix if needed

### Constraints

- Do not change functionality beyond fixing the identified issue
- Preserve existing variable names and function signatures unless they are part of the problem
- Ensure the fix is production-ready

---


Like Dislike Create Issue Jira

# Legacy fallback when ModelProfile is unavailable.
elif isinstance(value, dict):
Comment on lines +112 to +131

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Functional High

If ModelProfile imports successfully but get_type_hints(ModelProfile) returns an empty mapping, the new branch silently falls back to the legacy heuristic and can misclassify provider-level dict-valued fields as model-specific overrides; treat successful schema discovery separately from whether the field set is empty.

Suggested fix
    profile_fields = _profile_field_names()
    schema_available = profile_fields is not None

    for key, value in overrides.items():
        if schema_available:
            # Schema-driven: known profile field names are provider-level; all
            # other keys are treated as model identifiers (whose values must be
            # dict overrides).
            if key in profile_fields:
                provider_aug[key] = value
            elif isinstance(value, dict):
                model_augs[key] = value
            else:
                msg = (
                    f"Augmentation key '{key}' is not a declared ModelProfile "
                    f"field and its value is not a table of overrides."
                )
                print(f"❌ {msg}", file=sys.stderr)
                sys.exit(1)
        elif isinstance(value, dict):
            model_augs[key] = value
        else:
            provider_aug[key] = value
Prompt for AI assistance

Copy the prompt below and paste it into ChatGPT, Claude, or any LLM:

You are an expert python developer with deep knowledge of security, performance, and best practices.

### Context

File: libs/model-profiles/langchain_model_profiles/cli.py
Lines: 112-131
Issue Type: functional-high
Severity: high

Issue Description:
If `ModelProfile` imports successfully but `get_type_hints(ModelProfile)` returns an empty mapping, the new branch silently falls back to the legacy heuristic and can misclassify provider-level dict-valued fields as model-specific overrides; treat successful schema discovery separately from whether the field set is empty.

Current Code:
    profile_fields = _profile_field_names()

    for key, value in overrides.items():
        if profile_fields:
            # Schema-driven: known profile field names are provider-level; all
            # other keys are treated as model identifiers (whose values must be
            # dict overrides).
            if key in profile_fields:
                provider_aug[key] = value
            elif isinstance(value, dict):
                model_augs[key] = value
            else:
                msg = (
                    f"Augmentation key '{key}' is not a declared ModelProfile "
                    f"field and its value is not a table of overrides."
                )
                print(f"❌ {msg}", file=sys.stderr)
                sys.exit(1)
        # Legacy fallback when ModelProfile is unavailable.
        elif isinstance(value, dict):
            model_augs[key] = value
        else:
            provider_aug[key] = value

---

### Instructions

1. Fix the issue described above
2. Maintain the exact indentation and code style from the original
3. Follow python best practices and language-specific idioms
4. Ensure the fix addresses the root cause, not just the symptoms
5. Add brief inline comments explaining the fix if needed

### Constraints

- Do not change functionality beyond fixing the identified issue
- Preserve existing variable names and function signatures unless they are part of the problem
- Ensure the fix is production-ready

---


Like Dislike Create Issue Jira

model_augs[key] = value
else:
provider_aug[key] = value
Comment on lines +130 to 134

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Legacy fallback creates bogus model profiles for dict-valued fields

When _profile_field_names() returns an empty frozenset (because langchain_core can't be imported or get_type_hints raises), the legacy branch treats every dict-valued key as a model ID. A [overrides.input_mime_types] block therefore ends up in model_augs under the key "input_mime_types" rather than in provider_aug. Because no model with that ID exists in models.dev, the extra_models loop then inserts a profile whose ID is literally "input_mime_types" into the generated _profiles.py. The fix is to emit a warning and skip the key in the legacy path, or document that the new dict-valued provider fields require langchain_core to be installed.

Expand Down
130 changes: 130 additions & 0 deletions libs/model-profiles/tests/unit_tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,3 +472,133 @@ def test_survives_get_type_hints_failure(self) -> None:
side_effect=TypeError("broken"),
):
_warn_undeclared_profile_keys(profiles)


def test_refresh_merges_provider_level_mime_types(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Provider-level `input_mime_types` cascades to every model."""
data_dir = tmp_path / "data"
data_dir.mkdir()

aug_file = data_dir / "profile_augmentations.toml"
aug_file.write_text(
"""
provider = "anthropic"

[overrides]
image_url_inputs = true

[overrides.input_mime_types]
image = ["image/png", "image/jpeg"]
pdf = ["application/pdf"]
"""
)

mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()

with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="y"),
):
refresh("anthropic", data_dir)

profiles_file = data_dir / "_profiles.py"
spec = importlib.util.spec_from_file_location("generated_mime", profiles_file)
assert spec
assert spec.loader
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module) # type: ignore[union-attr]

for model_id in ("claude-3-opus", "claude-3-sonnet"):
profile = module._PROFILES[model_id] # type: ignore[attr-defined]
assert profile["input_mime_types"] == {
"image": ["image/png", "image/jpeg"],
"pdf": ["application/pdf"],
}


def test_refresh_model_level_mime_types_override_provider(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Model-level MIME-type overrides win over provider-level defaults."""
data_dir = tmp_path / "data"
data_dir.mkdir()

aug_file = data_dir / "profile_augmentations.toml"
aug_file.write_text(
"""
provider = "anthropic"

[overrides.input_mime_types]
image = ["image/png"]

[overrides."claude-3-opus".input_mime_types]
image = ["image/png", "image/jpeg", "image/webp"]

[overrides."claude-3-opus".output_mime_types]
image = ["image/png"]
"""
)

mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()

with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="y"),
):
refresh("anthropic", data_dir)

profiles_file = data_dir / "_profiles.py"
spec = importlib.util.spec_from_file_location(
"generated_mime_override", profiles_file
)
assert spec
assert spec.loader
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module) # type: ignore[union-attr]

opus = module._PROFILES["claude-3-opus"] # type: ignore[attr-defined]
sonnet = module._PROFILES["claude-3-sonnet"] # type: ignore[attr-defined]

assert opus["input_mime_types"] == {
"image": ["image/png", "image/jpeg", "image/webp"]
}
assert opus["output_mime_types"] == {"image": ["image/png"]}
assert sonnet["input_mime_types"] == {"image": ["image/png"]}
assert "output_mime_types" not in sonnet


def test_refresh_rejects_unknown_scalar_top_level_key(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Unknown scalar keys at the top level are no longer silently accepted."""
data_dir = tmp_path / "data"
data_dir.mkdir()

aug_file = data_dir / "profile_augmentations.toml"
aug_file.write_text(
"""
provider = "anthropic"

[overrides]
not_a_real_field = "oops"
"""
)

mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()

with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="y"),
pytest.raises(SystemExit) as exc_info,
):
refresh("anthropic", data_dir)

assert exc_info.value.code == 1
Loading