diff --git a/packages/python/servingcard/apply.py b/packages/python/servingcard/apply.py index e5a79c0..1bb3009 100644 --- a/packages/python/servingcard/apply.py +++ b/packages/python/servingcard/apply.py @@ -65,7 +65,10 @@ def generate_vllm_command(card: ServingCard) -> str: def generate_tgi_command(card: ServingCard) -> str: """Generate a TGI launch command from a servingcard.""" if not card.serving or not card.serving.engine_args: - return f"# No engine_args in servingcard -- cannot generate TGI command\ntext-generation-launcher --model-id {card.model}" + return ( + "# No engine_args in servingcard -- cannot generate TGI command\n" + f"text-generation-launcher --model-id {card.model}" + ) args = card.serving.engine_args.copy() model_id = args.pop("model", card.model) diff --git a/packages/python/servingcard/backends.py b/packages/python/servingcard/backends.py index 69c491e..2ffa074 100644 --- a/packages/python/servingcard/backends.py +++ b/packages/python/servingcard/backends.py @@ -5,7 +5,6 @@ import json import shutil import subprocess -import sys from abc import ABC, abstractmethod diff --git a/packages/python/servingcard/cli.py b/packages/python/servingcard/cli.py index c4043a2..72a9f86 100644 --- a/packages/python/servingcard/cli.py +++ b/packages/python/servingcard/cli.py @@ -2,7 +2,6 @@ from __future__ import annotations -import sys from datetime import date from pathlib import Path from typing import Optional @@ -11,7 +10,6 @@ import yaml from servingcard.apply import ( - REGISTRY_BASE_URL, generate_launch_command, resolve_source, ) @@ -201,7 +199,7 @@ def apply( def _fetch_remote_card(url: str) -> ServingCard: """Fetch a servingcard from a URL.""" try: - from urllib.request import urlopen, Request + from urllib.request import Request, urlopen req = Request(url, headers={"User-Agent": "servingcard-cli/0.1"}) with urlopen(req, timeout=15) as resp: diff --git a/packages/python/servingcard/schema.py b/packages/python/servingcard/schema.py index c212d76..15c9ab5 100644 --- a/packages/python/servingcard/schema.py +++ b/packages/python/servingcard/schema.py @@ -6,7 +6,7 @@ from typing import Any import yaml -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, field_validator class BenchmarkObservation(BaseModel): @@ -78,11 +78,33 @@ class HardwareDetails(BaseModel): architecture: str | None = None +_QUANT_BITS: dict[str, int] = { + "fp4": 4, + "nvfp4": 4, + "int4": 4, + "fp8": 8, + "int8": 8, + "fp16": 16, + "bf16": 16, + "fp32": 32, +} + + class QuantizationSection(BaseModel): - """Quantization configuration.""" + """Quantization configuration. + + Accepts either a structured form (`{method: fp8, bits: 8}`) or a bare + string shorthand (`fp8`) coerced via `from_shorthand`. Bit width is + inferred from the well-known method name; unknown shorthands default + to 0 bits with a warning rather than failing the load. + """ method: str - bits: int + bits: int = 0 + + @classmethod + def from_shorthand(cls, value: str) -> "QuantizationSection": + return cls(method=value, bits=_QUANT_BITS.get(value.lower(), 0)) class SpeculativeDecodingSection(BaseModel): @@ -169,6 +191,13 @@ class ServingCard(BaseModel): hardware_details: HardwareDetails | None = None quantization: QuantizationSection | None = None + + @field_validator("quantization", mode="before") + @classmethod + def _coerce_quantization(cls, v: object) -> object: + if isinstance(v, str): + return QuantizationSection.from_shorthand(v) + return v speculative_decoding: SpeculativeDecodingSection | None = None benchmark: BenchmarkSection | None = None benchmarks: list[BenchmarkObservation] | None = None diff --git a/packages/python/servingcard/validate.py b/packages/python/servingcard/validate.py index b61fd2d..a57c5b3 100644 --- a/packages/python/servingcard/validate.py +++ b/packages/python/servingcard/validate.py @@ -32,7 +32,7 @@ def validate_card(path: Path) -> list[str]: if not path.exists(): return [f"File not found: {path}"] - if not path.suffix in (".yaml", ".yml"): + if path.suffix not in (".yaml", ".yml"): errors.append(f"Expected .yaml or .yml extension, got: {path.suffix}") try: diff --git a/packages/python/tests/conftest.py b/packages/python/tests/conftest.py index 14ce213..46346eb 100644 --- a/packages/python/tests/conftest.py +++ b/packages/python/tests/conftest.py @@ -7,7 +7,6 @@ import pytest import yaml - REAL_EAGLE3_CONFIG = ( Path(__file__).resolve().parents[3] / "registry" / "qwen3-coder" / "gb10-fp8-eagle3-spec3.yaml" ) diff --git a/packages/python/tests/test_apply.py b/packages/python/tests/test_apply.py index 6e683e7..fc73396 100644 --- a/packages/python/tests/test_apply.py +++ b/packages/python/tests/test_apply.py @@ -2,8 +2,6 @@ from __future__ import annotations -import pytest - from servingcard.apply import ( REGISTRY_BASE_URL, generate_launch_command, @@ -12,13 +10,9 @@ resolve_source, ) from servingcard.schema import ( - CapacitySection, ServingCard, - ServingSection, - SpeculativeDecodingSection, ) - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- diff --git a/packages/python/tests/test_backends.py b/packages/python/tests/test_backends.py index e0bc070..e70a4ba 100644 --- a/packages/python/tests/test_backends.py +++ b/packages/python/tests/test_backends.py @@ -13,7 +13,6 @@ get_backend, ) - # --------------------------------------------------------------------------- # 1. ManualBackend.run returns expected keys (mock input) # --------------------------------------------------------------------------- diff --git a/packages/python/tests/test_schema.py b/packages/python/tests/test_schema.py index 5451b70..c82e6fc 100644 --- a/packages/python/tests/test_schema.py +++ b/packages/python/tests/test_schema.py @@ -10,7 +10,6 @@ from servingcard.schema import ( BenchmarkEntry, - BenchmarkSection, CapacitySection, PawBenchResults, ServingCard, @@ -18,7 +17,6 @@ SpeculativeDecodingSection, ) - # --------------------------------------------------------------------------- # 1. from_yaml loads real config # --------------------------------------------------------------------------- diff --git a/packages/python/tests/test_validate.py b/packages/python/tests/test_validate.py index f47fcf4..30a6411 100644 --- a/packages/python/tests/test_validate.py +++ b/packages/python/tests/test_validate.py @@ -4,12 +4,10 @@ from pathlib import Path -import pytest import yaml from servingcard.validate import validate_card - # --------------------------------------------------------------------------- # 1. Valid config returns empty errors list # ---------------------------------------------------------------------------