Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion packages/python/servingcard/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def generate_vllm_command(card: ServingCard) -> str:
def generate_tgi_command(card: ServingCard) -> str:
"""Generate a TGI launch command from a servingcard."""
if not card.serving or not card.serving.engine_args:
return f"# No engine_args in servingcard -- cannot generate TGI command\ntext-generation-launcher --model-id {card.model}"
return (
"# No engine_args in servingcard -- cannot generate TGI command\n"
f"text-generation-launcher --model-id {card.model}"
)

args = card.serving.engine_args.copy()
model_id = args.pop("model", card.model)
Expand Down
1 change: 0 additions & 1 deletion packages/python/servingcard/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import json
import shutil
import subprocess
import sys
from abc import ABC, abstractmethod


Expand Down
4 changes: 1 addition & 3 deletions packages/python/servingcard/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

import sys
from datetime import date
from pathlib import Path
from typing import Optional
Expand All @@ -11,7 +10,6 @@
import yaml

from servingcard.apply import (
REGISTRY_BASE_URL,
generate_launch_command,
resolve_source,
)
Expand Down Expand Up @@ -201,7 +199,7 @@ def apply(
def _fetch_remote_card(url: str) -> ServingCard:
"""Fetch a servingcard from a URL."""
try:
from urllib.request import urlopen, Request
from urllib.request import Request, urlopen

req = Request(url, headers={"User-Agent": "servingcard-cli/0.1"})
with urlopen(req, timeout=15) as resp:
Expand Down
35 changes: 32 additions & 3 deletions packages/python/servingcard/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any

import yaml
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, field_validator


class BenchmarkObservation(BaseModel):
Expand Down Expand Up @@ -78,11 +78,33 @@ class HardwareDetails(BaseModel):
architecture: str | None = None


_QUANT_BITS: dict[str, int] = {
"fp4": 4,
"nvfp4": 4,
"int4": 4,
"fp8": 8,
"int8": 8,
"fp16": 16,
"bf16": 16,
"fp32": 32,
}


class QuantizationSection(BaseModel):
"""Quantization configuration."""
"""Quantization configuration.

Accepts either a structured form (`{method: fp8, bits: 8}`) or a bare
string shorthand (`fp8`) coerced via `from_shorthand`. Bit width is
inferred from the well-known method name; unknown shorthands default
to 0 bits with a warning rather than failing the load.
"""

method: str
bits: int
bits: int = 0

@classmethod
def from_shorthand(cls, value: str) -> "QuantizationSection":
return cls(method=value, bits=_QUANT_BITS.get(value.lower(), 0))


class SpeculativeDecodingSection(BaseModel):
Expand Down Expand Up @@ -169,6 +191,13 @@ class ServingCard(BaseModel):

hardware_details: HardwareDetails | None = None
quantization: QuantizationSection | None = None

@field_validator("quantization", mode="before")
@classmethod
def _coerce_quantization(cls, v: object) -> object:
if isinstance(v, str):
return QuantizationSection.from_shorthand(v)
return v
speculative_decoding: SpeculativeDecodingSection | None = None
benchmark: BenchmarkSection | None = None
benchmarks: list[BenchmarkObservation] | None = None
Expand Down
2 changes: 1 addition & 1 deletion packages/python/servingcard/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def validate_card(path: Path) -> list[str]:
if not path.exists():
return [f"File not found: {path}"]

if not path.suffix in (".yaml", ".yml"):
if path.suffix not in (".yaml", ".yml"):
errors.append(f"Expected .yaml or .yml extension, got: {path.suffix}")

try:
Expand Down
1 change: 0 additions & 1 deletion packages/python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pytest
import yaml


REAL_EAGLE3_CONFIG = (
Path(__file__).resolve().parents[3] / "registry" / "qwen3-coder" / "gb10-fp8-eagle3-spec3.yaml"
)
Expand Down
6 changes: 0 additions & 6 deletions packages/python/tests/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from __future__ import annotations

import pytest

from servingcard.apply import (
REGISTRY_BASE_URL,
generate_launch_command,
Expand All @@ -12,13 +10,9 @@
resolve_source,
)
from servingcard.schema import (
CapacitySection,
ServingCard,
ServingSection,
SpeculativeDecodingSection,
)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
Expand Down
1 change: 0 additions & 1 deletion packages/python/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
get_backend,
)


# ---------------------------------------------------------------------------
# 1. ManualBackend.run returns expected keys (mock input)
# ---------------------------------------------------------------------------
Expand Down
2 changes: 0 additions & 2 deletions packages/python/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@

from servingcard.schema import (
BenchmarkEntry,
BenchmarkSection,
CapacitySection,
PawBenchResults,
ServingCard,
ServingSection,
SpeculativeDecodingSection,
)


# ---------------------------------------------------------------------------
# 1. from_yaml loads real config
# ---------------------------------------------------------------------------
Expand Down
2 changes: 0 additions & 2 deletions packages/python/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@

from pathlib import Path

import pytest
import yaml

from servingcard.validate import validate_card


# ---------------------------------------------------------------------------
# 1. Valid config returns empty errors list
# ---------------------------------------------------------------------------
Expand Down
Loading