Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions scripts/e2e_eval/build_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,11 @@ def load_optimum_types() -> set[str]:


def load_curated_entries(curated_path: Path) -> list[dict]:
"""Load curated entries (hf_id + task + group + priority) from source JSON."""
"""Load curated entries (hf_id + task + group + priority) from source JSON.

``onnx_file`` is carried through when present so Phase 2 can stamp it onto
the generated row (pre-exported ONNX models; see the eval harness).
"""
with curated_path.open(encoding="utf-8") as f:
entries = json.load(f)
return [
Expand All @@ -137,6 +141,7 @@ def load_curated_entries(curated_path: Path) -> list[dict]:
"task": e.get("task") or "",
"group": e.get("group", "P0"),
"priority": e.get("priority", "P0"),
**({"onnx_file": e["onnx_file"]} if e.get("onnx_file") else {}),
}
for e in entries
if "hf_id" in e
Expand Down Expand Up @@ -234,7 +239,7 @@ def build_registry(
# Soft filter: prioritize Optimum-supported models, then fill remaining slots
if top_n == 0:
safe_print("\n top_n=0 — skipping HF top-N queries")
for task in (tasks if top_n > 0 else []):
for task in tasks if top_n > 0 else []:
safe_print(f"\n Task: {task}")
# Fetch extra candidates to allow Optimum-first selection
candidates = get_models_for_task(task, top_n * 3)
Expand Down Expand Up @@ -380,6 +385,9 @@ def build_registry(
existing["priority"] = priority
existing["group"] = group
safe_print(f" [{priority}] {model_id} / {task} — updated (group={group})")
# Sync curated onnx_file (pre-exported ONNX models) onto the row.
if c.get("onnx_file"):
existing["onnx_file"] = c["onnx_file"]
continue

# New curated entry — fetch metadata if not already loaded
Expand All @@ -399,6 +407,9 @@ def build_registry(
"last_update_time": metadata["last_modified"],
"optimum_supported": is_optimum,
}
# Carry through pre-exported ONNX filename when the curated row sets it.
if c.get("onnx_file"):
entry["onnx_file"] = c["onnx_file"]

seen.add(key)
entry_lookup[key] = entry
Expand Down
128 changes: 111 additions & 17 deletions scripts/e2e_eval/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import argparse
import contextlib
import dataclasses
import json
import logging
import os
Expand Down Expand Up @@ -477,6 +478,71 @@ def _watchdog() -> None:
# ---------------------------------------------------------------------------


# winml's optimize stage requires opset >= 12 (see
# winml.modelkit.analyze.models.onnx_model.validate_opset_version). Pre-exported
# ONNX below this is upgraded to a modern opset before config/build. 17 matches
# winml's own default export opset.
_WINML_MIN_OPSET = 12
_OPSET_UPGRADE_TARGET = 17


def _ensure_min_opset(onnx_path: str, model_dir: Path) -> str:
"""Upgrade an ONNX file to winml's minimum opset if it ships below it.

Some pre-exported ONNX models (e.g. PaddleOCR's ``inference.onnx``) ship at
opset 11. onnxruntime can run them, but winml's optimize stage rejects
opset < 12. Convert to :data:`_OPSET_UPGRADE_TARGET` and write the result
into ``model_dir`` so config/build consume the upgraded graph. Models already
at/above the minimum are returned unchanged (no needless rewrite).
"""
import onnx
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed

model = onnx.load(onnx_path)
current = max(
(imp.version for imp in model.opset_import if imp.domain in ("", "ai.onnx")),
default=0,
)
if current >= _WINML_MIN_OPSET:
return onnx_path

safe_print(
f" [onnx] opset {current} < {_WINML_MIN_OPSET} (winml minimum); "
f"upgrading to opset {_OPSET_UPGRADE_TARGET} ..."
)
upgraded = onnx.version_converter.convert_version(model, _OPSET_UPGRADE_TARGET)
model_dir.mkdir(parents=True, exist_ok=True)
out_path = model_dir / f"{Path(onnx_path).stem}_op{_OPSET_UPGRADE_TARGET}.onnx"
onnx.save(upgraded, str(out_path))
safe_print(f" [onnx] upgraded ONNX: {out_path}")
return str(out_path)


def _resolve_model_input(entry: ModelEntry, model_dir: Path) -> str:
"""Return the value to pass to winml's ``-m`` argument for this model.

For models that ship a pre-exported ONNX (``entry.onnx_file`` set, e.g. the
PaddleOCR ``*_onnx`` repos), download that file from the HF repo and return
its local path. winml config/build/perf accept a local ``.onnx`` path
directly (``is_onnx_file_path`` routes to the skip-export pipeline), so the
harness hands them the downloaded file instead of the HF id — avoiding the
HF->ONNX export that these architectures don't support. The file is upgraded
to winml's minimum opset first when necessary (see :func:`_ensure_min_opset`).

Otherwise return ``entry.hf_id`` unchanged.
"""
if not entry.onnx_file:
return entry.hf_id

# Lazy import: keeps script load cheap and matches the in-function import
# pattern used elsewhere in this module.
from huggingface_hub import hf_hub_download

safe_print(f" [onnx] downloading {entry.onnx_file} from {entry.hf_id} ...")
local_path = hf_hub_download(repo_id=entry.hf_id, filename=entry.onnx_file)
safe_print(f" [onnx] using pre-exported ONNX: {local_path}")
return _ensure_min_opset(local_path, model_dir)


def _run_build(
entry: ModelEntry,
device: str,
Expand Down Expand Up @@ -506,12 +572,33 @@ def _run_build(
safe_print(f" [config] Removing stale sub-config from prior run: {_stale.name}")
_stale.unlink(missing_ok=True)

# Resolve the -m argument shared by config + build. For onnx_file models this
# downloads the pre-exported ONNX (upgrading opset if needed) and returns its
# local path; otherwise it is the HF id. Failures surface as a synthetic build
# failure so the run continues to the next model instead of crashing.
try:
model_input = _resolve_model_input(entry, model_dir)
except Exception as exc:
return {
"success": False,
"onnx_paths": {},
"stage": "onnx_prepare",
"proc": {
"stdout": "",
"stderr": f"ONNX prepare failed for {entry.hf_id}/{entry.onnx_file}: {exc}",
"exit_code": -1,
"elapsed": 0,
"timeout": False,
"command": f"hf_hub_download({entry.hf_id}, {entry.onnx_file})",
},
}

# Step 1: winml config
config_args = [
*WINML_CLI,
"config",
"-m",
entry.hf_id,
model_input,
"--device",
device,
"-o",
Expand Down Expand Up @@ -563,11 +650,18 @@ def _run_build(
"-c",
str(sub_cfg),
"-m",
entry.hf_id,
"--use-cache",
model_input,
"--device",
device,
]
# Direct-ONNX configs (export=None) carry no loader.task, so --use-cache
# cannot form its task-prefixed cache key; write the artifact to the
# model dir instead. HF-id builds keep using the shared model cache.
build_out_dir = model_dir / "build"
if entry.onnx_file:
build_args += ["--output-dir", str(build_out_dir)]
else:
build_args += ["--use-cache"]
if ep:
build_args += ["--ep", ep]
# Mirror the --no-quant passed to winml config above so the build
Expand All @@ -587,8 +681,16 @@ def _run_build(
"proc": build_proc,
}

task_hint = _extract_task_from_config(sub_cfg) or entry.task
path = _extract_onnx_path(build_proc, entry.hf_id, task_hint)
if entry.onnx_file:
# --output-dir builds write a deterministic <output-dir>/model.onnx.
# Use it directly rather than parsing stdout markers, which Rich wraps
# for long paths (an unfound path silently drops perf to a build-only
# false PASS).
built = build_out_dir / "model.onnx"
path = str(built) if built.exists() else None
else:
task_hint = _extract_task_from_config(sub_cfg) or entry.task
path = _extract_onnx_path(build_proc, entry.hf_id, task_hint)
if path:
onnx_paths[label] = path

Expand Down Expand Up @@ -809,9 +911,7 @@ def _build_dataset(ds_config: dict, timeout: int) -> None:
return

script_path = Path(build_script)
cache_dir = Path(
ds_config.get("dataset", EVAL_DATASETS_CACHE / script_path.stem)
).expanduser()
cache_dir = Path(ds_config.get("dataset", EVAL_DATASETS_CACHE / script_path.stem)).expanduser()

if (cache_dir / "dataset_info.json").exists():
safe_print(f" dataset: cached ({cache_dir})")
Expand Down Expand Up @@ -1288,16 +1388,10 @@ def main() -> None:
except Exception as e:
safe_print(f" [registry] Optional enrichment skipped: {e}")
if matched_entry is not None:
# Override task if explicitly provided on CLI
# Override task if explicitly provided on CLI. Use dataclasses.replace
# so all other fields (onnx_file, precision, perf_args, ...) survive.
if args.task and args.task != matched_entry.task:
matched_entry = ModelEntry(
hf_id=matched_entry.hf_id,
task=args.task,
model_type=matched_entry.model_type,
group=matched_entry.group,
priority=matched_entry.priority,
dataset_config=matched_entry.dataset_config,
)
matched_entry = dataclasses.replace(matched_entry, task=args.task)
entries = [matched_entry]
else:
entries = [make_adhoc_entry(args.hf_model, args.task)]
Expand Down
8 changes: 5 additions & 3 deletions scripts/e2e_eval/testsets/models_all.json
Original file line number Diff line number Diff line change
Expand Up @@ -916,22 +916,24 @@
"order": 9
},
{
"hf_id": "PaddlePaddle/PP-OCRv5_server_det",
"hf_id": "PaddlePaddle/PP-OCRv5_server_det_onnx",
"task": "image-to-text",
"model_type": "unknown",
"group": "ISV",
"priority": "P1",
"onnx_file": "inference.onnx",
"downloads": 623512,
"last_update_time": "2025-07-22T10:03:07+00:00",
"optimum_supported": false,
"order": 4
},
{
"hf_id": "PaddlePaddle/PP-OCRv5_server_rec",
"hf_id": "PaddlePaddle/PP-OCRv5_server_rec_onnx",
"task": "image-to-text",
"model_type": "unknown",
"group": "ISV",
"priority": "P1",
"onnx_file": "inference.onnx",
"downloads": 196930,
"last_update_time": "2025-07-22T10:04:11+00:00",
"optimum_supported": false,
Expand Down Expand Up @@ -5190,4 +5192,4 @@
"optimum_supported": true,
"order": 5
}
]
]
4 changes: 2 additions & 2 deletions scripts/e2e_eval/testsets/models_curated.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@
{"hf_id": "timm/repghostnet_200.in1k", "group": "ISV", "priority": "P1"},
{"hf_id": "openai/clip-vit-large-patch14-336", "group": "ISV", "priority": "P1"},
{"hf_id": "timm/mobilenetv3_small_100.lamb_in1k", "group": "ISV", "priority": "P1"},
{"hf_id": "PaddlePaddle/PP-OCRv5_server_det", "group": "ISV", "priority": "P1"},
{"hf_id": "PaddlePaddle/PP-OCRv5_server_rec", "group": "ISV", "priority": "P1"}
{"hf_id": "PaddlePaddle/PP-OCRv5_server_det_onnx", "onnx_file": "inference.onnx", "group": "ISV", "priority": "P1"},
{"hf_id": "PaddlePaddle/PP-OCRv5_server_rec_onnx","onnx_file": "inference.onnx", "group": "ISV", "priority": "P1"}
]
5 changes: 5 additions & 0 deletions scripts/e2e_eval/utils/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ class ModelEntry:
group: str
priority: str
precision: str | None = None
# Pre-exported ONNX filename inside the HF repo (e.g. "inference.onnx").
# When set, the eval harness downloads this file and feeds the local path to
# winml config/build/perf instead of the hf_id, skipping the HF->ONNX export.
onnx_file: str | None = None
dataset_config: dict | None = None
perf_args: list[str] = field(default_factory=list)
eval_args: list[str] = field(default_factory=list)
Expand Down Expand Up @@ -69,6 +73,7 @@ def load_registry(path: Path) -> list[ModelEntry]:
group=item["group"],
priority=priority,
precision=item.get("precision"),
onnx_file=item.get("onnx_file"),
dataset_config=ds_config,
perf_args=perf_args,
eval_args=eval_args,
Expand Down
Loading
Loading