From e2cea2a14779b3bdce748bf1daeee959c123d1d5 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 16 Jun 2026 16:18:44 +0800 Subject: [PATCH 1/3] feat: support eval onnx file --- scripts/e2e_eval/build_registry.py | 8 +- scripts/e2e_eval/run_eval.py | 130 +++++++++++++++--- scripts/e2e_eval/testsets/models_all.json | 8 +- scripts/e2e_eval/testsets/models_curated.json | 4 +- scripts/e2e_eval/utils/registry.py | 5 + tests/unit/eval/test_run_eval_script.py | 128 ++++++++++++++++- 6 files changed, 255 insertions(+), 28 deletions(-) diff --git a/scripts/e2e_eval/build_registry.py b/scripts/e2e_eval/build_registry.py index 547d8208a..5d3e14191 100644 --- a/scripts/e2e_eval/build_registry.py +++ b/scripts/e2e_eval/build_registry.py @@ -234,7 +234,7 @@ def build_registry( # Soft filter: prioritize Optimum-supported models, then fill remaining slots if top_n == 0: safe_print("\n top_n=0 — skipping HF top-N queries") - for task in (tasks if top_n > 0 else []): + for task in tasks if top_n > 0 else []: safe_print(f"\n Task: {task}") # Fetch extra candidates to allow Optimum-first selection candidates = get_models_for_task(task, top_n * 3) @@ -380,6 +380,9 @@ def build_registry( existing["priority"] = priority existing["group"] = group safe_print(f" [{priority}] {model_id} / {task} — updated (group={group})") + # Sync curated onnx_file (pre-exported ONNX models) onto the row. + if c.get("onnx_file"): + existing["onnx_file"] = c["onnx_file"] continue # New curated entry — fetch metadata if not already loaded @@ -399,6 +402,9 @@ def build_registry( "last_update_time": metadata["last_modified"], "optimum_supported": is_optimum, } + # Carry through pre-exported ONNX filename when the curated row sets it. + if c.get("onnx_file"): + entry["onnx_file"] = c["onnx_file"] seen.add(key) entry_lookup[key] = entry diff --git a/scripts/e2e_eval/run_eval.py b/scripts/e2e_eval/run_eval.py index 6eefb924b..dd6dcec25 100644 --- a/scripts/e2e_eval/run_eval.py +++ b/scripts/e2e_eval/run_eval.py @@ -30,6 +30,7 @@ import argparse import contextlib +import dataclasses import json import logging import os @@ -477,6 +478,73 @@ def _watchdog() -> None: # --------------------------------------------------------------------------- +# winml's optimize stage requires opset >= 12 (see +# winml.modelkit.analyze.models.onnx_model.validate_opset_version). Pre-exported +# ONNX below this is upgraded to a modern opset before config/build. 17 matches +# winml's own default export opset. +_WINML_MIN_OPSET = 12 +_OPSET_UPGRADE_TARGET = 17 + + +def _ensure_min_opset(onnx_path: str, model_dir: Path) -> str: + """Upgrade an ONNX file to winml's minimum opset if it ships below it. + + Some pre-exported ONNX models (e.g. PaddleOCR's ``inference.onnx``) ship at + opset 11. onnxruntime can run them, but winml's optimize stage rejects + opset < 12. Convert to :data:`_OPSET_UPGRADE_TARGET` and write the result + into ``model_dir`` so config/build consume the upgraded graph. Models already + at/above the minimum are returned unchanged (no needless rewrite). + """ + import onnx + + model = onnx.load(onnx_path) + current = max( + (imp.version for imp in model.opset_import if imp.domain in ("", "ai.onnx")), + default=0, + ) + if current >= _WINML_MIN_OPSET: + return onnx_path + + from onnx import version_converter + + safe_print( + f" [onnx] opset {current} < {_WINML_MIN_OPSET} (winml minimum); " + f"upgrading to opset {_OPSET_UPGRADE_TARGET} ..." + ) + upgraded = version_converter.convert_version(model, _OPSET_UPGRADE_TARGET) + model_dir.mkdir(parents=True, exist_ok=True) + out_path = model_dir / f"{Path(onnx_path).stem}_op{_OPSET_UPGRADE_TARGET}.onnx" + onnx.save(upgraded, str(out_path)) + safe_print(f" [onnx] upgraded ONNX: {out_path}") + return str(out_path) + + +def _resolve_model_input(entry: ModelEntry, model_dir: Path) -> str: + """Return the value to pass to winml's ``-m`` argument for this model. + + For models that ship a pre-exported ONNX (``entry.onnx_file`` set, e.g. the + PaddleOCR ``*_onnx`` repos), download that file from the HF repo and return + its local path. winml config/build/perf accept a local ``.onnx`` path + directly (``is_onnx_file_path`` routes to the skip-export pipeline), so the + harness hands them the downloaded file instead of the HF id — avoiding the + HF->ONNX export that these architectures don't support. The file is upgraded + to winml's minimum opset first when necessary (see :func:`_ensure_min_opset`). + + Otherwise return ``entry.hf_id`` unchanged. + """ + if not entry.onnx_file: + return entry.hf_id + + # Lazy import: keeps script load cheap and matches the in-function import + # pattern used elsewhere in this module. + from huggingface_hub import hf_hub_download + + safe_print(f" [onnx] downloading {entry.onnx_file} from {entry.hf_id} ...") + local_path = hf_hub_download(repo_id=entry.hf_id, filename=entry.onnx_file) + safe_print(f" [onnx] using pre-exported ONNX: {local_path}") + return _ensure_min_opset(local_path, model_dir) + + def _run_build( entry: ModelEntry, device: str, @@ -506,12 +574,33 @@ def _run_build( safe_print(f" [config] Removing stale sub-config from prior run: {_stale.name}") _stale.unlink(missing_ok=True) + # Resolve the -m argument shared by config + build. For onnx_file models this + # downloads the pre-exported ONNX (upgrading opset if needed) and returns its + # local path; otherwise it is the HF id. Failures surface as a synthetic build + # failure so the run continues to the next model instead of crashing. + try: + model_input = _resolve_model_input(entry, model_dir) + except Exception as exc: + return { + "success": False, + "onnx_paths": {}, + "stage": "onnx_prepare", + "proc": { + "stdout": "", + "stderr": f"ONNX prepare failed for {entry.hf_id}/{entry.onnx_file}: {exc}", + "exit_code": -1, + "elapsed": 0, + "timeout": False, + "command": f"hf_hub_download({entry.hf_id}, {entry.onnx_file})", + }, + } + # Step 1: winml config config_args = [ *WINML_CLI, "config", "-m", - entry.hf_id, + model_input, "--device", device, "-o", @@ -563,11 +652,18 @@ def _run_build( "-c", str(sub_cfg), "-m", - entry.hf_id, - "--use-cache", + model_input, "--device", device, ] + # Direct-ONNX configs (export=None) carry no loader.task, so --use-cache + # cannot form its task-prefixed cache key; write the artifact to the + # model dir instead. HF-id builds keep using the shared model cache. + build_out_dir = model_dir / "build" + if entry.onnx_file: + build_args += ["--output-dir", str(build_out_dir)] + else: + build_args += ["--use-cache"] if ep: build_args += ["--ep", ep] # Mirror the --no-quant passed to winml config above so the build @@ -587,8 +683,16 @@ def _run_build( "proc": build_proc, } - task_hint = _extract_task_from_config(sub_cfg) or entry.task - path = _extract_onnx_path(build_proc, entry.hf_id, task_hint) + if entry.onnx_file: + # --output-dir builds write a deterministic /model.onnx. + # Use it directly rather than parsing stdout markers, which Rich wraps + # for long paths (an unfound path silently drops perf to a build-only + # false PASS). + built = build_out_dir / "model.onnx" + path = str(built) if built.exists() else None + else: + task_hint = _extract_task_from_config(sub_cfg) or entry.task + path = _extract_onnx_path(build_proc, entry.hf_id, task_hint) if path: onnx_paths[label] = path @@ -809,9 +913,7 @@ def _build_dataset(ds_config: dict, timeout: int) -> None: return script_path = Path(build_script) - cache_dir = Path( - ds_config.get("dataset", EVAL_DATASETS_CACHE / script_path.stem) - ).expanduser() + cache_dir = Path(ds_config.get("dataset", EVAL_DATASETS_CACHE / script_path.stem)).expanduser() if (cache_dir / "dataset_info.json").exists(): safe_print(f" dataset: cached ({cache_dir})") @@ -1288,16 +1390,10 @@ def main() -> None: except Exception as e: safe_print(f" [registry] Optional enrichment skipped: {e}") if matched_entry is not None: - # Override task if explicitly provided on CLI + # Override task if explicitly provided on CLI. Use dataclasses.replace + # so all other fields (onnx_file, precision, perf_args, ...) survive. if args.task and args.task != matched_entry.task: - matched_entry = ModelEntry( - hf_id=matched_entry.hf_id, - task=args.task, - model_type=matched_entry.model_type, - group=matched_entry.group, - priority=matched_entry.priority, - dataset_config=matched_entry.dataset_config, - ) + matched_entry = dataclasses.replace(matched_entry, task=args.task) entries = [matched_entry] else: entries = [make_adhoc_entry(args.hf_model, args.task)] diff --git a/scripts/e2e_eval/testsets/models_all.json b/scripts/e2e_eval/testsets/models_all.json index b0bd56b2c..2e1eebece 100644 --- a/scripts/e2e_eval/testsets/models_all.json +++ b/scripts/e2e_eval/testsets/models_all.json @@ -916,22 +916,24 @@ "order": 9 }, { - "hf_id": "PaddlePaddle/PP-OCRv5_server_det", + "hf_id": "PaddlePaddle/PP-OCRv5_server_det_onnx", "task": "image-to-text", "model_type": "unknown", "group": "ISV", "priority": "P1", + "onnx_file": "inference.onnx", "downloads": 623512, "last_update_time": "2025-07-22T10:03:07+00:00", "optimum_supported": false, "order": 4 }, { - "hf_id": "PaddlePaddle/PP-OCRv5_server_rec", + "hf_id": "PaddlePaddle/PP-OCRv5_server_rec_onnx", "task": "image-to-text", "model_type": "unknown", "group": "ISV", "priority": "P1", + "onnx_file": "inference.onnx", "downloads": 196930, "last_update_time": "2025-07-22T10:04:11+00:00", "optimum_supported": false, @@ -5190,4 +5192,4 @@ "optimum_supported": true, "order": 5 } -] +] diff --git a/scripts/e2e_eval/testsets/models_curated.json b/scripts/e2e_eval/testsets/models_curated.json index 405acc558..0c4143a2a 100644 --- a/scripts/e2e_eval/testsets/models_curated.json +++ b/scripts/e2e_eval/testsets/models_curated.json @@ -30,6 +30,6 @@ {"hf_id": "timm/repghostnet_200.in1k", "group": "ISV", "priority": "P1"}, {"hf_id": "openai/clip-vit-large-patch14-336", "group": "ISV", "priority": "P1"}, {"hf_id": "timm/mobilenetv3_small_100.lamb_in1k", "group": "ISV", "priority": "P1"}, - {"hf_id": "PaddlePaddle/PP-OCRv5_server_det", "group": "ISV", "priority": "P1"}, - {"hf_id": "PaddlePaddle/PP-OCRv5_server_rec", "group": "ISV", "priority": "P1"} + {"hf_id": "PaddlePaddle/PP-OCRv5_server_det_onnx", "onnx_file": "inference.onnx", "group": "ISV", "priority": "P1"}, + {"hf_id": "PaddlePaddle/PP-OCRv5_server_rec_onnx","onnx_file": "inference.onnx", "group": "ISV", "priority": "P1"} ] diff --git a/scripts/e2e_eval/utils/registry.py b/scripts/e2e_eval/utils/registry.py index 3513d8306..960c15bc8 100644 --- a/scripts/e2e_eval/utils/registry.py +++ b/scripts/e2e_eval/utils/registry.py @@ -23,6 +23,10 @@ class ModelEntry: group: str priority: str precision: str | None = None + # Pre-exported ONNX filename inside the HF repo (e.g. "inference.onnx"). + # When set, the eval harness downloads this file and feeds the local path to + # winml config/build/perf instead of the hf_id, skipping the HF->ONNX export. + onnx_file: str | None = None dataset_config: dict | None = None perf_args: list[str] = field(default_factory=list) eval_args: list[str] = field(default_factory=list) @@ -69,6 +73,7 @@ def load_registry(path: Path) -> list[ModelEntry]: group=item["group"], priority=priority, precision=item.get("precision"), + onnx_file=item.get("onnx_file"), dataset_config=ds_config, perf_args=perf_args, eval_args=eval_args, diff --git a/tests/unit/eval/test_run_eval_script.py b/tests/unit/eval/test_run_eval_script.py index 85a9331cc..4b11d7391 100644 --- a/tests/unit/eval/test_run_eval_script.py +++ b/tests/unit/eval/test_run_eval_script.py @@ -83,9 +83,7 @@ def test_explicit_precision_takes_precedence_on_npu(self, run_eval): def test_skip_quant_ep_drops_default(self, run_eval): assert run_eval._resolve_precision("npu", None, ep="vitisai") is None - assert ( - run_eval._resolve_precision("npu", None, ep="VitisAIExecutionProvider") is None - ) + assert run_eval._resolve_precision("npu", None, ep="VitisAIExecutionProvider") is None def test_skip_quant_ep_drops_explicit_with_warning(self, run_eval, capsys): result = run_eval._resolve_precision("npu", "w8a8", ep="vitisai") @@ -109,6 +107,9 @@ def _make_entry(hf_id="microsoft/resnet-50", task="image-classification"): entry.hf_id = hf_id entry.task = task entry.perf_args = [] + # Mirror the real ModelEntry default: no pre-exported ONNX, so _run_build + # takes the HF-id path (a truthy MagicMock would trigger ONNX download). + entry.onnx_file = None return entry @staticmethod @@ -147,8 +148,9 @@ def fake_subprocess(args, _timeout): return self._make_config_proc(config_path) return self._make_build_proc() - with patch.object(run_eval, "_run_subprocess", side_effect=fake_subprocess), patch.object( - run_eval, "_extract_onnx_path", return_value=str(tmp_path / "model.onnx") + with ( + patch.object(run_eval, "_run_subprocess", side_effect=fake_subprocess), + patch.object(run_eval, "_extract_onnx_path", return_value=str(tmp_path / "model.onnx")), ): run_eval._run_build( entry, @@ -170,3 +172,119 @@ def test_vitisai_injects_no_quant_into_both_config_and_build(self, run_eval, tmp def test_other_ep_omits_no_quant(self, run_eval, tmp_path): calls = self._invoke(run_eval, "dml", tmp_path) assert all("--no-quant" not in args for args in calls) + + +def _write_onnx(path: Path, opset: int) -> None: + """Write a minimal single-node ONNX model at the given opset.""" + import onnx + from onnx import TensorProto, helper + + x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [1]) + y = helper.make_tensor_value_info("y", TensorProto.FLOAT, [1]) + graph = helper.make_graph([helper.make_node("Identity", ["x"], ["y"])], "g", [x], [y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", opset)]) + onnx.save(model, str(path)) + + +class TestEnsureMinOpset: + """``_ensure_min_opset`` upgrades sub-minimum ONNX to winml's opset floor.""" + + def test_below_minimum_is_upgraded(self, run_eval, tmp_path): + import onnx + + src = tmp_path / "low.onnx" + _write_onnx(src, opset=11) # below winml minimum (12) + out = run_eval._ensure_min_opset(str(src), tmp_path) + + assert out != str(src) # a new, upgraded file + assert Path(out).exists() + upgraded = max( + i.version for i in onnx.load(out).opset_import if i.domain in ("", "ai.onnx") + ) + assert upgraded == run_eval._OPSET_UPGRADE_TARGET + + def test_at_or_above_minimum_is_unchanged(self, run_eval, tmp_path): + src = tmp_path / "ok.onnx" + _write_onnx(src, opset=run_eval._WINML_MIN_OPSET) # exactly the floor + assert run_eval._ensure_min_opset(str(src), tmp_path) == str(src) + + +class TestOnnxFilePreBuiltModel: + """Models declaring ``onnx_file`` download the pre-exported ONNX and feed the + local path to winml config/build via ``-m``, building with ``--output-dir`` + (direct-ONNX configs have no ``loader.task`` for ``--use-cache``). + """ + + @staticmethod + def _make_entry(onnx_file: str | None = "inference.onnx"): + entry = MagicMock() + entry.hf_id = "PaddlePaddle/PP-OCRv5_server_det_onnx" + entry.task = "image-to-text" + entry.perf_args = [] + entry.onnx_file = onnx_file + return entry + + @staticmethod + def _model_arg(args: list[str]) -> str: + """Return the value after the *model* ``-m`` (WINML_CLI itself carries a + leading ``python -m winml.modelkit.cli``, so take the last ``-m``).""" + idx = len(args) - 1 - args[::-1].index("-m") + return args[idx + 1] + + def test_no_onnx_file_returns_hf_id(self, run_eval, tmp_path): + entry = self._make_entry(onnx_file=None) + assert run_eval._resolve_model_input(entry, tmp_path) == entry.hf_id + + def test_onnx_file_downloads_then_ensures_opset(self, run_eval, tmp_path): + entry = self._make_entry() + fake_dl = str(tmp_path / "inference.onnx") + with ( + patch.object(run_eval, "_ensure_min_opset", return_value="UPGRADED") as ensure, + patch("huggingface_hub.hf_hub_download", return_value=fake_dl) as download, + ): + result = run_eval._resolve_model_input(entry, tmp_path) + + download.assert_called_once_with(repo_id=entry.hf_id, filename="inference.onnx") + ensure.assert_called_once_with(fake_dl, tmp_path) + assert result == "UPGRADED" + + def test_run_build_uses_output_dir_and_onnx_path(self, run_eval, tmp_path): + entry = self._make_entry() + onnx_path = str(tmp_path / "inference_op17.onnx") + # winml build writes a deterministic /model.onnx + build_out = tmp_path / "build" + build_out.mkdir() + (build_out / "model.onnx").write_text("x") + config_path = tmp_path / "build_config.json" + config_path.write_text("{}") + + captured: list[list[str]] = [] + + def fake_subprocess(args, _timeout): + captured.append(list(args)) + stdout = f"Generated {config_path}" if "config" in args else "" + return { + "exit_code": 0, + "stdout": stdout, + "stderr": "", + "elapsed": 0.1, + "command": "winml ...", + } + + with ( + patch.object(run_eval, "_resolve_model_input", return_value=onnx_path), + patch.object(run_eval, "_run_subprocess", side_effect=fake_subprocess), + ): + result = run_eval._run_build(entry, "cpu", None, 300, tmp_path, ep=None) + + config_call = next(a for a in captured if "config" in a) + build_call = next(a for a in captured if "build" in a) + # -m points at the local ONNX for both config and build + assert self._model_arg(config_call) == onnx_path + assert self._model_arg(build_call) == onnx_path + # build writes to --output-dir, never --use-cache (no loader.task) + assert "--output-dir" in build_call + assert "--use-cache" not in build_call + # artifact resolved deterministically (not via stdout parsing) + assert result["success"] is True + assert result["onnx_paths"][""] == str(build_out / "model.onnx") From 82f3bd4830dfca9a1d18d1519184b670068362ba Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 16 Jun 2026 16:28:21 +0800 Subject: [PATCH 2/3] fix(e2e-eval): use consistent onnx import to satisfy CodeQL CodeQL flagged 'onnx' being imported with both 'import onnx' and 'from onnx import ...' in the same module. Use attribute access (onnx.version_converter / onnx.helper / onnx.TensorProto) throughout instead of mixing import styles. --- scripts/e2e_eval/run_eval.py | 4 +--- tests/unit/eval/test_run_eval_script.py | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/e2e_eval/run_eval.py b/scripts/e2e_eval/run_eval.py index dd6dcec25..b9b81a6a9 100644 --- a/scripts/e2e_eval/run_eval.py +++ b/scripts/e2e_eval/run_eval.py @@ -505,13 +505,11 @@ def _ensure_min_opset(onnx_path: str, model_dir: Path) -> str: if current >= _WINML_MIN_OPSET: return onnx_path - from onnx import version_converter - safe_print( f" [onnx] opset {current} < {_WINML_MIN_OPSET} (winml minimum); " f"upgrading to opset {_OPSET_UPGRADE_TARGET} ..." ) - upgraded = version_converter.convert_version(model, _OPSET_UPGRADE_TARGET) + upgraded = onnx.version_converter.convert_version(model, _OPSET_UPGRADE_TARGET) model_dir.mkdir(parents=True, exist_ok=True) out_path = model_dir / f"{Path(onnx_path).stem}_op{_OPSET_UPGRADE_TARGET}.onnx" onnx.save(upgraded, str(out_path)) diff --git a/tests/unit/eval/test_run_eval_script.py b/tests/unit/eval/test_run_eval_script.py index 4b11d7391..b292f58f1 100644 --- a/tests/unit/eval/test_run_eval_script.py +++ b/tests/unit/eval/test_run_eval_script.py @@ -177,10 +177,10 @@ def test_other_ep_omits_no_quant(self, run_eval, tmp_path): def _write_onnx(path: Path, opset: int) -> None: """Write a minimal single-node ONNX model at the given opset.""" import onnx - from onnx import TensorProto, helper - x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [1]) - y = helper.make_tensor_value_info("y", TensorProto.FLOAT, [1]) + helper = onnx.helper + x = helper.make_tensor_value_info("x", onnx.TensorProto.FLOAT, [1]) + y = helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1]) graph = helper.make_graph([helper.make_node("Identity", ["x"], ["y"])], "g", [x], [y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", opset)]) onnx.save(model, str(path)) From 7c1a6dc21955e8c29ae2eabc09c1a9dda622d5b0 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 16 Jun 2026 16:33:29 +0800 Subject: [PATCH 3/3] fix(e2e-eval): carry onnx_file through load_curated_entries The Phase 2 onnx_file passthrough was dead: load_curated_entries dropped every key except hf_id/task/group/priority, so a rebuild that re-added or newly-created a curated onnx_file entry would lose it. Carry onnx_file through when present. --- scripts/e2e_eval/build_registry.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/e2e_eval/build_registry.py b/scripts/e2e_eval/build_registry.py index 5d3e14191..c05cdb271 100644 --- a/scripts/e2e_eval/build_registry.py +++ b/scripts/e2e_eval/build_registry.py @@ -128,7 +128,11 @@ def load_optimum_types() -> set[str]: def load_curated_entries(curated_path: Path) -> list[dict]: - """Load curated entries (hf_id + task + group + priority) from source JSON.""" + """Load curated entries (hf_id + task + group + priority) from source JSON. + + ``onnx_file`` is carried through when present so Phase 2 can stamp it onto + the generated row (pre-exported ONNX models; see the eval harness). + """ with curated_path.open(encoding="utf-8") as f: entries = json.load(f) return [ @@ -137,6 +141,7 @@ def load_curated_entries(curated_path: Path) -> list[dict]: "task": e.get("task") or "", "group": e.get("group", "P0"), "priority": e.get("priority", "P0"), + **({"onnx_file": e["onnx_file"]} if e.get("onnx_file") else {}), } for e in entries if "hf_id" in e