From 46bb3d34603c2c50f1d69dfa988fd289602a6a96 Mon Sep 17 00:00:00 2001 From: hualxie Date: Thu, 18 Jun 2026 17:12:00 +0800 Subject: [PATCH 1/2] add example --- src/winml/modelkit/commands/perf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py index 3753c3cd..3c6a9868 100644 --- a/src/winml/modelkit/commands/perf.py +++ b/src/winml/modelkit/commands/perf.py @@ -9,6 +9,7 @@ Usage: winml perf -m microsoft/resnet-50 winml perf -m microsoft/resnet-50 --device npu --iterations 100 + winml perf -m microsoft/resnet-50 --module ResNetConvLayer winml perf -m bert-base-uncased --task text-classification """ From ba8d077c7771440bdfbf5380cee3c956666b5111 Mon Sep 17 00:00:00 2001 From: hualxie Date: Thu, 18 Jun 2026 17:16:09 +0800 Subject: [PATCH 2/2] fix(perf): render live HW chart for --monitor in --module mode (#654) In --module mode, --monitor created an HWMonitor and collected metrics into the JSON report but never drove the live utilization chart, so the flag appeared to do nothing (the --monitor help promises a live chart). Route the per-module monitored loop through the shared _run_monitored_loop helper that single-model mode already uses, so each module instance renders the same live chart while still persisting hw_monitor metrics to JSON. --- src/winml/modelkit/commands/perf.py | 15 +++- tests/unit/commands/test_perf_module.py | 102 ++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 2 deletions(-) diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py index 3c6a9868..dd4731b6 100644 --- a/src/winml/modelkit/commands/perf.py +++ b/src/winml/modelkit/commands/perf.py @@ -876,9 +876,20 @@ def _perf_modules( ) if hw_ctx: + # Drive the same live chart single-model mode uses so + # --monitor renders a per-module HW utilization chart + # instead of silently dumping metrics to JSON (issue #654). with session.perf(warmup=warmup) as stats, hw_ctx as hw: - for _ in range(total_iters): - session.run(inputs) + _run_monitored_loop( + session, + inputs, + stats, + hw, + total_iterations=total_iters, + warmup=warmup, + model_id=label, + device=resolved_device, + ) hw_metrics = hw.to_dict() else: with session.perf(warmup=warmup) as stats: diff --git a/tests/unit/commands/test_perf_module.py b/tests/unit/commands/test_perf_module.py index a5b49f83..d97d82e6 100644 --- a/tests/unit/commands/test_perf_module.py +++ b/tests/unit/commands/test_perf_module.py @@ -316,6 +316,108 @@ def test_running_model_path_in_module_result(self, tmp_path: Path) -> None: assert instance["running_model_path"] == str(running_model_path) +class TestPerfModuleMonitor: + """--monitor must drive the live HW utilization chart in --module mode. + + Regression guard for #654: previously the module path created an + HWMonitor and dumped metrics to JSON but never rendered the live chart + (via _run_monitored_loop), so --monitor appeared to do nothing. + """ + + def test_monitor_drives_live_chart_per_module(self, tmp_path: Path) -> None: + fake_cfg = MagicMock() + fake_cfg.loader.model_type = "bert" + fake_cfg.loader.module_path = "encoder.layer.0" + + fake_build_result = MagicMock() + fake_build_result.final_onnx_path = tmp_path / "model.onnx" + + fake_stats = MagicMock() + for attr in ("mean_ms", "p50_ms", "p90_ms", "p95_ms", "p99_ms", "min_ms", "max_ms"): + setattr(fake_stats, attr, 1.0) + fake_stats.samples_ms = [1.0, 1.0] + + fake_session = MagicMock() + fake_session.perf.return_value.__enter__.return_value = fake_stats + fake_session.running_model_path = tmp_path / "model_cpu_ctx.onnx" + + fake_loader_cfg = MagicMock() + fake_loader_cfg.task = "fill-mask" + + # HWMonitor instance: context-managed, with a JSON-serializable to_dict(). + fake_hw = MagicMock() + fake_hw.__enter__.return_value = fake_hw + fake_hw.to_dict.return_value = {"monitor": "HWMonitor", "device_kind": None} + fake_hw_cls = MagicMock() + fake_hw_cls.is_available.return_value = True + fake_hw_cls.return_value = fake_hw + + out_path = tmp_path / "out.json" + + with ( + patch( + "winml.modelkit.sysinfo.resolve_device", + return_value=("cpu", ["cpu"]), + ), + patch( + "winml.modelkit.config.generate_hf_build_config", + return_value=[fake_cfg], + ), + patch( + "winml.modelkit.loader.resolve_loader_config", + return_value=(fake_loader_cfg, MagicMock(), MagicMock(), MagicMock()), + ), + patch( + "winml.modelkit.commands.build._instantiate_parent_model", + return_value=MagicMock(), + ), + patch( + "winml.modelkit.build.build_hf_model", + return_value=fake_build_result, + ), + patch( + "winml.modelkit.session.WinMLSession", + return_value=fake_session, + ), + patch( + "winml.modelkit.commands.perf.generate_random_inputs", + return_value={}, + ), + patch( + "winml.modelkit.session.monitor.hw_monitor.HWMonitor", + fake_hw_cls, + ), + patch( + "winml.modelkit.commands.perf._run_monitored_loop", + ) as mock_loop, + ): + runner = CliRunner() + result = runner.invoke( + main, + [ + "perf", + "-m", + "fake/model", + "--module", + "BertLayer", + "--monitor", + "--iterations", + "1", + "--warmup", + "0", + "-o", + str(out_path), + ], + ) + + assert result.exit_code == 0, result.output + # The live-chart loop must be driven once for the single module instance. + mock_loop.assert_called_once() + # And the collected HW metrics still land in the JSON report. + report = json.loads(out_path.read_text(encoding="utf-8")) + assert report["instances"][0]["hw_monitor"]["monitor"] == "HWMonitor" + + class TestPerfModuleQuantCompileToggles: """--no-quantize and --compile/--no-compile clear cfg.quant / cfg.compile independently in the per-module build (mirrors the single-model path)."""