diff --git a/src/winml/modelkit/commands/analyze.py b/src/winml/modelkit/commands/analyze.py index 4e16bb02..a8dc5f28 100644 --- a/src/winml/modelkit/commands/analyze.py +++ b/src/winml/modelkit/commands/analyze.py @@ -725,7 +725,8 @@ def _build_runtime_debug_output_path(model_path: Path, ep_name: str, device_name type=click.Choice([*ALL_EP_NAMES, "all", "auto"], case_sensitive=False), help=( "Target execution provider. Supports canonical names, aliases, and all/auto. " - "all = evaluate all rule-data-backed EPs; auto = infer from local availability" + "all = evaluate all rule-data-backed EPs; " + "auto = infer a single best target from local availability" ), ) @click.option( @@ -736,7 +737,8 @@ def _build_runtime_debug_output_path(model_path: Path, ep_name: str, device_name type=click.Choice([*SUPPORTED_DEVICES, "all", "auto"], case_sensitive=False), help=( "Target device type. Supports CPU/GPU/NPU and all/auto. " - "all = all rule-data-backed devices; auto = infer from local availability" + "all = all rule-data-backed devices; " + "auto = infer a single best target from local availability" ), ) @cli_utils.verbosity_options() @@ -897,13 +899,29 @@ def analyze( logger.error("Searched directories: %s", searched) sys.exit(2) - devices: list[str] - if device == "auto": - from ..sysinfo.device import _get_available_devices + # Resolve the EP/device selection. `all` keeps the full rule-data-backed + # set (fan-out, unchanged). `auto` resolves to a single best target from + # local availability via the shared sysinfo helpers — the same path + # build/run/perf use. A concrete value is used as-is. + from ..sysinfo import resolve_device, resolve_eps + + # Only a pinned (concrete) EP can constrain device auto-resolution. + # ``ep`` is a concrete EP/alias here unless it is the "auto"/"all" + # sentinel; the cast drops those sentinels from the type for resolve_*. + ep_hint: EPNameOrAlias | None = ( + None if ep in ("auto", "all") or ep is None else cast("EPNameOrAlias", ep) + ) - devices = list(_get_available_devices()) - elif device == "all": + devices: list[str] + if device == "all": devices = list(SUPPORTED_DEVICES) + elif device == "auto": + try: + resolved_device, _ = resolve_device(device="auto", ep=ep_hint) + except (ValueError, RuntimeError) as e: + logger.error("Could not auto-select a device: %s", e) + sys.exit(2) + devices = [resolved_device] elif device is not None: devices = [device] else: @@ -911,12 +929,25 @@ def analyze( devices = sorted(d.upper() for d in devices) eps: list[EPName | None] - if ep == "auto": - from ..sysinfo.device import _get_available_eps - - eps = list(_get_available_eps()) - elif ep == "all": + if ep == "all": eps = list(SUPPORTED_EPS) + elif ep == "auto": + # Single highest-priority EP available on the target device. With + # device == "all" there is no single device context, so fall back to + # the best available device purely for EP selection. + if device == "all": + try: + ref_device, _ = resolve_device(device="auto") + except (ValueError, RuntimeError) as e: + logger.error("Could not auto-select an execution provider: %s", e) + sys.exit(2) + else: + ref_device = devices[0] + compatible_eps = resolve_eps(ref_device) + if not compatible_eps: + logger.error("No execution provider is available for device '%s'.", ref_device) + sys.exit(2) + eps = [compatible_eps[0]] else: # ep is a specific EP or alias eps = [normalize_ep_name(ep)] @@ -937,27 +968,11 @@ def analyze( ) execution_pairs = _sort_ep_device_pairs(execution_pairs) + # Local pairs are still needed to gate --run-unknown-op probing + # (_resolve_run_unknown_op). Single-target `auto` selection is already + # local by construction, so no extra intersection/warning is required. local_pairs = set(_get_local_ep_device_pairs()) - if device == "auto" and ep == "auto": - execution_pairs = [pair for pair in execution_pairs if pair in local_pairs] - elif device == "auto": - unsupported_pairs = [pair for pair in execution_pairs if pair not in local_pairs] - if unsupported_pairs: - logger.warning( - "--device auto resolves from local availability, but --ep is pinned;" - " the following pairs are not available on this machine: %s", - ", ".join(_ep_name_device_display_name(e, d) for e, d in unsupported_pairs), - ) - elif ep == "auto": - unsupported_pairs = [pair for pair in execution_pairs if pair not in local_pairs] - if unsupported_pairs: - logger.warning( - "--ep auto resolves from local availability, but --device is pinned;" - " the following pairs are not available on this machine: %s", - ", ".join(_ep_name_device_display_name(e, d) for e, d in unsupported_pairs), - ) - if not execution_pairs: logger.error("No EP/device combination matched the current selection.") sys.exit(2) diff --git a/tests/e2e/test_analyze_e2e.py b/tests/e2e/test_analyze_e2e.py index b5d728ad..9f436b27 100644 --- a/tests/e2e/test_analyze_e2e.py +++ b/tests/e2e/test_analyze_e2e.py @@ -321,50 +321,46 @@ def test_optim_config_writes_valid_json( data = json.loads(cfg_path.read_text(encoding="utf-8")) assert isinstance(data, dict) - def test_default_device_auto_filters_local_devices_by_ep_support( + def test_default_device_auto_resolves_single_best_device_for_pinned_ep( self, onnx_model_path: Path, rules_dir: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - """Omitting ``--device`` uses ``auto`` and filters local devices by - ``EP_SUPPORTED_DEVICES``. For pinned ``qnn`` and local CPU/GPU/NPU, - execution targets are ``(qnn, GPU)`` and ``(qnn, NPU)``. + """Omitting ``--device`` resolves a single best device for the pinned EP. - The test is hardware-agnostic (AMD/Intel included): local availability - is controlled via monkeypatch rather than real machine capabilities. + ``auto`` now picks one target via the shared sysinfo helpers (like + build/run): for ``qnn`` locally available on NPU and GPU, the + highest-priority device (NPU) is chosen — a single ``(qnn, NPU)`` run. - This setup distinguishes auto-device behavior from an NPU-only default: - NPU is supported while GPU is intentionally unsupported, so running both - targets must return partial support (exit code 1). + The test is hardware-agnostic: local availability is controlled via the + ORT device->EP map monkeypatch rather than real machine capabilities. """ monkeypatch.setattr( - "winml.modelkit.sysinfo.device._get_available_devices", - lambda: ["CPU", "GPU", "NPU"], - ) - monkeypatch.setattr( - "winml.modelkit.commands.analyze._get_local_ep_device_pairs", - lambda: [ - ("QNNExecutionProvider", "NPU"), - ("QNNExecutionProvider", "GPU"), - ("QNNExecutionProvider", "CPU"), - ("DmlExecutionProvider", "GPU"), - ("CPUExecutionProvider", "CPU"), - ], + "winml.modelkit.sysinfo.device._get_device_ep_map_from_ort", + lambda: { + "npu": ("QNNExecutionProvider",), + "gpu": ("QNNExecutionProvider", "DmlExecutionProvider"), + "cpu": ("CPUExecutionProvider",), + }, ) _write_supported_rule(rules_dir, "QNNExecutionProvider", "NPU") - _write_supported_rule(rules_dir, "QNNExecutionProvider", "GPU") result = _invoke(["-m", str(onnx_model_path), "--ep", "qnn", "--quiet"]) assert result.exit_code == 0 - def test_analyze_all_eps_when_ep_omitted(self, onnx_model_path: Path, rules_dir: Path) -> None: - """Omitting ``--ep`` analyzes all supported EPs. With only one - synthetic rule the run must still complete cleanly.""" + def test_default_auto_selects_single_ep_when_ep_omitted( + self, onnx_model_path: Path, rules_dir: Path + ) -> None: + """Omitting ``--ep`` resolves a single best EP from local availability. + + With a synthetic rule present the run must complete cleanly; the auto + axis resolves from the real ORT device map (CPU EP is always available + as a fallback), so only documented exit codes are asserted.""" _write_supported_rule(rules_dir, "QNNExecutionProvider", "NPU") result = _invoke(["-m", str(onnx_model_path), "--quiet"]) - # Aggregate result depends on whether every probed EP is fully + # Aggregate result depends on whether the resolved EP is fully # supported; only assert documented exit codes. - assert result.exit_code in {0, 1} + assert result.exit_code in {0, 1, 2} # =========================================================================== diff --git a/tests/unit/analyze/test_static_analyzer_cli.py b/tests/unit/analyze/test_static_analyzer_cli.py index 082fccbf..9649c291 100644 --- a/tests/unit/analyze/test_static_analyzer_cli.py +++ b/tests/unit/analyze/test_static_analyzer_cli.py @@ -64,6 +64,18 @@ def _mock_local_ep_device_pairs(monkeypatch: pytest.MonkeyPatch) -> None: "winml.modelkit.sysinfo.device._get_available_eps", lambda: simulated_eps, ) + # The analyze command now resolves a single `auto` target via the shared + # sysinfo helpers (resolve_device / resolve_eps), which read the ORT device + # -> EP map directly. Mirror the simulated local matrix here so resolution is + # deterministic and hardware-independent. + device_ep_map: dict[str, list[str]] = {} + for _ep, _device in SIMULATED_LOCAL_EP_DEVICE_PAIRS: + device_ep_map.setdefault(_device.lower(), []).append(_ep) + simulated_device_ep_map = {d: tuple(eps) for d, eps in device_ep_map.items()} + monkeypatch.setattr( + "winml.modelkit.sysinfo.device._get_device_ep_map_from_ort", + lambda: simulated_device_ep_map, + ) @pytest.fixture(autouse=True) @@ -1163,55 +1175,45 @@ class TestAnalyzeEPDeviceSelectionMatrix: @pytest.mark.parametrize( ("ep_arg", "device_arg", "expect_exit", "expect_calls", "expect_error"), [ - # Both auto: filter to local_pairs. Output sorted by EP_SUPPORTED_DEVICES. + # Both auto: resolve a single best target via shared sysinfo helpers. + # Best device is NPU (priority npu>gpu>cpu); its best local EP is + # OpenVINO (only npu EP in the simulated matrix). ( None, None, 0, - [ - ("NvTensorRTRTXExecutionProvider", "GPU"), - ("OpenVINOExecutionProvider", "NPU"), - ("OpenVINOExecutionProvider", "CPU"), - ("DmlExecutionProvider", "GPU"), - ("CPUExecutionProvider", "CPU"), - ], + [("OpenVINOExecutionProvider", "NPU")], None, ), - # ep=auto, device=gpu: warn about non-local but run all eps that support GPU. + # ep=auto, device=gpu: single best local EP for GPU. _DEVICE_EP_MAP + # ranks NvTensorRTRTX above Dml, both locally available on GPU. ( None, "gpu", 0, - [ - ("NvTensorRTRTXExecutionProvider", "GPU"), - ("OpenVINOExecutionProvider", "GPU"), - ("DmlExecutionProvider", "GPU"), - ], + [("NvTensorRTRTXExecutionProvider", "GPU")], None, ), - # ep=openvino, device=auto: warn about non-local pairs, run all 3. + # ep=openvino, device=auto: single best local device for OpenVINO. + # OpenVINO is local on NPU and CPU; NPU wins on priority. ( "openvino", None, 0, - [ - ("OpenVINOExecutionProvider", "NPU"), - ("OpenVINOExecutionProvider", "GPU"), - ("OpenVINOExecutionProvider", "CPU"), - ], + [("OpenVINOExecutionProvider", "NPU")], None, ), - # ep=qnn, device=auto: QNN is not local, but we warn (not filter) and run. + # ep=qnn, device=auto: QNN is not local, so resolving a device fails + # the same way build/run fail — exit 2 with a clear message. ( "qnn", None, - 0, - [ - ("QNNExecutionProvider", "NPU"), - ("QNNExecutionProvider", "GPU"), - ], - None, + 2, + [], + "not available on this system", ), + # ep=qnn, device=all: `all` keeps the full fan-out (no local check), + # so both QNN-supported devices run unchanged. ( "qnn", "all", @@ -1337,14 +1339,19 @@ def test_no_rule_data_pair_runs_with_inline_skip_marker( assert call_kwargs["device"] == "GPU" @patch("winml.modelkit.analyze.ONNXStaticAnalyzer") - def test_qnn_auto_warns_about_non_local_pairs( + def test_qnn_device_auto_errors_when_not_local( self, mock_analyzer_class: MagicMock, runner: CliRunner, tmp_path: Path, mock_analyzer_result: Mock, ) -> None: - """qnn + auto device: QNN isn't locally supported but we warn (not error) and run.""" + """qnn + auto device: QNN isn't local, so device resolution fails (exit 2). + + ``auto`` resolves from local availability via the shared sysinfo helpers, + exactly like build/run. To statically analyze a non-local EP the user must + pin the device (``--device npu``) or use ``--device all``. + """ model_file = tmp_path / "test.onnx" model_file.write_bytes(b"dummy") @@ -1353,23 +1360,16 @@ def test_qnn_auto_warns_about_non_local_pairs( mock_analyzer_class.return_value = mock_instance result = runner.invoke(analyze, ["--model", str(model_file), "--ep", "qnn"]) - assert result.exit_code == 0 - assert "not available on this machine" in result.output.lower() - actual_calls = [ - (call.kwargs["ep"], call.kwargs["device"]) - for call in mock_instance.analyze.call_args_list - ] - assert actual_calls == [ - ("QNNExecutionProvider", "NPU"), - ("QNNExecutionProvider", "GPU"), - ] + assert result.exit_code == 2 + assert "not available on this system" in result.output.lower() + assert not mock_instance.analyze.called @patch( "winml.modelkit.analyze.utils.ep_utils.has_rule_data_for_ep", return_value=False, ) @patch("winml.modelkit.analyze.ONNXStaticAnalyzer") - def test_auto_specific_device_run_unknown_op_executes_local_pairs_without_rule_data( + def test_auto_ep_specific_device_run_unknown_op_executes_single_local_pair( self, mock_analyzer_class: MagicMock, _mock_has_rule: Mock, @@ -1377,12 +1377,12 @@ def test_auto_specific_device_run_unknown_op_executes_local_pairs_without_rule_d tmp_path: Path, mock_analyzer_result: Mock, ) -> None: - """ep=auto + specific device should run all locally-eligible (ep, device) pairs. + """ep=auto + specific device resolves a single best local (ep, device) pair. - With ep=auto and device specified, no local filter is applied — pairs the - local machine doesn't support are kept (a warning is emitted) and analysis - runs for each. has_rule_data_for_ep returning False here only affects - per-pair OP CHECK rendering (op-check-skipped), not which pairs run. + With ep=auto the shared resolver picks the highest-priority EP locally + available on the requested device (NvTensorRTRTX on GPU). The pair is + local, so --run-unknown-op stays enabled. has_rule_data_for_ep returning + False only affects per-pair OP CHECK rendering, not which pair runs. """ model_file = tmp_path / "test.onnx" model_file.write_bytes(b"dummy") @@ -1401,11 +1401,7 @@ def test_auto_specific_device_run_unknown_op_executes_local_pairs_without_rule_d (call.kwargs["ep"], call.kwargs["device"]) for call in mock_instance.analyze.call_args_list ] - assert actual_calls == [ - ("NvTensorRTRTXExecutionProvider", "GPU"), - ("OpenVINOExecutionProvider", "GPU"), - ("DmlExecutionProvider", "GPU"), - ] + assert actual_calls == [("NvTensorRTRTXExecutionProvider", "GPU")] class TestQDQNodeDisplayMapping: diff --git a/tests/unit/commands/test_config_value_priority.py b/tests/unit/commands/test_config_value_priority.py index d2d0e388..1a7d8e1d 100644 --- a/tests/unit/commands/test_config_value_priority.py +++ b/tests/unit/commands/test_config_value_priority.py @@ -372,8 +372,13 @@ def fake_analyze(**kw): "winml.modelkit.analyze.utils.ep_utils.has_any_rule_data", return_value=True, ), - # Deterministic Tier-3 default: when ep stays "auto" through the - # merge block, _get_available_eps -> QNN so target_ep is fixed. + # Deterministic Tier-3 default: when ep stays "auto" through the merge + # block, analyze resolves it via resolve_eps(resolved_device)[0]. Pin the + # ORT device->EP map so npu -> QNN, fixing the resolved target EP. + patch( + "winml.modelkit.sysinfo.device._get_device_ep_map_from_ort", + return_value={"npu": ("QNNExecutionProvider",)}, + ), patch( "winml.modelkit.sysinfo.device._get_available_eps", return_value=["QNNExecutionProvider"],