microsoft · fangyangci · Jun 18, 2026 · Jun 18, 2026
@@ -725,7 +725,8 @@ def _build_runtime_debug_output_path(model_path: Path, ep_name: str, device_name
     type=click.Choice([*ALL_EP_NAMES, "all", "auto"], case_sensitive=False),
     help=(
         "Target execution provider. Supports canonical names, aliases, and all/auto. "
-        "all = evaluate all rule-data-backed EPs; auto = infer from local availability"
+        "all = evaluate all rule-data-backed EPs; "
+        "auto = infer a single best target from local availability"
     ),
 )
 @click.option(
@@ -736,7 +737,8 @@ def _build_runtime_debug_output_path(model_path: Path, ep_name: str, device_name
     type=click.Choice([*SUPPORTED_DEVICES, "all", "auto"], case_sensitive=False),
     help=(
         "Target device type. Supports CPU/GPU/NPU and all/auto. "
-        "all = all rule-data-backed devices; auto = infer from local availability"
+        "all = all rule-data-backed devices; "
+        "auto = infer a single best target from local availability"
     ),
 )
 @cli_utils.verbosity_options()
@@ -897,26 +899,55 @@ def analyze(
             logger.error("Searched directories: %s", searched)
             sys.exit(2)
 
-        devices: list[str]
-        if device == "auto":
-            from ..sysinfo.device import _get_available_devices
+        # Resolve the EP/device selection. `all` keeps the full rule-data-backed
+        # set (fan-out, unchanged). `auto` resolves to a single best target from
+        # local availability via the shared sysinfo helpers — the same path
+        # build/run/perf use. A concrete value is used as-is.
+        from ..sysinfo import resolve_device, resolve_eps
+
+        # Only a pinned (concrete) EP can constrain device auto-resolution.
+        # ``ep`` is a concrete EP/alias here unless it is the "auto"/"all"
+        # sentinel; the cast drops those sentinels from the type for resolve_*.
+        ep_hint: EPNameOrAlias | None = (
+            None if ep in ("auto", "all") or ep is None else cast("EPNameOrAlias", ep)
+        )
 
-            devices = list(_get_available_devices())
-        elif device == "all":
+        devices: list[str]
+        if device == "all":
             devices = list(SUPPORTED_DEVICES)
+        elif device == "auto":
+            try:
+                resolved_device, _ = resolve_device(device="auto", ep=ep_hint)
+            except (ValueError, RuntimeError) as e:
+                logger.error("Could not auto-select a device: %s", e)
+                sys.exit(2)
+            devices = [resolved_device]
         elif device is not None:
             devices = [device]
         else:
             devices = []
         devices = sorted(d.upper() for d in devices)
 
         eps: list[EPName | None]
-        if ep == "auto":
-            from ..sysinfo.device import _get_available_eps
-
-            eps = list(_get_available_eps())
-        elif ep == "all":
+        if ep == "all":
             eps = list(SUPPORTED_EPS)
+        elif ep == "auto":
+            # Single highest-priority EP available on the target device. With
+            # device == "all" there is no single device context, so fall back to
+            # the best available device purely for EP selection.
+            if device == "all":
+                try:
+                    ref_device, _ = resolve_device(device="auto")
+                except (ValueError, RuntimeError) as e:
+                    logger.error("Could not auto-select an execution provider: %s", e)
+                    sys.exit(2)
+            else:
+                ref_device = devices[0]
+            compatible_eps = resolve_eps(ref_device)
+            if not compatible_eps:
+                logger.error("No execution provider is available for device '%s'.", ref_device)
+                sys.exit(2)
+            eps = [compatible_eps[0]]
         else:
             # ep is a specific EP or alias
             eps = [normalize_ep_name(ep)]
@@ -937,27 +968,11 @@ def analyze(
             )
         execution_pairs = _sort_ep_device_pairs(execution_pairs)
 
+        # Local pairs are still needed to gate --run-unknown-op probing
+        # (_resolve_run_unknown_op). Single-target `auto` selection is already
+        # local by construction, so no extra intersection/warning is required.
         local_pairs = set(_get_local_ep_device_pairs())
 
-        if device == "auto" and ep == "auto":
-            execution_pairs = [pair for pair in execution_pairs if pair in local_pairs]
-        elif device == "auto":
-            unsupported_pairs = [pair for pair in execution_pairs if pair not in local_pairs]
-            if unsupported_pairs:
-                logger.warning(
-                    "--device auto resolves from local availability, but --ep is pinned;"
-                    " the following pairs are not available on this machine: %s",
-                    ", ".join(_ep_name_device_display_name(e, d) for e, d in unsupported_pairs),
-                )
-        elif ep == "auto":
-            unsupported_pairs = [pair for pair in execution_pairs if pair not in local_pairs]
-            if unsupported_pairs:
-                logger.warning(
-                    "--ep auto resolves from local availability, but --device is pinned;"
-                    " the following pairs are not available on this machine: %s",
-                    ", ".join(_ep_name_device_display_name(e, d) for e, d in unsupported_pairs),
-                )
-
         if not execution_pairs:
             logger.error("No EP/device combination matched the current selection.")
             sys.exit(2)

@@ -321,50 +321,46 @@ def test_optim_config_writes_valid_json(
         data = json.loads(cfg_path.read_text(encoding="utf-8"))
         assert isinstance(data, dict)
 
-    def test_default_device_auto_filters_local_devices_by_ep_support(
+    def test_default_device_auto_resolves_single_best_device_for_pinned_ep(
         self,
         onnx_model_path: Path,
         rules_dir: Path,
         monkeypatch: pytest.MonkeyPatch,
     ) -> None:
-        """Omitting ``--device`` uses ``auto`` and filters local devices by
-        ``EP_SUPPORTED_DEVICES``. For pinned ``qnn`` and local CPU/GPU/NPU,
-        execution targets are ``(qnn, GPU)`` and ``(qnn, NPU)``.
+        """Omitting ``--device`` resolves a single best device for the pinned EP.
 
-        The test is hardware-agnostic (AMD/Intel included): local availability
-        is controlled via monkeypatch rather than real machine capabilities.
+        ``auto`` now picks one target via the shared sysinfo helpers (like
+        build/run): for ``qnn`` locally available on NPU and GPU, the
+        highest-priority device (NPU) is chosen — a single ``(qnn, NPU)`` run.
 
-        This setup distinguishes auto-device behavior from an NPU-only default:
-        NPU is supported while GPU is intentionally unsupported, so running both
-        targets must return partial support (exit code 1).
+        The test is hardware-agnostic: local availability is controlled via the
+        ORT device->EP map monkeypatch rather than real machine capabilities.
         """
         monkeypatch.setattr(
-            "winml.modelkit.sysinfo.device._get_available_devices",
-            lambda: ["CPU", "GPU", "NPU"],
-        )
-        monkeypatch.setattr(
-            "winml.modelkit.commands.analyze._get_local_ep_device_pairs",
-            lambda: [
-                ("QNNExecutionProvider", "NPU"),
-                ("QNNExecutionProvider", "GPU"),
-                ("QNNExecutionProvider", "CPU"),
-                ("DmlExecutionProvider", "GPU"),
-                ("CPUExecutionProvider", "CPU"),
-            ],
+            "winml.modelkit.sysinfo.device._get_device_ep_map_from_ort",
+            lambda: {
+                "npu": ("QNNExecutionProvider",),
+                "gpu": ("QNNExecutionProvider", "DmlExecutionProvider"),
+                "cpu": ("CPUExecutionProvider",),
+            },
         )
         _write_supported_rule(rules_dir, "QNNExecutionProvider", "NPU")
-        _write_supported_rule(rules_dir, "QNNExecutionProvider", "GPU")
         result = _invoke(["-m", str(onnx_model_path), "--ep", "qnn", "--quiet"])
         assert result.exit_code == 0
 
-    def test_analyze_all_eps_when_ep_omitted(self, onnx_model_path: Path, rules_dir: Path) -> None:
-        """Omitting ``--ep`` analyzes all supported EPs. With only one
-        synthetic rule the run must still complete cleanly."""
+    def test_default_auto_selects_single_ep_when_ep_omitted(
+        self, onnx_model_path: Path, rules_dir: Path
+    ) -> None:
+        """Omitting ``--ep`` resolves a single best EP from local availability.
+
+        With a synthetic rule present the run must complete cleanly; the auto
+        axis resolves from the real ORT device map (CPU EP is always available
+        as a fallback), so only documented exit codes are asserted."""
         _write_supported_rule(rules_dir, "QNNExecutionProvider", "NPU")
         result = _invoke(["-m", str(onnx_model_path), "--quiet"])
-        # Aggregate result depends on whether every probed EP is fully
+        # Aggregate result depends on whether the resolved EP is fully
         # supported; only assert documented exit codes.
-        assert result.exit_code in {0, 1}
+        assert result.exit_code in {0, 1, 2}
 
 
 # ===========================================================================

@@ -64,6 +64,18 @@ def _mock_local_ep_device_pairs(monkeypatch: pytest.MonkeyPatch) -> None:
         "winml.modelkit.sysinfo.device._get_available_eps",
         lambda: simulated_eps,
     )
+    # The analyze command now resolves a single `auto` target via the shared
+    # sysinfo helpers (resolve_device / resolve_eps), which read the ORT device
+    # -> EP map directly. Mirror the simulated local matrix here so resolution is
+    # deterministic and hardware-independent.
+    device_ep_map: dict[str, list[str]] = {}
+    for _ep, _device in SIMULATED_LOCAL_EP_DEVICE_PAIRS:
+        device_ep_map.setdefault(_device.lower(), []).append(_ep)
+    simulated_device_ep_map = {d: tuple(eps) for d, eps in device_ep_map.items()}
+    monkeypatch.setattr(
+        "winml.modelkit.sysinfo.device._get_device_ep_map_from_ort",
+        lambda: simulated_device_ep_map,
+    )
 
 
 @pytest.fixture(autouse=True)
@@ -1163,55 +1175,45 @@ class TestAnalyzeEPDeviceSelectionMatrix:
     @pytest.mark.parametrize(
         ("ep_arg", "device_arg", "expect_exit", "expect_calls", "expect_error"),
         [
-            # Both auto: filter to local_pairs. Output sorted by EP_SUPPORTED_DEVICES.
+            # Both auto: resolve a single best target via shared sysinfo helpers.
+            # Best device is NPU (priority npu>gpu>cpu); its best local EP is
+            # OpenVINO (only npu EP in the simulated matrix).
             (
                 None,
                 None,
                 0,
-                [
-                    ("NvTensorRTRTXExecutionProvider", "GPU"),
-                    ("OpenVINOExecutionProvider", "NPU"),
-                    ("OpenVINOExecutionProvider", "CPU"),
-                    ("DmlExecutionProvider", "GPU"),
-                    ("CPUExecutionProvider", "CPU"),
-                ],
+                [("OpenVINOExecutionProvider", "NPU")],
                 None,
             ),
-            # ep=auto, device=gpu: warn about non-local but run all eps that support GPU.
+            # ep=auto, device=gpu: single best local EP for GPU. _DEVICE_EP_MAP
+            # ranks NvTensorRTRTX above Dml, both locally available on GPU.
             (
                 None,
                 "gpu",
                 0,
-                [
-                    ("NvTensorRTRTXExecutionProvider", "GPU"),
-                    ("OpenVINOExecutionProvider", "GPU"),
-                    ("DmlExecutionProvider", "GPU"),
-                ],
+                [("NvTensorRTRTXExecutionProvider", "GPU")],
                 None,
             ),
-            # ep=openvino, device=auto: warn about non-local pairs, run all 3.
+            # ep=openvino, device=auto: single best local device for OpenVINO.
+            # OpenVINO is local on NPU and CPU; NPU wins on priority.
             (
                 "openvino",
                 None,
                 0,
-                [
-                    ("OpenVINOExecutionProvider", "NPU"),
-                    ("OpenVINOExecutionProvider", "GPU"),
-                    ("OpenVINOExecutionProvider", "CPU"),
-                ],
+                [("OpenVINOExecutionProvider", "NPU")],
                 None,
             ),
-            # ep=qnn, device=auto: QNN is not local, but we warn (not filter) and run.
+            # ep=qnn, device=auto: QNN is not local, so resolving a device fails
+            # the same way build/run fail — exit 2 with a clear message.
             (
                 "qnn",
                 None,
-                0,
-                [
-                    ("QNNExecutionProvider", "NPU"),
-                    ("QNNExecutionProvider", "GPU"),
-                ],
-                None,
+                2,
+                [],
+                "not available on this system",
             ),
+            # ep=qnn, device=all: `all` keeps the full fan-out (no local check),
+            # so both QNN-supported devices run unchanged.
             (
                 "qnn",
                 "all",
@@ -1337,14 +1339,19 @@ def test_no_rule_data_pair_runs_with_inline_skip_marker(
         assert call_kwargs["device"] == "GPU"
 
     @patch("winml.modelkit.analyze.ONNXStaticAnalyzer")
-    def test_qnn_auto_warns_about_non_local_pairs(
+    def test_qnn_device_auto_errors_when_not_local(
         self,
         mock_analyzer_class: MagicMock,
         runner: CliRunner,
         tmp_path: Path,
         mock_analyzer_result: Mock,
     ) -> None:
-        """qnn + auto device: QNN isn't locally supported but we warn (not error) and run."""
+        """qnn + auto device: QNN isn't local, so device resolution fails (exit 2).
+
+        ``auto`` resolves from local availability via the shared sysinfo helpers,
+        exactly like build/run. To statically analyze a non-local EP the user must
+        pin the device (``--device npu``) or use ``--device all``.
+        """
         model_file = tmp_path / "test.onnx"
         model_file.write_bytes(b"dummy")
 
@@ -1353,36 +1360,29 @@ def test_qnn_auto_warns_about_non_local_pairs(
         mock_analyzer_class.return_value = mock_instance
 
         result = runner.invoke(analyze, ["--model", str(model_file), "--ep", "qnn"])
-        assert result.exit_code == 0
-        assert "not available on this machine" in result.output.lower()
-        actual_calls = [
-            (call.kwargs["ep"], call.kwargs["device"])
-            for call in mock_instance.analyze.call_args_list
-        ]
-        assert actual_calls == [
-            ("QNNExecutionProvider", "NPU"),
-            ("QNNExecutionProvider", "GPU"),
-        ]
+        assert result.exit_code == 2
+        assert "not available on this system" in result.output.lower()
+        assert not mock_instance.analyze.called
 
     @patch(
         "winml.modelkit.analyze.utils.ep_utils.has_rule_data_for_ep",
         return_value=False,
     )
     @patch("winml.modelkit.analyze.ONNXStaticAnalyzer")
-    def test_auto_specific_device_run_unknown_op_executes_local_pairs_without_rule_data(
+    def test_auto_ep_specific_device_run_unknown_op_executes_single_local_pair(
         self,
         mock_analyzer_class: MagicMock,
         _mock_has_rule: Mock,
         runner: CliRunner,
         tmp_path: Path,
         mock_analyzer_result: Mock,
     ) -> None:
-        """ep=auto + specific device should run all locally-eligible (ep, device) pairs.
+        """ep=auto + specific device resolves a single best local (ep, device) pair.
 
-        With ep=auto and device specified, no local filter is applied — pairs the
-        local machine doesn't support are kept (a warning is emitted) and analysis
-        runs for each. has_rule_data_for_ep returning False here only affects
-        per-pair OP CHECK rendering (op-check-skipped), not which pairs run.
+        With ep=auto the shared resolver picks the highest-priority EP locally
+        available on the requested device (NvTensorRTRTX on GPU). The pair is
+        local, so --run-unknown-op stays enabled. has_rule_data_for_ep returning
+        False only affects per-pair OP CHECK rendering, not which pair runs.
         """
         model_file = tmp_path / "test.onnx"
         model_file.write_bytes(b"dummy")
@@ -1401,11 +1401,7 @@ def test_auto_specific_device_run_unknown_op_executes_local_pairs_without_rule_d
             (call.kwargs["ep"], call.kwargs["device"])
             for call in mock_instance.analyze.call_args_list
         ]
-        assert actual_calls == [
-            ("NvTensorRTRTXExecutionProvider", "GPU"),
-            ("OpenVINOExecutionProvider", "GPU"),
-            ("DmlExecutionProvider", "GPU"),
-        ]
+        assert actual_calls == [("NvTensorRTRTXExecutionProvider", "GPU")]
 
 
 class TestQDQNodeDisplayMapping:

@@ -372,8 +372,13 @@ def fake_analyze(**kw):
             "winml.modelkit.analyze.utils.ep_utils.has_any_rule_data",
             return_value=True,
         ),
-        # Deterministic Tier-3 default: when ep stays "auto" through the
-        # merge block, _get_available_eps -> QNN so target_ep is fixed.
+        # Deterministic Tier-3 default: when ep stays "auto" through the merge
+        # block, analyze resolves it via resolve_eps(resolved_device)[0]. Pin the
+        # ORT device->EP map so npu -> QNN, fixing the resolved target EP.
+        patch(
+            "winml.modelkit.sysinfo.device._get_device_ep_map_from_ort",
+            return_value={"npu": ("QNNExecutionProvider",)},
+        ),
         patch(
             "winml.modelkit.sysinfo.device._get_available_eps",
             return_value=["QNNExecutionProvider"],