Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 46 additions & 31 deletions src/winml/modelkit/commands/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,8 @@ def _build_runtime_debug_output_path(model_path: Path, ep_name: str, device_name
type=click.Choice([*ALL_EP_NAMES, "all", "auto"], case_sensitive=False),
help=(
"Target execution provider. Supports canonical names, aliases, and all/auto. "
"all = evaluate all rule-data-backed EPs; auto = infer from local availability"
"all = evaluate all rule-data-backed EPs; "
"auto = infer a single best target from local availability"
),
)
@click.option(
Expand All @@ -736,7 +737,8 @@ def _build_runtime_debug_output_path(model_path: Path, ep_name: str, device_name
type=click.Choice([*SUPPORTED_DEVICES, "all", "auto"], case_sensitive=False),
help=(
"Target device type. Supports CPU/GPU/NPU and all/auto. "
"all = all rule-data-backed devices; auto = infer from local availability"
"all = all rule-data-backed devices; "
"auto = infer a single best target from local availability"
),
)
@cli_utils.verbosity_options()
Expand Down Expand Up @@ -897,26 +899,55 @@ def analyze(
logger.error("Searched directories: %s", searched)
sys.exit(2)

devices: list[str]
if device == "auto":
from ..sysinfo.device import _get_available_devices
# Resolve the EP/device selection. `all` keeps the full rule-data-backed
# set (fan-out, unchanged). `auto` resolves to a single best target from
# local availability via the shared sysinfo helpers — the same path
# build/run/perf use. A concrete value is used as-is.
from ..sysinfo import resolve_device, resolve_eps

# Only a pinned (concrete) EP can constrain device auto-resolution.
# ``ep`` is a concrete EP/alias here unless it is the "auto"/"all"
# sentinel; the cast drops those sentinels from the type for resolve_*.
ep_hint: EPNameOrAlias | None = (
None if ep in ("auto", "all") or ep is None else cast("EPNameOrAlias", ep)
)

devices = list(_get_available_devices())
elif device == "all":
devices: list[str]
if device == "all":
devices = list(SUPPORTED_DEVICES)
elif device == "auto":
try:
resolved_device, _ = resolve_device(device="auto", ep=ep_hint)
except (ValueError, RuntimeError) as e:
logger.error("Could not auto-select a device: %s", e)
sys.exit(2)
devices = [resolved_device]
elif device is not None:
devices = [device]
else:
devices = []
devices = sorted(d.upper() for d in devices)

eps: list[EPName | None]
if ep == "auto":
from ..sysinfo.device import _get_available_eps

eps = list(_get_available_eps())
elif ep == "all":
if ep == "all":
eps = list(SUPPORTED_EPS)
elif ep == "auto":
# Single highest-priority EP available on the target device. With
# device == "all" there is no single device context, so fall back to
# the best available device purely for EP selection.
if device == "all":
try:
ref_device, _ = resolve_device(device="auto")
except (ValueError, RuntimeError) as e:
logger.error("Could not auto-select an execution provider: %s", e)
sys.exit(2)
else:
ref_device = devices[0]
compatible_eps = resolve_eps(ref_device)
if not compatible_eps:
logger.error("No execution provider is available for device '%s'.", ref_device)
sys.exit(2)
eps = [compatible_eps[0]]
else:
# ep is a specific EP or alias
eps = [normalize_ep_name(ep)]
Expand All @@ -937,27 +968,11 @@ def analyze(
)
execution_pairs = _sort_ep_device_pairs(execution_pairs)

# Local pairs are still needed to gate --run-unknown-op probing
# (_resolve_run_unknown_op). Single-target `auto` selection is already
# local by construction, so no extra intersection/warning is required.
local_pairs = set(_get_local_ep_device_pairs())

if device == "auto" and ep == "auto":
execution_pairs = [pair for pair in execution_pairs if pair in local_pairs]
elif device == "auto":
unsupported_pairs = [pair for pair in execution_pairs if pair not in local_pairs]
if unsupported_pairs:
logger.warning(
"--device auto resolves from local availability, but --ep is pinned;"
" the following pairs are not available on this machine: %s",
", ".join(_ep_name_device_display_name(e, d) for e, d in unsupported_pairs),
)
elif ep == "auto":
unsupported_pairs = [pair for pair in execution_pairs if pair not in local_pairs]
if unsupported_pairs:
logger.warning(
"--ep auto resolves from local availability, but --device is pinned;"
" the following pairs are not available on this machine: %s",
", ".join(_ep_name_device_display_name(e, d) for e, d in unsupported_pairs),
)

if not execution_pairs:
logger.error("No EP/device combination matched the current selection.")
sys.exit(2)
Expand Down
50 changes: 23 additions & 27 deletions tests/e2e/test_analyze_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,50 +321,46 @@ def test_optim_config_writes_valid_json(
data = json.loads(cfg_path.read_text(encoding="utf-8"))
assert isinstance(data, dict)

def test_default_device_auto_filters_local_devices_by_ep_support(
def test_default_device_auto_resolves_single_best_device_for_pinned_ep(
self,
onnx_model_path: Path,
rules_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Omitting ``--device`` uses ``auto`` and filters local devices by
``EP_SUPPORTED_DEVICES``. For pinned ``qnn`` and local CPU/GPU/NPU,
execution targets are ``(qnn, GPU)`` and ``(qnn, NPU)``.
"""Omitting ``--device`` resolves a single best device for the pinned EP.

The test is hardware-agnostic (AMD/Intel included): local availability
is controlled via monkeypatch rather than real machine capabilities.
``auto`` now picks one target via the shared sysinfo helpers (like
build/run): for ``qnn`` locally available on NPU and GPU, the
highest-priority device (NPU) is chosen — a single ``(qnn, NPU)`` run.

This setup distinguishes auto-device behavior from an NPU-only default:
NPU is supported while GPU is intentionally unsupported, so running both
targets must return partial support (exit code 1).
The test is hardware-agnostic: local availability is controlled via the
ORT device->EP map monkeypatch rather than real machine capabilities.
"""
monkeypatch.setattr(
"winml.modelkit.sysinfo.device._get_available_devices",
lambda: ["CPU", "GPU", "NPU"],
)
monkeypatch.setattr(
"winml.modelkit.commands.analyze._get_local_ep_device_pairs",
lambda: [
("QNNExecutionProvider", "NPU"),
("QNNExecutionProvider", "GPU"),
("QNNExecutionProvider", "CPU"),
("DmlExecutionProvider", "GPU"),
("CPUExecutionProvider", "CPU"),
],
"winml.modelkit.sysinfo.device._get_device_ep_map_from_ort",
lambda: {
"npu": ("QNNExecutionProvider",),
"gpu": ("QNNExecutionProvider", "DmlExecutionProvider"),
"cpu": ("CPUExecutionProvider",),
},
)
_write_supported_rule(rules_dir, "QNNExecutionProvider", "NPU")
_write_supported_rule(rules_dir, "QNNExecutionProvider", "GPU")
result = _invoke(["-m", str(onnx_model_path), "--ep", "qnn", "--quiet"])
assert result.exit_code == 0

def test_analyze_all_eps_when_ep_omitted(self, onnx_model_path: Path, rules_dir: Path) -> None:
"""Omitting ``--ep`` analyzes all supported EPs. With only one
synthetic rule the run must still complete cleanly."""
def test_default_auto_selects_single_ep_when_ep_omitted(
self, onnx_model_path: Path, rules_dir: Path
) -> None:
"""Omitting ``--ep`` resolves a single best EP from local availability.

With a synthetic rule present the run must complete cleanly; the auto
axis resolves from the real ORT device map (CPU EP is always available
as a fallback), so only documented exit codes are asserted."""
_write_supported_rule(rules_dir, "QNNExecutionProvider", "NPU")
result = _invoke(["-m", str(onnx_model_path), "--quiet"])
# Aggregate result depends on whether every probed EP is fully
# Aggregate result depends on whether the resolved EP is fully
# supported; only assert documented exit codes.
assert result.exit_code in {0, 1}
assert result.exit_code in {0, 1, 2}


# ===========================================================================
Expand Down
96 changes: 46 additions & 50 deletions tests/unit/analyze/test_static_analyzer_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,18 @@ def _mock_local_ep_device_pairs(monkeypatch: pytest.MonkeyPatch) -> None:
"winml.modelkit.sysinfo.device._get_available_eps",
lambda: simulated_eps,
)
# The analyze command now resolves a single `auto` target via the shared
# sysinfo helpers (resolve_device / resolve_eps), which read the ORT device
# -> EP map directly. Mirror the simulated local matrix here so resolution is
# deterministic and hardware-independent.
device_ep_map: dict[str, list[str]] = {}
for _ep, _device in SIMULATED_LOCAL_EP_DEVICE_PAIRS:
device_ep_map.setdefault(_device.lower(), []).append(_ep)
simulated_device_ep_map = {d: tuple(eps) for d, eps in device_ep_map.items()}
monkeypatch.setattr(
"winml.modelkit.sysinfo.device._get_device_ep_map_from_ort",
lambda: simulated_device_ep_map,
)


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -1163,55 +1175,45 @@ class TestAnalyzeEPDeviceSelectionMatrix:
@pytest.mark.parametrize(
("ep_arg", "device_arg", "expect_exit", "expect_calls", "expect_error"),
[
# Both auto: filter to local_pairs. Output sorted by EP_SUPPORTED_DEVICES.
# Both auto: resolve a single best target via shared sysinfo helpers.
# Best device is NPU (priority npu>gpu>cpu); its best local EP is
# OpenVINO (only npu EP in the simulated matrix).
(
None,
None,
0,
[
("NvTensorRTRTXExecutionProvider", "GPU"),
("OpenVINOExecutionProvider", "NPU"),
("OpenVINOExecutionProvider", "CPU"),
("DmlExecutionProvider", "GPU"),
("CPUExecutionProvider", "CPU"),
],
[("OpenVINOExecutionProvider", "NPU")],
None,
),
# ep=auto, device=gpu: warn about non-local but run all eps that support GPU.
# ep=auto, device=gpu: single best local EP for GPU. _DEVICE_EP_MAP
# ranks NvTensorRTRTX above Dml, both locally available on GPU.
(
None,
"gpu",
0,
[
("NvTensorRTRTXExecutionProvider", "GPU"),
("OpenVINOExecutionProvider", "GPU"),
("DmlExecutionProvider", "GPU"),
],
[("NvTensorRTRTXExecutionProvider", "GPU")],
None,
),
# ep=openvino, device=auto: warn about non-local pairs, run all 3.
# ep=openvino, device=auto: single best local device for OpenVINO.
# OpenVINO is local on NPU and CPU; NPU wins on priority.
(
"openvino",
None,
0,
[
("OpenVINOExecutionProvider", "NPU"),
("OpenVINOExecutionProvider", "GPU"),
("OpenVINOExecutionProvider", "CPU"),
],
[("OpenVINOExecutionProvider", "NPU")],
None,
),
# ep=qnn, device=auto: QNN is not local, but we warn (not filter) and run.
# ep=qnn, device=auto: QNN is not local, so resolving a device fails
# the same way build/run fail — exit 2 with a clear message.
(
"qnn",
None,
0,
[
("QNNExecutionProvider", "NPU"),
("QNNExecutionProvider", "GPU"),
],
None,
2,
[],
"not available on this system",
),
# ep=qnn, device=all: `all` keeps the full fan-out (no local check),
# so both QNN-supported devices run unchanged.
(
"qnn",
"all",
Expand Down Expand Up @@ -1337,14 +1339,19 @@ def test_no_rule_data_pair_runs_with_inline_skip_marker(
assert call_kwargs["device"] == "GPU"

@patch("winml.modelkit.analyze.ONNXStaticAnalyzer")
def test_qnn_auto_warns_about_non_local_pairs(
def test_qnn_device_auto_errors_when_not_local(
self,
mock_analyzer_class: MagicMock,
runner: CliRunner,
tmp_path: Path,
mock_analyzer_result: Mock,
) -> None:
"""qnn + auto device: QNN isn't locally supported but we warn (not error) and run."""
"""qnn + auto device: QNN isn't local, so device resolution fails (exit 2).

``auto`` resolves from local availability via the shared sysinfo helpers,
exactly like build/run. To statically analyze a non-local EP the user must
pin the device (``--device npu``) or use ``--device all``.
"""
model_file = tmp_path / "test.onnx"
model_file.write_bytes(b"dummy")

Expand All @@ -1353,36 +1360,29 @@ def test_qnn_auto_warns_about_non_local_pairs(
mock_analyzer_class.return_value = mock_instance

result = runner.invoke(analyze, ["--model", str(model_file), "--ep", "qnn"])
assert result.exit_code == 0
assert "not available on this machine" in result.output.lower()
actual_calls = [
(call.kwargs["ep"], call.kwargs["device"])
for call in mock_instance.analyze.call_args_list
]
assert actual_calls == [
("QNNExecutionProvider", "NPU"),
("QNNExecutionProvider", "GPU"),
]
assert result.exit_code == 2
assert "not available on this system" in result.output.lower()
assert not mock_instance.analyze.called

@patch(
"winml.modelkit.analyze.utils.ep_utils.has_rule_data_for_ep",
return_value=False,
)
@patch("winml.modelkit.analyze.ONNXStaticAnalyzer")
def test_auto_specific_device_run_unknown_op_executes_local_pairs_without_rule_data(
def test_auto_ep_specific_device_run_unknown_op_executes_single_local_pair(
self,
mock_analyzer_class: MagicMock,
_mock_has_rule: Mock,
runner: CliRunner,
tmp_path: Path,
mock_analyzer_result: Mock,
) -> None:
"""ep=auto + specific device should run all locally-eligible (ep, device) pairs.
"""ep=auto + specific device resolves a single best local (ep, device) pair.

With ep=auto and device specified, no local filter is applied — pairs the
local machine doesn't support are kept (a warning is emitted) and analysis
runs for each. has_rule_data_for_ep returning False here only affects
per-pair OP CHECK rendering (op-check-skipped), not which pairs run.
With ep=auto the shared resolver picks the highest-priority EP locally
available on the requested device (NvTensorRTRTX on GPU). The pair is
local, so --run-unknown-op stays enabled. has_rule_data_for_ep returning
False only affects per-pair OP CHECK rendering, not which pair runs.
"""
model_file = tmp_path / "test.onnx"
model_file.write_bytes(b"dummy")
Expand All @@ -1401,11 +1401,7 @@ def test_auto_specific_device_run_unknown_op_executes_local_pairs_without_rule_d
(call.kwargs["ep"], call.kwargs["device"])
for call in mock_instance.analyze.call_args_list
]
assert actual_calls == [
("NvTensorRTRTXExecutionProvider", "GPU"),
("OpenVINOExecutionProvider", "GPU"),
("DmlExecutionProvider", "GPU"),
]
assert actual_calls == [("NvTensorRTRTXExecutionProvider", "GPU")]


class TestQDQNodeDisplayMapping:
Expand Down
9 changes: 7 additions & 2 deletions tests/unit/commands/test_config_value_priority.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,13 @@ def fake_analyze(**kw):
"winml.modelkit.analyze.utils.ep_utils.has_any_rule_data",
return_value=True,
),
# Deterministic Tier-3 default: when ep stays "auto" through the
# merge block, _get_available_eps -> QNN so target_ep is fixed.
# Deterministic Tier-3 default: when ep stays "auto" through the merge
# block, analyze resolves it via resolve_eps(resolved_device)[0]. Pin the
# ORT device->EP map so npu -> QNN, fixing the resolved target EP.
patch(
"winml.modelkit.sysinfo.device._get_device_ep_map_from_ort",
return_value={"npu": ("QNNExecutionProvider",)},
),
patch(
"winml.modelkit.sysinfo.device._get_available_eps",
return_value=["QNNExecutionProvider"],
Expand Down
Loading