Skip to content

Commit 6245c00

Browse files
committed
fix(ci): gate AMX modules behind x86_64 + scope nostd build to -p ndarray + rustfmt
Three additional CI failures surfaced by PR #141 (all pre-existing latent issues, none caused by the original migration): (1) s390x-unknown-linux-gnu cross-tests — x86-only inline asm leaks `src/hpc/amx_matmul.rs` + `src/hpc/bf16_tile_gemm.rs` (its wrapper) + `src/simd_amx.rs` all use `asm!` with `rcx` / `rax` register names. AMX is an Intel-only ISA (Sapphire Rapids+); the registers don't exist on s390x / aarch64 / wasm32 / etc. and the asm parser rejects them at compile time. Fix: gate all three module declarations behind `#[cfg(target_arch = "x86_64")]`. On x86_64 CI runners (most) they compile normally and runtime gating via `amx_available()` already prevents execution on CPUs without AMX. On non-x86 targets they're skipped entirely. External consumer audit clean — only `bf16_tile_gemm` uses `amx_matmul`, and only `amx_matmul` uses `simd_amx`. No cascade gating needed. (2) thumbv6m-none-eabi nostd — criterion dev-dep tree leaks into workspace cross-build CI runs `cargo rustc --target=thumbv6m-none-eabi --no-default-features --features portable-atomic-critical-section` from the workspace root. Without `-p ndarray` scoping, cargo evaluates the whole workspace's dep graph (including dev-deps from ndarray-rand / serialization-tests / numeric-tests). The bench migration in PR #140 added `criterion 0.5` as a dev-dep; criterion transitively pulls `serde_core` (which doesn't declare `#![no_std]`) and `getrandom` (which has its own no_std-incompatible paths) into the dep tree. The library `ndarray` itself builds cleanly on thumbv6m no-default- features (verified: `cargo check -p ndarray --target=thumbv6m-none-eabi --no-default-features --features portable-atomic-critical-section` is clean). The CI command just needs scoping. Fix: add `-p ndarray` to the cargo rustc invocation in the nostd CI job so dev-dep evaluation is limited to the library's own deps. (3) cargo fmt --all --check failures Bench files migrated to criterion in PR #140 used the workspace's prior-style "brace on next line" formatting (`fn foo(c: &mut Criterion) \n{`). Stable rustfmt 1.94.1 (pinned per CLAUDE.md) wants "brace on same line" (`fn foo(c: &mut Criterion) {`). Plus single-statement closures inlined. Fix: run `cargo fmt --all` (no manual changes needed). 13 bench files + examples/life.rs touched with mechanical formatting changes only; no semantic changes. Verification (local, 2026-05-13): - `cargo clippy --no-deps` (default) → clean - `cargo check --no-default-features --features portable-atomic-critical-section --lib` → clean - `cargo rustc -p ndarray --target=thumbv6m-none-eabi --no-default-features --features portable-atomic-critical-section` → clean (CI command as updated) - `cargo fmt --all --check` → clean Pre-existing latent bugs (amx asm registers since the module was added, criterion dev-dep regression introduced by PR #140 itself); fixes land on the same PR series that exposed them.
1 parent 5f3a43f commit 6245c00

16 files changed

Lines changed: 290 additions & 686 deletions

File tree

.github/workflows/ci.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,12 @@ jobs:
107107
- run: rustup target add ${{ matrix.target }}
108108
- name: Tests
109109
run: |
110-
cargo rustc "--target=${{ matrix.target }}" --no-default-features --features portable-atomic-critical-section
110+
# Scope to `-p ndarray` so workspace dev-deps (criterion → serde_core
111+
# → getrandom) don't get evaluated against the no_std target. The
112+
# library itself builds cleanly under no_std + portable-atomic-
113+
# critical-section; only its dev-dependency tree pulls std-requiring
114+
# crates that have no business existing in the nostd build.
115+
cargo rustc -p ndarray "--target=${{ matrix.target }}" --no-default-features --features portable-atomic-critical-section
111116
112117
tests:
113118
runs-on: ubuntu-latest

benches/append.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,20 @@ use ndarray::prelude::*;
44
fn select_axis0(c: &mut Criterion) {
55
let a = Array::<f32, _>::zeros((256, 256));
66
let selectable = vec![0, 1, 2, 0, 1, 3, 0, 4, 16, 32, 128, 147, 149, 220, 221, 255, 221, 0, 1];
7-
c.bench_function("select_axis0", |b| {
8-
b.iter(|| black_box(&a).select(Axis(0), black_box(&selectable)))
9-
});
7+
c.bench_function("select_axis0", |b| b.iter(|| black_box(&a).select(Axis(0), black_box(&selectable))));
108
}
119

1210
fn select_axis1(c: &mut Criterion) {
1311
let a = Array::<f32, _>::zeros((256, 256));
1412
let selectable = vec![0, 1, 2, 0, 1, 3, 0, 4, 16, 32, 128, 147, 149, 220, 221, 255, 221, 0, 1];
15-
c.bench_function("select_axis1", |b| {
16-
b.iter(|| black_box(&a).select(Axis(1), black_box(&selectable)))
17-
});
13+
c.bench_function("select_axis1", |b| b.iter(|| black_box(&a).select(Axis(1), black_box(&selectable))));
1814
}
1915

2016
fn select_1d(c: &mut Criterion) {
2117
let a = Array::<f32, _>::zeros(1024);
2218
let mut selectable = (0..a.len()).step_by(17).collect::<Vec<_>>();
2319
selectable.extend(selectable.clone().iter().rev());
24-
c.bench_function("select_1d", |b| {
25-
b.iter(|| black_box(&a).select(Axis(0), black_box(&selectable)))
26-
});
20+
c.bench_function("select_1d", |b| b.iter(|| black_box(&a).select(Axis(0), black_box(&selectable))));
2721
}
2822

2923
criterion_group!(benches, select_axis0, select_axis1, select_1d);

0 commit comments

Comments
 (0)