From 3d56323723128f4325af2e5ae9bb4b0f9eb27418 Mon Sep 17 00:00:00 2001 From: Matt Hargett Date: Sun, 17 May 2026 01:18:28 -0700 Subject: [PATCH 1/8] Enable arm64_32-apple-watchos (aarch64-watchos-ilp32) build --- build.zig | 9 +++++ src/c_api.zig | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ src/guard.zig | 11 +++++- src/memory.zig | 11 +++++- src/types.zig | 12 ++++++ src/vm.zig | 6 ++- 6 files changed, 149 insertions(+), 5 deletions(-) diff --git a/build.zig b/build.zig index c0699d59f..5b6bd88fd 100644 --- a/build.zig +++ b/build.zig @@ -194,11 +194,20 @@ pub fn build(b: *std.Build) void { lib_shared.installHeader(b.path("include/zwasm.h"), "zwasm.h"); // Static library (libzwasm.a) + // + // single_threaded = true eliminates the wasm-threads atomic.wait/notify + // paths that pull in std.Thread / std.Io.Threaded. The static lib is + // intended for App-Store-eligible iOS / watchOS / tvOS apps (no JIT, + // no shared memory, single-threaded wasm execution), so dropping the + // threading paths costs no functionality. Required for the + // arm64_32-apple-watchos build because std.Io.Threaded does not + // compile under ILP32 (u64 → usize narrowing errors). const lib_static_mod = b.createModule(.{ .root_source_file = b.path("src/c_api.zig"), .target = target, .optimize = if (lib_optimize) optimize else if (optimize == .Debug) .ReleaseSafe else optimize, .link_libc = true, + .single_threaded = true, .pic = if (enable_pic) true else null, }); lib_static_mod.addOptions("build_options", options); diff --git a/src/c_api.zig b/src/c_api.zig index 1c0df74fe..2bb4e78e1 100644 --- a/src/c_api.zig +++ b/src/c_api.zig @@ -18,6 +18,111 @@ const types = @import("types.zig"); const WasmModule = types.WasmModule; const WasiOptions = types.WasiOptions; +// On arm64_32-apple-watchos (32-bit-pointer aarch64) Zig 0.16's default +// panic / std.debug machinery hits `u64 → usize` narrowing errors in +// std.Io.Threaded.dirReadDarwin et al. Even `std.debug.simple_panic` +// still routes through `lockStderr → std_options.debug_io → +// debug_threaded_io.io()` which pulls in the whole std.Io.Threaded +// module. Same applies to std.log.defaultLog. We override `panic` and +// `std_options.logFn` so the static-lib build for watchos compiles +// without any Zig-stdlib patches. Harmless on other targets — zwasm +// surfaces all errors through the C-ABI `zwasm_last_error_message()` +// channel, never via stderr. +fn traping_panic(_: []const u8, _: ?usize) noreturn { + @trap(); +} +pub const panic = struct { + pub const call = traping_panic; + pub fn sentinelMismatch(_: anytype, _: anytype) noreturn { + @trap(); + } + pub fn unwrapError(_: anyerror) noreturn { + @trap(); + } + pub fn outOfBounds(_: usize, _: usize) noreturn { + @trap(); + } + pub fn startGreaterThanEnd(_: usize, _: usize) noreturn { + @trap(); + } + pub fn inactiveUnionField(_: anytype, _: anytype) noreturn { + @trap(); + } + pub fn sliceCastLenRemainder(_: usize) noreturn { + @trap(); + } + pub fn reachedUnreachable() noreturn { + @trap(); + } + pub fn unwrapNull() noreturn { + @trap(); + } + pub fn castToNull() noreturn { + @trap(); + } + pub fn incorrectAlignment() noreturn { + @trap(); + } + pub fn invalidErrorCode() noreturn { + @trap(); + } + pub fn integerOutOfBounds() noreturn { + @trap(); + } + pub fn integerOverflow() noreturn { + @trap(); + } + pub fn shlOverflow() noreturn { + @trap(); + } + pub fn shrOverflow() noreturn { + @trap(); + } + pub fn divideByZero() noreturn { + @trap(); + } + pub fn exactDivisionRemainder() noreturn { + @trap(); + } + pub fn integerPartOutOfBounds() noreturn { + @trap(); + } + pub fn corruptSwitch() noreturn { + @trap(); + } + pub fn shiftRhsTooBig() noreturn { + @trap(); + } + pub fn invalidEnumValue() noreturn { + @trap(); + } + pub fn forLenMismatch() noreturn { + @trap(); + } + pub fn copyLenMismatch() noreturn { + @trap(); + } + pub fn memcpyAlias() noreturn { + @trap(); + } + pub fn noreturnReturned() noreturn { + @trap(); + } +}; + +fn noopLog( + comptime _: std.log.Level, + comptime _: @EnumLiteral(), + comptime _: []const u8, + _: anytype, +) void {} + +pub const std_options: std.Options = .{ + .allow_stack_tracing = false, + .networking = false, + .logFn = noopLog, +}; + /// Convert isize (C intptr_t) to platform File.Handle. fn isizeToHandle(v: isize) std.Io.File.Handle { if (builtin.os.tag == .windows) { diff --git a/src/guard.zig b/src/guard.zig index a9bce6fbd..f44314025 100644 --- a/src/guard.zig +++ b/src/guard.zig @@ -117,11 +117,18 @@ const Ucontext = switch (builtin.os.tag) { /// Guard region size: 4 GiB + 64 KiB. /// This ensures any 32-bit index (0..0xFFFFFFFF) + small offset (up to 64 KiB) /// falls within the mapped region (data + guard). -pub const GUARD_SIZE: usize = 4 * 1024 * 1024 * 1024 + 64 * 1024; +/// +/// On ILP32 targets (arm64_32-apple-watchos) `usize` is 32-bit and these +/// values overflow comptime. Guard memory is only used when `jitSupported()` +/// returns true, which is false for watchos — so on 32-bit targets we set +/// the constants to zero placeholders to satisfy the type checker. Any +/// runtime call into the GuardedMem path on a 32-bit platform would be a +/// bug: `addMemory` in src/store.zig predicates on `jitSupported()`. +pub const GUARD_SIZE: usize = if (@sizeOf(usize) >= 8) 4 * 1024 * 1024 * 1024 + 64 * 1024 else 0; /// Total virtual reservation: data capacity + guard. /// Data capacity matches Wasm max 4 GiB. Guard provides PROT_NONE safety zone. -pub const TOTAL_RESERVATION: usize = 8 * 1024 * 1024 * 1024 + 64 * 1024; +pub const TOTAL_RESERVATION: usize = if (@sizeOf(usize) >= 8) 8 * 1024 * 1024 * 1024 + 64 * 1024 else 0; /// Recovery information for signal handler. /// Set before calling JIT code, cleared after. diff --git a/src/memory.zig b/src/memory.zig index 4bff7c458..2813e1eb8 100644 --- a/src/memory.zig +++ b/src/memory.zig @@ -176,7 +176,12 @@ pub const Memory = struct { const len = self.data.items.len; if (overflow != 0 or len < @sizeOf(T) or effective > len - @sizeOf(T)) return error.OutOfBoundsMemoryAccess; - const ptr: *const [@sizeOf(T)]u8 = @ptrCast(&self.data.items[effective]); + // After the bounds check, `effective` fits in usize because `len` + // is usize. The explicit @intCast is needed on ILP32 targets + // (arm64_32-apple-watchos) where usize is 32-bit but `effective` + // is u64. + const effective_usize: usize = @intCast(effective); + const ptr: *const [@sizeOf(T)]u8 = @ptrCast(&self.data.items[effective_usize]); return switch (T) { u8, u16, u32, u64, i8, i16, i32, i64 => mem.readInt(T, ptr, .little), u128 => mem.readInt(u128, ptr, .little), @@ -193,7 +198,9 @@ pub const Memory = struct { const len = self.data.items.len; if (overflow != 0 or len < @sizeOf(T) or effective > len - @sizeOf(T)) return error.OutOfBoundsMemoryAccess; - const ptr: *[@sizeOf(T)]u8 = @ptrCast(&self.data.items[effective]); + // See Memory.read above for why this cast is required on ILP32. + const effective_usize: usize = @intCast(effective); + const ptr: *[@sizeOf(T)]u8 = @ptrCast(&self.data.items[effective_usize]); switch (T) { u8, u16, u32, u64, i8, i16, i32, i64 => mem.writeInt(T, ptr, value, .little), u128 => mem.writeInt(u128, ptr, value, .little), diff --git a/src/types.zig b/src/types.zig index f257ad8e0..a72881359 100644 --- a/src/types.zig +++ b/src/types.zig @@ -468,8 +468,20 @@ pub const WasmModule = struct { // stand up a private `std.Io.Threaded` owned by this module. // Acquired early — applyWasiOptions's addPreopenPath needs io to open // host directories cross-platform (Zig 0.16's `std.Io.Dir.openDir`). + // + // On ILP32 targets (arm64_32-apple-watchos) `std.Io.Threaded` itself + // doesn't compile (u64 syscall returns vs 32-bit usize), so we only + // expose the auto-init path on 64-bit targets. On 32-bit watchos the + // caller MUST supply `config.io` if they want WASI host directories + // — which they won't, because WatchKit apps don't get filesystem + // access. Workloads without WASI (the case for wasm-benchmark) never + // dereference the io vtable, so leaving it default-init'd is OK. const io: std.Io = blk: { if (config.io) |io_val| break :blk io_val; + if (@sizeOf(usize) < 8) { + self.owned_io = null; + break :blk @as(std.Io, undefined); + } const threaded = try allocator.create(std.Io.Threaded); errdefer allocator.destroy(threaded); threaded.* = std.Io.Threaded.init(allocator, .{}); diff --git a/src/vm.zig b/src/vm.zig index 1100ab0d1..cc1907571 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -4008,9 +4008,13 @@ pub const Vm = struct { const byte_count = N * @sizeOf(NarrowT); const effective, const ov = @addWithOverflow(ma.offset, base); if (ov != 0 or m.data.items.len < byte_count or effective > m.data.items.len - byte_count) return error.OutOfBoundsMemoryAccess; + // After the bounds check `effective` fits in usize because + // `m.data.items.len` is usize. Explicit cast needed on ILP32 + // targets (arm64_32-apple-watchos). + const effective_usize: usize = @intCast(effective); var narrow: [N]NarrowT = undefined; for (&narrow, 0..) |*n, i| { - const ptr: *const [@sizeOf(NarrowT)]u8 = @ptrCast(&m.data.items[effective + i * @sizeOf(NarrowT)]); + const ptr: *const [@sizeOf(NarrowT)]u8 = @ptrCast(&m.data.items[effective_usize + i * @sizeOf(NarrowT)]); n.* = std.mem.readInt(NarrowT, ptr, .little); } // Extend to wide From 1e26b16aa712537b8ee6e83c723284f95532198e Mon Sep 17 00:00:00 2001 From: "Shota Kudo (chaploud)" Date: Sun, 17 May 2026 23:12:37 +0900 Subject: [PATCH 2/8] fix(c_api): use std.debug.no_panic on ILP32, scope override to that ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous override declared a custom `panic` namespace and `std_options` unconditionally for every C-API consumer (Mac / iOS / Linux / Windows static and shared libs), losing pretty panic messages and stdlib logging to work around a compile error that only surfaces on ILP32 (arm64_32-apple-watchos). Zig 0.16 already ships `std.debug.no_panic` — the canonical trap-on-every-panic namespace. Use it directly on ILP32 and fall back to `std.debug.FullPanic(std.debug.defaultPanic)` (the stdlib default) on every other target. Same gating for `std_options`. Net: ~80 fewer hand-rolled lines, no behaviour change for 64-bit C-API consumers, ILP32 build still compiles. --- src/c_api.zig | 109 ++++++++------------------------------------------ 1 file changed, 16 insertions(+), 93 deletions(-) diff --git a/src/c_api.zig b/src/c_api.zig index 2bb4e78e1..bd360a676 100644 --- a/src/c_api.zig +++ b/src/c_api.zig @@ -18,97 +18,20 @@ const types = @import("types.zig"); const WasmModule = types.WasmModule; const WasiOptions = types.WasiOptions; -// On arm64_32-apple-watchos (32-bit-pointer aarch64) Zig 0.16's default -// panic / std.debug machinery hits `u64 → usize` narrowing errors in -// std.Io.Threaded.dirReadDarwin et al. Even `std.debug.simple_panic` -// still routes through `lockStderr → std_options.debug_io → -// debug_threaded_io.io()` which pulls in the whole std.Io.Threaded -// module. Same applies to std.log.defaultLog. We override `panic` and -// `std_options.logFn` so the static-lib build for watchos compiles -// without any Zig-stdlib patches. Harmless on other targets — zwasm -// surfaces all errors through the C-ABI `zwasm_last_error_message()` -// channel, never via stderr. -fn traping_panic(_: []const u8, _: ?usize) noreturn { - @trap(); -} -pub const panic = struct { - pub const call = traping_panic; - pub fn sentinelMismatch(_: anytype, _: anytype) noreturn { - @trap(); - } - pub fn unwrapError(_: anyerror) noreturn { - @trap(); - } - pub fn outOfBounds(_: usize, _: usize) noreturn { - @trap(); - } - pub fn startGreaterThanEnd(_: usize, _: usize) noreturn { - @trap(); - } - pub fn inactiveUnionField(_: anytype, _: anytype) noreturn { - @trap(); - } - pub fn sliceCastLenRemainder(_: usize) noreturn { - @trap(); - } - pub fn reachedUnreachable() noreturn { - @trap(); - } - pub fn unwrapNull() noreturn { - @trap(); - } - pub fn castToNull() noreturn { - @trap(); - } - pub fn incorrectAlignment() noreturn { - @trap(); - } - pub fn invalidErrorCode() noreturn { - @trap(); - } - pub fn integerOutOfBounds() noreturn { - @trap(); - } - pub fn integerOverflow() noreturn { - @trap(); - } - pub fn shlOverflow() noreturn { - @trap(); - } - pub fn shrOverflow() noreturn { - @trap(); - } - pub fn divideByZero() noreturn { - @trap(); - } - pub fn exactDivisionRemainder() noreturn { - @trap(); - } - pub fn integerPartOutOfBounds() noreturn { - @trap(); - } - pub fn corruptSwitch() noreturn { - @trap(); - } - pub fn shiftRhsTooBig() noreturn { - @trap(); - } - pub fn invalidEnumValue() noreturn { - @trap(); - } - pub fn forLenMismatch() noreturn { - @trap(); - } - pub fn copyLenMismatch() noreturn { - @trap(); - } - pub fn memcpyAlias() noreturn { - @trap(); - } - pub fn noreturnReturned() noreturn { - @trap(); - } -}; +// On arm64_32-apple-watchos (ILP32) Zig 0.16's default panic / std.debug +// machinery and std.log.defaultLog both route through std.Io.Threaded +// (lockStderr → std_options.debug_io → debug_threaded_io.io()), which +// fails to compile under ILP32 because of u64 → usize narrowing in +// dirReadDarwin / pwrite. On 64-bit targets the stdlib defaults work +// fine, so the override is scoped to ILP32 only — keeping panic +// messages and log output intact for every other C-API consumer +// (macOS / iOS / Linux / Windows static & shared libs). +const ilp32 = @sizeOf(usize) < 8; + +pub const panic = if (ilp32) + std.debug.no_panic +else + std.debug.FullPanic(std.debug.defaultPanic); fn noopLog( comptime _: std.log.Level, @@ -117,11 +40,11 @@ fn noopLog( _: anytype, ) void {} -pub const std_options: std.Options = .{ +pub const std_options: std.Options = if (ilp32) .{ .allow_stack_tracing = false, .networking = false, .logFn = noopLog, -}; +} else .{}; /// Convert isize (C intptr_t) to platform File.Handle. fn isizeToHandle(v: isize) std.Io.File.Handle { From 600a3dc51ff01459edda7a735da4d397af134c53 Mon Sep 17 00:00:00 2001 From: "Shota Kudo (chaploud)" Date: Sun, 17 May 2026 23:14:15 +0900 Subject: [PATCH 3/8] fix(build): scope static-lib single_threaded to ILP32 targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `.single_threaded = true` was being forced on every static-lib build (macOS / iOS / Linux / Windows), silently degrading wasm- threads `atomic.wait/notify` on 64-bit C-API consumers — the wait queue paths in `src/memory.zig` rely on `std.Thread.Mutex` / `Condition`, which become no-ops under single_threaded. The ILP32 motivation (std.Io.Threaded not compiling under arm64_32-apple-watchos) only applies when usize is 32-bit, so gate on `target.result.ptrBitWidth() < 64` and leave the default (multi-threaded) on every other target. --- build.zig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/build.zig b/build.zig index 5b6bd88fd..e3b5872d3 100644 --- a/build.zig +++ b/build.zig @@ -195,19 +195,19 @@ pub fn build(b: *std.Build) void { // Static library (libzwasm.a) // - // single_threaded = true eliminates the wasm-threads atomic.wait/notify - // paths that pull in std.Thread / std.Io.Threaded. The static lib is - // intended for App-Store-eligible iOS / watchOS / tvOS apps (no JIT, - // no shared memory, single-threaded wasm execution), so dropping the - // threading paths costs no functionality. Required for the - // arm64_32-apple-watchos build because std.Io.Threaded does not - // compile under ILP32 (u64 → usize narrowing errors). + // single_threaded is forced on ILP32 targets (arm64_32-apple-watchos) + // because std.Io.Threaded fails to compile under that ABI (u64 → usize + // narrowing in dirReadDarwin / pwrite). On 64-bit targets the static + // lib keeps the default (multi-threaded), so wasm-threads + // atomic.wait/notify continues to work for Linux / macOS / Windows + // C-API consumers. + const lib_static_single_threaded: ?bool = if (target.result.ptrBitWidth() < 64) true else null; const lib_static_mod = b.createModule(.{ .root_source_file = b.path("src/c_api.zig"), .target = target, .optimize = if (lib_optimize) optimize else if (optimize == .Debug) .ReleaseSafe else optimize, .link_libc = true, - .single_threaded = true, + .single_threaded = lib_static_single_threaded, .pic = if (enable_pic) true else null, }); lib_static_mod.addOptions("build_options", options); From 5b5dbb6cf9ac3e3fd88032f15fa7f427a6ebce42 Mon Sep 17 00:00:00 2001 From: "Shota Kudo (chaploud)" Date: Sun, 17 May 2026 23:16:35 +0900 Subject: [PATCH 4/8] fix(types): require explicit config.io on ILP32 when WASI or timeout is requested MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous ILP32 code path left `vm.io` as `undefined` whenever config.io was null, on the assumption that consumers without WASI would never dereference it. That is unsafe: `setDeadlineTimeoutMs` unconditionally calls `std.Io.Timestamp.now(self.io, ...)` for any non-null `config.timeout_ms`, and `applyWasiOptions` reaches into `io.vtable.now` / `io.openDir` via `addPreopenPath` when WASI is enabled. Both would deref an undefined vtable. Refuse loadCore early with `error.IlpRequiresExplicitIo` if either of these features is requested on ILP32 without a caller-supplied io. Pure-interpreter workloads (the watchOS wasm-benchmark use case: no WASI, no timeout, no atomics) are unaffected. Wasm modules executing `memory.atomic.wait/notify` also reach io but can't be detected statically — embedders who run such modules on ILP32 must pass config.io themselves. --- src/types.zig | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/types.zig b/src/types.zig index a72881359..0f1715566 100644 --- a/src/types.zig +++ b/src/types.zig @@ -470,15 +470,19 @@ pub const WasmModule = struct { // host directories cross-platform (Zig 0.16's `std.Io.Dir.openDir`). // // On ILP32 targets (arm64_32-apple-watchos) `std.Io.Threaded` itself - // doesn't compile (u64 syscall returns vs 32-bit usize), so we only - // expose the auto-init path on 64-bit targets. On 32-bit watchos the - // caller MUST supply `config.io` if they want WASI host directories - // — which they won't, because WatchKit apps don't get filesystem - // access. Workloads without WASI (the case for wasm-benchmark) never - // dereference the io vtable, so leaving it default-init'd is OK. + // doesn't compile (u64 syscall returns vs 32-bit usize), so we cannot + // auto-construct one. If the caller asked for any feature that + // reaches into the io vtable (WASI host preopens, deadline timeout) + // they must supply config.io themselves; otherwise loadCore would + // dereference an undefined vtable at runtime. Wasm modules that + // execute `memory.atomic.wait/notify` reach io too — those embedders + // must also pass config.io, but we cannot detect that statically. const io: std.Io = blk: { if (config.io) |io_val| break :blk io_val; if (@sizeOf(usize) < 8) { + if (config.wasi or config.timeout_ms != null) { + return error.IlpRequiresExplicitIo; + } self.owned_io = null; break :blk @as(std.Io, undefined); } From 035fe6b27409c624f32fb429dc732b3e97d2dab0 Mon Sep 17 00:00:00 2001 From: "Shota Kudo (chaploud)" Date: Sun, 17 May 2026 23:19:23 +0900 Subject: [PATCH 5/8] docs(D139,W55): document arm64_32-apple-watchos ILP32 support strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a D## entry capturing why ILP32 needs separate handling (std.Io.Threaded fails u64 → usize narrowing under that ABI), which features stay supported, which are explicitly refused (WASI / timeout without config.io), and the comptime gating strategy that keeps 64-bit consumers byte-identical. Track open follow-ups under W55-watchos-ilp32 in checklist.md: upstream Zig .a packing bug, C-header documentation of the new error code, and the atomic.wait/notify correctness gap. Tables touched by `md-table-align` (per project convention) collapsed adjacent rows in D128 / D137 that were already mis-aligned — pure whitespace, no semantic change. --- .dev/checklist.md | 21 ++++++++ .dev/decisions.md | 121 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 132 insertions(+), 10 deletions(-) diff --git a/.dev/checklist.md b/.dev/checklist.md index 5981ee41a..634ff4faa 100644 --- a/.dev/checklist.md +++ b/.dev/checklist.md @@ -74,6 +74,27 @@ Prefix: W## (to distinguish from CW's F## items). still 25/25 because it uses per-job `Setup Rust` and does not install Go / TinyGo; CI adoption tracked separately under W50. +- [ ] W55-watchos-ilp32: arm64_32-apple-watchos (ILP32) best-effort + support landed in PR #97 (D139). CI runs only a build-only smoke + job for `zig build static-lib -Dtarget=aarch64-watchos-ilp32 + -Djit=false -Dcomponent=false -Dwat=false` because GitHub Actions + runners have no watchOS SDK to link / run the resulting archive. + Open follow-ups: + (a) Zig 0.16 archive packing bug — `aarch64-watchos-ilp32` + static-lib produces a working `.o` but the `.a` archive is + 88 bytes (SYMDEF only). Embedders (e.g. wasm-benchmark's + `scripts/build-zwasm.sh`) work around with `ar rcs` + post-build. File upstream and remove the workaround once + fixed. + (b) `error.IlpRequiresExplicitIo` semantics are documented in + D139 but not surfaced in `include/zwasm.h` or the C API + reference. Add a "Caveats — ILP32" subsection if the target + gains traction. + (c) `memory.atomic.wait/notify` correctness on ILP32 with + `single_threaded = true`. Not a regression vs the initial PR + (atomics never worked under ILP32 anyway) but worth a one-line + test once the watchOS SDK becomes available in CI. + - [ ] W45: SIMD loop persistence — Skip Q-cache eviction at loop headers. Requires back-edge detection in scanBranchTargets. diff --git a/.dev/decisions.md b/.dev/decisions.md index c51bb0d49..d5c8110be 100644 --- a/.dev/decisions.md +++ b/.dev/decisions.md @@ -351,11 +351,11 @@ this is unaffected by host allocator choice. **Usage matrix**: -| Caller | Allocator source | -|---------------------|-------------------------------------------------| -| Zig host (CW/cw-new) | Host's `std.mem.Allocator` (GC-managed) | -| C host (via C API) | `malloc/free` function pointers or default | -| Standalone CLI | Internal `page_allocator` or `GeneralPurposeAllocator` | +| Caller | Allocator source | +|----------------------|--------------------------------------------------------| +| Zig host (CW/cw-new) | Host's `std.mem.Allocator` (GC-managed) | +| C host (via C API) | `malloc/free` function pointers or default | +| Standalone CLI | Internal `page_allocator` or `GeneralPurposeAllocator` | **Migration**: Internal Arena usage → accept Allocator parameter. Existing C API (`zwasm_engine_new`) gains optional config struct with alloc/free callbacks. @@ -887,11 +887,11 @@ agreed-on stripping mechanism, the ceiling has no meaning. and forcing parity would either hobble Linux or grant Windows excess slack. - | OS | Stripped binary | Ceiling | Headroom | - |------------------|-----------------|-----------|----------| - | macOS aarch64 | ~1.20 MB | 1.30 MB | ~80 KB | - | Linux x86_64 | ~1.56 MB | 1.60 MB | ~40 KB | - | Windows x86_64 | ~1.70 MB | 1.80 MB | ~100 KB | + | OS | Stripped binary | Ceiling | Headroom | + |----------------|-----------------|---------|----------| + | macOS aarch64 | ~1.20 MB | 1.30 MB | ~80 KB | + | Linux x86_64 | ~1.56 MB | 1.60 MB | ~40 KB | + | Windows x86_64 | ~1.70 MB | 1.80 MB | ~100 KB | The Linux 1.60 MB number is the original W48 Phase-1 target and is unchanged; the macOS 1.30 MB number tightens on the prior implicit @@ -1069,3 +1069,104 @@ once the coalescer extension that needs them is debugged on x86_64 - `W54-libm`: `rw_c_math` is dominated by libm `sin`/`cos`/`pow` dispatch; intrinsic recognition + ARM64 FSQRT inline + soft-libm fallback. + +--- + +## D139: arm64_32-apple-watchos (ILP32) static-lib support — best-effort, no CI gate + +**Status**: Accepted — landed via PR #97 (`arm64_32-apple-watchos` branch). + +**Context**: Apple Watch SE / SE2 / Series 4-8 (S4-S8 SoC family) +ships the ILP32 ABI: 32-bit pointers, 64-bit aarch64 instructions. +Zig 0.16 spells the triple `aarch64-watchos-ilp32` (the legacy +`arm64_32-` arch identifier was removed upstream in ziglang/zig +#20820). Apple's App Store policy forbids `MAP_JIT` outside +JavaScriptCore, so any wasm runtime targeting WatchKit apps must +be pure interpreter (`-Djit=false`). zwasm already supports that +mode on every 64-bit Apple platform; the missing piece was the +ILP32 build itself. + +The blocker on a clean build was that `std.Io.Threaded` in Zig +0.16 does not compile under ILP32 — `dirReadDarwin` / `pwrite` +and friends narrow `u64` syscall returns into a 32-bit `usize` +and the compiler rejects it. The full panic / `std.debug.*` paths +transitively pull `std.Io.Threaded` in via +`lockStderr → std_options.debug_io → debug_threaded_io.io()`, so +they trip the same compile error even when nothing else in zwasm +touches threading. + +**Decision**: Accept ILP32 as a **best-effort target with no +runtime CI gate**. Build-only smoke is wired into CI to catch +source-level rot; spec / e2e / realworld / ffi / bench coverage +is not added because GitHub Actions runners have no watchOS SDK +and the resulting archive cannot be linked or run there. Support +is conditional on the consumer (a) leaving JIT off, (b) not +enabling WASI host-dir access, and (c) not depending on +`memory.atomic.wait/notify` correctness. Every workaround is +gated comptime on `@sizeOf(usize) < 8` or +`target.result.ptrBitWidth() < 64` so 64-bit consumers are +byte-identical to before. + +| Concern | ILP32 (watchOS) | 64-bit (Mac/iOS/Linux/Windows) | +|-----------------------|----------------------------------|--------------------------------| +| panic / log namespace | `std.debug.no_panic` (trap-only) | stdlib default | +| static-lib threading | `single_threaded = true` | default (multi-threaded) | +| auto-init Io | refused | `std.Io.Threaded` (per D135) | +| `Config.io` required | yes, if WASI or timeout | optional | +| Guard memory consts | 0 placeholders | 4 GiB / 8 GiB | +| JIT | unsupported (App Store) | enabled by default | + +**Safety strategy** (per failure mode): + +1. **panic / std_options override** (`src/c_api.zig`). Scoped to + ILP32 via a comptime `const ilp32 = @sizeOf(usize) < 8;`. + 64-bit C-API consumers keep + `std.debug.FullPanic(std.debug.defaultPanic)`, the stdlib + default — no behaviour change for them. +2. **`single_threaded = true`** (`build.zig`, `lib_static_mod`). + Gated on `target.result.ptrBitWidth() < 64`. 64-bit static-lib + consumers keep functioning `atomic.wait/notify` via + `std.Thread` primitives. +3. **Io acquisition** (`src/types.zig`, `loadCore`). On ILP32 with + no `config.io`: refuse with `error.IlpRequiresExplicitIo` if + WASI or `timeout_ms` is requested (both would dereference an + undefined vtable at runtime). Otherwise leave `io = undefined` + — the embedder has promised not to exercise io-dependent paths. +4. **Guard memory constants** (`src/guard.zig`). `GUARD_SIZE` / + `TOTAL_RESERVATION` overflow comptime under 32-bit usize. Set + to 0 placeholders on ILP32; runtime callers are predicated on + `jitSupported()` which is false for watchos. +5. **Memory index narrowing** (`src/memory.zig`, `src/vm.zig`). + After the bounds check `effective: u64` provably fits in usize + (since `len: usize` bounds it), so the explicit + `@intCast(usize)` is a no-op on 64-bit and a + correctness-preserving narrowing on ILP32. + +**Alternatives considered**: + +- **Refuse the build entirely on ILP32.** Rejected — the watchOS + use case is real (Apple Watch wasm runtime comparison in + rebeckerspecialties/wasm-benchmark) and the comptime gates are + small enough to keep maintained. +- **Force `single_threaded = true` on every static-lib.** What + the initial PR proposed. Rejected — silently breaks + `atomic.wait/notify` for 64-bit Linux / macOS / Windows + static-lib consumers. +- **Reinvent the panic namespace inline.** What the initial PR + did (~80 lines of hand-rolled `@trap()` handlers). Rejected — + Zig 0.16 ships `std.debug.no_panic` for exactly this purpose. +- **Provide a trap-on-all-methods Io vtable.** Considered for the + ILP32 `undefined io` case. Rejected — `std.Io.VTable` has 50+ + function pointers, the surface is volatile across Zig releases, + and the `error.IlpRequiresExplicitIo` path achieves the same + loud-failure semantics with much less code. + +**Known follow-up**: tracked in `checklist.md` as +`W55-watchos-ilp32`. The Zig 0.16 `.a`-packing bug for this triple +(archive ends up as the 88-byte SYMDEF only; embedders must `ar +rcs` the `.o` themselves) is upstream's problem, not zwasm's, and +is left out of scope here. + +**Affected files**: `build.zig`, `src/c_api.zig`, `src/guard.zig`, +`src/memory.zig`, `src/types.zig`, `src/vm.zig`, +`.github/workflows/ci.yml`. From 67bbf94d027f4fbc6ee20f928480963ceaec4eb4 Mon Sep 17 00:00:00 2001 From: "Shota Kudo (chaploud)" Date: Sun, 17 May 2026 23:21:54 +0900 Subject: [PATCH 6/8] ci: build-only smoke check for aarch64-watchos-ilp32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GitHub Actions runners do not ship the watchOS SDK so the resulting archive cannot be linked or executed there; without a build-only gate the ILP32 comptime branches in src/c_api.zig, src/guard.zig, src/memory.zig, src/types.zig, src/vm.zig and the build.zig single_threaded selector will rot the next time anyone touches those files. Add a dedicated job that runs `zig build static-lib -Dtarget=aarch64-watchos-ilp32 -Djit=false -Dcomponent=false -Dwat=false` on macos-latest and verifies the resulting cached `libzwasm_zcu.o` is the expected `arm64_32 / armv8` Mach-O object — Zig 0.16's broken .a packing for this triple (D139 known issue) means the `.a` itself is only the SYMDEF, not a useful linkable artefact, so we point the check at the `.o` directly. --- .github/workflows/ci.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe9ae35a3..9598227ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -278,6 +278,41 @@ jobs: - name: Verify minimal build tests pass run: zig build test -Djit=false -Dcomponent=false -Dwat=false + # Build-only smoke for arm64_32-apple-watchos (ILP32) — see D139. + # GitHub Actions runners lack a watchOS SDK so the resulting + # archive cannot be linked or run; this job only catches + # source-level rot on the ILP32 comptime gates in src/c_api.zig, + # src/guard.zig, src/memory.zig, src/types.zig, src/vm.zig and + # the build.zig single_threaded gate. + watchos-ilp32-build: + name: build (aarch64-watchos-ilp32, ILP32 smoke) + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Zig + uses: goto-bus-stop/setup-zig@v2 + with: + version: 0.16.0 + + - name: Build static-lib for aarch64-watchos-ilp32 + run: | + zig build static-lib \ + -Dtarget=aarch64-watchos-ilp32 \ + -Djit=false -Dcomponent=false -Dwat=false + # Zig 0.16 ships an empty .a for this target (SYMDEF only, + # ~88 bytes). The .o is the actual artefact; verify it + # built and is the right ABI (arm64_32 / armv8 Mach-O). + OBJ=$(find .zig-cache -name 'libzwasm_zcu.o' \ + -exec file {} + | grep '64_32-bit armv8' | head -1 | cut -d: -f1) + if [ -z "$OBJ" ]; then + echo "FAIL: no arm64_32 (armv8 ILP32) object produced" + find .zig-cache -name 'libzwasm_zcu.o' -exec file {} + + exit 1 + fi + echo "PASS: built $OBJ" + ls -la "$OBJ" + # 3-OS bench matrix (C-g step 5, 2026-04-29). Each runner does its # own fresh measurement of base vs PR — never compared across # OSes / architectures because the hardware deltas dwarf any From 1d7425e58dff3f4d0303ba310a5c7cf12e295ab7 Mon Sep 17 00:00:00 2001 From: "Shota Kudo (chaploud)" Date: Sun, 17 May 2026 23:26:45 +0900 Subject: [PATCH 7/8] fix(types): rename ILP32 io guard to error.MissingIo The error returned when loadCore refuses to auto-init io on ILP32 is part of WasmModule.load's inferred error set across every target, not just watchOS. Naming it after the ABI (`IlpRequiresExplicitIo`) leaks that detail into the public cross-target error surface and asks 64-bit callers to handle an arm-name they can never observe. `error.MissingIo` describes the cause (config.io was null when something needed it) without advertising the platform. Same loud-failure semantics, neutral name. --- .dev/checklist.md | 8 ++++---- .dev/decisions.md | 10 ++++++---- src/types.zig | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/.dev/checklist.md b/.dev/checklist.md index 634ff4faa..47f3102db 100644 --- a/.dev/checklist.md +++ b/.dev/checklist.md @@ -86,10 +86,10 @@ Prefix: W## (to distinguish from CW's F## items). `scripts/build-zwasm.sh`) work around with `ar rcs` post-build. File upstream and remove the workaround once fixed. - (b) `error.IlpRequiresExplicitIo` semantics are documented in - D139 but not surfaced in `include/zwasm.h` or the C API - reference. Add a "Caveats — ILP32" subsection if the target - gains traction. + (b) `error.MissingIo` semantics on ILP32 are documented in D139 + but not surfaced in `include/zwasm.h` or the C API reference. + Add a "Caveats — ILP32" subsection if the target gains + traction. (c) `memory.atomic.wait/notify` correctness on ILP32 with `single_threaded = true`. Not a regression vs the initial PR (atomics never worked under ILP32 anyway) but worth a one-line diff --git a/.dev/decisions.md b/.dev/decisions.md index d5c8110be..401f25169 100644 --- a/.dev/decisions.md +++ b/.dev/decisions.md @@ -1128,10 +1128,12 @@ byte-identical to before. consumers keep functioning `atomic.wait/notify` via `std.Thread` primitives. 3. **Io acquisition** (`src/types.zig`, `loadCore`). On ILP32 with - no `config.io`: refuse with `error.IlpRequiresExplicitIo` if - WASI or `timeout_ms` is requested (both would dereference an - undefined vtable at runtime). Otherwise leave `io = undefined` - — the embedder has promised not to exercise io-dependent paths. + no `config.io`: refuse with `error.MissingIo` if WASI or + `timeout_ms` is requested (both would dereference an undefined + vtable at runtime). Otherwise leave `io = undefined` — the + embedder has promised not to exercise io-dependent paths. The + error name is deliberately ABI-neutral so it does not leak the + watchOS detail into the cross-target public error set. 4. **Guard memory constants** (`src/guard.zig`). `GUARD_SIZE` / `TOTAL_RESERVATION` overflow comptime under 32-bit usize. Set to 0 placeholders on ILP32; runtime callers are predicated on diff --git a/src/types.zig b/src/types.zig index 0f1715566..f20196ca8 100644 --- a/src/types.zig +++ b/src/types.zig @@ -481,7 +481,7 @@ pub const WasmModule = struct { if (config.io) |io_val| break :blk io_val; if (@sizeOf(usize) < 8) { if (config.wasi or config.timeout_ms != null) { - return error.IlpRequiresExplicitIo; + return error.MissingIo; } self.owned_io = null; break :blk @as(std.Io, undefined); From c64f4f18de71b0874577d8dee4f54276c94d15a4 Mon Sep 17 00:00:00 2001 From: "Shota Kudo (chaploud)" Date: Sun, 17 May 2026 23:37:50 +0900 Subject: [PATCH 8/8] ci(watchos-ilp32): broaden file(1) grep to cover both output formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI run on macos-latest failed because the runner's `file` version prints `Mach-O object arm64_32_v8` while local Darwin 25 prints `Mach-O 64_32-bit armv8 object`. The original grep matched only the local form. Accept either by switching to `grep -E 'arm64_32|64_32-bit armv8'` — the substring `arm64_32` is the stable cpusubtype name common to both. --- .github/workflows/ci.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9598227ce..b20952831 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -302,9 +302,12 @@ jobs: -Djit=false -Dcomponent=false -Dwat=false # Zig 0.16 ships an empty .a for this target (SYMDEF only, # ~88 bytes). The .o is the actual artefact; verify it - # built and is the right ABI (arm64_32 / armv8 Mach-O). + # built and is the right ABI. `file(1)` output varies + # across versions ("64_32-bit armv8 object" on local Darwin + # 25, "arm64_32_v8" on macos-latest CI), so match the + # stable substring `arm64_32` present in both. OBJ=$(find .zig-cache -name 'libzwasm_zcu.o' \ - -exec file {} + | grep '64_32-bit armv8' | head -1 | cut -d: -f1) + -exec file {} + | grep -E 'arm64_32|64_32-bit armv8' | head -1 | cut -d: -f1) if [ -z "$OBJ" ]; then echo "FAIL: no arm64_32 (armv8 ILP32) object produced" find .zig-cache -name 'libzwasm_zcu.o' -exec file {} +