From f556d90881beb8e62faa185a17f2b323f99ae193 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 20 May 2026 10:48:26 -0700 Subject: [PATCH 1/7] go --- src/passes/CMakeLists.txt | 1 + src/passes/MarkJSCalled.cpp | 74 +++++++++++++++++++++++++++++++++++++ src/passes/pass.cpp | 3 ++ src/passes/passes.h | 1 + 4 files changed, 79 insertions(+) create mode 100644 src/passes/MarkJSCalled.cpp diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index d6f9100aad0..9a3e1738e93 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -65,6 +65,7 @@ set(passes_SOURCES LocalSubtyping.cpp LogExecution.cpp LoopInvariantCodeMotion.cpp + MarkJSCalled.cpp Memory64Lowering.cpp MemoryPacking.cpp MergeBlocks.cpp diff --git a/src/passes/MarkJSCalled.cpp b/src/passes/MarkJSCalled.cpp new file mode 100644 index 00000000000..c34adb01e44 --- /dev/null +++ b/src/passes/MarkJSCalled.cpp @@ -0,0 +1,74 @@ +/* + * Copyright 2026 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Users should mark JS-called functions using @binaryen.js.called. This pass +// helps by auto-marking them where possible. The main thing this does is to +// find any configureAll calls and mark the functions referred to there. +// + +#include "ir/find-all.h" +#include "ir/module-utils.h" +#include "pass.h" +#include "wasm.h" + +namespace wasm { + +struct MarkJSCalled : public Pass { + void run(Module* module) override { + Intrinsics intrinsics(*module); + + // See if there even is a configureAll. + auto hasConfigureAll = false; + for (auto& func : module.functions) { + if (intrinsics.isConfigureAll(func.get())) { + hasConfigureAll = true; + break; + } + } + if (!hasConfigureAll) { + return; + } + + using JSCalledSet = std::unordered_set; + + ModuleUtils::ParallelFunctionAnalysis analysis( + module, [&](Function* func, JSCalledSet& jsCalled) { + if (func->imported()) { + return; + } + + FindAll calls(func->body); + for (auto* call : calls.list) { + if (isConfigureAll(call)) { + for (auto name : getConfigureAllFunctions(call)) { + jsCalled.insert(name); + } + } + } + }); + + for (auto& [_, jsCalled] : analysis.map) { + for (auto name : jsCalled) { + module->getFunction(name)->funcAnnotations.jsCalled = true; + } + } + } +}; + +Pass* createMarkJSCalledPass() { return new MarkJSCalled(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index d29a6fcebf5..d47812e0869 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -277,6 +277,9 @@ void PassRegistry::registerPasses() { registerPass("limit-segments", "attempt to merge segments to fit within web limits", createLimitSegmentsPass); + registerPass("mark-js-called", + "mark js called functions (using configureAll) as doing so", + createMarkJSCalledPass); registerPass("memory64-lowering", "lower loads and stores to a 64-bit memory to instead use a " "32-bit one", diff --git a/src/passes/passes.h b/src/passes/passes.h index 681a259a831..0e53028144e 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -87,6 +87,7 @@ Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); Pass* createLLVMMemoryCopyFillLoweringPass(); Pass* createLoopInvariantCodeMotionPass(); +Pass* createMarkJSCalledPass(); Pass* createMemory64LoweringPass(); Pass* createMemoryPackingPass(); Pass* createMergeBlocksPass(); From 6f5507eb221be45fca3858d4574cd12e5971bc55 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 20 May 2026 10:53:25 -0700 Subject: [PATCH 2/7] go --- src/passes/MarkJSCalled.cpp | 11 +-- test/lit/passes/mark-js-called.wast | 102 ++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 test/lit/passes/mark-js-called.wast diff --git a/src/passes/MarkJSCalled.cpp b/src/passes/MarkJSCalled.cpp index c34adb01e44..8776ea890c2 100644 --- a/src/passes/MarkJSCalled.cpp +++ b/src/passes/MarkJSCalled.cpp @@ -20,7 +20,8 @@ // find any configureAll calls and mark the functions referred to there. // -#include "ir/find-all.h" +#include "ir/find_all.h" +#include "ir/intrinsics.h" #include "ir/module-utils.h" #include "pass.h" #include "wasm.h" @@ -33,7 +34,7 @@ struct MarkJSCalled : public Pass { // See if there even is a configureAll. auto hasConfigureAll = false; - for (auto& func : module.functions) { + for (auto& func : module->functions) { if (intrinsics.isConfigureAll(func.get())) { hasConfigureAll = true; break; @@ -46,15 +47,15 @@ struct MarkJSCalled : public Pass { using JSCalledSet = std::unordered_set; ModuleUtils::ParallelFunctionAnalysis analysis( - module, [&](Function* func, JSCalledSet& jsCalled) { + *module, [&](Function* func, JSCalledSet& jsCalled) { if (func->imported()) { return; } FindAll calls(func->body); for (auto* call : calls.list) { - if (isConfigureAll(call)) { - for (auto name : getConfigureAllFunctions(call)) { + if (intrinsics.isConfigureAll(call)) { + for (auto name : intrinsics.getConfigureAllFunctions(call)) { jsCalled.insert(name); } } diff --git a/test/lit/passes/mark-js-called.wast b/test/lit/passes/mark-js-called.wast new file mode 100644 index 00000000000..ac04a7a37bc --- /dev/null +++ b/test/lit/passes/mark-js-called.wast @@ -0,0 +1,102 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt --mark-js-called -all -S -o - | filecheck %s + +;; $configure will be marked as @binaryen.js.called. $already is already marked, +;; and nothing changess. $unconfigured* are not in configureAll so they are left +;; alone. + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $externs (array (mut externref))) + (type $externs (array (mut externref))) + + ;; CHECK: (type $funcs (array (mut funcref))) + (type $funcs (array (mut funcref))) + + ;; CHECK: (type $bytes (array (mut i8))) + (type $bytes (array (mut i8))) + + ;; CHECK: (type $configureAll (func (param (ref null $externs) (ref null $funcs) (ref null $bytes) externref))) + (type $configureAll (func (param (ref null $externs)) (param (ref null $funcs)) (param (ref null $bytes)) (param externref))) + + ;; CHECK: (import "wasm:js-prototypes" "configureAll" (func $configureAll (type $configureAll) (param (ref null $externs) (ref null $funcs) (ref null $bytes) externref))) + (import "wasm:js-prototypes" "configureAll" (func $configureAll (type $configureAll))) + + ;; CHECK: (data $bytes "12345678") + (data $bytes "12345678") + + ;; CHECK: (elem $externs externref (item (ref.null noextern))) + (elem $externs externref + (ref.null extern) + ) + + ;; CHECK: (elem $funcs func $configured $already) + (elem $funcs funcref + (ref.func $configured) + (ref.func $already) + ) + + ;; CHECK: (elem $other func $unconfigured) + (elem $other funcref + (ref.func $unconfigured) + ) + + ;; CHECK: (start $start) + (start $start) + + ;; CHECK: (func $start (type $0) + ;; CHECK-NEXT: (call $configureAll + ;; CHECK-NEXT: (array.new_elem $externs $externs + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (array.new_elem $funcs $funcs + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (array.new_data $bytes $bytes + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null noextern) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $start + (call $configureAll + (array.new_elem $externs $externs + (i32.const 0) (i32.const 1)) + (array.new_elem $funcs $funcs + (i32.const 0) (i32.const 2)) + (array.new_data $bytes $bytes + (i32.const 0) (i32.const 8)) + (ref.null extern) + ) + ) + + ;; CHECK: (@binaryen.js.called) + ;; CHECK-NEXT: (func $configured (type $0) + ;; CHECK-NEXT: ) + (func $configured + ) + + ;; CHECK: (@binaryen.js.called) + ;; CHECK-NEXT: (func $already (type $0) + ;; CHECK-NEXT: ) + (@binaryen.js.called) + (func $already + ) + + ;; CHECK: (func $unconfigured (type $0) + ;; CHECK-NEXT: ) + (func $unconfigured + ) + + ;; CHECK: (@binaryen.js.called) + ;; CHECK-NEXT: (func $unconfigured-already (type $0) + ;; CHECK-NEXT: ) + (@binaryen.js.called) + (func $unconfigured-already + ) +) From d644c8bb142bbbe3d559b512cd57cab080fa37e5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 20 May 2026 10:54:01 -0700 Subject: [PATCH 3/7] typo --- test/lit/passes/mark-js-called.wast | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lit/passes/mark-js-called.wast b/test/lit/passes/mark-js-called.wast index ac04a7a37bc..10a2ca57ddd 100644 --- a/test/lit/passes/mark-js-called.wast +++ b/test/lit/passes/mark-js-called.wast @@ -2,7 +2,7 @@ ;; RUN: foreach %s %t wasm-opt --mark-js-called -all -S -o - | filecheck %s -;; $configure will be marked as @binaryen.js.called. $already is already marked, +;; $configured will be marked as @binaryen.js.called. $already is already marked, ;; and nothing changess. $unconfigured* are not in configureAll so they are left ;; alone. From 1df1190e358789c479470c284ff985ed1b4c83e3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 20 May 2026 10:58:21 -0700 Subject: [PATCH 4/7] fuzz --- scripts/fuzz_opt.py | 7 +++++++ src/passes/MarkJSCalled.cpp | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 4173058abf8..c653a2d62ae 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -2232,6 +2232,13 @@ def do_handle_pair(self, input, before_wasm, after_wasm, opts): pre_vm = random.choice(vms) pre = self.do_run(pre_vm, js_file, pre_wasm) + # We are about to optimize, and do not trust the given wasm file to + # have marked all js-called methods properly. In particular, it could + # have a configureAll that is not in the start function. + opts = opts + [ + '--mark-js-called', + ] + # Optimize. post_wasm = abspath('post.wasm') cmd = [in_bin('wasm-opt'), pre_wasm, '-o', post_wasm] + opts + FEATURE_OPTS diff --git a/src/passes/MarkJSCalled.cpp b/src/passes/MarkJSCalled.cpp index 8776ea890c2..c05c75062c5 100644 --- a/src/passes/MarkJSCalled.cpp +++ b/src/passes/MarkJSCalled.cpp @@ -19,6 +19,10 @@ // helps by auto-marking them where possible. The main thing this does is to // find any configureAll calls and mark the functions referred to there. // +// We do automatically handle configureAll in the start function (in +// intrinsics.cpp), so this pass is only needed for other uses of configureAll, +// like from an export. +// #include "ir/find_all.h" #include "ir/intrinsics.h" From be8ee884eb337ce102eefb01cc3fb25f94e57075 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 20 May 2026 11:23:02 -0700 Subject: [PATCH 5/7] lint --- scripts/fuzz_opt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index c653a2d62ae..72df5a75dbb 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -2235,13 +2235,13 @@ def do_handle_pair(self, input, before_wasm, after_wasm, opts): # We are about to optimize, and do not trust the given wasm file to # have marked all js-called methods properly. In particular, it could # have a configureAll that is not in the start function. - opts = opts + [ + full_opts = opts + [ '--mark-js-called', ] # Optimize. post_wasm = abspath('post.wasm') - cmd = [in_bin('wasm-opt'), pre_wasm, '-o', post_wasm] + opts + FEATURE_OPTS + cmd = [in_bin('wasm-opt'), pre_wasm, '-o', post_wasm] + full_opts + FEATURE_OPTS print(' '.join(cmd)) proc = subprocess.run(cmd, capture_output=True, text=True) if proc.returncode: From b439d8a011dedfb0ac77ef5ce3c106d4a9641cf8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 20 May 2026 11:37:50 -0700 Subject: [PATCH 6/7] update help --- test/lit/help/wasm-metadce.test | 3 +++ test/lit/help/wasm-opt.test | 3 +++ test/lit/help/wasm2js.test | 3 +++ 3 files changed, 9 insertions(+) diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test index b35982035d0..5cd70538ba0 100644 --- a/test/lit/help/wasm-metadce.test +++ b/test/lit/help/wasm-metadce.test @@ -256,6 +256,9 @@ ;; CHECK-NEXT: --log-execution instrument the build with ;; CHECK-NEXT: logging of where execution goes ;; CHECK-NEXT: +;; CHECK-NEXT: --mark-js-called mark js called functions (using +;; CHECK-NEXT: configureAll) as doing so +;; CHECK-NEXT: ;; CHECK-NEXT: --memory-packing packs memory into separate ;; CHECK-NEXT: segments, skipping zeros ;; CHECK-NEXT: diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 1565b9686b7..50aa71f196b 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -292,6 +292,9 @@ ;; CHECK-NEXT: --log-execution instrument the build with ;; CHECK-NEXT: logging of where execution goes ;; CHECK-NEXT: +;; CHECK-NEXT: --mark-js-called mark js called functions (using +;; CHECK-NEXT: configureAll) as doing so +;; CHECK-NEXT: ;; CHECK-NEXT: --memory-packing packs memory into separate ;; CHECK-NEXT: segments, skipping zeros ;; CHECK-NEXT: diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 32a1f60ed3e..1f89eedb6d0 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -220,6 +220,9 @@ ;; CHECK-NEXT: --log-execution instrument the build with ;; CHECK-NEXT: logging of where execution goes ;; CHECK-NEXT: +;; CHECK-NEXT: --mark-js-called mark js called functions (using +;; CHECK-NEXT: configureAll) as doing so +;; CHECK-NEXT: ;; CHECK-NEXT: --memory-packing packs memory into separate ;; CHECK-NEXT: segments, skipping zeros ;; CHECK-NEXT: From 0cefb75e54b7d1e20d928ecefc678df57ac2ef1e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 20 May 2026 12:25:48 -0700 Subject: [PATCH 7/7] Add TODO about warnings and unifying --- src/ir/intrinsics.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/ir/intrinsics.cpp b/src/ir/intrinsics.cpp index 6dd4c072324..2069b1288e6 100644 --- a/src/ir/intrinsics.cpp +++ b/src/ir/intrinsics.cpp @@ -110,6 +110,15 @@ std::vector Intrinsics::getJSCalledFunctions() { } // ConfigureAlls in a start function make their functions callable. + // + // TODO: Rather than scan the start, which does not handle all cases + // (configureAll can be called from an export), we could remove this and + // expect users to mark all functions as jsCalled. The MarkJSCalled pass scans + // for configureAlls and emits that annotation, so users could basically run + // it, if they don't want to manually annotate. Then the code here could + // get unified into that pass. The errors above (like the elem segment not + // having the right size etc.) could then be improved and/or turned into + // warnings. if (module.start) { auto* start = module.getFunction(module.start); if (!start->imported()) {