From 0cb3bf5b4188340b8b20a30b2a0b545d599628e2 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Tue, 18 Nov 2025 20:34:21 +0000 Subject: [PATCH 01/14] Support (module definition ...) and (module instance ...) in WAST spec tests --- check.py | 2 - scripts/test/shared.py | 2 +- scripts/test/support.py | 12 +++-- src/parser/wast-parser.cpp | 107 ++++++++++++++++++++++++++++++------- src/parser/wat-parser.cpp | 4 ++ src/parser/wat-parser.h | 16 +++++- src/tools/wasm-shell.cpp | 55 ++++++++++++------- src/wasm.h | 2 + 8 files changed, 157 insertions(+), 43 deletions(-) diff --git a/check.py b/check.py index 128cdae8b0f..5057933fbd6 100755 --- a/check.py +++ b/check.py @@ -221,8 +221,6 @@ def check_expected(actual, expected): check_expected(actual, expected) - run_spec_test(wast) - # check binary format. here we can verify execution of the final # result, no need for an output verification actual = '' diff --git a/scripts/test/shared.py b/scripts/test/shared.py index b1764d30f36..29b83cdb792 100644 --- a/scripts/test/shared.py +++ b/scripts/test/shared.py @@ -443,7 +443,7 @@ def get_tests(test_dir, extensions=[], recursive=False): 'imports.wast', # Missing validation of missing function on instantiation 'proposals/threads/imports.wast', # Missing memory type validation on instantiation 'linking.wast', # Missing function type validation on instantiation - 'memory.wast', # Requires wast `module definition` support + # 'memory.wast', # Requires wast `module definition` support 'proposals/threads/memory.wast', # Missing memory type validation on instantiation 'memory64-imports.wast', # Missing validation on instantiation 'annotations.wast', # String annotations IDs should be allowed diff --git a/scripts/test/support.py b/scripts/test/support.py index c109c98b654..5be99bb05ea 100644 --- a/scripts/test/support.py +++ b/scripts/test/support.py @@ -90,8 +90,13 @@ def untar(tarfile, outdir): QUOTED = re.compile(r'\(module\s*(\$\S*)?\s+(quote|binary)') +MODULE_DEFINITION_OR_INSTANCE = re.compile(r'(?m)\(module\s+(instance|definition)') def split_wast(wastFile): + ''' + Returns a list of pairs of module definitions and assertions. + Module invalidity tests, as well as (module definition ...) and (module instance ...) are skipped. + ''' # if it's a binary, leave it as is, we can't split it wast = None if not wastFile.endswith('.wasm'): @@ -152,6 +157,8 @@ def to_end(j): ignoring_quoted = True continue if chunk.startswith('(module'): + if MODULE_DEFINITION_OR_INSTANCE.match(chunk): + continue ignoring_quoted = False ret += [(chunk, [])] elif chunk.startswith('(assert_invalid'): @@ -190,14 +197,13 @@ def run_command(cmd, expected_status=0, stderr=None, out, err = proc.communicate() code = proc.returncode if expected_status is not None and code != expected_status: - raise Exception(('run_command failed (%s)' % code, out + str(err or ''))) + raise Exception(f"run_command `{" ".join(cmd)}` failed ({code}) {err or ""}") if expected_err is not None: if err_ignore is not None: err = "\n".join([line for line in err.split('\n') if err_ignore not in line]) err_correct = expected_err in err if err_contains else expected_err == err if not err_correct: - raise Exception(('run_command unexpected stderr', - "expected '%s', actual '%s'" % (expected_err, err))) + raise Exception(f"run_command unexpected stderr. Expected '{expected_err}', actual '{err}'") return out diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index 6aa9d6a200b..8b2c787978e 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -88,16 +88,20 @@ Result action(Lexer& in) { return in.err("expected action"); } -// (module id? binary string*) -// (module id? quote string*) -// (module ...) +// (module id binary string*) +// (module id quote string*) +// (module definition id? ...) Result wastModule(Lexer& in, bool maybeInvalid = false) { Lexer reset = in; if (!in.takeSExprStart("module"sv)) { return in.err("expected module"); } - // TODO: use ID? - [[maybe_unused]] auto id = in.takeID(); + + bool isDefinition = in.takeKeyword("definition"sv); + + // We'll read this again in the 'inline module' case + (void)in.takeID(); + QuotedModuleType type; if (in.takeKeyword("quote"sv)) { type = QuotedModuleType::Text; @@ -118,14 +122,31 @@ Result wastModule(Lexer& in, bool maybeInvalid = false) { } } std::string mod(reset.next().substr(0, in.getPos() - reset.getPos())); - return QuotedModule{QuotedModuleType::Text, mod}; + return QuotedModule{!isDefinition, QuotedModuleType::Text, mod}; } else { - // This is a normal inline module that should be parseable. Reset to the - // start and parse it normally. + // In this case the module is mostly valid WAT, unless it is a module + // definition in which case it will begin with (module definition ...) in = std::move(reset); + + // We already checked this before resetting + if (!in.takeSExprStart("module"sv)) { + return in.err("expected module"); + } + + bool isDefinition = in.takeKeyword("definition"sv); + auto wasm = std::make_shared(); + if (auto id = in.takeID()) { + wasm->name = *id; + } + wasm->features = FeatureSet::All; - CHECK_ERR(parseModule(*wasm, in)); + CHECK_ERR(parseModuleBody(*wasm, in)); + if (!in.takeRParen()) { + return in.err("expected end of module"); + } + + wasm->isDefinition = isDefinition; return wasm; } @@ -139,7 +160,7 @@ Result wastModule(Lexer& in, bool maybeInvalid = false) { return in.err("expected end of module"); } - return QuotedModule{type, ss.str()}; + return QuotedModule{isDefinition, type, ss.str()}; } Result nan(Lexer& in) { @@ -440,17 +461,64 @@ MaybeResult register_(Lexer& in) { return in.err("expected name"); } - // TODO: Do we need to use this optional id? - in.takeID(); + auto instanceName = in.takeID(); if (!in.takeRParen()) { - // TODO: handle optional module id. return in.err("expected end of register command"); } - return Register{*name}; + + return Register{.name = *name, .instanceName = instanceName}; +} + +// (module instance instance_name module_name) +MaybeResult instantiation(Lexer& in) { + if (!in.takeSExprStart("module"sv)) { + std::optional actual = in.peekKeyword(); + return in.err((std::stringstream() << "expected `module` keyword but got " + << actual.value_or("")) + .str()); + } + + if (!in.takeKeyword("instance"sv)) { + // This is not a module instance and probably a module instead. + return {}; + } + + auto instanceId = in.takeID(); + if (!instanceId.has_value()) { + return in.err("expected an instance id in module instantiation"); + } + auto moduleId = in.takeID(); + if (!moduleId.has_value()) { + return in.err("expected a module id in module instantiation"); + } + + if (!in.takeRParen()) { + return in.err("expected end of module instantiation"); + } + + return ModuleInstantiation{.moduleName = *moduleId, + .instanceName = *instanceId}; +} + +using ModuleOrInstantiation = std::variant; + +// (module instance ...) | (module ...) +Result moduleOrInstantiation(Lexer& in) { + auto reset = in; + + if (auto inst = instantiation(in)) { + CHECK_ERR(inst); + return *inst; + } + in = reset; + + auto module = wastModule(in); + CHECK_ERR(module); + return *module; } -// module | register | action | assertion +// instantiate | module | register | action | assertion Result command(Lexer& in) { if (auto cmd = register_(in)) { CHECK_ERR(cmd); @@ -464,9 +532,12 @@ Result command(Lexer& in) { CHECK_ERR(cmd); return *cmd; } - auto mod = wastModule(in); - CHECK_ERR(mod); - return *mod; + auto cmd = moduleOrInstantiation(in); + CHECK_ERR(cmd); + + return std::visit( + [](const auto& modOrInstantiation) -> WASTCommand { return modOrInstantiation; }, + *cmd); } #pragma GCC diagnostic push diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp index 26b2bbad06f..0c738327156 100644 --- a/src/parser/wat-parser.cpp +++ b/src/parser/wat-parser.cpp @@ -137,4 +137,8 @@ Result<> parseModule(Module& wasm, Lexer& lexer) { return doParseModule(wasm, lexer, true); } +Result<> parseModuleBody(Module& wasm, Lexer& lexer) { + return doParseModule(wasm, lexer, true); +} + } // namespace wasm::WATParser diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h index 34e9b69f8f0..cac69a0be42 100644 --- a/src/parser/wat-parser.h +++ b/src/parser/wat-parser.h @@ -34,6 +34,11 @@ Result<> parseModule(Module& wasm, // file. Result<> parseModule(Module& wasm, Lexer& lexer); +// Similar to `parseModule`, parse the fields of a single WAT module (after the +// initial module definition including its name) and stop at the ending right +// paren. +Result<> parseModuleBody(Module& wasm, Lexer& lexer); + Result parseConst(Lexer& lexer); #pragma GCC diagnostic push @@ -88,6 +93,7 @@ struct AssertAction { enum class QuotedModuleType { Text, Binary }; struct QuotedModule { + bool isDefinition = false; QuotedModuleType type; std::string module; }; @@ -104,10 +110,18 @@ struct AssertModule { using Assertion = std::variant; struct Register { + // TODO: rename this to distinguish it from instanceName Name name; + std::optional instanceName = std::nullopt; +}; + +struct ModuleInstantiation { + Name moduleName; + Name instanceName; }; -using WASTCommand = std::variant; +using WASTCommand = + std::variant; struct ScriptEntry { WASTCommand cmd; diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp index 1ee9a5a4c5f..e219e5c5622 100644 --- a/src/tools/wasm-shell.cpp +++ b/src/tools/wasm-shell.cpp @@ -39,10 +39,13 @@ using namespace wasm; using namespace wasm::WATParser; struct Shell { + // Keyed by module name std::map> modules; + + // Keyed by instance name std::map> interfaces; std::map> instances; - // used for imports + // used for imports, keyed by instance name std::map> linkedInstances; Name lastModule; @@ -86,6 +89,10 @@ struct Shell { return Ok{}; } else if (auto* assn = std::get_if(&cmd)) { return doAssertion(*assn); + } else if (auto* instantiateModule = + std::get_if(&cmd)) { + return instantiate(*modules[instantiateModule->moduleName], + instantiateModule->instanceName); } else { WASM_UNREACHABLE("unexpected command"); } @@ -98,6 +105,7 @@ struct Shell { switch (quoted->type) { case QuotedModuleType::Text: { CHECK_ERR(parseModule(*wasm, quoted->module)); + wasm->isDefinition = quoted->isDefinition; break; } case QuotedModuleType::Binary: { @@ -111,6 +119,7 @@ struct Shell { p.dump(ss); return Err{ss.str()}; } + wasm->isDefinition = quoted->isDefinition; break; } } @@ -133,20 +142,26 @@ struct Shell { using InstanceInfo = std::pair, std::shared_ptr>; - Result instantiate(Module& wasm) { + Result<> instantiate(Module& wasm, Name instanceName) { + + std::shared_ptr interface; + std::shared_ptr instance; try { - auto interface = - std::make_shared(linkedInstances); - auto instance = + interface = std::make_shared(linkedInstances); + instance = std::make_shared(wasm, interface.get(), linkedInstances); + // This is not an optimization: we want to execute anything, even relaxed // SIMD instructions. instance->setRelaxedBehavior(ModuleRunner::RelaxedBehavior::Execute); instance->instantiate(); - return {{std::move(interface), std::move(instance)}}; } catch (...) { return Err{"failed to instantiate module"}; } + + interfaces[instanceName] = std::move(interface); + instances[instanceName] = std::move(instance); + return Ok{}; } Result<> addModule(WASTModule& mod) { @@ -156,20 +171,20 @@ struct Shell { auto wasm = *module; CHECK_ERR(validateModule(*wasm)); - auto instanceInfo = instantiate(*wasm); - CHECK_ERR(instanceInfo); - - auto& [interface, instance] = *instanceInfo; lastModule = wasm->name; - modules[lastModule] = std::move(wasm); - interfaces[lastModule] = std::move(interface); - instances[lastModule] = std::move(instance); + modules[lastModule] = wasm; + if (!wasm->isDefinition) { + CHECK_ERR(instantiate(*wasm, wasm->name)); + } return Ok{}; } Result<> addRegistration(Register& reg) { - auto instance = instances[lastModule]; + wasm::Name instanceName = + reg.instanceName.has_value() ? *reg.instanceName : lastModule; + + auto instance = instances[instanceName]; if (!instance) { return Err{"register called without a module"}; } @@ -177,9 +192,11 @@ struct Shell { // We copy pointers as a registered module's name might still be used // in an assertion or invoke command. + modules[reg.name] = modules[lastModule]; - interfaces[reg.name] = interfaces[lastModule]; - instances[reg.name] = instances[lastModule]; + + interfaces[reg.name] = interfaces[instanceName]; + instances[reg.name] = instances[instanceName]; return Ok{}; } @@ -441,7 +458,7 @@ struct Shell { return Err{"expected invalid module"}; } - auto instance = instantiate(**wasm); + auto instance = instantiate(**wasm, (*wasm)->name); if (auto* err = instance.getErr()) { if (assn.type == ModuleAssertionType::Unlinkable || assn.type == ModuleAssertionType::Trap) { @@ -521,7 +538,9 @@ struct Shell { Register registration{"spectest"}; auto registered = addRegistration(registration); if (registered.getErr()) { - WASM_UNREACHABLE("error registering spectest module"); + WASM_UNREACHABLE((std::string("error registering spectest module: ") + + registered.getErr()->msg) + .c_str()); } } }; diff --git a/src/wasm.h b/src/wasm.h index 605c7025395..37281b6ca3d 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2519,6 +2519,8 @@ class Module { Name start; + bool isDefinition = false; + std::vector customSections; // Optional user section IR representation. From e5c9ab4d49cd1d15fb446dc52aac540fc57eebb6 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Thu, 20 Nov 2025 00:18:27 +0000 Subject: [PATCH 02/14] Address PR comments --- scripts/test/shared.py | 1 - scripts/test/support.py | 4 +--- src/parser/wast-parser.cpp | 10 +++++----- src/parser/wat-parser.h | 6 ++++-- src/tools/wasm-shell.cpp | 16 +++++++--------- src/tools/wasm2js.cpp | 10 ++++++++-- src/wasm.h | 2 -- 7 files changed, 25 insertions(+), 24 deletions(-) diff --git a/scripts/test/shared.py b/scripts/test/shared.py index 29b83cdb792..dfdbaaff0d7 100644 --- a/scripts/test/shared.py +++ b/scripts/test/shared.py @@ -443,7 +443,6 @@ def get_tests(test_dir, extensions=[], recursive=False): 'imports.wast', # Missing validation of missing function on instantiation 'proposals/threads/imports.wast', # Missing memory type validation on instantiation 'linking.wast', # Missing function type validation on instantiation - # 'memory.wast', # Requires wast `module definition` support 'proposals/threads/memory.wast', # Missing memory type validation on instantiation 'memory64-imports.wast', # Missing validation on instantiation 'annotations.wast', # String annotations IDs should be allowed diff --git a/scripts/test/support.py b/scripts/test/support.py index 5be99bb05ea..7d4be1f4f63 100644 --- a/scripts/test/support.py +++ b/scripts/test/support.py @@ -151,14 +151,12 @@ def to_end(j): break i = to_end(start + 1) chunk = wast[start:i] - if QUOTED.match(chunk): + if QUOTED.match(chunk) or MODULE_DEFINITION_OR_INSTANCE.match(chunk): # There may be assertions after this quoted module, but we aren't # returning the module, so we need to skip the assertions as well. ignoring_quoted = True continue if chunk.startswith('(module'): - if MODULE_DEFINITION_OR_INSTANCE.match(chunk): - continue ignoring_quoted = False ret += [(chunk, [])] elif chunk.startswith('(assert_invalid'): diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index 8b2c787978e..bbe207f4c6c 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -91,6 +91,7 @@ Result action(Lexer& in) { // (module id binary string*) // (module id quote string*) // (module definition id? ...) +// (module definition id? binary ...) Result wastModule(Lexer& in, bool maybeInvalid = false) { Lexer reset = in; if (!in.takeSExprStart("module"sv)) { @@ -122,7 +123,7 @@ Result wastModule(Lexer& in, bool maybeInvalid = false) { } } std::string mod(reset.next().substr(0, in.getPos() - reset.getPos())); - return QuotedModule{!isDefinition, QuotedModuleType::Text, mod}; + return WASTModule{isDefinition, QuotedModule{QuotedModuleType::Text, mod}}; } else { // In this case the module is mostly valid WAT, unless it is a module // definition in which case it will begin with (module definition ...) @@ -146,8 +147,7 @@ Result wastModule(Lexer& in, bool maybeInvalid = false) { return in.err("expected end of module"); } - wasm->isDefinition = isDefinition; - return wasm; + return WASTModule{isDefinition, wasm}; } // We have a quote or binary module. Collect its contents. @@ -160,7 +160,7 @@ Result wastModule(Lexer& in, bool maybeInvalid = false) { return in.err("expected end of module"); } - return QuotedModule{isDefinition, type, ss.str()}; + return WASTModule{isDefinition, QuotedModule{type, ss.str()}}; } Result nan(Lexer& in) { @@ -558,7 +558,7 @@ Result wast(Lexer& in) { // No, that wasn't the problem. Return the original error. return Err{err->msg}; } - cmds.push_back({WASTModule{std::move(wasm)}, line}); + cmds.push_back({WASTModule{/*isDefinition=*/false, std::move(wasm)}, line}); return cmds; } CHECK_ERR(cmd); diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h index cac69a0be42..29a56518079 100644 --- a/src/parser/wat-parser.h +++ b/src/parser/wat-parser.h @@ -93,12 +93,14 @@ struct AssertAction { enum class QuotedModuleType { Text, Binary }; struct QuotedModule { - bool isDefinition = false; QuotedModuleType type; std::string module; }; -using WASTModule = std::variant>; +struct WASTModule { + bool isDefinition = false; + std::variant> module; +}; enum class ModuleAssertionType { Trap, Malformed, Invalid, Unlinkable }; diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp index e219e5c5622..873a53782f0 100644 --- a/src/tools/wasm-shell.cpp +++ b/src/tools/wasm-shell.cpp @@ -39,13 +39,13 @@ using namespace wasm; using namespace wasm::WATParser; struct Shell { - // Keyed by module name + // Keyed by module name. std::map> modules; - // Keyed by instance name + // Keyed by instance name. std::map> interfaces; std::map> instances; - // used for imports, keyed by instance name + // Used for imports, keyed by instance name. std::map> linkedInstances; Name lastModule; @@ -100,12 +100,11 @@ struct Shell { Result> makeModule(WASTModule& mod) { std::shared_ptr wasm; - if (auto* quoted = std::get_if(&mod)) { + if (auto* quoted = std::get_if(&mod.module)) { wasm = std::make_shared(); switch (quoted->type) { case QuotedModuleType::Text: { CHECK_ERR(parseModule(*wasm, quoted->module)); - wasm->isDefinition = quoted->isDefinition; break; } case QuotedModuleType::Binary: { @@ -119,11 +118,10 @@ struct Shell { p.dump(ss); return Err{ss.str()}; } - wasm->isDefinition = quoted->isDefinition; break; } } - } else if (auto* ptr = std::get_if>(&mod)) { + } else if (auto* ptr = std::get_if>(&mod.module)) { wasm = *ptr; } else { WASM_UNREACHABLE("unexpected module kind"); @@ -173,7 +171,7 @@ struct Shell { lastModule = wasm->name; modules[lastModule] = wasm; - if (!wasm->isDefinition) { + if (!mod.isDefinition) { CHECK_ERR(instantiate(*wasm, wasm->name)); } @@ -530,7 +528,7 @@ struct Shell { // print_* functions are handled separately, no need to define here. - WASTModule mod = std::move(spectest); + WASTModule mod = {/*isDefinition=*/false, spectest}; auto added = addModule(mod); if (added.getErr()) { WASM_UNREACHABLE("error building spectest module"); diff --git a/src/tools/wasm2js.cpp b/src/tools/wasm2js.cpp index e0f61a21766..f0c0d59585e 100644 --- a/src/tools/wasm2js.cpp +++ b/src/tools/wasm2js.cpp @@ -788,7 +788,10 @@ void AssertionEmitter::emit() { Name testFuncName("check" + std::to_string(i)); auto& cmd = script[i].cmd; if (auto* mod = std::get_if(&cmd)) { - if (auto* w = std::get_if>(mod)) { + if (mod->isDefinition) { + Fatal() << "Module definition is not supported on line " << script[i].line; + } + if (auto* w = std::get_if>(&mod->module)) { wasm = *w; // We have already done the parse, but we still do this to apply the // features from the command line. @@ -951,7 +954,10 @@ int main(int argc, const char* argv[]) { Fatal() << "expected module"; } if (auto* mod = std::get_if(&(*script)[0].cmd)) { - if (auto* w = std::get_if>(mod)) { + if (mod->isDefinition) { + Fatal() << "module definition is not supported"; + } + if (auto* w = std::get_if>(&mod->module)) { wasm = *w; // This isn't actually before the parse, but we can't apply the // feature options any earlier. FIXME. diff --git a/src/wasm.h b/src/wasm.h index 37281b6ca3d..605c7025395 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2519,8 +2519,6 @@ class Module { Name start; - bool isDefinition = false; - std::vector customSections; // Optional user section IR representation. From 93e80f1bd553546859d12ce78242acb8d506b438 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Thu, 20 Nov 2025 00:27:56 +0000 Subject: [PATCH 03/14] Fix python lint --- scripts/test/support.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/test/support.py b/scripts/test/support.py index 7d4be1f4f63..3c14bc8e11c 100644 --- a/scripts/test/support.py +++ b/scripts/test/support.py @@ -92,6 +92,7 @@ def untar(tarfile, outdir): MODULE_DEFINITION_OR_INSTANCE = re.compile(r'(?m)\(module\s+(instance|definition)') + def split_wast(wastFile): ''' Returns a list of pairs of module definitions and assertions. @@ -133,7 +134,7 @@ def to_end(j): return j i = 0 - ignoring_quoted = False + ignoring_assertions = False while i >= 0: start = wast.find('(', i) if start >= 0 and wast[start + 1] == ';': @@ -154,14 +155,14 @@ def to_end(j): if QUOTED.match(chunk) or MODULE_DEFINITION_OR_INSTANCE.match(chunk): # There may be assertions after this quoted module, but we aren't # returning the module, so we need to skip the assertions as well. - ignoring_quoted = True + ignoring_assertions = True continue if chunk.startswith('(module'): - ignoring_quoted = False + ignoring_assertions = False ret += [(chunk, [])] elif chunk.startswith('(assert_invalid'): continue - elif chunk.startswith(('(assert', '(invoke', '(register')) and not ignoring_quoted: + elif chunk.startswith(('(assert', '(invoke', '(register')) and not ignoring_assertions: # ret may be empty if there are some asserts before the first # module. in that case these are asserts *without* a module, which # are valid (they may check something that doesn't refer to a module From dcc8fb0d3c41b7ce22f25a7469012694c0561afd Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Thu, 20 Nov 2025 05:08:48 +0000 Subject: [PATCH 04/14] Fix comment and remove designated initializer for C++17 --- src/parser/wast-parser.cpp | 2 +- src/parser/wat-parser.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index bbe207f4c6c..8090b759fc4 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -467,7 +467,7 @@ MaybeResult register_(Lexer& in) { return in.err("expected end of register command"); } - return Register{.name = *name, .instanceName = instanceName}; + return Register{*name, instanceName}; } // (module instance instance_name module_name) diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h index 29a56518079..87e1f034ff6 100644 --- a/src/parser/wat-parser.h +++ b/src/parser/wat-parser.h @@ -112,7 +112,7 @@ struct AssertModule { using Assertion = std::variant; struct Register { - // TODO: rename this to distinguish it from instanceName + // TODO: Rename this to distinguish it from instanceName. Name name; std::optional instanceName = std::nullopt; }; From 6b17b4e8f2c0e03443748f8678a6a5dbbddee7cb Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Thu, 20 Nov 2025 06:02:16 +0000 Subject: [PATCH 05/14] Remove designated initialized for c++17 --- src/parser/wast-parser.cpp | 3 +-- src/tools/wasm-shell.cpp | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index 8090b759fc4..6052e588b49 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -497,8 +497,7 @@ MaybeResult instantiation(Lexer& in) { return in.err("expected end of module instantiation"); } - return ModuleInstantiation{.moduleName = *moduleId, - .instanceName = *instanceId}; + return ModuleInstantiation{*moduleId, *instanceId}; } using ModuleOrInstantiation = std::variant; diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp index 873a53782f0..3c22c24db6c 100644 --- a/src/tools/wasm-shell.cpp +++ b/src/tools/wasm-shell.cpp @@ -190,7 +190,6 @@ struct Shell { // We copy pointers as a registered module's name might still be used // in an assertion or invoke command. - modules[reg.name] = modules[lastModule]; interfaces[reg.name] = interfaces[instanceName]; From c961cda902519bba967aaa52050d4069c9b37ee8 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Thu, 20 Nov 2025 06:13:33 +0000 Subject: [PATCH 06/14] Fix clang-format --- src/parser/wast-parser.cpp | 3 ++- src/tools/wasm2js.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index 6052e588b49..a6f553e9113 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -557,7 +557,8 @@ Result wast(Lexer& in) { // No, that wasn't the problem. Return the original error. return Err{err->msg}; } - cmds.push_back({WASTModule{/*isDefinition=*/false, std::move(wasm)}, line}); + cmds.push_back( + {WASTModule{/*isDefinition=*/false, std::move(wasm)}, line}); return cmds; } CHECK_ERR(cmd); diff --git a/src/tools/wasm2js.cpp b/src/tools/wasm2js.cpp index f0c0d59585e..c6277c6c30f 100644 --- a/src/tools/wasm2js.cpp +++ b/src/tools/wasm2js.cpp @@ -789,7 +789,8 @@ void AssertionEmitter::emit() { auto& cmd = script[i].cmd; if (auto* mod = std::get_if(&cmd)) { if (mod->isDefinition) { - Fatal() << "Module definition is not supported on line " << script[i].line; + Fatal() << "Module definition is not supported on line " + << script[i].line; } if (auto* w = std::get_if>(&mod->module)) { wasm = *w; From 077f1742386d47fa263b87c7c181620b35685730 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Thu, 20 Nov 2025 06:33:48 +0000 Subject: [PATCH 07/14] Fix clang-format --- src/parser/wast-parser.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index a6f553e9113..02d30e5d6c2 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -535,7 +535,9 @@ Result command(Lexer& in) { CHECK_ERR(cmd); return std::visit( - [](const auto& modOrInstantiation) -> WASTCommand { return modOrInstantiation; }, + [](const auto& modOrInstantiation) -> WASTCommand { + return modOrInstantiation; + }, *cmd); } From 4470a3cc0ab68ea5a232ab2c05deee085c5a9aa6 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Tue, 25 Nov 2025 21:05:54 +0000 Subject: [PATCH 08/14] Address PR comments about parsing, style --- src/parser/parse-1-decls.cpp | 4 +- src/parser/parsers.h | 13 +++++-- src/parser/wast-parser.cpp | 4 +- src/parser/wat-parser-internal.h | 4 +- src/parser/wat-parser.cpp | 65 +++++++++++++++++++++----------- src/tools/wasm-shell.cpp | 7 +--- 6 files changed, 62 insertions(+), 35 deletions(-) diff --git a/src/parser/parse-1-decls.cpp b/src/parser/parse-1-decls.cpp index 1c753cc611a..7a4ad34ac23 100644 --- a/src/parser/parse-1-decls.cpp +++ b/src/parser/parse-1-decls.cpp @@ -18,6 +18,8 @@ namespace wasm::WATParser { -Result<> parseDecls(ParseDeclsCtx& decls) { return module(decls); } +Result<> parseModule(ParseDeclsCtx& decls) { return module(decls); } + +Result<> parseModuleBody(ParseDeclsCtx& decls) { return moduleBody(decls); } } // namespace wasm::WATParser diff --git a/src/parser/parsers.h b/src/parser/parsers.h index 3733761bc2c..266ee626a07 100644 --- a/src/parser/parsers.h +++ b/src/parser/parsers.h @@ -3881,6 +3881,15 @@ template MaybeResult<> modulefield(Ctx& ctx) { return ctx.in.err("unrecognized module field"); } +// (m:modulefield)* +template Result<> moduleBody(Ctx& ctx) { + while (auto field = modulefield(ctx)) { + CHECK_ERR(field); + } + + return Ok{}; +} + // module ::= '(' 'module' id? (m:modulefield)* ')' // | (m:modulefield)* eof template Result<> module(Ctx& ctx) { @@ -3892,9 +3901,7 @@ template Result<> module(Ctx& ctx) { } } - while (auto field = modulefield(ctx)) { - CHECK_ERR(field); - } + CHECK_ERR(moduleBody(ctx)); if (outer && !ctx.in.takeRParen()) { return ctx.in.err("expected end of module"); diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index 02d30e5d6c2..a802cc30fe6 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -485,11 +485,11 @@ MaybeResult instantiation(Lexer& in) { } auto instanceId = in.takeID(); - if (!instanceId.has_value()) { + if (!instanceId) { return in.err("expected an instance id in module instantiation"); } auto moduleId = in.takeID(); - if (!moduleId.has_value()) { + if (!moduleId) { return in.err("expected a module id in module instantiation"); } diff --git a/src/parser/wat-parser-internal.h b/src/parser/wat-parser-internal.h index 00c96abd42e..612fd0a4bc7 100644 --- a/src/parser/wat-parser-internal.h +++ b/src/parser/wat-parser-internal.h @@ -22,7 +22,9 @@ namespace wasm::WATParser { -Result<> parseDecls(ParseDeclsCtx& decls); +Result<> parseModule(ParseDeclsCtx& decls); + +Result<> parseModuleBody(ParseDeclsCtx& decls); Result<> parseTypeDefs( ParseDeclsCtx& decls, diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp index 0c738327156..d03967bc08b 100644 --- a/src/parser/wat-parser.cpp +++ b/src/parser/wat-parser.cpp @@ -83,37 +83,58 @@ void propagateDebugLocations(Module& wasm) { runner.run(); } -// Parse module-level declarations. +// After doing the initial pass, parse types, imports, et.c +Result<> parseModuleWithDecls(ParseDeclsCtx& decls) { + auto typeIndices = createIndexMap(decls.in, decls.typeDefs); + CHECK_ERR(typeIndices); + + std::vector types; + std::unordered_map> typeNames; + CHECK_ERR(parseTypeDefs(decls, decls.in, *typeIndices, types, typeNames)); -// Parse type definitions. + // Parse implicit type definitions and map typeuses without explicit types to + // the correct types. + std::unordered_map implicitTypes; + CHECK_ERR( + parseImplicitTypeDefs(decls, decls.in, *typeIndices, types, implicitTypes)); -// Parse implicit type definitions and map typeuses without explicit types to -// the correct types. + CHECK_ERR( + parseModuleTypes(decls, decls.in, *typeIndices, types, implicitTypes)); + + CHECK_ERR(parseDefinitions( + decls, decls.in, *typeIndices, types, implicitTypes, typeNames)); + + propagateDebugLocations(decls.wasm); + + return Ok{}; +} Result<> doParseModule(Module& wasm, Lexer& input, bool allowExtra) { ParseDeclsCtx decls(input, wasm); - CHECK_ERR(parseDecls(decls)); + CHECK_ERR(parseModule(decls)); if (!allowExtra && !decls.in.empty()) { return decls.in.err("Unexpected tokens after module"); } - auto typeIndices = createIndexMap(decls.in, decls.typeDefs); - CHECK_ERR(typeIndices); - - std::vector types; - std::unordered_map> typeNames; - CHECK_ERR(parseTypeDefs(decls, input, *typeIndices, types, typeNames)); + CHECK_ERR(parseModuleWithDecls(decls)); - std::unordered_map implicitTypes; - CHECK_ERR( - parseImplicitTypeDefs(decls, input, *typeIndices, types, implicitTypes)); + // decls / parseModule made a copy of `input`. Advance `input` past the parsed + // module. + input = decls.in; + return Ok{}; +} - CHECK_ERR(parseModuleTypes(decls, input, *typeIndices, types, implicitTypes)); +Result<> doParseModuleBody(Module& wasm, Lexer& input, bool allowExtra) { + ParseDeclsCtx decls(input, wasm); + CHECK_ERR(parseModuleBody(decls)); + if (!allowExtra && !decls.in.empty()) { + return decls.in.err("Unexpected tokens after module"); + } - CHECK_ERR(parseDefinitions( - decls, input, *typeIndices, types, implicitTypes, typeNames)); + CHECK_ERR(parseModuleWithDecls(decls)); - propagateDebugLocations(wasm); + // decls / parseModuleBody made a copy of `input`. Advance `input` past the + // parsed module. input = decls.in; return Ok{}; @@ -125,20 +146,20 @@ Result<> parseModule(Module& wasm, std::string_view in, std::optional filename) { Lexer lexer(in, filename); - return doParseModule(wasm, lexer, false); + return doParseModule(wasm, lexer, /*allowExtra=*/false); } Result<> parseModule(Module& wasm, std::string_view in) { Lexer lexer(in); - return doParseModule(wasm, lexer, false); + return doParseModule(wasm, lexer, /*allowExtra=*/false); } Result<> parseModule(Module& wasm, Lexer& lexer) { - return doParseModule(wasm, lexer, true); + return doParseModule(wasm, lexer, /*allowExtra=*/true); } Result<> parseModuleBody(Module& wasm, Lexer& lexer) { - return doParseModule(wasm, lexer, true); + return doParseModuleBody(wasm, lexer, /*allowExtra=*/true); } } // namespace wasm::WATParser diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp index 3c22c24db6c..f3553b4b570 100644 --- a/src/tools/wasm-shell.cpp +++ b/src/tools/wasm-shell.cpp @@ -137,11 +137,7 @@ struct Shell { return Ok{}; } - using InstanceInfo = std::pair, - std::shared_ptr>; - Result<> instantiate(Module& wasm, Name instanceName) { - std::shared_ptr interface; std::shared_ptr instance; try { @@ -179,8 +175,7 @@ struct Shell { } Result<> addRegistration(Register& reg) { - wasm::Name instanceName = - reg.instanceName.has_value() ? *reg.instanceName : lastModule; + Name instanceName = reg.instanceName ? *reg.instanceName : lastModule; auto instance = instances[instanceName]; if (!instance) { From ff0a7eec48822768a2eb6aa2983f6633314d5a45 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Wed, 26 Nov 2025 19:59:04 +0000 Subject: [PATCH 09/14] PR fixes * Fixes for lastModule / lastInstance * Fix for python f-string in lower python versions * Fix for Module registration when some params aren't specified --- scripts/test/support.py | 2 +- src/parser/wat-parser.h | 7 ++++-- src/tools/wasm-shell.cpp | 46 +++++++++++++++++++++++++++++----------- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/scripts/test/support.py b/scripts/test/support.py index 3c14bc8e11c..c69c53fd42c 100644 --- a/scripts/test/support.py +++ b/scripts/test/support.py @@ -196,7 +196,7 @@ def run_command(cmd, expected_status=0, stderr=None, out, err = proc.communicate() code = proc.returncode if expected_status is not None and code != expected_status: - raise Exception(f"run_command `{" ".join(cmd)}` failed ({code}) {err or ""}") + raise Exception(f"run_command `{' '.join(cmd)}` failed ({code}) {err or ''}") if expected_err is not None: if err_ignore is not None: err = "\n".join([line for line in err.split('\n') if err_ignore not in line]) diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h index 87e1f034ff6..e448c7d35da 100644 --- a/src/parser/wat-parser.h +++ b/src/parser/wat-parser.h @@ -118,8 +118,11 @@ struct Register { }; struct ModuleInstantiation { - Name moduleName; - Name instanceName; + // If not specified, instantiate the most recent module definition. + std::optional moduleName; + + // If not specified, use the moduleName + std::optional instanceName; }; using WASTCommand = diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp index f3553b4b570..6a94cb8db70 100644 --- a/src/tools/wasm-shell.cpp +++ b/src/tools/wasm-shell.cpp @@ -48,7 +48,8 @@ struct Shell { // Used for imports, keyed by instance name. std::map> linkedInstances; - Name lastModule; + Name lastInstance; + std::optional lastModuleDefinition; Options& options; @@ -91,8 +92,7 @@ struct Shell { return doAssertion(*assn); } else if (auto* instantiateModule = std::get_if(&cmd)) { - return instantiate(*modules[instantiateModule->moduleName], - instantiateModule->instanceName); + return doInstantiate(*instantiateModule); } else { WASM_UNREACHABLE("unexpected command"); } @@ -137,6 +137,26 @@ struct Shell { return Ok{}; } + Result<> doInstantiate(ModuleInstantiation& instantiateModule) { + auto moduleDefinitionName = instantiateModule.moduleName + ? instantiateModule.moduleName + : lastModuleDefinition; + if (!moduleDefinitionName) { + return Err{"No module definition found in module instantiation, and no " + "previous module definition was found."}; + } + + auto instanceName = instantiateModule.instanceName + ? instantiateModule.instanceName + : lastModuleDefinition; + if (!instanceName) { + return Err{"No instance name found in module instantiation, and no " + "previous module definition was found."}; + } + + return instantiate(*modules[*moduleDefinitionName], *instanceName); + } + Result<> instantiate(Module& wasm, Name instanceName) { std::shared_ptr interface; std::shared_ptr instance; @@ -153,6 +173,8 @@ struct Shell { return Err{"failed to instantiate module"}; } + lastInstance = instanceName; + interfaces[instanceName] = std::move(interface); instances[instanceName] = std::move(instance); return Ok{}; @@ -165,17 +187,18 @@ struct Shell { auto wasm = *module; CHECK_ERR(validateModule(*wasm)); - lastModule = wasm->name; - modules[lastModule] = wasm; + modules[wasm->name] = wasm; if (!mod.isDefinition) { CHECK_ERR(instantiate(*wasm, wasm->name)); + } else { + lastModuleDefinition = wasm->name; } return Ok{}; } Result<> addRegistration(Register& reg) { - Name instanceName = reg.instanceName ? *reg.instanceName : lastModule; + Name instanceName = reg.instanceName ? *reg.instanceName : lastInstance; auto instance = instances[instanceName]; if (!instance) { @@ -185,8 +208,6 @@ struct Shell { // We copy pointers as a registered module's name might still be used // in an assertion or invoke command. - modules[reg.name] = modules[lastModule]; - interfaces[reg.name] = interfaces[instanceName]; instances[reg.name] = instances[instanceName]; return Ok{}; @@ -221,9 +242,9 @@ struct Shell { } ActionResult doAction(Action& act) { - assert(instances[lastModule].get()); + assert(instances[lastInstance].get()); if (auto* invoke = std::get_if(&act)) { - auto it = instances.find(invoke->base ? *invoke->base : lastModule); + auto it = instances.find(invoke->base ? *invoke->base : lastInstance); if (it == instances.end()) { return TrapResult{}; } @@ -248,7 +269,7 @@ struct Shell { } return flow.values; } else if (auto* get = std::get_if(&act)) { - auto it = instances.find(get->base ? *get->base : lastModule); + auto it = instances.find(get->base ? *get->base : lastInstance); if (it == instances.end()) { return TrapResult{}; } @@ -527,7 +548,8 @@ struct Shell { if (added.getErr()) { WASM_UNREACHABLE("error building spectest module"); } - Register registration{"spectest"}; + Register registration{/*name=*/"spectest"}; + modules["spectest"] = spectest; auto registered = addRegistration(registration); if (registered.getErr()) { WASM_UNREACHABLE((std::string("error registering spectest module: ") + From e1ecc24058cb165b1f7735ebc1c5cd25e35a6207 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Wed, 26 Nov 2025 20:09:04 +0000 Subject: [PATCH 10/14] Fix some comments about disabled tests --- scripts/test/shared.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/test/shared.py b/scripts/test/shared.py index dfdbaaff0d7..c96257e743f 100644 --- a/scripts/test/shared.py +++ b/scripts/test/shared.py @@ -447,8 +447,10 @@ def get_tests(test_dir, extensions=[], recursive=False): 'memory64-imports.wast', # Missing validation on instantiation 'annotations.wast', # String annotations IDs should be allowed 'id.wast', # Empty IDs should be disallowed - 'instance.wast', # Requires wast `module definition` support - 'table64.wast', # Requires wast `module definition` support + # Requires correct handling of tag imports from different instances of the same module, + # ref.null wast constants, and splitting for module instances + 'instance.wast', + 'table64.wast', # Requires validations for table size 'table_grow.wast', # Incorrect table linking semantics in interpreter 'tag.wast', # Non-empty tag results allowed by stack switching 'try_table.wast', # Requires try_table interpretation @@ -472,7 +474,7 @@ def get_tests(test_dir, extensions=[], recursive=False): 'type-rec.wast', # Missing function type validation on instantiation 'type-subtyping.wast', # ShellExternalInterface::callTable does not handle subtyping 'call_indirect.wast', # Bug with 64-bit inline element segment parsing - 'memory64.wast', # Requires wast `module definition` support + 'memory64.wast', # Requires validations for memory size 'imports0.wast', # Missing memory type validation on instantiation 'imports2.wast', # Missing memory type validation on instantiation 'imports3.wast', # Missing memory type validation on instantiation From 23351dbb6ae470e9993caea2d24a01d38554a05c Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Fri, 28 Nov 2025 01:04:27 +0000 Subject: [PATCH 11/14] Make module name and instance name optional in WAST parser --- src/parser/wast-parser.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index a802cc30fe6..8557903bebd 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -470,7 +470,7 @@ MaybeResult register_(Lexer& in) { return Register{*name, instanceName}; } -// (module instance instance_name module_name) +// (module instance instance_name? module_name?) MaybeResult instantiation(Lexer& in) { if (!in.takeSExprStart("module"sv)) { std::optional actual = in.peekKeyword(); @@ -485,19 +485,13 @@ MaybeResult instantiation(Lexer& in) { } auto instanceId = in.takeID(); - if (!instanceId) { - return in.err("expected an instance id in module instantiation"); - } auto moduleId = in.takeID(); - if (!moduleId) { - return in.err("expected a module id in module instantiation"); - } if (!in.takeRParen()) { return in.err("expected end of module instantiation"); } - return ModuleInstantiation{*moduleId, *instanceId}; + return ModuleInstantiation{moduleId, instanceId}; } using ModuleOrInstantiation = std::variant; From 07db5155a19207f513e1ed420f783d06f72da309 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Mon, 1 Dec 2025 17:52:25 +0000 Subject: [PATCH 12/14] Extract out function for parseModuleBody --- src/parser/parse-1-decls.cpp | 4 +- src/parser/parsers.h | 13 +++++-- src/parser/wat-parser-internal.h | 4 +- src/parser/wat-parser.cpp | 67 ++++++++++++++++++++++---------- src/parser/wat-parser.h | 5 +++ 5 files changed, 67 insertions(+), 26 deletions(-) diff --git a/src/parser/parse-1-decls.cpp b/src/parser/parse-1-decls.cpp index 1c753cc611a..7a4ad34ac23 100644 --- a/src/parser/parse-1-decls.cpp +++ b/src/parser/parse-1-decls.cpp @@ -18,6 +18,8 @@ namespace wasm::WATParser { -Result<> parseDecls(ParseDeclsCtx& decls) { return module(decls); } +Result<> parseModule(ParseDeclsCtx& decls) { return module(decls); } + +Result<> parseModuleBody(ParseDeclsCtx& decls) { return moduleBody(decls); } } // namespace wasm::WATParser diff --git a/src/parser/parsers.h b/src/parser/parsers.h index f18b8216c81..360878848c5 100644 --- a/src/parser/parsers.h +++ b/src/parser/parsers.h @@ -3882,6 +3882,15 @@ template MaybeResult<> modulefield(Ctx& ctx) { return ctx.in.err("unrecognized module field"); } +// (m:modulefield)* +template Result<> moduleBody(Ctx& ctx) { + while (auto field = modulefield(ctx)) { + CHECK_ERR(field); + } + + return Ok{}; +} + // module ::= '(' 'module' id? (m:modulefield)* ')' // | (m:modulefield)* eof template Result<> module(Ctx& ctx) { @@ -3893,9 +3902,7 @@ template Result<> module(Ctx& ctx) { } } - while (auto field = modulefield(ctx)) { - CHECK_ERR(field); - } + CHECK_ERR(moduleBody(ctx)); if (outer && !ctx.in.takeRParen()) { return ctx.in.err("expected end of module"); diff --git a/src/parser/wat-parser-internal.h b/src/parser/wat-parser-internal.h index 00c96abd42e..612fd0a4bc7 100644 --- a/src/parser/wat-parser-internal.h +++ b/src/parser/wat-parser-internal.h @@ -22,7 +22,9 @@ namespace wasm::WATParser { -Result<> parseDecls(ParseDeclsCtx& decls); +Result<> parseModule(ParseDeclsCtx& decls); + +Result<> parseModuleBody(ParseDeclsCtx& decls); Result<> parseTypeDefs( ParseDeclsCtx& decls, diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp index 26b2bbad06f..b23bcccc718 100644 --- a/src/parser/wat-parser.cpp +++ b/src/parser/wat-parser.cpp @@ -83,37 +83,58 @@ void propagateDebugLocations(Module& wasm) { runner.run(); } -// Parse module-level declarations. +// After doing the initial pass, parse types, imports, etc. +Result<> parseModuleWithDecls(ParseDeclsCtx& decls) { + auto typeIndices = createIndexMap(decls.in, decls.typeDefs); + CHECK_ERR(typeIndices); + + std::vector types; + std::unordered_map> typeNames; + CHECK_ERR(parseTypeDefs(decls, decls.in, *typeIndices, types, typeNames)); + + // Parse implicit type definitions and map typeuses without explicit types to + // the correct types. + std::unordered_map implicitTypes; + CHECK_ERR( + parseImplicitTypeDefs(decls, decls.in, *typeIndices, types, implicitTypes)); -// Parse type definitions. + CHECK_ERR( + parseModuleTypes(decls, decls.in, *typeIndices, types, implicitTypes)); + + CHECK_ERR(parseDefinitions( + decls, decls.in, *typeIndices, types, implicitTypes, typeNames)); + + propagateDebugLocations(decls.wasm); -// Parse implicit type definitions and map typeuses without explicit types to -// the correct types. + return Ok{}; +} Result<> doParseModule(Module& wasm, Lexer& input, bool allowExtra) { ParseDeclsCtx decls(input, wasm); - CHECK_ERR(parseDecls(decls)); + CHECK_ERR(parseModule(decls)); if (!allowExtra && !decls.in.empty()) { return decls.in.err("Unexpected tokens after module"); } - auto typeIndices = createIndexMap(decls.in, decls.typeDefs); - CHECK_ERR(typeIndices); + CHECK_ERR(parseModuleWithDecls(decls)); - std::vector types; - std::unordered_map> typeNames; - CHECK_ERR(parseTypeDefs(decls, input, *typeIndices, types, typeNames)); - - std::unordered_map implicitTypes; - CHECK_ERR( - parseImplicitTypeDefs(decls, input, *typeIndices, types, implicitTypes)); + // decls / parseModule made a copy of `input`. Advance `input` past the parsed + // module. + input = decls.in; + return Ok{}; +} - CHECK_ERR(parseModuleTypes(decls, input, *typeIndices, types, implicitTypes)); +Result<> doParseModuleBody(Module& wasm, Lexer& input, bool allowExtra) { + ParseDeclsCtx decls(input, wasm); + CHECK_ERR(parseModuleBody(decls)); + if (!allowExtra && !decls.in.empty()) { + return decls.in.err("Unexpected tokens after module"); + } - CHECK_ERR(parseDefinitions( - decls, input, *typeIndices, types, implicitTypes, typeNames)); + CHECK_ERR(parseModuleWithDecls(decls)); - propagateDebugLocations(wasm); + // decls / parseModuleBody made a copy of `input`. Advance `input` past the + // parsed module. input = decls.in; return Ok{}; @@ -125,16 +146,20 @@ Result<> parseModule(Module& wasm, std::string_view in, std::optional filename) { Lexer lexer(in, filename); - return doParseModule(wasm, lexer, false); + return doParseModule(wasm, lexer, /*allowExtra=*/false); } Result<> parseModule(Module& wasm, std::string_view in) { Lexer lexer(in); - return doParseModule(wasm, lexer, false); + return doParseModule(wasm, lexer, /*allowExtra=*/false); } Result<> parseModule(Module& wasm, Lexer& lexer) { - return doParseModule(wasm, lexer, true); + return doParseModule(wasm, lexer, /*allowExtra=*/true); +} + +Result<> parseModuleBody(Module& wasm, Lexer& lexer) { + return doParseModuleBody(wasm, lexer, /*allowExtra=*/true); } } // namespace wasm::WATParser diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h index 34e9b69f8f0..6bbe8601a4c 100644 --- a/src/parser/wat-parser.h +++ b/src/parser/wat-parser.h @@ -34,6 +34,11 @@ Result<> parseModule(Module& wasm, // file. Result<> parseModule(Module& wasm, Lexer& lexer); +// Similar to `parseModule`, parse the fields of a single WAT module (after the +// initial module definition including its name) and stop at the ending right +// paren. +Result<> parseModuleBody(Module& wasm, Lexer& lexer); + Result parseConst(Lexer& lexer); #pragma GCC diagnostic push From 00689279224039e82277cc112bb381f842bfcf40 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Mon, 1 Dec 2025 19:08:08 +0000 Subject: [PATCH 13/14] Cleanup WAST parsing logic --- src/parser/wast-parser.cpp | 48 +++++++++++--------------------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp index 8557903bebd..52045bf347e 100644 --- a/src/parser/wast-parser.cpp +++ b/src/parser/wast-parser.cpp @@ -99,9 +99,9 @@ Result wastModule(Lexer& in, bool maybeInvalid = false) { } bool isDefinition = in.takeKeyword("definition"sv); + std::optional id = in.takeID(); - // We'll read this again in the 'inline module' case - (void)in.takeID(); + Lexer moduleBody = in; QuotedModuleType type; if (in.takeKeyword("quote"sv)) { @@ -127,17 +127,10 @@ Result wastModule(Lexer& in, bool maybeInvalid = false) { } else { // In this case the module is mostly valid WAT, unless it is a module // definition in which case it will begin with (module definition ...) - in = std::move(reset); - - // We already checked this before resetting - if (!in.takeSExprStart("module"sv)) { - return in.err("expected module"); - } - - bool isDefinition = in.takeKeyword("definition"sv); + in = std::move(moduleBody); auto wasm = std::make_shared(); - if (auto id = in.takeID()) { + if (id) { wasm->name = *id; } @@ -472,6 +465,7 @@ MaybeResult register_(Lexer& in) { // (module instance instance_name? module_name?) MaybeResult instantiation(Lexer& in) { + Lexer reset = in; if (!in.takeSExprStart("module"sv)) { std::optional actual = in.peekKeyword(); return in.err((std::stringstream() << "expected `module` keyword but got " @@ -481,6 +475,7 @@ MaybeResult instantiation(Lexer& in) { if (!in.takeKeyword("instance"sv)) { // This is not a module instance and probably a module instead. + in = reset; return {}; } @@ -494,23 +489,6 @@ MaybeResult instantiation(Lexer& in) { return ModuleInstantiation{moduleId, instanceId}; } -using ModuleOrInstantiation = std::variant; - -// (module instance ...) | (module ...) -Result moduleOrInstantiation(Lexer& in) { - auto reset = in; - - if (auto inst = instantiation(in)) { - CHECK_ERR(inst); - return *inst; - } - in = reset; - - auto module = wastModule(in); - CHECK_ERR(module); - return *module; -} - // instantiate | module | register | action | assertion Result command(Lexer& in) { if (auto cmd = register_(in)) { @@ -525,14 +503,14 @@ Result command(Lexer& in) { CHECK_ERR(cmd); return *cmd; } - auto cmd = moduleOrInstantiation(in); - CHECK_ERR(cmd); + if (auto cmd = instantiation(in)) { + CHECK_ERR(cmd); + return *cmd; + } - return std::visit( - [](const auto& modOrInstantiation) -> WASTCommand { - return modOrInstantiation; - }, - *cmd); + auto module = wastModule(in); + CHECK_ERR(module); + return *module; } #pragma GCC diagnostic push From 82575dd640a6dccd45f6bc6825197fdf5927ee14 Mon Sep 17 00:00:00 2001 From: stevenfontanella Date: Mon, 1 Dec 2025 21:18:23 +0000 Subject: [PATCH 14/14] Inline doParseModuleBody --- src/parser/wat-parser.cpp | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp index b23bcccc718..7f1f9fe16ce 100644 --- a/src/parser/wat-parser.cpp +++ b/src/parser/wat-parser.cpp @@ -124,22 +124,6 @@ Result<> doParseModule(Module& wasm, Lexer& input, bool allowExtra) { return Ok{}; } -Result<> doParseModuleBody(Module& wasm, Lexer& input, bool allowExtra) { - ParseDeclsCtx decls(input, wasm); - CHECK_ERR(parseModuleBody(decls)); - if (!allowExtra && !decls.in.empty()) { - return decls.in.err("Unexpected tokens after module"); - } - - CHECK_ERR(parseModuleWithDecls(decls)); - - // decls / parseModuleBody made a copy of `input`. Advance `input` past the - // parsed module. - input = decls.in; - - return Ok{}; -} - } // anonymous namespace Result<> parseModule(Module& wasm, @@ -159,7 +143,15 @@ Result<> parseModule(Module& wasm, Lexer& lexer) { } Result<> parseModuleBody(Module& wasm, Lexer& lexer) { - return doParseModuleBody(wasm, lexer, /*allowExtra=*/true); + ParseDeclsCtx decls(lexer, wasm); + CHECK_ERR(parseModuleBody(decls)); + CHECK_ERR(parseModuleWithDecls(decls)); + + // decls / parseModuleBody made a copy of `input`. Advance `input` past the + // parsed module. + lexer = decls.in; + + return Ok{}; } } // namespace wasm::WATParser