From 0ccf015c218ed1565855a34b1fc01fd917181343 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 20 May 2026 11:50:23 -0700 Subject: [PATCH] Fix OOB errors in the lexer Fix an OOB string_view access that was just trying to get a one-past-the-end pointer. Fix two locations where we could have been peeking empty input. Fixes #8732. --- src/parser/lexer.h | 10 +++++++++- test/gtest/wat-lexer.cpp | 9 +++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/parser/lexer.h b/src/parser/lexer.h index 2f5cb7a0291..e998c6db820 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -624,6 +624,9 @@ inline std::optional Lexer::takeHexnum(OverflowBehavior behavior) { } inline Lexer::Sign Lexer::takeSign() { + if (empty()) { + return NoSign; + } auto c = peek(); if (c == '+') { take(1); @@ -812,7 +815,8 @@ inline std::optional Lexer::takeFloat() { // we need to strip any underscores since `std::strtod` does not understand // them. std::stringstream ss; - for (const char *curr = &buffer[startPos], *end = &buffer[pos]; curr != end; + for (const char *curr = buffer.data() + startPos, *end = buffer.data() + pos; + curr != end; ++curr) { if (*curr != '_') { ss << *curr; @@ -853,6 +857,10 @@ inline std::optional Lexer::takeStr() { // Escape sequences ensureBuildingEscaped(); take(1); + if (empty()) { + pos = startPos; + return std::nullopt; + } auto c = peek(); take(1); switch (c) { diff --git a/test/gtest/wat-lexer.cpp b/test/gtest/wat-lexer.cpp index 3a4cd49e246..38ad63dedca 100644 --- a/test/gtest/wat-lexer.cpp +++ b/test/gtest/wat-lexer.cpp @@ -22,6 +22,14 @@ using namespace wasm::WATParser; using namespace std::string_view_literals; +TEST(LexerTest, EmptyInput) { + EXPECT_TRUE(Lexer(""sv).empty()); + EXPECT_EQ(Lexer(""sv).takeI32(), std::nullopt); + EXPECT_EQ(Lexer(""sv).takeF32(), std::nullopt); + EXPECT_EQ(Lexer(""sv).takeString(), std::nullopt); + EXPECT_EQ(Lexer(""sv).takeID(), std::nullopt); +} + TEST(LexerTest, LexWhitespace) { Lexer lexer(" 1\t2\n3\r4 \n\n\t 5 "sv); @@ -915,6 +923,7 @@ TEST(LexerTest, LexString) { "_$_\xC2\xA3_\xE2\x82\xAC_\xF0\x90\x8D\x88_"s); EXPECT_FALSE(Lexer("\"unterminated"sv).takeString()); + EXPECT_FALSE(Lexer("\"foo\\"sv).takeString()); EXPECT_FALSE(Lexer("\"unescaped nul\0\""sv).takeString()); EXPECT_FALSE(Lexer("\"unescaped U+19\x19\""sv).takeString()); EXPECT_FALSE(Lexer("\"unescaped U+7f\x7f\""sv).takeString());