diff --git a/src/parser/lexer.h b/src/parser/lexer.h index e998c6db820..a65bac4eb85 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -69,6 +69,8 @@ struct Lexer { std::vector annotations; std::optional file; + static bool isSpacechar(uint8_t c); + public: std::string_view buffer; @@ -93,11 +95,11 @@ struct Lexer { std::optional peekChar() const; - bool peekLParen() { return !empty() && peek() == '('; } + bool peekLParen() { return peek() == uint8_t('('); } bool takeLParen(); - bool peekRParen() { return !empty() && peek() == ')'; } + bool peekRParen() { return peek() == uint8_t(')'); } bool takeRParen(); @@ -134,7 +136,12 @@ struct Lexer { std::string_view next() const { return buffer.substr(pos); } - uint8_t peek() const { return buffer[pos]; } + std::optional peek() const { + if (empty()) { + return std::nullopt; + } + return uint8_t(buffer[pos]); + } void advance() { annotations.clear(); @@ -247,8 +254,8 @@ inline Lexer::Lexer(std::string_view buffer, std::optional file) } inline std::optional Lexer::peekChar() const { - if (!empty()) { - return peek(); + if (auto c = peek()) { + return char(*c); } return std::nullopt; } @@ -298,16 +305,12 @@ inline std::optional Lexer::takeID() { } inline std::optional Lexer::peekKeyword() { - if (empty()) { + auto start = peek(); + if (!start || *start < 'a' || *start > 'z') { return std::nullopt; } auto startPos = pos; - uint8_t start = peek(); - if ('a' <= start && start <= 'z') { - take(1); - } else { - return std::nullopt; - } + take(1); while (idchar()) { take(1); } @@ -544,23 +547,21 @@ inline bool Lexer::takePrefix(std::string_view sv) { } inline std::optional Lexer::takeDigit() { - if (empty()) { - return std::nullopt; - } - if (auto d = getDigit(peek())) { - take(1); - return d; + if (auto c = peek()) { + if (auto d = getDigit(*c)) { + take(1); + return d; + } } return std::nullopt; } inline std::optional Lexer::takeHexdigit() { - if (empty()) { - return std::nullopt; - } - if (auto h = getHexDigit(peek())) { - take(1); - return h; + if (auto c = peek()) { + if (auto h = getHexDigit(*c)) { + take(1); + return h; + } } return std::nullopt; } @@ -624,17 +625,15 @@ inline std::optional Lexer::takeHexnum(OverflowBehavior behavior) { } inline Lexer::Sign Lexer::takeSign() { - if (empty()) { - return NoSign; - } - auto c = peek(); - if (c == '+') { - take(1); - return Pos; - } - if (c == '-') { - take(1); - return Neg; + if (auto c = peek()) { + if (*c == '+') { + take(1); + return Pos; + } + if (*c == '-') { + take(1); + return Neg; + } } return NoSign; } @@ -862,8 +861,12 @@ inline std::optional Lexer::takeStr() { return std::nullopt; } auto c = peek(); + if (!c) { + pos = startPos; + return std::nullopt; + } take(1); - switch (c) { + switch (*c) { case 't': *escapeBuilder << '\t'; break; @@ -909,7 +912,7 @@ inline std::optional Lexer::takeStr() { default: { // Byte escape: \hh // We already took the first h as c. - auto first = getHexDigit(c); + auto first = getHexDigit(*c); auto second = takeHexdigit(); if (!first || !second) { // TODO: Add error production for unrecognized escape sequence. @@ -921,7 +924,8 @@ inline std::optional Lexer::takeStr() { } } else { // Normal characters - if (uint8_t c = peek(); c >= 0x20 && c != 0x7F) { + uint8_t c = *peek(); + if (c >= 0x20 && c != 0x7F) { if (escapeBuilder) { *escapeBuilder << c; } @@ -941,17 +945,17 @@ inline std::optional Lexer::takeStr() { } inline bool Lexer::idchar() { - if (empty()) { + auto c = peek(); + if (!c) { return false; } - uint8_t c = peek(); // All the allowed characters lie in the range '!' to '~', and within that // range the vast majority of characters are allowed, so it is significantly // faster to check for the disallowed characters instead. - if (c < '!' || c > '~') { + if (*c < '!' || *c > '~') { return false; } - switch (c) { + switch (*c) { case '"': case '(': case ')': @@ -999,11 +1003,8 @@ inline std::optional Lexer::takeIdent() { return std::nullopt; } -inline bool Lexer::spacechar() { - if (empty()) { - return false; - } - switch (peek()) { +inline bool Lexer::isSpacechar(uint8_t c) { + switch (c) { case ' ': case '\n': case '\r': @@ -1014,6 +1015,13 @@ inline bool Lexer::spacechar() { } } +inline bool Lexer::spacechar() { + if (auto c = peek()) { + return isSpacechar(*c); + } + return false; +} + inline bool Lexer::takeSpacechar() { if (spacechar()) { take(1); @@ -1160,8 +1168,11 @@ inline bool Lexer::canFinish() { // actually want to parse more than a couple characters of space, so check // for individual space chars or comment starts instead. using namespace std::string_view_literals; - return empty() || spacechar() || peek() == '(' || peek() == ')' || - startsWith(";;"sv); + auto c = peek(); + if (!c) { + return true; + } + return isSpacechar(*c) || *c == '(' || *c == ')' || startsWith(";;"sv); } } // namespace wasm::WATParser