diff --git a/README.md b/README.md index 744b491..ce54400 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Milo is a fast and embeddable HTTP/1.1 parser written in [Rust][rust]. +Milo can report parser activity through callbacks or through parser-owned event buffers. See the language-specific API docs for event decoding details: [JavaScript](./docs/js.md), [Rust](./docs/rust.md), and [C++](./docs/cpp.md). + ## Support Matrix Milo supports strict HTTP/1.1 message parsing for Rust, C++, and JavaScript via WebAssembly. @@ -17,6 +19,10 @@ Milo intentionally rejects HTTP/0.9, HTTP/1.0, RTSP, obs-fold, bare LF, and bare For security and ambiguity reduction, Milo rejects request bodies on `GET` and `HEAD`. Responses to `HEAD` are application context: callers must use `skip_body` when they know a response has no body because it belongs to a `HEAD` request. +Use `max_body_payload` when an integration needs to cap body payload consumed by a single parse call. `0` means unlimited. Reaching the limit returns normally with unconsumed input left for the next parse call. + +Use `suspend_after_headers` when an integration needs to inspect headers before body parsing starts. Parsing returns after consuming the header terminator and can continue with the next parse call. + For full parser scope, strictness, and protocol behavior, see [Milo Design](./docs/design.md). ## How to use it (JavaScript via WebAssembly) @@ -78,8 +84,6 @@ milo.setActiveCallbacks(parser, milo.CALLBACK_ACTIVE_ON_DATA) buffer.set(message, 0) milo.parse(parser, ptr, message.length) -// Use milo.parseWithError to get the consumed characters plus an error flag in a single call. - // Cleanup used resources. milo.destroy(parser) milo.dealloc(ptr, message.length) diff --git a/benchmarks/wasm/src/index.js b/benchmarks/wasm/src/index.js index f54de94..6b4943a 100644 --- a/benchmarks/wasm/src/index.js +++ b/benchmarks/wasm/src/index.js @@ -5,7 +5,7 @@ const TARGET_BYTES = 8n << 30n const ERROR_NONE = 0 const HTTP_REQUEST = 1 const HTTP_RESPONSE = 2 -const MILO_ACTIVE_CALLBACKS = +const MILO_ACTIVE_EVENTS = 4n | // ON_MESSAGE_START 8n | // ON_MESSAGE_COMPLETE 256n | // ON_URL @@ -129,7 +129,8 @@ function createMilo () { on_status: noop, on_reason: noop, on_method: noop, - on_url: noop + on_url: noop, + on_state_change: noop } }).exports } @@ -166,7 +167,7 @@ function validateMilo (milo, fixture) { milo.set_should_autodetect(parser, false) milo.set_is_request(parser, fixture.isRequest) - milo.set_active_callbacks(parser, MILO_ACTIVE_CALLBACKS) + milo.set_active_events(parser, MILO_ACTIVE_EVENTS) const consumed = milo.parse(parser, ptr, fixture.payload.length) const error = milo.get_error_code(parser) @@ -205,7 +206,7 @@ function benchmarkMilo (milo, fixture) { milo.set_should_autodetect(parser, false) milo.set_is_request(parser, fixture.isRequest) - milo.set_active_callbacks(parser, MILO_ACTIVE_CALLBACKS) + milo.set_active_events(parser, MILO_ACTIVE_EVENTS) const start = process.hrtime.bigint() let consumed = 0 diff --git a/docs/cpp.md b/docs/cpp.md index ad2099d..73779a5 100644 --- a/docs/cpp.md +++ b/docs/cpp.md @@ -46,6 +46,8 @@ The file `milo.h` defines several constants (`*` is used to denote a family pref - `METHOD_*`: An HTTP request method. - `CALLBACK_*`: A parser callback. - `CALLBACK_ACTIVE_*`: A callback activation flag. +- `EVENT_*`: A parser event type. +- `EVENT_ACTIVE_*`: An event activation flag. - `STATE_*`: A parser state. Internal generated lookup tables used by the parser are not exported in `milo.h`. @@ -102,11 +104,14 @@ A struct representing a parser. It has the following fields: - `is_request` (`bool`): The configured or detected message type. Set this when `autodetect` is `false`. - `paused` (`bool`): If the parser is paused. - `manage_unconsumed` (`bool`): If the parser should automatically copy and prepend unconsumed data. +- `suspend_after_headers` (`bool`): If parsing should stop after headers have completed. Disabled by default. - `continue_without_data` (`bool`): If the next execution of the parse loop should execute even if there is no more data. - `is_connect` (`bool`): If the current request used `CONNECT` method. - `skip_body` (`bool`): If the parser should skip the body. +- `debug` (`bool`): If debug tracing is enabled for this parser. It only affects tracing in debug-enabled builds. - `max_start_line_length` (`uintptr_t`): Maximum allowed request/status line length. By default is `8192`. - `max_header_length` (`uintptr_t`): Maximum allowed header length. By default is `8192`. +- `max_body_payload` (`uint64_t`): Maximum body payload bytes consumed in a single `milo_parse()` call. `0` means unlimited and is the default. - `context` (`void*`): The context of this parser. Use is reserved to the developer. - `state` (`uint8_t`): The current parser state. - `position` (`uintptr_t`): The current parser position in the slice in the current execution of `milo_parse`. @@ -114,8 +119,6 @@ A struct representing a parser. It has the following fields: - `error_code` (`uint8_t`): The parser error. By default is `ERROR_NONE`. - `method` (`uint8_t`): The current request method. - `status` (`uint32_t`): The current response status. -- `version_major` (`uint8_t`): The current message HTTP version major version. -- `version_minor` (`uint8_t`): The current message HTTP version minor version. - `content_length` (`uint64_t`): The value of the `Content-Length` header. - `chunk_size` (`uint64_t`): The expected length of the next chunk. - `remaining_content_length` (`uint64_t`): The missing data length of the body according to the `content_length` field. @@ -128,26 +131,139 @@ A struct representing a parser. It has the following fields: - `has_upgrade` (`bool`): If the current message has an `Upgrade` header. - `has_trailers` (`bool`): If the current message has a `Trailer` header. - `active_callbacks` (`uint64_t`): Active callback bitmask. Set to one or more `CALLBACK_ACTIVE_*` values. +- `active_events` (`uint64_t`): Active event bitmask. Set to one or more `EVENT_ACTIVE_*` values. - `callbacks` (`ParserCallbacks`): The callbacks for the current parser. -- `error_description` (`const unsigned char*`): The parser error description. -- `error_description_len` (`uint16_t`): The parser error description length. +- `error_description` (`unsigned char[255]`): The parser error description buffer. It is always NIL-terminated. +- `error_description_len` (`uint8_t`): The parser error description length, excluding the NIL terminator. Error descriptions are clamped to 254 bytes. - `unconsumed` (`const unsigned char*`): The unconsumed data from the previous execution of `parse` when `manage_unconsumed` is `true`. - `unconsumed_len` (`uintptr_t`): The unconsumed data length from the previous execution of `parse` when `manage_unconsumed` is `true`. +- `events` (`unsigned char[65536]`): Parser-owned event buffer. All the fields **MUST** be considered readonly, with the following exceptions: - `autodetect` - `is_request` - `manage_unconsumed` +- `suspend_after_headers` - `continue_without_data` - `is_connect` - `skip_body` +- `debug` - `max_start_line_length` - `max_header_length` +- `max_body_payload` - `context` - `active_callbacks` +- `active_events` - `callbacks` +## Events + +Events are parser-owned records written to `Parser::events` during parsing. They are disabled by default. Enable them by setting `Parser::active_events` or by calling `milo_parser::milo_set_active_events()` with one or more `EVENT_ACTIVE_*` values. + +Callbacks are replayed from the same event buffer. Setting `active_callbacks` also enables event emission for those callbacks, then callbacks are invoked in event order before `milo_parse()` returns. + +The event buffer is terminated by `EVENT_END`. Do not rely on the internal buffer size; always stop reading at `EVENT_END`. Event payload integers are little-endian and may be unaligned, so copy multi-byte values before decoding them. + +If an active event would exceed the internal event buffer, parsing stops before consuming the data that would have produced the event. This is not a parser error and does not pause the parser. Call `milo_parse()` again after draining the event buffer. + +## Body Payload Limit + +`max_body_payload` limits how many body payload bytes a single `milo_parse()` invocation can consume. The default value is `0`, which means unlimited. + +When the limit is reached, `milo_parse()` returns normally with a consumed byte count smaller than `limit` and leaves the remaining input unconsumed. This is not a parser error and does not pause the parser. The next `milo_parse()` invocation continues from the same parser state. + +The limit applies only to body payload bytes. Framing bytes such as chunk headers, chunk CRLFs, and trailers are not counted. + +## Suspend After Headers + +`suspend_after_headers` stops parsing after the final header terminator has been consumed and `on_headers` has been emitted. `milo_parse()` returns normally, the parser is not paused, and the next `milo_parse()` call continues with body decision and body parsing. + +### Range events + +Most events use this payload: + +```text +uint8_t type +uint32_t at +uint32_t len +``` + +`type` is one of the `EVENT_*` constants. `at` and `len` are relative to the last input passed to `milo_parse()`. `len` can be `0`. + +`EVENT_STATE_CHANGE` is debug-only and uses the same payload. For this event, `len` contains the new parser state id as a `uint32_t`. Callback replay passes that value as the callback `size` argument. + +### Metadata events + +`EVENT_HEADERS` uses this payload: + +```text +uint8_t type +uint32_t at +uint16_t status_or_method +uint8_t should_keep_alive +uint8_t should_upgrade +uint8_t has_trailers +uint8_t body_kind +uint64_t content_length +``` + +`status_or_method` is the response status for responses and the request method for requests. + +`body_kind` values are: + +- `0`: `Content-Length` +- `1`: chunked transfer encoding +- `2`: no explicit body length + +### Error events + +`EVENT_ERROR` uses this payload: + +```text +uint8_t type +uint32_t at +uint8_t error_code +``` + +### Reading events + +```cpp +#include "milo.h" + +#include + +static uint32_t read_u32_le(const unsigned char* ptr) { + return static_cast(ptr[0]) | + (static_cast(ptr[1]) << 8) | + (static_cast(ptr[2]) << 16) | + (static_cast(ptr[3]) << 24); +} + +static void drain_events(const milo_parser::Parser* parser) { + uintptr_t cursor = 0; + + for (;;) { + const uint8_t event_type = parser->events[cursor]; + + if (event_type == milo_parser::EVENT_END) { + break; + } + + if (event_type == milo_parser::EVENT_DATA) { + const uint32_t at = read_u32_le(parser->events + cursor + 1); + const uint32_t len = read_u32_le(parser->events + cursor + 5); + // Use at and len. + cursor += 9; + continue; + } + + // Decode other events according to their payload type. + break; + } +} +``` + ## Enumerations ### `milo_parser::Errors` @@ -162,6 +278,10 @@ An enum listing all possible HTTP methods recognized by Milo. An enum listing all possible parser callbacks. +### `milo_parser::Events` + +An enum listing all possible parser events. + ### `milo_parser::States` An enum listing all possible parser states. @@ -172,6 +292,10 @@ An enum listing all possible parser states. Returns `true` if debug informations are available in this build. +### `bool milo_is_debug(Parser *parser)` + +Returns `true` if debug tracing is enabled for this parser. + ### `void milo_noop(Parser *_parser, uintptr_t _at, uintptr_t _len)` A callback that does nothing. @@ -200,6 +324,18 @@ Parses `data` up to `limit` characters. It returns the number of consumed characters. +### `void milo_set_active_events(Parser *parser, uint64_t value)` + +Sets the active event bitmask on the parser. + +### `void milo_set_max_body_payload(Parser *parser, uint64_t value)` + +Sets the maximum body payload bytes consumed by a single `milo_parse()` invocation. Use `0` for unlimited. + +### `void milo_set_suspend_after_headers(Parser *parser, bool value)` + +Sets whether `milo_parse()` should return after headers have completed. + ### `void milo_reset(Parser *parser, bool keep_parsed)` Resets a parser. The second parameters specifies if to also reset the @@ -212,7 +348,9 @@ The following fields are not modified: - `autodetect` - `is_request` - `manage_unconsumed` +- `suspend_after_headers` - `continue_without_data` +- `debug` - `max_start_line_length` - `max_header_length` - `context` @@ -233,6 +371,15 @@ Pauses the parser. The parser will have to be resumed via `milo_parser::milo_res Resumes the parser. +### `void milo_complete(Parser *parser)` + +Completes the current message without consuming more input. + +This emits normal completion events and performs the same completion transition +used by `milo_parse()`. It is valid only while the parser is in `BODY_DECISION`, +`TUNNEL`, `BODY_VIA_CONTENT_LENGTH`, `BODY_WITH_NO_LENGTH`, `CHUNK_HEADER`, or +`TRAILER`. Other states fail with `ERROR_UNEXPECTED_STATE`. + ### `void milo_finish(Parser *parser)` Marks the parser as finished. Any new invocation of `milo_parser::milo_parse` will put the parser in the error state. @@ -247,6 +394,36 @@ Returns the current parser's state as string. **The returned value MUST be freed using `milo_parser::milo_free_string`.** +### `CStringWithLength *milo_method_to_string(uint8_t method)` + +Returns a parser method as string. + +**The returned value MUST be freed using `milo_parser::milo_free_string`.** + +### `CStringWithLength *milo_error_to_string(uint8_t error)` + +Returns a parser error as string. + +**The returned value MUST be freed using `milo_parser::milo_free_string`.** + +### `CStringWithLength *milo_callback_to_string(uint8_t callback)` + +Returns a parser callback as string. + +**The returned value MUST be freed using `milo_parser::milo_free_string`.** + +### `CStringWithLength *milo_state_to_string(uint8_t state)` + +Returns a parser state as string. + +**The returned value MUST be freed using `milo_parser::milo_free_string`.** + +### `CStringWithLength *milo_event_to_string(uint8_t event)` + +Returns a parser event as string. `EVENT_END` returns `END`. + +**The returned value MUST be freed using `milo_parser::milo_free_string`.** + ### `CStringWithLength *milo_error_code_string(Parser *parser)` Returns the current parser's error state as string. diff --git a/docs/design.md b/docs/design.md index e2ded74..218f0b3 100644 --- a/docs/design.md +++ b/docs/design.md @@ -92,7 +92,11 @@ CONNECT response behavior requires application context and is not inferred by Mi `skip_body` lets applications skip response body parsing when they have external context, such as responses to `HEAD`. -`autodetect` and `is_request` control parser direction. Milo also supports managed unconsumed data, start-line and header length limits, and callback activation flags. +`max_body_payload` limits how many body payload bytes a single parse invocation can consume. `0` means unlimited. When the limit is reached, parsing returns normally with unconsumed input left for the caller's next parse invocation. Framing bytes such as chunk headers, chunk CRLFs, and trailers are not counted. + +`suspend_after_headers` stops parsing after the final header terminator has been consumed and headers metadata has been emitted. The parser is not paused; the next parse invocation continues with body decision and body parsing. + +`autodetect` and `is_request` control parser direction. Milo also supports managed unconsumed data, start-line and header length limits, event activation flags, and callback activation flags. ## Non-Goals diff --git a/docs/js.md b/docs/js.md index 1862eb5..3d0b2c5 100644 --- a/docs/js.md +++ b/docs/js.md @@ -30,7 +30,10 @@ The module exports several constants (`*` is used to denote a family prefix): - `METHOD_*`: An HTTP request method. - `CALLBACK_*`: A parser callback. - `CALLBACK_ACTIVE_*`: Callback activation flags. +- `EVENT_*`: A parser event type. +- `EVENT_ACTIVE_*`: Event activation flags. - `STATE_*`: A parser state. +- `PARSER_FIELD_*`: A WebAssembly parser field offset. Internal generated lookup tables used by the parser are not exported from the WebAssembly package. @@ -60,12 +63,170 @@ An enum listing all possible parser callbacks bitmask. Access is supported from string constant or numeric value. +#### `Events` + +An enum listing all possible parser event types. + +Access is supported from string constant or numeric value. + +#### `EventActives` + +An enum listing all possible parser event activation flags. + +Access is supported from string constant or bigint value. + #### `States` An enum listing all possible parser states. Access is supported from string constant or numeric value. +#### `ParserFields` + +An enum listing WebAssembly parser field offsets. + +Access is supported from string constant or numeric value. + +### Parser Fields + +`ParserFields` contains byte offsets for reading parser fields directly from WebAssembly memory. + +Use the offset with the parser pointer returned by `create()`: + +```javascript +const milo = setup() +const parser = milo.create() + +const status = new DataView(milo.memory.buffer).getUint32(parser + milo.ParserFields.STATUS, true) +const paused = new Uint8Array(milo.memory.buffer)[parser + milo.ParserFields.PAUSED] !== 0 + +milo.destroy(parser) +``` + +Read fields with the matching WebAssembly representation: + +- `bool` and `u8`: `Uint8Array` +- `u16`: `DataView#getUint16(..., true)` +- `u32` and `usize`: `DataView#getUint32(..., true)` +- `u64`: `DataView#getBigUint64(..., true)` +- pointers: `DataView#getUint32(..., true)` + +`ParserFields.ERROR_DESCRIPTION` points to an inline `Uint8Array` buffer of 255 bytes inside the parser. It is always NIL-terminated. `ParserFields.ERROR_DESCRIPTION_LEN` is a `u8` length that excludes the terminator; error descriptions are clamped to 254 bytes. + +Prefer the regular getters when available. `ParserFields` is intended for advanced WebAssembly integrations that need direct memory access. + +### Events + +Events are parser-owned records written to the parser event buffer during parsing. They are disabled by default. Enable them with `setActiveEvents(parser, mask)` using one or more `EVENT_ACTIVE_*` constants. + +Callbacks are replayed from the same event buffer. Calling `setActiveCallbacks(parser, mask)` also enables event emission for those callbacks, then callbacks are invoked in event order before `parse()` returns. + +Read the event buffer pointer from `parser + ParserFields.EVENTS`, then drain records from that pointer. The event stream is terminated by `EVENT_END`. Do not rely on the internal buffer size; always stop reading at `EVENT_END`. Event payload integers are little-endian. + +If an active event would exceed the internal event buffer, parsing stops before consuming the data that would have produced the event. This is not a parser error and does not pause the parser. Call `parse()` again after draining the event buffer. + +### Body Payload Limit + +`setMaxBodyPayload(parser, value)` limits how many body payload bytes a single `parse()` invocation can consume. The default value is `0`, which means unlimited. + +When the limit is reached, `parse()` returns normally with a consumed byte count smaller than `limit` and leaves the remaining input unconsumed. This is not a parser error and does not pause the parser. The next `parse()` invocation continues from the same parser state. + +The limit applies only to body payload bytes. Framing bytes such as chunk headers, chunk CRLFs, and trailers are not counted. + +### Suspend After Headers + +`setShouldSuspendAfterHeaders(parser, true)` makes `parse()` return after the final header terminator has been consumed and `on_headers` has been emitted. The parser is not paused; the next `parse()` invocation continues with body decision and body parsing. + +#### Range events + +Most events use this payload: + +```text +u8 type +u32 at +u32 len +``` + +`type` is one of the `EVENT_*` constants. `at` and `len` are relative to the last input passed to `parse()`. `len` can be `0`. + +`EVENT_STATE_CHANGE` is debug-only and uses the same payload. For this event, `len` contains the new parser state id as a `u32`. Callback replay passes that value as the callback `size` argument. + +#### Metadata events + +`EVENT_HEADERS` uses this payload: + +```text +u8 type +u32 at +u16 status_or_method +u8 should_keep_alive +u8 should_upgrade +u8 has_trailers +u8 body_kind +u64 content_length +``` + +`status_or_method` is the response status for responses and the request method for requests. + +`body_kind` values are: + +- `0`: `Content-Length` +- `1`: chunked transfer encoding +- `2`: no explicit body length + +#### Error events + +`EVENT_ERROR` uses this payload: + +```text +u8 type +u32 at +u8 error_code +``` + +#### Reading events + +```javascript +function readEvents (milo, parser) { + const memory = milo.memory.buffer + const eventsPtr = new DataView(memory).getUint32(parser + milo.ParserFields.EVENTS, true) + const events = new Uint8Array(memory, eventsPtr) + const view = new DataView(memory, eventsPtr) + const decoded = [] + let cursor = 0 + + for (;;) { + const type = events[cursor] + + if (type === milo.EVENT_END) { + break + } + + if (type === milo.EVENT_ERROR) { + decoded.push({ type, at: view.getUint32(cursor + 1, true), errorCode: events[cursor + 5] }) + cursor += 6 + } else if (type === milo.EVENT_HEADERS) { + decoded.push({ + type, + at: view.getUint32(cursor + 1, true), + statusOrMethod: view.getUint16(cursor + 5, true), + shouldKeepAlive: events[cursor + 7] !== 0, + shouldUpgrade: events[cursor + 8] !== 0, + hasTrailers: events[cursor + 9] !== 0, + bodyKind: events[cursor + 10], + contentLength: view.getBigUint64(cursor + 11, true) + }) + cursor += 19 + } else { + decoded.push({ type, at: view.getUint32(cursor + 1, true), len: view.getUint32(cursor + 5, true) }) + cursor += 9 + } + } + + return decoded +} +``` + ### Methods #### `setup` @@ -135,23 +296,6 @@ Parses `data` up to `limit` characters. It returns the number of consumed characters. -#### `parseWithError(parser, data, limit)` - -Parses `data` up to `limit` characters. - -It returns an object containing the number of consumed characters and whether the parser errored: - -```javascript -{ - consumed: 0, - errored: false -} -``` - -Internally this wraps the `parse_with_error` WebAssembly export, which returns a signed 32-bit integer: `consumed` on success, and `-(consumed + 1)` on error. The extra `+ 1` allows representing errors that happen after consuming zero bytes. - -Since the raw return value is a signed 32-bit integer, `parseWithError` supports up to `2_147_483_646` consumed bytes per call when an error is reported. - #### `reset(parser)` Resets a parser. The second parameters specifies if to also reset the @@ -165,6 +309,7 @@ The following fields are not modified: - `is_request` - `manage_unconsumed` - `continue_without_data` +- `debug` - `max_start_line_length` - `max_header_length` - `context` @@ -185,6 +330,15 @@ Pauses the parser. The parser will have to be resumed via `resume`. Resumes the parser. +#### `complete(parser)` + +Completes the current message without consuming more input. + +This emits normal completion events and performs the same completion transition +used by `parse`. It is valid only while the parser is in `BODY_DECISION`, +`TUNNEL`, `BODY_VIA_CONTENT_LENGTH`, `BODY_WITH_NO_LENGTH`, `CHUNK_HEADER`, or +`TRAILER`. Other states fail with `ERROR_UNEXPECTED_STATE`. + #### `finish(parser)` Marks the parser as finished. Any new invocation of `parse` will put the parser in the error state. @@ -205,6 +359,12 @@ Returns `true` if the parser autodetects requests and responses. Returns `true` if the configured or detected message type is a request. +#### `isDebug(parser)` + +Returns `true` if debug tracing is enabled for this parser. + +The flag only affects tracing in debug-enabled builds. + #### `isPaused(parser)` Returns `true` if the parser is paused. @@ -225,6 +385,12 @@ Returns the parser maximum allowed header length. Default is `8192`. +#### `getMaxBodyPayload(parser)` + +Returns the maximum body payload bytes consumed by a single `parse()` invocation. + +Default is `0`, which means unlimited. + #### `shouldContinueWithoutData(parser)` Returns `true` if the next execution of the parse loop should execute even if there is no more data. @@ -237,6 +403,10 @@ Returns `true` if the current request used `CONNECT` method. Returns `true` if the parser should skip the body. +#### `shouldSuspendAfterHeaders(parser)` + +Returns `true` if parsing should return after headers have completed. + #### `getState(parser)` Returns the parser state. @@ -261,14 +431,6 @@ Returns the parser current request method. Returns the parser current response status. -#### `getVersionMajor(parser)` - -Returns the parser current message HTTP version major version. - -#### `getVersionMinor(parser)` - -Returns the parser current message HTTP version minor version. - #### `hasConnectionClose(parser)` Returns `true` if the current message has a `Connection: close` token. @@ -337,14 +499,26 @@ Sets the parser maximum allowed header length. Defaults to `8192` in a new parser. +#### `setMaxBodyPayload(parser, value)` + +Sets the maximum body payload bytes consumed by a single `parse()` invocation. Use `0` for unlimited. + #### `setActiveCallbacks(parser, value)` Sets the active callback bitmask on the parser. +#### `setActiveEvents(parser, value)` + +Sets the active event bitmask on the parser. + #### `setShouldManageUnconsumed(parser, value)` Sets if the parser should automatically copy and prepend unconsumed data. +#### `setShouldSuspendAfterHeaders(parser, value)` + +Sets if parsing should return after headers have completed. + #### `setShouldContinueWithoutData(parser, value)` Sets if the next execution of the parse loop should execute even if there is no more data. @@ -357,6 +531,12 @@ Set if the parser should skip the body. Sets if the current request used the `CONNECT` method. +#### `setDebug(parser, value)` + +Sets if debug tracing is enabled for this parser. + +The flag only affects tracing in debug-enabled builds. + ## Simple API A preconfigured module instance exported as `simple`. diff --git a/docs/rust.md b/docs/rust.md index f4e1eed..648d2fa 100644 --- a/docs/rust.md +++ b/docs/rust.md @@ -28,13 +28,15 @@ The crate exports several constants (`*` is used to denote a family prefix): - `METHOD_*`: An HTTP request method. - `CALLBACK_*`: A parser callback. - `CALLBACK_ACTIVE_*`: A callback activation flag. +- `EVENT_*`: A parser event type. +- `EVENT_ACTIVE_*`: An event activation flag. - `STATE_*`: A parser state. Internal generated lookup tables used by the parser are not public API. ## Enums -All the enums below implement `TryFrom` and `Into<&str>` traits and have the `as_str` method. +All the enums below implement `TryFrom` and `Into<&str>` traits and have the `as_str` method. ### `Errors` @@ -48,6 +50,10 @@ An enum listing all possible HTTP methods recognized by Milo. An enum listing all possible parser callbacks. +### `Events` + +An enum listing all possible parser events. + ### `States` An enum listing all possible parser states. @@ -99,11 +105,14 @@ A struct representing a parser. It has the following fields: - `is_request` (`bool`): The configured or detected message type. Set this when `autodetect` is `false`. - `paused` (`bool`): If the parser is paused. - `manage_unconsumed` (`bool`): If the parser should automatically copy and prepend unconsumed data. +- `suspend_after_headers` (`bool`): If parsing should stop after headers have completed. Disabled by default. - `continue_without_data` (`bool`): If the next execution of the parse loop should execute even if there is no more data. - `is_connect` (`bool`): If the current request used `CONNECT` method. - `skip_body` (`bool`): If the parser should skip the body. +- `debug` (`bool`): If debug tracing is enabled for this parser. It only affects tracing in debug-enabled builds. - `max_start_line_length` (`usize`): Maximum allowed request/status line length. By default is `8192`. - `max_header_length` (`usize`): Maximum allowed header length. By default is `8192`. +- `max_body_payload` (`u64`): Maximum body payload bytes consumed in a single `parse()` call. `0` means unlimited and is the default. - `context` (`*mut c_void`): The context of this parser. Use is reserved to the developer. - `state` (`u8`): The current parser state. - `position` (`usize`): The current parser position in the slice in the current execution of `milo_parse`. @@ -111,8 +120,6 @@ A struct representing a parser. It has the following fields: - `error_code` (`u8`): The parser error. By default is `ERROR_NONE`. - `method` (`u8`): The current request method. - `status` (`u32`): The current response status. -- `version_major` (`u8`): The current message HTTP version major version. -- `version_minor` (`u8`): The current message HTTP version minor version. - `content_length` (`u64`): The value of the `Content-Length` header. - `chunk_size` (`u64`): The expected length of the next chunk. - `remaining_content_length` (`u64`): The missing data length of the body according to the `content_length` field. @@ -125,26 +132,131 @@ A struct representing a parser. It has the following fields: - `has_upgrade` (`bool`): If the current message has an `Upgrade` header. - `has_trailers` (`bool`): If the current message has a `Trailer` header. - `active_callbacks` (`u64`): Active callback bitmask. Set to one or more `CALLBACK_ACTIVE_*` flags. +- `active_events` (`u64`): Active event bitmask. Set to one or more `EVENT_ACTIVE_*` flags. - `callbacks` (`ParserCallbacks`): The callbacks for the current parser. -- `error_description` (`*const c_uchar`): The parser error description. -- `error_description_len` (`u16`): The parser error description length. +- `error_description` (`[u8; 255]`): The parser error description buffer. It is always NIL-terminated. +- `error_description_len` (`u8`): The parser error description length, excluding the NIL terminator. Error descriptions are clamped to 254 bytes. - `unconsumed` (`*const c_uchar`): The unconsumed data from the previous execution of `parse` when `manage_unconsumed` is `true`. - `unconsumed_len` (`usize`): The unconsumed data length from the previous execution of `parse` when `manage_unconsumed` is `true`. +- `events` (`*mut c_uchar`): Parser-owned event buffer. All the fields **MUST** be considered readonly, with the following exceptions: - `autodetect` - `is_request` - `manage_unconsumed` +- `suspend_after_headers` - `continue_without_data` - `is_connect` - `skip_body` +- `debug` - `max_start_line_length` - `max_header_length` +- `max_body_payload` - `context` - `active_callbacks` +- `active_events` - `callbacks` +## Events + +Events are parser-owned records written to `Parser::events` during parsing. They are disabled by default. Enable them by setting `Parser::active_events` to one or more `EVENT_ACTIVE_*` flags. + +Callbacks are replayed from the same event buffer. Setting `active_callbacks` also enables event emission for those callbacks, then callbacks are invoked in event order before `parse()` returns. + +The event buffer is terminated by `EVENT_END`. Do not rely on the internal buffer size; always stop reading at `EVENT_END`. Event payload integers are little-endian and may be unaligned, so read multi-byte values with unaligned reads. + +If an active event would exceed the internal event buffer, parsing stops before consuming the data that would have produced the event. This is not a parser error and does not pause the parser. Call `parse()` again after draining the event buffer. + +## Body Payload Limit + +`max_body_payload` limits how many body payload bytes a single `parse()` invocation can consume. The default value is `0`, which means unlimited. + +When the limit is reached, `parse()` returns normally with `consumed < limit` and leaves the remaining input unconsumed. This is not a parser error and does not pause the parser. The next `parse()` invocation continues from the same parser state. + +The limit applies only to body payload bytes. Framing bytes such as chunk headers, chunk CRLFs, and trailers are not counted. + +## Suspend After Headers + +`suspend_after_headers` stops parsing after the final header terminator has been consumed and `on_headers` has been emitted. `parse()` returns normally, the parser is not paused, and the next `parse()` call continues with body decision and body parsing. + +### Range events + +Most events use this payload: + +```text +u8 type +u32 at +u32 len +``` + +`type` is one of the `EVENT_*` constants. `at` and `len` are relative to the last input passed to `parse()`. `len` can be `0`. + +`EVENT_STATE_CHANGE` is debug-only and uses the same payload. For this event, `len` contains the new parser state id as a `u32`. Callback replay passes that value as the callback `size` argument. + +### Metadata events + +`EVENT_HEADERS` uses this payload: + +```text +u8 type +u32 at +u16 status_or_method +u8 should_keep_alive +u8 should_upgrade +u8 has_trailers +u8 body_kind +u64 content_length +``` + +`status_or_method` is the response status for responses and the request method for requests. + +`body_kind` values are: + +- `0`: `Content-Length` +- `1`: chunked transfer encoding +- `2`: no explicit body length + +### Error events + +`EVENT_ERROR` uses this payload: + +```text +u8 type +u32 at +u8 error_code +``` + +### Reading events + +```rust +use core::ptr; + +use milo_parser::{EVENT_DATA, EVENT_END, Parser}; + +fn drain(parser: &Parser) { + let mut cursor = 0usize; + + loop { + let event_type = unsafe { *parser.events.add(cursor) }; + + match event_type { + EVENT_END => break, + EVENT_DATA => { + let at = u32::from_le(unsafe { ptr::read_unaligned(parser.events.add(cursor + 1) as *const u32) }) as usize; + let len = u32::from_le(unsafe { ptr::read_unaligned(parser.events.add(cursor + 5) as *const u32) }) as usize; + // Use at and len. + cursor += 9; + } + _ => { + // Decode other events according to their payload type. + break; + } + } + } +} +``` + #### `Parser::new() -> Parser` Creates a new parser. @@ -167,7 +279,9 @@ The following fields are not modified: - `autodetect` - `is_request` - `manage_unconsumed` +- `suspend_after_headers` - `continue_without_data` +- `debug` - `max_start_line_length` - `max_header_length` - `context` @@ -188,6 +302,15 @@ Pauses the parser. The parser will have to be resumed via `Parser::resume`. Resumes the parser. +#### `Parser::complete(&mut self)` + +Completes the current message without consuming more input. + +This emits normal completion events and performs the same completion transition +used by `parse`. It is valid only while the parser is in `BODY_DECISION`, +`TUNNEL`, `BODY_VIA_CONTENT_LENGTH`, `BODY_WITH_NO_LENGTH`, `CHUNK_HEADER`, or +`TRAILER`. Other states fail with `ERROR_UNEXPECTED_STATE`. + #### `Parser::finish(&mut self)` Marks the parser as finished. Any new data received via `parse` will @@ -285,6 +408,15 @@ Pauses the parser. The parser will have to be resumed via `milo_parser::milo_res Resumes the parser. +### `milo_complete(parser: *mut Parser)` + +Completes the current message without consuming more input. + +This emits normal completion events and performs the same completion transition +used by `milo_parse`. It is valid only while the parser is in `BODY_DECISION`, +`TUNNEL`, `BODY_VIA_CONTENT_LENGTH`, `BODY_WITH_NO_LENGTH`, `CHUNK_HEADER`, or +`TRAILER`. Other states fail with `ERROR_UNEXPECTED_STATE`. + ### `milo_finish(parser: *mut Parser)` Marks the parser as finished. Any new invocation of `milo_parse` will put the parser in the error state. @@ -299,6 +431,36 @@ Returns the current parser's state as string. **The returned value MUST be freed using `milo_free_string`.** +### `milo_method_to_string(method: u8) -> *const c_uchar` + +Returns a parser method as string. + +**The returned value MUST be freed using `milo_free_string`.** + +### `milo_error_to_string(error: u8) -> *const c_uchar` + +Returns a parser error as string. + +**The returned value MUST be freed using `milo_free_string`.** + +### `milo_callback_to_string(callback: u8) -> *const c_uchar` + +Returns a parser callback as string. + +**The returned value MUST be freed using `milo_free_string`.** + +### `milo_state_to_string(state: u8) -> *const c_uchar` + +Returns a parser state as string. + +**The returned value MUST be freed using `milo_free_string`.** + +### `milo_event_to_string(event: u8) -> *const c_uchar` + +Returns a parser event as string. `EVENT_END` returns `END`. + +**The returned value MUST be freed using `milo_free_string`.** + ### `milo_error_code_string(parser: *mut Parser) -> *const c_uchar` Returns the current parser's error state as string. diff --git a/macros/Cargo.toml b/macros/Cargo.toml index 19a4289..2a317ad 100644 --- a/macros/Cargo.toml +++ b/macros/Cargo.toml @@ -13,6 +13,10 @@ [lib] proc-macro = true +[[bin]] + name = "milo-parser-fields" + path = "src/parser_fields.rs" + [dependencies] indexmap = { version = "2.13.0", features = ["serde"] } proc-macro2 = "1.0.106" diff --git a/macros/src/actions.rs b/macros/src/actions.rs index 98d972d..2b11ddd 100644 --- a/macros/src/actions.rs +++ b/macros/src/actions.rs @@ -2,28 +2,125 @@ use proc_macro::TokenStream; use quote::{format_ident, quote}; use syn::{Expr, Ident, parse_macro_input}; -use crate::{ - native, - structs::{CallbackRequest, FailureRequest}, - wasm, -}; +use crate::structs::{EventRequest, FailureRequest}; -/// Invokes one of the user defined callbacks, eventually attaching some view of -/// the data (via pointer and length). -pub fn callback(input: TokenStream) -> TokenStream { - let definition = parse_macro_input!(input as CallbackRequest); - let native = native::callback(&definition); - let wasm = wasm::callback(&definition); +/// Emits an event carrying an input range. +pub fn event_with_range(input: TokenStream) -> TokenStream { + let definition = parse_macro_input!(input as EventRequest); + let callback = &definition.identifier; + let callback_const = format_ident!("CALLBACK_{}", callback.to_string().to_uppercase()); + let event_type = quote! { #callback_const + 1 }; + let bitmask = format_ident!("EVENT_ACTIVE_{}", definition.identifier.to_string().to_uppercase()); + let offset = definition.offset.as_ref().expect("event_with_range requires offset"); + let length = definition.length.as_ref().expect("event_with_range requires length"); + let needed = quote! { 9usize }; + let emit = quote! { + let at = (self.position + #offset) as u32; + let len = (#length) as u32; + unsafe { + *self.events.add(event_cursor) = #event_type; + core::ptr::write_unaligned( + self.events.add(event_cursor + 1) as *mut u32, + at.to_le(), + ); + core::ptr::write_unaligned( + self.events.add(event_cursor + 5) as *mut u32, + len.to_le(), + ); + } + }; - let bitmask = format_ident!("CALLBACK_ACTIVE_{}", definition.identifier.to_string().to_uppercase()); + TokenStream::from(quote! { + if active_events & #bitmask != 0 { + if event_cursor + #needed < EVENTS_BUFFER_SIZE { + #emit + event_cursor += #needed; + } else { + suspend!(); + } + } + }) +} + +/// Emits an error event. +pub fn event_with_error(input: TokenStream) -> TokenStream { + let definition = parse_macro_input!(input as EventRequest); + let callback = &definition.identifier; + let callback_const = format_ident!("CALLBACK_{}", callback.to_string().to_uppercase()); + let event_type = quote! { #callback_const + 1 }; + let bitmask = format_ident!("EVENT_ACTIVE_{}", definition.identifier.to_string().to_uppercase()); + let needed = quote! { 6usize }; + let emit = quote! { + let at = self.position as u32; + unsafe { + *self.events.add(event_cursor) = #event_type; + core::ptr::write_unaligned( + self.events.add(event_cursor + 1) as *mut u32, + at.to_le(), + ); + *self.events.add(event_cursor + 5) = self.error_code; + } + }; TokenStream::from(quote! { - if self.active_callbacks & #bitmask != 0 { - #[cfg(not(target_family = "wasm"))] - #native + if active_events & #bitmask != 0 { + if event_cursor + #needed < EVENTS_BUFFER_SIZE { + #emit + event_cursor += #needed; + } else { + suspend!(); + } + } + }) +} + +/// Emits an event carrying parsed metadata. +pub fn event_with_metadata(input: TokenStream) -> TokenStream { + let definition = parse_macro_input!(input as EventRequest); + let callback = &definition.identifier; + let callback_const = format_ident!("CALLBACK_{}", callback.to_string().to_uppercase()); + let event_type = quote! { #callback_const + 1 }; + let bitmask = format_ident!("EVENT_ACTIVE_{}", definition.identifier.to_string().to_uppercase()); + let offset = definition.offset.unwrap_or_else(|| syn::parse_quote! { 0 }); + let needed = quote! { 19usize }; + let emit = quote! { + let at = (self.position + #offset) as u32; + let status_or_method = if self.is_request { self.method as u16 } else { self.status as u16 }; + let body_kind = if self.has_content_length { 0u8 } else if self.has_chunked_transfer_encoding { 1u8 } else { 2u8 }; + let should_keep_alive = (!self.has_connection_close) as u8; + let should_upgrade = (self.has_upgrade && self.has_connection_upgrade) as u8; + let has_trailers = self.has_trailers as u8; + let content_length = if self.has_content_length { self.content_length } else { 0 }; - #[cfg(target_family = "wasm")] - #wasm + unsafe { + *self.events.add(event_cursor) = #event_type; + core::ptr::write_unaligned( + self.events.add(event_cursor + 1) as *mut u32, + at.to_le(), + ); + core::ptr::write_unaligned( + self.events.add(event_cursor + 5) as *mut u16, + status_or_method.to_le(), + ); + *self.events.add(event_cursor + 7) = should_keep_alive; + *self.events.add(event_cursor + 8) = should_upgrade; + *self.events.add(event_cursor + 9) = has_trailers; + *self.events.add(event_cursor + 10) = body_kind; + core::ptr::write_unaligned( + self.events.add(event_cursor + 11) as *mut u64, + content_length.to_le(), + ); + } + }; + + TokenStream::from(quote! { + if active_events & #bitmask != 0 { + if event_cursor + #needed < EVENTS_BUFFER_SIZE { + #emit + event_cursor += #needed; + } else { + suspend!(); + } } }) } diff --git a/macros/src/generators.rs b/macros/src/generators.rs index 1e13d4c..1b6da07 100644 --- a/macros/src/generators.rs +++ b/macros/src/generators.rs @@ -3,7 +3,7 @@ use quote::{format_ident, quote}; use regex::{Captures, Regex}; use syn::{Arm, ItemConst, parse_str}; -use crate::{native, wasm}; +use crate::{native, parser_fields, wasm}; fn init_constants() -> (Vec, Vec, Vec, Vec) { let methods = serde_yaml::from_str(include_str!("../constants/methods.yml")).unwrap(); @@ -14,16 +14,22 @@ fn init_constants() -> (Vec, Vec, Vec, Vec) { (methods, errors, callbacks, states) } -fn generate_constants_internal(items: &[String], prefix: &str) -> Vec { +fn generate_constants_internal( + items: &[String], + prefix: &str, + start: usize, + strip_prefix: Option<&str>, +) -> Vec { let mut consts: Vec = Vec::new(); let mut bytes: Vec<&[u8]> = vec![]; for (i, x) in items.iter().enumerate() { - let uppercased = x.to_uppercase(); + let value = strip_prefix.and_then(|prefix| x.strip_prefix(prefix)).unwrap_or(x); + let uppercased = value.to_uppercase(); let name = uppercased.replace('-', "_"); bytes.push(x.as_bytes()); - consts.push(parse_str::(&format!("pub const {}_{}: u8 = {};", prefix, name, i)).unwrap()); + consts.push(parse_str::(&format!("pub const {}_{}: u8 = {};", prefix, name, i + start)).unwrap()); } consts @@ -59,11 +65,14 @@ where /// Generates all parser constants. fn generate_constants(methods: &[String], errors: &[String], callbacks: &[String], states: &[String]) -> TokenStream { - let methods_consts = generate_constants_internal(methods, "METHOD"); - let states_consts = generate_constants_internal(states, "STATE"); - let errors_consts = generate_constants_internal(errors, "ERROR"); - let callbacks_consts = generate_constants_internal(callbacks, "CALLBACK"); + let methods_consts = generate_constants_internal(methods, "METHOD", 0, None); + let states_consts = generate_constants_internal(states, "STATE", 0, None); + let errors_consts = generate_constants_internal(errors, "ERROR", 0, None); + let callbacks_consts = generate_constants_internal(callbacks, "CALLBACK", 0, None); let callbacks_bitmask = generate_bitmask(callbacks, "CALLBACK_ACTIVE"); + let event_bitmask = generate_bitmask(callbacks, "EVENT_ACTIVE"); + let event_consts = generate_constants_internal(callbacks, "EVENT", 1, Some("on_")); + let parser_field_offsets = parser_fields::generate_constants(); let token_table = generate_table(|byte| { (0x30..=0x39).contains(&byte) || (0x41..=0x5a).contains(&byte) @@ -123,8 +132,14 @@ fn generate_constants(methods: &[String], errors: &[String], callbacks: &[String #(#methods_consts)* #(#errors_consts)* #(#callbacks_consts)* + pub const EVENT_END: u8 = 0; + #(#event_consts)* #(#callbacks_bitmask)* + #(#event_bitmask)* #(#states_consts)* + #parser_field_offsets + + const EVENTS_BUFFER_SIZE: usize = 64 * 1024; /// cbindgen:ignore static TOKEN_TABLE: [bool; 256] = [#(#token_table),*]; @@ -147,6 +162,9 @@ fn generate_enums(methods: &[String], errors: &[String], callbacks: &[String], s let methods_ref = methods; let errors_ref = errors; let callbacks_ref = callbacks; + let events_ref: Vec = core::iter::once("END".to_string()) + .chain(callbacks_ref.iter().map(|x| x.strip_prefix("on_").unwrap_or(x).to_string())) + .collect(); let states_ref = states; let methods: Vec<_> = methods_ref @@ -168,6 +186,8 @@ fn generate_enums(methods: &[String], errors: &[String], callbacks: &[String], s }) .collect(); + let events: Vec<_> = events_ref.iter().map(|x| format_ident!("{}", x.to_uppercase())).collect(); + let states: Vec<_> = states_ref .iter() .map(|x| format_ident!("{}", x.to_uppercase())) @@ -191,6 +211,12 @@ fn generate_enums(methods: &[String], errors: &[String], callbacks: &[String], s .map(|(x, i)| parse_str::(&format!("{} => Ok(Callbacks::{})", x, i)).unwrap()) .collect(); + let events_from: Vec<_> = events + .iter() + .enumerate() + .map(|(x, i)| parse_str::(&format!("{} => Ok(Events::{})", x, i)).unwrap()) + .collect(); + let states_from: Vec<_> = states .iter() .enumerate() @@ -212,6 +238,11 @@ fn generate_enums(methods: &[String], errors: &[String], callbacks: &[String], s .map(|x| parse_str::(&format!("Callbacks::{} => \"{}\"", x, x)).unwrap()) .collect(); + let events_into: Vec<_> = events_ref + .iter() + .map(|x| parse_str::(&format!("Events::{} => \"{}\"", x.to_uppercase(), x.to_uppercase())).unwrap()) + .collect(); + let states_into: Vec<_> = states .iter() .map(|x| parse_str::(&format!("States::{} => \"{}\"", x, x)).unwrap()) @@ -236,6 +267,12 @@ fn generate_enums(methods: &[String], errors: &[String], callbacks: &[String], s #(#callbacks),* } + #[repr(u8)] + #[derive(Copy, Clone, Debug)] + pub enum Events { + #(#events),* + } + #[repr(u8)] #[derive(Copy, Clone, Debug)] pub enum States { @@ -275,6 +312,17 @@ fn generate_enums(methods: &[String], errors: &[String], callbacks: &[String], s } } + impl TryFrom for Events { + type Error = (); + + fn try_from(value: u8) -> Result { + match value { + #(#events_from),*, + _ => Err(()) + } + } + } + impl TryFrom for States { type Error = (); @@ -310,6 +358,14 @@ fn generate_enums(methods: &[String], errors: &[String], callbacks: &[String], s } } + impl From for &str { + fn from(value: Events) -> Self { + match value { + #(#events_into),* + } + } + } + impl From for &str { fn from(value: States) -> Self { match value { @@ -336,6 +392,12 @@ fn generate_enums(methods: &[String], errors: &[String], callbacks: &[String], s } } + impl Events { + pub fn as_str(self) -> &'static str { + self.into() + } + } + impl States { pub fn as_str(self) -> &'static str { self.into() diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 6fbc587..ea68cd5 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -4,6 +4,7 @@ mod actions; mod generators; mod matchers; mod native; +mod parser_fields; mod structs; mod wasm; @@ -14,7 +15,13 @@ pub fn generate(_: TokenStream) -> TokenStream { generators::generate() } pub fn case_insensitive_string(input: TokenStream) -> TokenStream { matchers::case_insensitive_string(input) } #[proc_macro] -pub fn callback(input: TokenStream) -> TokenStream { actions::callback(input) } +pub fn event_with_range(input: TokenStream) -> TokenStream { actions::event_with_range(input) } + +#[proc_macro] +pub fn event_with_error(input: TokenStream) -> TokenStream { actions::event_with_error(input) } + +#[proc_macro] +pub fn event_with_metadata(input: TokenStream) -> TokenStream { actions::event_with_metadata(input) } #[proc_macro] pub fn advance(input: TokenStream) -> TokenStream { actions::advance(input) } diff --git a/macros/src/native.rs b/macros/src/native.rs index 82aabac..e0de71c 100644 --- a/macros/src/native.rs +++ b/macros/src/native.rs @@ -1,25 +1,53 @@ use proc_macro::TokenStream; use quote::{format_ident, quote}; -use crate::structs::CallbackRequest; - -pub fn callback(definition: &CallbackRequest) -> proc_macro2::TokenStream { - let callback = &definition.identifier; - - if let Some(offset) = &definition.offset - && let Some(length) = &definition.length - { - quote! { - (self.callbacks.#callback)(self, self.position + #offset, #length); - } - } else { - quote! { (self.callbacks.#callback)(self, self.position, 0); } - } -} - /// Generates all parser callbacks. pub fn generate_callbacks(callbacks: &[String]) -> TokenStream { let callbacks: Vec<_> = callbacks.iter().map(|x| format_ident!("{}", x)).collect(); + let replay_arms = callbacks.iter().map(|callback| { + let callback_name = callback.to_string(); + let event_const = format_ident!( + "EVENT_{}", + callback_name + .strip_prefix("on_") + .unwrap_or(&callback_name) + .to_uppercase() + ); + let active_const = format_ident!("CALLBACK_ACTIVE_{}", callback_name.to_uppercase()); + + if callback_name == "on_error" { + quote! { + #event_const => { + let at = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 1) as *const u32) }.to_le() as usize; + if self.active_callbacks & #active_const != 0 { + (self.callbacks.#callback)(self, at, 0); + } + cursor += 6usize; + } + } + } else if callback_name == "on_headers" { + quote! { + #event_const => { + let at = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 1) as *const u32) }.to_le() as usize; + if self.active_callbacks & #active_const != 0 { + (self.callbacks.#callback)(self, at, 0); + } + cursor += 19usize; + } + } + } else { + quote! { + #event_const => { + let at = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 1) as *const u32) }.to_le() as usize; + let len = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 5) as *const u32) }.to_le() as usize; + if self.active_callbacks & #active_const != 0 { + (self.callbacks.#callback)(self, at, len); + } + cursor += 9usize; + } + } + } + }); TokenStream::from(quote! { #[cfg(not(target_family = "wasm"))] @@ -40,5 +68,23 @@ pub fn generate_callbacks(callbacks: &[String]) -> TokenStream { } } } + + #[cfg(not(target_family = "wasm"))] + impl Parser { + #[inline] + fn invoke_callbacks(&mut self) { + let mut cursor = 0usize; + + loop { + let event_type = unsafe { *self.events.add(cursor) }; + + match event_type { + EVENT_END => break, + #(#replay_arms)* + _ => break, + } + } + } + } }) } diff --git a/macros/src/parser_fields.rs b/macros/src/parser_fields.rs new file mode 100644 index 0000000..a9496db --- /dev/null +++ b/macros/src/parser_fields.rs @@ -0,0 +1,122 @@ +use core::mem::offset_of; + +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; + +type WasmPointer = u32; +type WasmUsize = u32; + +// Keep this in sync with parser::Parser when compiled with target_family = +// "wasm". +#[repr(C)] +struct ParserStub { + max_start_line_length: WasmUsize, + max_header_length: WasmUsize, + max_body_payload: u64, + autodetect: bool, + is_request: bool, + suspend_after_headers: bool, + manage_unconsumed: bool, + continue_without_data: bool, + is_connect: bool, + skip_body: bool, + debug: bool, + parsed: u64, + position: WasmUsize, + state: u8, + paused: bool, + error_code: u8, + content_length: u64, + chunk_size: u64, + remaining_content_length: u64, + remaining_chunk_size: u64, + status: u32, + method: u8, + has_content_length: bool, + has_transfer_encoding: bool, + has_chunked_transfer_encoding: bool, + has_connection_close: bool, + has_connection_upgrade: bool, + has_upgrade: bool, + has_trailers: bool, + active_callbacks: u64, + active_events: u64, + ptr: WasmPointer, + error_description: [u8; 255], + unconsumed: WasmPointer, + unconsumed_len: WasmUsize, + error_description_len: u8, + events: WasmPointer, +} + +const FIELDS: &[(&str, usize)] = &[ + ("MAX_START_LINE_LENGTH", offset_of!(ParserStub, max_start_line_length)), + ("MAX_HEADER_LENGTH", offset_of!(ParserStub, max_header_length)), + ("MAX_BODY_PAYLOAD", offset_of!(ParserStub, max_body_payload)), + ("AUTODETECT", offset_of!(ParserStub, autodetect)), + ("IS_REQUEST", offset_of!(ParserStub, is_request)), + ("SUSPEND_AFTER_HEADERS", offset_of!(ParserStub, suspend_after_headers)), + ("MANAGE_UNCONSUMED", offset_of!(ParserStub, manage_unconsumed)), + ("CONTINUE_WITHOUT_DATA", offset_of!(ParserStub, continue_without_data)), + ("IS_CONNECT", offset_of!(ParserStub, is_connect)), + ("SKIP_BODY", offset_of!(ParserStub, skip_body)), + ("DEBUG", offset_of!(ParserStub, debug)), + ("PARSED", offset_of!(ParserStub, parsed)), + ("POSITION", offset_of!(ParserStub, position)), + ("STATE", offset_of!(ParserStub, state)), + ("PAUSED", offset_of!(ParserStub, paused)), + ("ERROR_CODE", offset_of!(ParserStub, error_code)), + ("CONTENT_LENGTH", offset_of!(ParserStub, content_length)), + ("CHUNK_SIZE", offset_of!(ParserStub, chunk_size)), + ( + "REMAINING_CONTENT_LENGTH", + offset_of!(ParserStub, remaining_content_length), + ), + ("REMAINING_CHUNK_SIZE", offset_of!(ParserStub, remaining_chunk_size)), + ("STATUS", offset_of!(ParserStub, status)), + ("METHOD", offset_of!(ParserStub, method)), + ("HAS_CONTENT_LENGTH", offset_of!(ParserStub, has_content_length)), + ("HAS_TRANSFER_ENCODING", offset_of!(ParserStub, has_transfer_encoding)), + ( + "HAS_CHUNKED_TRANSFER_ENCODING", + offset_of!(ParserStub, has_chunked_transfer_encoding), + ), + ("HAS_CONNECTION_CLOSE", offset_of!(ParserStub, has_connection_close)), + ("HAS_CONNECTION_UPGRADE", offset_of!(ParserStub, has_connection_upgrade)), + ("HAS_UPGRADE", offset_of!(ParserStub, has_upgrade)), + ("HAS_TRAILERS", offset_of!(ParserStub, has_trailers)), + ("ACTIVE_CALLBACKS", offset_of!(ParserStub, active_callbacks)), + ("ACTIVE_EVENTS", offset_of!(ParserStub, active_events)), + ("PTR", offset_of!(ParserStub, ptr)), + ("ERROR_DESCRIPTION", offset_of!(ParserStub, error_description)), + ("UNCONSUMED", offset_of!(ParserStub, unconsumed)), + ("UNCONSUMED_LEN", offset_of!(ParserStub, unconsumed_len)), + ("ERROR_DESCRIPTION_LEN", offset_of!(ParserStub, error_description_len)), + ("EVENTS", offset_of!(ParserStub, events)), +]; + +pub fn generate_constants() -> TokenStream { + let constants = FIELDS.iter().map(|(name, offset)| { + let ident = format_ident!("PARSER_FIELD_{}", name); + + quote! { + #[cfg(target_family = "wasm")] + pub const #ident: usize = #offset; + } + }); + + quote! { + #(#constants)* + } +} + +#[allow(dead_code)] +fn main() { + let constants = FIELDS + .iter() + .map(|(name, offset)| format!("\"PARSER_FIELD_{name}\":{offset}")) + .collect::>() + .join(","); + + println!("{{{}}}", constants); +} diff --git a/macros/src/structs.rs b/macros/src/structs.rs index 94ceb49..2252fe8 100644 --- a/macros/src/structs.rs +++ b/macros/src/structs.rs @@ -9,8 +9,8 @@ pub struct FailureRequest { } /// An identifier associated to an expression - An example of this is used in -/// `callback!`. -pub struct CallbackRequest { +/// `event_with_range!`. +pub struct EventRequest { pub identifier: Ident, pub offset: Option, pub length: Option, @@ -32,7 +32,7 @@ impl Parse for FailureRequest { } } -impl Parse for CallbackRequest { +impl Parse for EventRequest { // Parses a identifier and its optional expression fn parse(input: ParseStream) -> Result { let identifier = input.parse()?; @@ -47,14 +47,16 @@ impl Parse for CallbackRequest { // Parse the expression offset = Some(input.parse::()?); - // Discard the comma - input.parse::()?; + if !input.is_empty() { + // Discard the comma + input.parse::()?; - // Parse the expression - length = Some(input.parse::()?); + // Parse the expression + length = Some(input.parse::()?); + } } - Ok(CallbackRequest { + Ok(EventRequest { identifier, offset, length, diff --git a/macros/src/wasm.rs b/macros/src/wasm.rs index bfb6b34..fdedebe 100644 --- a/macros/src/wasm.rs +++ b/macros/src/wasm.rs @@ -1,29 +1,54 @@ use proc_macro::TokenStream; use quote::{format_ident, quote}; -use crate::structs::CallbackRequest; - -pub fn callback(definition: &CallbackRequest) -> proc_macro2::TokenStream { - let callback = &definition.identifier; - - if let Some(offset) = &definition.offset - && let Some(length) = &definition.length - { - quote! { unsafe { #callback(self.ptr, self.position + #offset, #length) }; } - } else { - quote! { unsafe { #callback(self.ptr, self.position, 0) }; } - } -} - /// Generates all parser callbacks. pub fn generate_callbacks(callbacks: &[String]) -> TokenStream { - let callbacks: Vec<_> = callbacks.iter().map(|x| format_ident!("{}", x)).collect(); + let callbacks: Vec<_> = callbacks + .iter() + .filter(|x| x.as_str() != "on_headers") + .map(|x| format_ident!("{}", x)) + .collect(); + let replay_arms = callbacks + .iter() + .filter(|callback| callback.to_string() != "on_error") + .map(|callback| { + let callback_name = callback.to_string(); + let event_const = format_ident!( + "EVENT_{}", + callback_name + .strip_prefix("on_") + .unwrap_or(&callback_name) + .to_uppercase() + ); + let active_const = format_ident!("CALLBACK_ACTIVE_{}", callback_name.to_uppercase()); + + quote! { + #event_const => { + let at = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 1) as *const u32) }.to_le() as usize; + let len = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 5) as *const u32) }.to_le() as usize; + if self.active_callbacks & #active_const != 0 { + unsafe { #callback(self.ptr, at, len); } + } + cursor += 9usize; + } + } + }); TokenStream::from(quote! { #[cfg(target_family = "wasm")] #[link(wasm_import_module = "env")] unsafe extern "C" { #(fn #callbacks(parser: *mut c_void, _at: usize, _len: usize);)* + fn on_headers( + parser: *mut c_void, + at: usize, + method_or_status: u32, + should_keep_alive: bool, + should_upgrade: bool, + has_trailers: bool, + body_kind: u8, + content_length: f64, + ); #[cfg(any(debug_assertions, feature = "debug"))] fn logger(message: u64); @@ -36,5 +61,55 @@ pub fn generate_callbacks(callbacks: &[String]) -> TokenStream { debug(format!("WebAssembly panicked: {:#?}", panic_info)); })); } + + #[cfg(target_family = "wasm")] + impl Parser { + #[inline] + fn invoke_callbacks(&mut self) { + let mut cursor = 0usize; + + loop { + let event_type = unsafe { *self.events.add(cursor) }; + + match event_type { + EVENT_END => break, + EVENT_ERROR => { + let at = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 1) as *const u32) }.to_le() as usize; + if self.active_callbacks & CALLBACK_ACTIVE_ON_ERROR != 0 { + unsafe { on_error(self.ptr, at, 0); } + } + cursor += 6usize; + } + EVENT_HEADERS => { + let at = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 1) as *const u32) }.to_le() as usize; + let method_or_status = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 5) as *const u16) }.to_le() as u32; + let should_keep_alive = unsafe { *self.events.add(cursor + 7) } != 0; + let should_upgrade = unsafe { *self.events.add(cursor + 8) } != 0; + let has_trailers = unsafe { *self.events.add(cursor + 9) } != 0; + let body_kind = unsafe { *self.events.add(cursor + 10) }; + let content_length = unsafe { core::ptr::read_unaligned(self.events.add(cursor + 11) as *const u64) }.to_le() as f64; + + if self.active_callbacks & CALLBACK_ACTIVE_ON_HEADERS != 0 { + unsafe { + on_headers( + self.ptr, + at, + method_or_status, + should_keep_alive, + should_upgrade, + has_trailers, + body_kind, + content_length, + ); + } + } + cursor += 19usize; + } + #(#replay_arms)* + _ => break, + } + } + } + } }) } diff --git a/parser/cbindgen.toml b/parser/cbindgen.toml index 81d1a92..844779a 100644 --- a/parser/cbindgen.toml +++ b/parser/cbindgen.toml @@ -16,4 +16,6 @@ namespace = "milo_parser" "Methods", "States", "Errors", + "Callbacks", + "Events", ] diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 223af2a..8a85146 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -8,45 +8,44 @@ use alloc::{boxed::Box, format}; use core::cell::{Cell, RefCell}; use core::ffi::{c_char, c_uchar, c_void}; use core::fmt::Debug; -use core::ptr; use core::str; +use core::{mem, ptr}; use core::{slice, slice::from_raw_parts}; -use milo_macros::{callback, generate, next}; +use milo_macros::generate; #[repr(C)] -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct Parser { // User writable + #[cfg(not(target_family = "wasm"))] + pub context: *mut c_void, + pub max_start_line_length: usize, + pub max_header_length: usize, + pub max_body_payload: u64, pub autodetect: bool, pub is_request: bool, + pub suspend_after_headers: bool, pub manage_unconsumed: bool, pub continue_without_data: bool, pub is_connect: bool, pub skip_body: bool, - pub max_start_line_length: usize, - pub max_header_length: usize, - #[cfg(not(target_family = "wasm"))] - pub context: *mut c_void, - #[cfg(any(debug_assertions, feature = "debug"))] pub debug: bool, // Generic state - pub state: u8, - pub position: usize, pub parsed: u64, + pub position: usize, + pub state: u8, pub paused: bool, pub error_code: u8, // Current message flags - pub method: u8, - pub status: u32, - pub version_major: u8, - pub version_minor: u8, pub content_length: u64, pub chunk_size: u64, pub remaining_content_length: u64, pub remaining_chunk_size: u64, + pub status: u32, + pub method: u8, pub has_content_length: bool, pub has_transfer_encoding: bool, pub has_chunked_transfer_encoding: bool, @@ -57,6 +56,7 @@ pub struct Parser { // Callback handling pub active_callbacks: u64, + pub active_events: u64, #[cfg(not(target_family = "wasm"))] pub callbacks: ParserCallbacks, @@ -65,10 +65,13 @@ pub struct Parser { pub ptr: *mut c_void, // Complex data types - We need to split them in order to be exportable to C++ - pub error_description: *const c_uchar, - pub error_description_len: u16, + pub error_description: [u8; 255], pub unconsumed: *const c_uchar, pub unconsumed_len: usize, + pub error_description_len: u8, + + // Event buffer. Keep this at the end of the struct for external readers. + pub events: *mut c_uchar, } #[cfg(not(target_family = "wasm"))] @@ -88,35 +91,38 @@ generate!(); impl Parser { /// Creates a new parser pub fn new() -> Parser { + let mut events = Box::new([0u8; 65536]); + let events_ptr = events.as_mut_ptr(); + mem::forget(events); + Parser { // User writable + #[cfg(not(target_family = "wasm"))] + context: ptr::null_mut(), + max_start_line_length: 8192, + max_header_length: 8192, + max_body_payload: 0, autodetect: true, is_request: false, + suspend_after_headers: false, manage_unconsumed: false, continue_without_data: false, is_connect: false, skip_body: false, - max_start_line_length: 8192, - max_header_length: 8192, - #[cfg(any(debug_assertions, feature = "debug"))] debug: false, - #[cfg(not(target_family = "wasm"))] - context: ptr::null_mut(), // Generic state - state: STATE_START, - position: 0, parsed: 0, + position: 0, + state: STATE_START, paused: false, error_code: ERROR_NONE, // Current message flags - method: 0, - status: 0, - version_major: 0, - version_minor: 0, content_length: 0, chunk_size: 0, remaining_content_length: 0, remaining_chunk_size: 0, + status: 0, + method: 0, has_content_length: false, has_transfer_encoding: false, has_chunked_transfer_encoding: false, @@ -126,16 +132,18 @@ impl Parser { has_trailers: false, // Callbacks handling active_callbacks: 0, + active_events: 0, #[cfg(not(target_family = "wasm"))] callbacks: ParserCallbacks::new(), // WASM Specific #[cfg(target_family = "wasm")] ptr: ptr::null_mut(), // Complex data types - error_description: ptr::null(), - error_description_len: 0, + error_description: [0; 255], unconsumed: ptr::null(), unconsumed_len: 0, + error_description_len: 0, + events: events_ptr, } } @@ -147,8 +155,10 @@ impl Parser { /// * context /// * autodetect /// * is_request + /// * suspend_after_headers /// * manage_unconsumed /// * continue_without_data + /// * debug /// * context pub fn reset(&mut self, keep_parsed: bool) { self.state = STATE_START; @@ -160,14 +170,8 @@ impl Parser { self.error_code = ERROR_NONE; - if self.error_description_len > 0 { - unsafe { - let _ = slice::from_raw_parts(self.error_description, self.error_description_len as usize); - } - - self.error_description = ptr::null(); - self.error_description_len = 0; - } + self.error_description[0] = 0; + self.error_description_len = 0; if self.unconsumed_len > 0 { unsafe { @@ -179,7 +183,10 @@ impl Parser { } self.clear(); - callback!(on_reset); + self.skip_body = false; + unsafe { + *self.events = EVENT_END; + } } /// Clears all values about the message in the parser. @@ -187,8 +194,6 @@ impl Parser { self.is_connect = false; self.method = 0; self.status = 0; - self.version_major = 0; - self.version_minor = 0; self.has_content_length = false; self.has_transfer_encoding = false; self.has_chunked_transfer_encoding = false; @@ -200,7 +205,45 @@ impl Parser { self.chunk_size = 0; self.remaining_content_length = 0; self.remaining_chunk_size = 0; - self.skip_body = false; + } + + #[inline(always)] + pub(crate) fn try_emit_event_range( + &mut self, + event_cursor: &mut usize, + event_type: u8, + at: usize, + len: usize, + ) -> bool { + if *event_cursor + 9usize >= EVENTS_BUFFER_SIZE { + return false; + } + + unsafe { + *self.events.add(*event_cursor) = event_type; + core::ptr::write_unaligned(self.events.add(*event_cursor + 1) as *mut u32, (at as u32).to_le()); + core::ptr::write_unaligned(self.events.add(*event_cursor + 5) as *mut u32, (len as u32).to_le()); + } + *event_cursor += 9usize; + true + } + + #[inline(always)] + pub(crate) fn try_emit_event_error(&mut self, event_cursor: &mut usize) -> bool { + if *event_cursor + 6usize >= EVENTS_BUFFER_SIZE { + return false; + } + + unsafe { + *self.events.add(*event_cursor) = EVENT_ERROR; + core::ptr::write_unaligned( + self.events.add(*event_cursor + 1) as *mut u32, + (self.position as u32).to_le(), + ); + *self.events.add(*event_cursor + 5) = self.error_code; + } + *event_cursor += 6usize; + true } /// Pauses the parser. It will have to be resumed via `resume`. @@ -219,7 +262,14 @@ impl Parser { } STATE_BODY_WITH_NO_LENGTH => { // Notify that the message has been completed - callback!(on_message_complete); + let active_events = self.active_events | self.active_callbacks; + let mut event_cursor = 0usize; + if active_events & EVENT_ACTIVE_ON_MESSAGE_COMPLETE != 0 { + self.try_emit_event_range(&mut event_cursor, EVENT_MESSAGE_COMPLETE, self.position, 0); + } + unsafe { + *self.events.add(event_cursor) = EVENT_END; + } // Set the state to be finished self.state = STATE_FINISH; @@ -237,14 +287,22 @@ impl Parser { /// It always returns zero for internal use. #[inline(always)] pub fn fail(&mut self, code: u8, description: &str) { - let description_copy = description.to_string(); - let (ptr, _, len) = description_copy.into_raw_parts(); + let bytes = description.as_bytes(); + let len = bytes.len().min(254); self.state = STATE_ERROR; self.error_code = code; - self.error_description = ptr; - self.error_description_len = len as u16; - callback!(on_error, 0, 0); + self.error_description[..len].copy_from_slice(&bytes[..len]); + self.error_description[len] = 0; + self.error_description_len = len as u8; + let active_events = self.active_events | self.active_callbacks; + let mut event_cursor = 0usize; + if active_events & EVENT_ACTIVE_ON_ERROR != 0 { + self.try_emit_event_error(&mut event_cursor); + } + unsafe { + *self.events.add(event_cursor) = EVENT_END; + } } /// Returns the current parser's state as string. @@ -258,7 +316,7 @@ impl Parser { unsafe { if self.error_description_len > 0 { str::from_utf8_unchecked(from_raw_parts( - self.error_description, + self.error_description.as_ptr(), self.error_description_len as usize, )) } else { @@ -268,6 +326,17 @@ impl Parser { } } +impl Drop for Parser { + fn drop(&mut self) { + if !self.events.is_null() { + unsafe { + let _ = Box::from_raw(self.events as *mut [u8; 65536]); + } + self.events = ptr::null_mut(); + } + } +} + impl Default for Parser { fn default() -> Self { Self::new() } } diff --git a/parser/src/native.rs b/parser/src/native.rs index d76979b..071802e 100644 --- a/parser/src/native.rs +++ b/parser/src/native.rs @@ -5,8 +5,8 @@ use core::str; use core::{slice, slice::from_raw_parts}; use std::ffi::{CString, c_char, c_uchar}; -use crate::Parser; use crate::parse; +use crate::{Callbacks, Errors, Events, Methods, Parser, States}; #[repr(C)] pub struct CStringWithLength { @@ -39,6 +39,10 @@ impl From for &str { #[unsafe(no_mangle)] pub extern "C" fn milo_has_debug() -> bool { cfg!(any(debug_assertions, feature = "debug")) } +/// Returns if debug tracing is enabled for this parser. +#[unsafe(no_mangle)] +pub extern "C" fn milo_is_debug(parser: *const Parser) -> bool { unsafe { (*parser).debug } } + /// A callback that simply returns `0`. /// /// Use this callback as pointer when you want to remove a callback from the @@ -77,6 +81,30 @@ pub extern "C" fn milo_parse(parser: *mut Parser, data: *const c_uchar, limit: u unsafe { (*parser).parse(data, limit) } } +/// Sets the parser event bitmask. +#[unsafe(no_mangle)] +pub extern "C" fn milo_set_active_events(parser: *mut Parser, value: u64) { + unsafe { + (*parser).active_events = value; + } +} + +/// Sets the maximum body payload consumed by a single parse invocation. +#[unsafe(no_mangle)] +pub extern "C" fn milo_set_max_body_payload(parser: *mut Parser, value: u64) { + unsafe { + (*parser).max_body_payload = value; + } +} + +/// Sets whether parsing should stop after headers have completed. +#[unsafe(no_mangle)] +pub extern "C" fn milo_set_suspend_after_headers(parser: *mut Parser, value: bool) { + unsafe { + (*parser).suspend_after_headers = value; + } +} + /// Resets a parser. The second parameters specifies if to also reset the /// parsed counter. /// @@ -85,8 +113,11 @@ pub extern "C" fn milo_parse(parser: *mut Parser, data: *const c_uchar, limit: u /// * context /// * autodetect /// * is_request +/// * suspend_after_headers +/// * max_body_payload /// * manage_unconsumed /// * continue_without_data +/// * debug /// * context #[unsafe(no_mangle)] pub extern "C" fn milo_reset(parser: *mut Parser, keep_parsed: bool) { unsafe { (*parser).reset(keep_parsed) } } @@ -105,6 +136,10 @@ pub extern "C" fn milo_pause(parser: *mut Parser) { unsafe { (*parser).pause() } #[unsafe(no_mangle)] pub extern "C" fn milo_resume(parser: *mut Parser) { unsafe { (*parser).resume() } } +/// Completes the current message without consuming more input. +#[unsafe(no_mangle)] +pub extern "C" fn milo_complete(parser: *mut Parser) { unsafe { (*parser).complete() } } + /// Marks the parser as finished. Any new data received via `milo_parse` will /// put the parser in the error state. #[unsafe(no_mangle)] @@ -124,6 +159,50 @@ pub extern "C" fn milo_state_string(parser: *mut Parser) -> CStringWithLength { unsafe { (*parser).state_str().into() } } +/// Returns a parser method as string. +/// +/// The returned value must be freed using `free_string`. +#[unsafe(no_mangle)] +pub extern "C" fn milo_method_to_string(method: u8) -> CStringWithLength { + Methods::try_from(method) + .map_or("UNKNOWN", |method| method.as_str()) + .into() +} + +/// Returns a parser error as string. +/// +/// The returned value must be freed using `free_string`. +#[unsafe(no_mangle)] +pub extern "C" fn milo_error_to_string(error: u8) -> CStringWithLength { + Errors::try_from(error).map_or("UNKNOWN", |error| error.as_str()).into() +} + +/// Returns a parser callback as string. +/// +/// The returned value must be freed using `free_string`. +#[unsafe(no_mangle)] +pub extern "C" fn milo_callback_to_string(callback: u8) -> CStringWithLength { + Callbacks::try_from(callback) + .map_or("UNKNOWN", |callback| callback.as_str()) + .into() +} + +/// Returns a parser state as string. +/// +/// The returned value must be freed using `free_string`. +#[unsafe(no_mangle)] +pub extern "C" fn milo_state_to_string(state: u8) -> CStringWithLength { + States::try_from(state).map_or("UNKNOWN", |state| state.as_str()).into() +} + +/// Returns a parser event as string. +/// +/// The returned value must be freed using `free_string`. +#[unsafe(no_mangle)] +pub extern "C" fn milo_event_to_string(event: u8) -> CStringWithLength { + Events::try_from(event).map_or("UNKNOWN", |event| event.as_str()).into() +} + /// Returns the current parser's error state as string. /// /// The returned value must be freed using `free_string`. diff --git a/parser/src/parse.rs b/parser/src/parse.rs index f2b64a6..33ef1db 100644 --- a/parser/src/parse.rs +++ b/parser/src/parse.rs @@ -25,8 +25,17 @@ impl Parser { /// /// It returns the number of consumed characters. pub fn parse(&mut self, input: *const c_uchar, limit: usize) -> usize { + let mut event_cursor = 0usize; + let active_events = self.active_events | self.active_callbacks; + // If the self.is paused, this is a no-op if self.paused { + if active_events != 0 { + unsafe { + *self.events = EVENT_END; + } + } + return 0; } @@ -54,6 +63,7 @@ impl Parser { // Limit the data that is currently analyzed data = &data[..limit]; + let mut available = data.len(); #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))] @@ -73,11 +83,44 @@ impl Parser { self.position = 0; let mut advanced: usize; let mut parsing = true; - let has_active_callbacks = self.active_callbacks != 0; - let has_header_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_NAME != 0; - let has_header_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_VALUE != 0; - let has_trailer_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_NAME != 0; - let has_trailer_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_VALUE != 0; + let max_body_payload = self.max_body_payload; + let mut body_payload_read = 0u64; + let has_active_events = active_events != 0; + let ( + has_request_start_events, + has_response_start_events, + has_request_line_events, + has_response_line_events, + has_metadata_event, + has_header_name_event, + has_header_value_event, + has_trailer_name_event, + has_trailer_value_event, + has_complete_events, + has_finish_event, + ) = if has_active_events { + ( + active_events & (EVENT_ACTIVE_ON_REQUEST | EVENT_ACTIVE_ON_MESSAGE_START) != 0, + active_events & (EVENT_ACTIVE_ON_RESPONSE | EVENT_ACTIVE_ON_MESSAGE_START) != 0, + active_events + & (EVENT_ACTIVE_ON_METHOD | EVENT_ACTIVE_ON_URL | EVENT_ACTIVE_ON_PROTOCOL | EVENT_ACTIVE_ON_VERSION) + != 0, + active_events + & (EVENT_ACTIVE_ON_PROTOCOL | EVENT_ACTIVE_ON_VERSION | EVENT_ACTIVE_ON_STATUS | EVENT_ACTIVE_ON_REASON) + != 0, + active_events & EVENT_ACTIVE_ON_HEADERS != 0, + active_events & EVENT_ACTIVE_ON_HEADER_NAME != 0, + active_events & EVENT_ACTIVE_ON_HEADER_VALUE != 0, + active_events & EVENT_ACTIVE_ON_TRAILER_NAME != 0, + active_events & EVENT_ACTIVE_ON_TRAILER_VALUE != 0, + active_events & (EVENT_ACTIVE_ON_MESSAGE_COMPLETE | EVENT_ACTIVE_ON_RESET) != 0, + active_events & EVENT_ACTIVE_ON_FINISH != 0, + ) + } else { + ( + false, false, false, false, false, false, false, false, false, false, false, + ) + }; #[cfg(any(debug_assertions, feature = "debug"))] if self.debug { @@ -115,22 +158,22 @@ impl Parser { // Choose the initial state depending on the configured message type. STATE_START => { if !self.autodetect && self.is_request { - if has_active_callbacks { - callback!(on_request); - callback!(on_message_start); + if has_request_start_events { + event_with_range!(on_request, 0, 0); + event_with_range!(on_message_start, 0, 0); } move_to!(request_line); } else if !self.autodetect { - if has_active_callbacks { - callback!(on_response); - callback!(on_message_start); + if has_response_start_events { + event_with_range!(on_response, 0, 0); + event_with_range!(on_message_start, 0, 0); } move_to!(status_line); } else if data.len() >= 5 && data[4] == b'/' && data.starts_with(b"HTTP") { self.is_request = false; - if has_active_callbacks { - callback!(on_response); - callback!(on_message_start); + if has_response_start_events { + event_with_range!(on_response, 0, 0); + event_with_range!(on_message_start, 0, 0); } move_to!(status_line); } else if data.len() >= 2 && data.starts_with(b"\r\n") { @@ -140,9 +183,9 @@ impl Parser { // For performance reason, we assume it's a request so we don't lookup the // method twice self.is_request = true; - if has_active_callbacks { - callback!(on_request); - callback!(on_message_start); + if has_request_start_events { + event_with_range!(on_request, 0, 0); + event_with_range!(on_message_start, 0, 0); } move_to!(request_line); } @@ -262,25 +305,19 @@ impl Parser { if self.method == METHOD_PRI { fail!(UNSUPPORTED_HTTP_VERSION, "PRI is only valid with HTTP/2.0"); } - - self.version_major = 1; - self.version_minor = 1; } else if &data[protocol_start..cr] == b"HTTP/2.0" { if self.method != METHOD_PRI { fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version"); } - - self.version_major = 2; - self.version_minor = 0; } else { fail!(UNEXPECTED_CHARACTER, "Invalid protocol"); } - if has_active_callbacks { - callback!(on_method, method_start, method_end - method_start); - callback!(on_url, url_start, url_end - url_start); - callback!(on_protocol, protocol_start, protocol_end - protocol_start); - callback!(on_version, version_start, 3); + if has_request_line_events { + event_with_range!(on_method, method_start, method_end - method_start); + event_with_range!(on_url, url_start, url_end - url_start); + event_with_range!(on_protocol, protocol_start, protocol_end - protocol_start); + event_with_range!(on_version, version_start, 3); } advance!(cr + 2); @@ -338,10 +375,7 @@ impl Parser { } match &data[protocol_start..version_end] { - b"HTTP/1.1" => { - self.version_major = 1; - self.version_minor = 1; - } + b"HTTP/1.1" => {} [b'H', b'T', b'T', b'P', b'/', ..] => { fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version"); } @@ -381,12 +415,12 @@ impl Parser { + ((data[status_start + 1] - b'0') as u32) * 10 + (data[status_start + 2] - b'0') as u32; - if has_active_callbacks { - callback!(on_protocol, protocol_start, 4); - callback!(on_version, version_start, 3); - callback!(on_status, status_start, 3); + if has_response_line_events { + event_with_range!(on_protocol, protocol_start, 4); + event_with_range!(on_version, version_start, 3); + event_with_range!(on_status, status_start, 3); if reason_end > reason_start { - callback!(on_reason, reason_start, reason_end - reason_start); + event_with_range!(on_reason, reason_start, reason_end - reason_start); } } @@ -429,10 +463,18 @@ impl Parser { // No more headers or no headers at all, move to the headers state if cr == 0 { - self.continue_without_data = true; advance!(2); + if has_metadata_event { + event_with_metadata!(on_headers, 2); + } move_to!(body_decision); - next!(); + self.continue_without_data = true; + + if self.suspend_after_headers { + suspend!(); + } else { + next!(); + } } // RFC 9112 section.4 @@ -454,7 +496,7 @@ impl Parser { fail!(UNEXPECTED_CHARACTER, "Invalid header field name character"); } - if has_header_value_callback { + if has_header_value_event { strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true); } } else { @@ -727,19 +769,19 @@ impl Parser { fail!(UNEXPECTED_CHARACTER, "Invalid header field name character"); } - if has_header_value_callback { + if has_header_value_event { strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true); } } } } - if has_header_name_callback { - callback!(on_header_name, header_name_start, header_name_end - header_name_start); + if has_header_name_event { + event_with_range!(on_header_name, header_name_start, header_name_end - header_name_start); } - if has_header_value_callback { - callback!( + if has_header_value_event { + event_with_range!( on_header_value, header_value_start, header_value_end - header_value_start @@ -771,10 +813,6 @@ impl Parser { // RFC 9110 section 9.3.6 and 7.8 - Headers have finished, check if the // connection must be upgraded or a body is expected. STATE_BODY_DECISION => { - if has_active_callbacks { - callback!(on_headers); - } - let method = self.method; let status = self.status; @@ -797,10 +835,10 @@ impl Parser { // In case of Connection: Upgrade or a CONNECT method if self.is_connect { // In case of CONNECT method - callback!(on_connect); + event_with_range!(on_connect, 0, 0); move_to!(tunnel); } else if self.has_upgrade && !self.is_request && status == 101 { - callback!(on_upgrade); + event_with_range!(on_upgrade, 0, 0); move_to!(tunnel); } else if self.is_request { if self.has_transfer_encoding && !self.has_chunked_transfer_encoding { @@ -810,18 +848,42 @@ impl Parser { ); } else if self.skip_body { self.continue_without_data = true; - self.complete(0); + if !self.complete_message( + 0, + active_events, + has_complete_events, + has_finish_event, + &mut event_cursor, + ) { + suspend!(); + } } else if self.has_content_length { // RFC 9110 section 6.3 if self.content_length == 0 { self.continue_without_data = true; - self.complete(0); + if !self.complete_message( + 0, + active_events, + has_complete_events, + has_finish_event, + &mut event_cursor, + ) { + suspend!(); + } } else { move_to!(body_via_content_length); } } else if !self.has_chunked_transfer_encoding { self.continue_without_data = true; - self.complete(0); + if !self.complete_message( + 0, + active_events, + has_complete_events, + has_finish_event, + &mut event_cursor, + ) { + suspend!(); + } } else { move_to!(chunk_header); } @@ -830,11 +892,27 @@ impl Parser { // RFC 9110 section 15.4.5 if self.skip_body || (status < 200 && status != 101) || status == 204 || status == 205 || status == 304 { self.continue_without_data = true; - self.complete(0); + if !self.complete_message( + 0, + active_events, + has_complete_events, + has_finish_event, + &mut event_cursor, + ) { + suspend!(); + } } else if self.has_content_length { if self.content_length == 0 { self.continue_without_data = true; - self.complete(0); + if !self.complete_message( + 0, + active_events, + has_complete_events, + has_finish_event, + &mut event_cursor, + ) { + suspend!(); + } } else { move_to!(body_via_content_length); } @@ -849,24 +927,45 @@ impl Parser { // RFC 9112 section 6.2 STATE_BODY_VIA_CONTENT_LENGTH => { let expected = self.remaining_content_length; - let available_64 = available as u64; + let mut to_consume = expected.min(available as u64); + + if max_body_payload != 0 { + if body_payload_read >= max_body_payload { + suspend!(); + } - // Less data than what it is expected - if available_64 < expected { - self.remaining_content_length -= available_64; + to_consume = to_consume.min(max_body_payload - body_payload_read); + } - callback!(on_data, 0, available); - advance!(available); + if to_consume == 0 { + suspend!(); + } + + let to_consume_usize = to_consume as usize; + body_payload_read += to_consume; + + if to_consume < expected { + event_with_range!(on_data, 0, to_consume_usize); + self.remaining_content_length -= to_consume; + advance!(to_consume_usize); } else { - self.remaining_content_length = 0; + event_with_range!(on_data, 0, to_consume_usize); + event_with_range!(on_body, to_consume_usize, 0); - callback!(on_data, 0, expected as usize); - callback!(on_body, expected as usize, 0); + self.remaining_content_length = 0; self.continue_without_data = true; - advance!(expected as usize); - self.complete(expected as usize); + if !self.complete_message( + to_consume_usize, + active_events, + has_complete_events, + has_finish_event, + &mut event_cursor, + ) { + suspend!(); + } + advance!(to_consume_usize); } } @@ -875,8 +974,25 @@ impl Parser { // Note that on_body can't and will not be called here as there is no way to // know when the response finishes. STATE_BODY_WITH_NO_LENGTH => { - callback!(on_data, 0, available); - advance!(available); + let mut to_consume = available as u64; + + if max_body_payload != 0 { + if body_payload_read >= max_body_payload { + suspend!(); + } + + to_consume = to_consume.min(max_body_payload - body_payload_read); + } + + if to_consume == 0 { + suspend!(); + } + + let to_consume_usize = to_consume as usize; + body_payload_read += to_consume; + + event_with_range!(on_data, 0, to_consume_usize); + advance!(to_consume_usize); } // RFC 9112 section 7.1 @@ -928,7 +1044,7 @@ impl Parser { self.chunk_size = chunk_length; self.remaining_chunk_size = chunk_length; - callback!( + event_with_range!( on_chunk_length, chunk_length_start, chunk_length_end - chunk_length_start @@ -940,13 +1056,14 @@ impl Parser { move_to!(chunk_extensions); } else { self.continue_without_data = true; - advance!(cr + 2); if self.chunk_size == 0 { - callback!(on_chunk, 3, 0); - callback!(on_body, 3, 0); + event_with_range!(on_chunk, 3, 0); + event_with_range!(on_body, 3, 0); + advance!(cr + 2); move_to!(trailer); } else { + advance!(cr + 2); move_to!(chunk_data); } } @@ -989,15 +1106,15 @@ impl Parser { // No value if name_end == cr || data[name_end_raw] == b';' { - callback!(on_chunk_extension_name, name_start, name_end - name_start); + event_with_range!(on_chunk_extension_name, name_start, name_end - name_start); if name_end_raw == cr { - advance!(cr + 2); - if self.chunk_size == 0 { - callback!(on_body); + event_with_range!(on_body, 0, 0); + advance!(cr + 2); move_to!(trailer); } else { + advance!(cr + 2); move_to!(chunk_data); } } else { @@ -1073,12 +1190,12 @@ impl Parser { } } - callback!(on_chunk_extension_name, name_start, name_end - name_start); + event_with_range!(on_chunk_extension_name, name_start, name_end - name_start); if quoted { - callback!(on_chunk_extension_value, quote_start, value_end - quote_start + 1); + event_with_range!(on_chunk_extension_value, quote_start, value_end - quote_start + 1); } else { - callback!(on_chunk_extension_value, value_start, value_end - value_start); + event_with_range!(on_chunk_extension_value, value_start, value_end - value_start); } let next_semicolon = find_char(data, next_extension, cr, b';').unwrap_or(cr); @@ -1094,12 +1211,12 @@ impl Parser { if next_semicolon < cr { advance!(next_semicolon + 1); } else { - advance!(cr + 2); - if self.chunk_size == 0 { - callback!(on_body); + event_with_range!(on_body, 0, 0); + advance!(cr + 2); move_to!(trailer); } else { + advance!(cr + 2); move_to!(chunk_data); } } @@ -1118,7 +1235,6 @@ impl Parser { STATE_CHUNK_DATA => { let expected = self.remaining_chunk_size; - let available_64 = available as u64; // No more data for this chunk, just wait for the CRLF if expected == 0 { @@ -1130,21 +1246,34 @@ impl Parser { advance!(2); move_to!(chunk_header); } - } else if available_64 < expected { - // Less data than what it is expected for this chunk - self.remaining_chunk_size -= available_64; + } else { + let mut to_consume = expected.min(available as u64); - callback!(on_chunk, 0, available); - callback!(on_data, 0, available); + if max_body_payload != 0 { + if body_payload_read >= max_body_payload { + suspend!(); + } - advance!(available); - } else { - self.remaining_chunk_size = 0; + to_consume = to_consume.min(max_body_payload - body_payload_read); + } - callback!(on_chunk, 0, expected as usize); - callback!(on_data, 0, expected as usize); + if to_consume == 0 { + suspend!(); + } + + let to_consume_usize = to_consume as usize; + body_payload_read += to_consume; + + event_with_range!(on_chunk, 0, to_consume_usize); + event_with_range!(on_data, 0, to_consume_usize); + + if to_consume < expected { + self.remaining_chunk_size -= to_consume; + } else { + self.remaining_chunk_size = 0; + } - advance!(expected as usize); + advance!(to_consume_usize); } } @@ -1164,10 +1293,18 @@ impl Parser { // No more trailers or no trailers at all, message completed if cr == 0 { - callback!(on_trailers, 2, 0); + event_with_range!(on_trailers, 2, 0); self.continue_without_data = true; + if !self.complete_message( + 2, + active_events, + has_complete_events, + has_finish_event, + &mut event_cursor, + ) { + suspend!(); + } advance!(2); - self.complete(2); next!(); } @@ -1181,7 +1318,7 @@ impl Parser { let mut trailer_value_start = trailer_name_end + 1; let mut trailer_value_end = cr; - if has_trailer_value_callback { + if has_trailer_value_event { strip_ows_fast(data, &mut trailer_value_start, &mut trailer_value_end, true); } @@ -1190,16 +1327,16 @@ impl Parser { fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character"); } - if has_trailer_name_callback { - callback!( + if has_trailer_name_event { + event_with_range!( on_trailer_name, trailer_name_start, trailer_name_end - trailer_name_start ); } - if has_trailer_value_callback { - callback!( + if has_trailer_value_event { + event_with_range!( on_trailer_value, trailer_value_start, trailer_value_end - trailer_value_start @@ -1258,7 +1395,17 @@ impl Parser { // Notify the status change #[cfg(any(debug_assertions, feature = "debug"))] if previous_state != self.state { - callback!(on_state_change); + if active_events & EVENT_ACTIVE_ON_STATE_CHANGE != 0 + && !self.try_emit_event_range( + &mut event_cursor, + EVENT_STATE_CHANGE, + self.position, + self.state as usize, + ) + { + parsing = false; + continue 'parser; + } previous_state = self.state; } @@ -1323,16 +1470,77 @@ impl Parser { } } + if has_active_events { + unsafe { + *self.events.add(event_cursor) = EVENT_END; + } + } + + if self.active_callbacks != 0 { + self.invoke_callbacks(); + } + // Return the number of consumed bytes consumed } - // RFC 9110 section 6.4.1 - Message completed + /// Completes the current message without consuming more input. + /// + /// This is intended for integrations that consume opaque body bytes outside + /// Milo after headers were parsed, then need Milo to perform the normal + /// completion transition and emit completion events. + pub fn complete(&mut self) { + match self.state { + STATE_BODY_DECISION + | STATE_TUNNEL + | STATE_BODY_VIA_CONTENT_LENGTH + | STATE_BODY_WITH_NO_LENGTH + | STATE_CHUNK_HEADER + | STATE_TRAILER => {} + _ => { + self.fail(ERROR_UNEXPECTED_STATE, "Invalid state"); + return; + } + } + + let active_events = self.active_events | self.active_callbacks; + let has_complete_events = active_events & (EVENT_ACTIVE_ON_MESSAGE_COMPLETE | EVENT_ACTIVE_ON_RESET) != 0; + let has_finish_event = active_events & EVENT_ACTIVE_ON_FINISH != 0; + let mut event_cursor = 0usize; + + if !self.complete_message( + 0, + active_events, + has_complete_events, + has_finish_event, + &mut event_cursor, + ) { + self.paused = true; + } + + unsafe { + *self.events.add(event_cursor) = EVENT_END; + } + } + + // RFC 9110 section 6.4.1 - Message completed. #[inline(always)] - fn complete(&mut self, offset: usize) { - if self.active_callbacks != 0 { - callback!(on_message_complete, offset, 0); - callback!(on_reset, offset, 0); + fn complete_message( + &mut self, + offset: usize, + active_events: u64, + has_complete_events: bool, + has_finish_event: bool, + event_cursor: &mut usize, + ) -> bool { + if has_complete_events { + if (active_events & EVENT_ACTIVE_ON_MESSAGE_COMPLETE != 0 + && !self.try_emit_event_range(event_cursor, EVENT_MESSAGE_COMPLETE, self.position + offset, 0)) + || (active_events & EVENT_ACTIVE_ON_RESET != 0 + && !self.try_emit_event_range(event_cursor, EVENT_RESET, self.position + offset, 0)) + { + return false; + } } self.continue_without_data = false; @@ -1341,12 +1549,14 @@ impl Parser { if self.has_upgrade && self.is_request { move_to!(tunnel); } else if self.has_connection_close { - if self.active_callbacks != 0 { - callback!(on_finish); + if has_finish_event && !self.try_emit_event_range(event_cursor, EVENT_FINISH, self.position, 0) { + return false; } move_to!(finish); } else { move_to!(start); } + + true } } diff --git a/parser/src/wasm.rs b/parser/src/wasm.rs index 5507278..73d380c 100644 --- a/parser/src/wasm.rs +++ b/parser/src/wasm.rs @@ -70,20 +70,6 @@ pub fn parse(parser: *mut c_void, data: *const c_uchar, limit: usize) -> usize { unsafe { (*(parser as *mut Parser)).parse(data, limit) } } -/// Parses a slice of characters. It returns consumed bytes, or -(consumed + 1) -/// if the parser errored. -#[unsafe(no_mangle)] -pub fn parse_with_error(parser: *mut c_void, data: *const c_uchar, limit: usize) -> i32 { - let parser = unsafe { &mut *(parser as *mut Parser) }; - let consumed = parser.parse(data, limit) as i32; - - if parser.error_code == crate::ERROR_NONE { - consumed - } else { - -(consumed + 1) - } -} - /// Pauses the parser. It will have to be resumed via `resume`. #[unsafe(no_mangle)] pub fn pause(parser: *mut c_void) { unsafe { (*(parser as *mut Parser)).pause() } } @@ -92,6 +78,10 @@ pub fn pause(parser: *mut c_void) { unsafe { (*(parser as *mut Parser)).pause() #[unsafe(no_mangle)] pub fn resume(parser: *mut c_void) { unsafe { (*(parser as *mut Parser)).resume() } } +/// Completes the current message without consuming more input. +#[unsafe(no_mangle)] +pub fn complete(parser: *mut c_void) { unsafe { (*(parser as *mut Parser)).complete() } } + /// Marks the parser as finished. Any new data received via `parse` will /// put the parser in the error state. #[unsafe(no_mangle)] @@ -139,6 +129,10 @@ pub fn is_autodetect(parser: *const c_void) -> bool { unsafe { (*(parser as *con #[unsafe(no_mangle)] pub fn is_request(parser: *const c_void) -> bool { unsafe { (*(parser as *const Parser)).is_request } } +// Get the parser debug property. +#[unsafe(no_mangle)] +pub fn is_debug(parser: *const c_void) -> bool { unsafe { (*(parser as *const Parser)).debug } } + // Get the parser paused property. #[unsafe(no_mangle)] pub fn is_paused(parser: *const c_void) -> bool { unsafe { (*(parser as *const Parser)).paused } } @@ -149,6 +143,12 @@ pub fn should_manage_unconsumed(parser: *const c_void) -> bool { unsafe { (*(parser as *const Parser)).manage_unconsumed } } +// Get the parser suspend_after_headers property. +#[unsafe(no_mangle)] +pub fn should_suspend_after_headers(parser: *const c_void) -> bool { + unsafe { (*(parser as *const Parser)).suspend_after_headers } +} + // Get the parser max_start_line_length property. #[unsafe(no_mangle)] pub fn get_max_start_line_length(parser: *const c_void) -> usize { @@ -161,6 +161,10 @@ pub fn get_max_header_length(parser: *const c_void) -> usize { unsafe { (*(parser as *const Parser)).max_header_length } } +// Get the parser max_body_payload property. +#[unsafe(no_mangle)] +pub fn get_max_body_payload(parser: *const c_void) -> u64 { unsafe { (*(parser as *const Parser)).max_body_payload } } + // Get the parser continue_without_data property. #[unsafe(no_mangle)] pub fn should_continue_without_data(parser: *const c_void) -> bool { @@ -199,14 +203,6 @@ pub fn get_method(parser: *const c_void) -> u8 { unsafe { (*(parser as *const Pa #[unsafe(no_mangle)] pub fn get_status(parser: *const c_void) -> u32 { unsafe { (*(parser as *const Parser)).status } } -// Get the parser version_major property. -#[unsafe(no_mangle)] -pub fn get_version_major(parser: *const c_void) -> u8 { unsafe { (*(parser as *const Parser)).version_major } } - -// Get the parser version_minor property. -#[unsafe(no_mangle)] -pub fn get_version_minor(parser: *const c_void) -> u8 { unsafe { (*(parser as *const Parser)).version_minor } } - // Get the parser has_connection_close property. #[unsafe(no_mangle)] pub fn has_connection_close(parser: *const c_void) -> bool { @@ -269,7 +265,7 @@ pub fn has_trailers(parser: *const c_void) -> bool { unsafe { (*(parser as *cons pub fn get_error_description_raw(parser: *mut c_void) -> u64 { let parser = unsafe { &(*(parser as *const Parser)) }; - let ptr = parser.error_description as u64; + let ptr = parser.error_description.as_ptr() as u64; let len = parser.error_description_len as u64; (ptr << 32) + len @@ -296,6 +292,13 @@ pub fn set_should_manage_unconsumed(parser: *mut c_void, value: bool) { } } +#[unsafe(no_mangle)] +pub fn set_should_suspend_after_headers(parser: *mut c_void, value: bool) { + unsafe { + (*(parser as *mut Parser)).suspend_after_headers = value; + } +} + #[unsafe(no_mangle)] pub fn set_max_start_line_length(parser: *mut c_void, value: usize) { unsafe { @@ -310,6 +313,13 @@ pub fn set_max_header_length(parser: *mut c_void, value: usize) { } } +#[unsafe(no_mangle)] +pub fn set_max_body_payload(parser: *mut c_void, value: u64) { + unsafe { + (*(parser as *mut Parser)).max_body_payload = value; + } +} + #[unsafe(no_mangle)] pub fn set_should_continue_without_data(parser: *mut c_void, value: bool) { unsafe { @@ -331,9 +341,23 @@ pub fn set_is_connect(parser: *mut c_void, value: bool) { } } +#[unsafe(no_mangle)] +pub fn set_debug(parser: *mut c_void, value: bool) { + unsafe { + (*(parser as *mut Parser)).debug = value; + } +} + #[unsafe(no_mangle)] pub fn set_active_callbacks(parser: *mut c_void, value: u64) { unsafe { (*(parser as *mut Parser)).active_callbacks = value; } } + +#[unsafe(no_mangle)] +pub fn set_active_events(parser: *mut c_void, value: u64) { + unsafe { + (*(parser as *mut Parser)).active_events = value; + } +} diff --git a/parser/src/wasm/template.js b/parser/src/wasm/template.js index e8d5126..4d30a68 100644 --- a/parser/src/wasm/template.js +++ b/parser/src/wasm/template.js @@ -30,16 +30,6 @@ function parse (parser, data, limit) { return this.parse(parser, data, limit) >>> 0 } -function parseWithError (parser, data, limit) { - const result = this.parse_with_error(parser, data, limit) | 0 - const errored = result < 0 - - return { - consumed: errored ? -result - 1 : result, - errored - } -} - function fail (parser, code, description) { const len = description.length const ptr = this.alloc(len) @@ -112,10 +102,10 @@ export function setup (env = {}) { create: create.bind(wasm), destroy: destroy.bind(wasm), parse: parse.bind(wasm), - parseWithError: parseWithError.bind(wasm), fail: fail.bind(wasm), hasDebug: hasDebug.bind(wasm), clear: wasm.clear, + complete: wasm.complete, finish: wasm.finish, pause: wasm.pause, reset: wasm.reset, diff --git a/parser/tests/basic.rs b/parser/tests/basic.rs index d80a736..278d194 100644 --- a/parser/tests/basic.rs +++ b/parser/tests/basic.rs @@ -3,10 +3,32 @@ mod helpers; #[allow(unused_imports)] use std::ffi::c_uchar; -use milo_parser::{CALLBACK_ACTIVE_ON_HEADERS, Parser, STATE_ERROR, STATE_FINISH, STATE_HEADER, STATE_START}; +use milo_parser::{ + CALLBACK_ACTIVE_ON_HEADERS, ERROR_NONE, ERROR_UNEXPECTED_CHARACTER, ERROR_UNEXPECTED_STATE, + EVENT_ACTIVE_ON_HEADER_NAME, EVENT_ACTIVE_ON_HEADER_VALUE, STATE_BODY_DECISION, STATE_ERROR, STATE_FINISH, + STATE_HEADER, STATE_START, +}; use crate::helpers::{context, create_parser, http, parse}; +#[test] +fn basic_error_description_is_clamped_and_terminated() { + let mut parser = create_parser(); + let description = "a".repeat(300); + + parser.fail(ERROR_UNEXPECTED_CHARACTER, &description); + + assert_eq!(parser.error_description_len, 254); + assert_eq!(parser.error_description[254], 0); + assert_eq!(parser.error_description_str().len(), 254); + assert!(parser.error_description_str().bytes().all(|byte| byte == b'a')); + + parser.reset(false); + + assert_eq!(parser.error_description_len, 0); + assert_eq!(parser.error_description[0], 0); +} + #[test] fn basic_disable_autodetect() { let mut parser = create_parser(); @@ -218,6 +240,181 @@ fn basic_connection_close() { assert_eq!(parser.state, STATE_FINISH); } +#[test] +fn basic_max_body_payload_content_length() { + let mut parser = create_parser(); + parser.autodetect = false; + parser.is_request = false; + parser.max_body_payload = 3; + + let message = String::from("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nabcdef"); + let body_start = message.find("\r\n\r\n").unwrap() + 4; + + let consumed = parse(&mut parser, &message); + assert_eq!(consumed, body_start + 3); + assert_eq!(parser.remaining_content_length, 3); + assert!(!parser.paused); + + let consumed = parser.parse(unsafe { message.as_ptr().add(body_start + 3) }, 3); + assert_eq!(consumed, 3); + assert_eq!(parser.remaining_content_length, 0); +} + +#[test] +fn basic_max_body_payload_chunked() { + let mut parser = create_parser(); + parser.autodetect = false; + parser.is_request = false; + parser.max_body_payload = 3; + + let message = String::from("HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n6\r\nabcdef\r\n0\r\n\r\n"); + let chunk_data_start = message.find("\r\n\r\n6\r\n").unwrap() + 7; + + let consumed = parse(&mut parser, &message); + assert_eq!(consumed, chunk_data_start + 3); + assert_eq!(parser.remaining_chunk_size, 3); + assert!(!parser.paused); + + let remaining = &message[chunk_data_start + 3..]; + let consumed = parser.parse(remaining.as_ptr(), remaining.len()); + assert_eq!(consumed, remaining.len()); + assert_eq!(parser.remaining_chunk_size, 0); +} + +#[test] +fn basic_max_body_payload_no_length() { + let mut parser = create_parser(); + parser.autodetect = false; + parser.is_request = false; + parser.max_body_payload = 3; + + let message = String::from("HTTP/1.1 200 OK\r\n\r\nabcdef"); + let body_start = message.find("\r\n\r\n").unwrap() + 4; + + let consumed = parse(&mut parser, &message); + assert_eq!(consumed, body_start + 3); + assert!(!parser.paused); + + let consumed = parser.parse(unsafe { message.as_ptr().add(body_start + 3) }, 3); + assert_eq!(consumed, 3); +} + +#[test] +fn basic_max_body_payload_zero_is_unlimited() { + let mut parser = create_parser(); + parser.autodetect = false; + parser.is_request = false; + parser.max_body_payload = 0; + + let message = String::from("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nabcdef"); + let consumed = parse(&mut parser, &message); + assert_eq!(consumed, message.len()); + assert_eq!(parser.remaining_content_length, 0); +} + +#[test] +fn basic_suspend_after_headers_content_length() { + let mut parser = create_parser(); + parser.suspend_after_headers = true; + + let message = String::from("POST / HTTP/1.1\r\nContent-Length: 6\r\n\r\nabcdef"); + let body_start = message.find("\r\n\r\n").unwrap() + 4; + + let consumed = parse(&mut parser, &message); + assert_eq!(consumed, body_start); + assert_eq!(parser.state, STATE_BODY_DECISION); + assert!(!parser.paused); + + let consumed = parser.parse(unsafe { message.as_ptr().add(body_start) }, message.len() - body_start); + assert_eq!(consumed, message.len() - body_start); + assert_eq!(parser.state, STATE_START); +} + +#[test] +fn basic_suspend_after_headers_emits_headers_once() { + let mut parser = create_parser(); + parser.active_callbacks = CALLBACK_ACTIVE_ON_HEADERS; + parser.suspend_after_headers = true; + + let message = String::from("POST / HTTP/1.1\r\nContent-Length: 6\r\n\r\nabcdef"); + let body_start = message.find("\r\n\r\n").unwrap() + 4; + + let consumed = parse(&mut parser, &message); + assert_eq!(consumed, body_start); + + let consumed = parser.parse(unsafe { message.as_ptr().add(body_start) }, message.len() - body_start); + assert_eq!(consumed, message.len() - body_start); + + let context = unsafe { Box::from_raw(parser.context as *mut context::Context) }; + let headers_count = context.output.matches("\"event\": \"headers\"").count(); + let _ = Box::into_raw(context); + + assert_eq!(headers_count, 1); +} + +#[test] +fn basic_suspend_after_headers_zero_body_completes_on_empty_parse() { + let mut parser = create_parser(); + parser.suspend_after_headers = true; + + let message = String::from("HTTP/1.1 204 No Content\r\n\r\n"); + let consumed = parse(&mut parser, &message); + + assert_eq!(consumed, message.len()); + assert_eq!(parser.state, STATE_BODY_DECISION); + + let consumed = parser.parse(message.as_ptr(), 0); + assert_eq!(consumed, 0); + assert_eq!(parser.state, STATE_START); +} + +#[test] +fn basic_complete_after_suspend_after_headers() { + let mut parser = create_parser(); + parser.suspend_after_headers = true; + + let message = String::from("POST / HTTP/1.1\r\nContent-Length: 6\r\n\r\nabcdef"); + let body_start = message.find("\r\n\r\n").unwrap() + 4; + + let consumed = parse(&mut parser, &message); + assert_eq!(consumed, body_start); + assert_eq!(parser.state, STATE_BODY_DECISION); + + parser.complete(); + assert_eq!(parser.state, STATE_START); + assert_eq!(parser.error_code, ERROR_NONE); +} + +#[test] +fn basic_complete_rejects_invalid_state() { + let mut parser = create_parser(); + + parser.complete(); + + assert_eq!(parser.state, STATE_ERROR); + assert_eq!(parser.error_code, ERROR_UNEXPECTED_STATE); + assert_eq!(parser.error_description_str(), "Invalid state"); +} + +#[test] +fn basic_event_buffer_full_stops_parsing() { + let mut parser = milo_parser::Parser::new(); + parser.autodetect = false; + parser.is_request = false; + parser.active_events = EVENT_ACTIVE_ON_HEADER_NAME | EVENT_ACTIVE_ON_HEADER_VALUE; + + let mut message = String::from("HTTP/1.1 200 OK\r\n"); + for i in 0..4000 { + message.push_str(&format!("Header{i}: value\r\n")); + } + message.push_str("\r\n"); + + let consumed = parser.parse(message.as_ptr(), message.len()); + assert!(consumed < message.len()); + assert!(!parser.paused); + assert_eq!(parser.error_code, ERROR_NONE); +} + #[test] fn basic_sample_multiple_responses() { let mut parser = create_parser(); @@ -352,13 +549,7 @@ fn basic_pause_and_resume() { Content-Length: 3\r\n "#, ); - let sample2 = http(r#"\r\nabc"#); // This will be paused before the body - let sample3 = http(r#"abc"#); - - parser.callbacks.on_headers = |p: &mut Parser, _at: usize, _size: usize| { - p.pause(); - }; - parser.active_callbacks |= CALLBACK_ACTIVE_ON_HEADERS; + let sample2 = http(r#"\r\nabc"#); assert_eq!(parser.paused, false); @@ -366,19 +557,18 @@ fn basic_pause_and_resume() { assert_eq!(consumed1, sample1.len()); assert_eq!(parser.paused, false); - let consumed2 = parse(&mut parser, &sample2); - assert_eq!(consumed2, sample2.len() - 3); + parser.pause(); assert_eq!(parser.paused, true); - let consumed3 = parse(&mut parser, &sample3); + let consumed3 = parse(&mut parser, &sample2); assert_eq!(consumed3, 0); assert_eq!(parser.paused, true); parser.resume(); assert_eq!(parser.paused, false); - let consumed4 = parse(&mut parser, &sample3); - assert_eq!(consumed4, sample3.len()); + let consumed4 = parse(&mut parser, &sample2); + assert_eq!(consumed4, sample2.len()); assert_eq!(parser.paused, false); assert_ne!(parser.state, STATE_ERROR); diff --git a/parser/tests/helpers/callbacks.rs b/parser/tests/helpers/callbacks.rs index 83ce46d..bbbf00d 100644 --- a/parser/tests/helpers/callbacks.rs +++ b/parser/tests/helpers/callbacks.rs @@ -2,20 +2,18 @@ use std::{os::unix::process, slice, str}; -use milo_parser::{Parser, milo_has_debug}; +use milo_parser::{Parser, States, milo_has_debug}; use crate::helpers::{context, output}; pub fn on_state_change(parser: &mut Parser, from: usize, size: usize) { + let state = States::try_from(size as u8).unwrap().as_str(); + output::append_output( parser, - format!( - "\"pos\": {}, \"event\": \"state\", \"state\": \"{}\"", - parser.position, - parser.state_str() - ), + format!("\"pos\": {}, \"event\": \"state\", \"state\": \"{}\"", from, state), from, - size, + 0, ); } @@ -24,7 +22,7 @@ pub fn on_message_start(parser: &mut Parser, from: usize, size: usize) { parser, format!( "\"pos\": {}, \"event\": \"begin\", \"configuration\": {{ \"debug\": {} }}", - parser.position, + from, milo_has_debug(), ), from, @@ -41,7 +39,7 @@ pub fn on_error(parser: &mut Parser, from: usize, size: usize) { parser, format!( "\"pos\": {}, \"event\": {}, \"error_code={}, \"error_code_string\": \"{}\", reason=\"{}\"", - parser.position, + from, "error", parser.error_code, parser.error_code_str(), @@ -81,7 +79,7 @@ pub fn on_header_value(parser: &mut Parser, from: usize, size: usize) { pub fn on_headers(parser: &mut Parser, from: usize, size: usize) { let context = unsafe { Box::from_raw(parser.context as *mut context::Context) }; - let position = parser.position; + let position = from; let chunked = parser.has_chunked_transfer_encoding; let content_length = parser.content_length; diff --git a/parser/tests/helpers/llhttp.rs b/parser/tests/helpers/llhttp.rs index cfa46fa..e0f7075 100644 --- a/parser/tests/helpers/llhttp.rs +++ b/parser/tests/helpers/llhttp.rs @@ -164,7 +164,7 @@ fn on_tunnel(parser: &mut Parser, from: usize, size: usize) { on_state_change(parser, from, size); } - if parser.state == STATE_TUNNEL { + if size as u8 == STATE_TUNNEL { add_event(parser, "tunnel", from, size); } } diff --git a/references/cpp/src/reference.cc b/references/cpp/src/reference.cc index e1e9ab0..a8659fa 100644 --- a/references/cpp/src/reference.cc +++ b/references/cpp/src/reference.cc @@ -23,25 +23,22 @@ uchar_t* copy_string(uchar_t* source, usize_t size) { } void on_state_change(milo_parser::Parser* parser, usize_t from, usize_t size) { - EXTRACT_PAYLOAD(data, parser, from, size); - usize_t position = parser->position; - auto state = milo_parser::milo_state_string(parser); + auto state = milo_parser::milo_state_to_string(static_cast(size)); auto message = create_string(); snprintf(reinterpret_cast(message), MAX_FORMAT, "\"pos\": %lu, \"event\": \"state\", \"state\": \"%s\"", - position, state.ptr); + from, state.ptr); milo_parser::milo_free_string(state); - append_output(parser, message, data, from, size); + append_output(parser, message, NULL, from, 0); } void on_message_start(milo_parser::Parser* parser, usize_t from, usize_t size) { EXTRACT_PAYLOAD(data, parser, from, size); - usize_t position = parser->position; auto message = create_string(); snprintf(reinterpret_cast(message), MAX_FORMAT, - "\"pos\": %lu, \"event\": \"begin\", \"configuration\": { \"debug\": %s }", position, + "\"pos\": %lu, \"event\": \"begin\", \"configuration\": { \"debug\": %s }", from, milo_parser::milo_has_debug() ? "true" : "false"); append_output(parser, message, data, from, size); @@ -54,7 +51,6 @@ void on_message_complete(milo_parser::Parser* parser, usize_t from, usize_t size void on_error(milo_parser::Parser* parser, usize_t from, usize_t size) { EXTRACT_PAYLOAD(data, parser, from, size); - usize_t position = parser->position; usize_t error_code = static_cast(parser->error_code); auto error_code_string = milo_parser::milo_error_code_string(parser); auto error_code_description = milo_parser::milo_error_description_string(parser); @@ -62,7 +58,7 @@ void on_error(milo_parser::Parser* parser, usize_t from, usize_t size) { auto message = create_string(); snprintf(reinterpret_cast(message), MAX_FORMAT, "\"pos\": %lu, \"event\": \"error\", \"error_code\": %lu, \"error_code_string\": \"%s\", \"reason\": \"%s\"", - position, error_code, error_code_string.ptr, error_code_description.ptr); + from, error_code, error_code_string.ptr, error_code_description.ptr); milo_parser::milo_free_string(error_code_string); milo_parser::milo_free_string(error_code_description); @@ -126,7 +122,6 @@ void on_header_value(milo_parser::Parser* parser, usize_t from, usize_t size) { void on_headers(milo_parser::Parser* parser, usize_t from, usize_t size) { EXTRACT_PAYLOAD(data, parser, from, size); - usize_t position = parser->position; usize_t content_length = parser->content_length; bool chunked = parser->has_chunked_transfer_encoding; @@ -143,34 +138,34 @@ void on_headers(milo_parser::Parser* parser, usize_t from, usize_t size) { snprintf(reinterpret_cast(message), MAX_FORMAT, "\"pos\": %lu, \"event\": \"headers\", \"type\": \"response\", \"status\": %u, \"protocol\": \"%s\", " "\"version\": \"%s\", \"body\": \"chunked\"", - position, parser->status, protocol, version); + from, parser->status, protocol, version); } else if (content_length > 0) { snprintf(reinterpret_cast(message), MAX_FORMAT, "\"pos\": %lu, \"event\": \"headers\", \"type\": \"response\", \"status\": %u, \"protocol\": \"%s\", " "\"version\": \"%s\", \"body\": %lu", - position, parser->status, protocol, version, content_length); + from, parser->status, protocol, version, content_length); } else { snprintf(reinterpret_cast(message), MAX_FORMAT, "\"pos\": %lu, \"event\": \"headers\", \"type\": \"response\", \"status\": %u, \"protocol\": \"%s\", " "\"version\": \"%s\", \"body\": null", - position, parser->status, protocol, version); + from, parser->status, protocol, version); } } else { if (chunked) { snprintf(reinterpret_cast(message), MAX_FORMAT, "\"pos\": %lu, \"event\": \"headers\", \"type\": \"request\", \"method\": \"%s\", \"url\": \"%s\", " "\"protocol\": \"%s\", \"version\": \"%s\", \"body\": \"chunked\"", - position, method, url, protocol, version); + from, method, url, protocol, version); } else if (content_length > 0) { snprintf(reinterpret_cast(message), MAX_FORMAT, "\"pos\": %lu, \"event\": \"headers\", \"type\": \"request\", \"method\": \"%s\", \"url\": \"%s\", " "\"protocol\": \"%s\", \"version\": \"%s\", \"body\": %lu", - position, method, url, protocol, version, content_length); + from, method, url, protocol, version, content_length); } else { snprintf(reinterpret_cast(message), MAX_FORMAT, "\"pos\": %lu, \"event\": \"headers\", \"type\": \"request\", \"method\": \"%s\", \"url\": \"%s\", " "\"protocol\": \"%s\", \"version\": \"%s\", \"body\": null", - position, method, url, protocol, version); + from, method, url, protocol, version); } } diff --git a/references/fixtures/debug.jsonl b/references/fixtures/debug.jsonl index f51c989..3f18264 100644 --- a/references/fixtures/debug.jsonl +++ b/references/fixtures/debug.jsonl @@ -6,8 +6,8 @@ { "pos": 6, "event": "protocol", "data": "HTTP" } { "pos": 11, "event": "version", "data": "1.1" } { "pos": 16, "event": "state", "state": "HEADER", "data": null } -{ "pos": 18, "event": "state", "state": "BODY_DECISION", "data": null } { "pos": 18, "event": "headers", "type": "request", "method": "GET", "url": "/", "protocol": "HTTP", "version": "1.1", "body": null, "data": null } +{ "pos": 18, "event": "state", "state": "BODY_DECISION", "data": null } { "pos": 18, "event": "complete", "data": null } { "pos": 18, "event": "state", "state": "START", "data": null } { "pos": 18, "consumed": 18, "state": "START" } @@ -26,8 +26,8 @@ { "pos": 36, "event": "header_value", "data": "chunked" } { "pos": 45, "event": "header_name", "data": "Trailer" } { "pos": 54, "event": "header_value", "data": "x-trailer" } -{ "pos": 67, "event": "state", "state": "BODY_DECISION", "data": null } { "pos": 67, "event": "headers", "type": "response", "status": 200, "protocol": "HTTP", "version": "1.1", "body": "chunked", "data": null } +{ "pos": 67, "event": "state", "state": "BODY_DECISION", "data": null } { "pos": 67, "event": "state", "state": "CHUNK_HEADER", "data": null } { "pos": 67, "event": "chunk_length", "data": "c" } { "pos": 69, "event": "state", "state": "CHUNK_EXTENSIONS", "data": null } diff --git a/references/wasm/src/readme.js b/references/wasm/src/readme.js index 81075f0..a9f68f4 100755 --- a/references/wasm/src/readme.js +++ b/references/wasm/src/readme.js @@ -29,13 +29,13 @@ const message = Buffer.from('HTTP/1.1 200 OK\r\nContent-Length: 3\r\n\r\nabc') // Allocate a memory in the WebAssembly space. This speeds up data copying to the WebAssembly layer. const ptr = milo.alloc(message.length) -// Create a buffer we can use normally. -const buffer = Buffer.from(milo.memory.buffer, ptr, message.length) - // Create the parser. const parser = milo.create() milo.setActiveCallbacks(parser, milo.CALLBACK_ACTIVE_ON_DATA) +// Create a buffer we can use normally. +const buffer = Buffer.from(milo.memory.buffer, ptr, message.length) + // Now perform the main parsing using milo.parse. The method returns the number of consumed characters. buffer.set(message, 0) milo.parse(parser, ptr, message.length) diff --git a/references/wasm/src/reference.js b/references/wasm/src/reference.js index b2f9b7c..63d44b1 100755 --- a/references/wasm/src/reference.js +++ b/references/wasm/src/reference.js @@ -39,11 +39,11 @@ function showSpan (name, context, parser, from, size) { function onStateChange (context, parser, from, size) { return appendOutput( - sprintf('"pos": {}, "event": "state", "state": "{}"', from, context.milo.States[context.milo.getState(parser)]), + sprintf('"pos": {}, "event": "state", "state": "{}"', from, context.milo.States[size]), context, parser, from, - size + 0 ) } @@ -125,7 +125,7 @@ function onHeaderValue (context, parser, from, size) { return showSpan('header_value', context, parser, from, size) } -function onHeaders (context, parser, from, size) { +function onHeaders (context, parser, from) { const position = from const chunked = context.milo.hasChunkedTransferEncoding(parser) const contentLength = context.milo.getContentLength(parser) @@ -150,7 +150,7 @@ function onHeaders (context, parser, from, size) { context, parser, from, - size + 0 ) } else if (contentLength > 0) { return appendOutput( @@ -165,7 +165,7 @@ function onHeaders (context, parser, from, size) { context, parser, from, - size + 0 ) } else { return appendOutput( @@ -173,7 +173,7 @@ function onHeaders (context, parser, from, size) { context, parser, from, - size + 0 ) } } else { @@ -192,7 +192,7 @@ function onHeaders (context, parser, from, size) { context, parser, from, - size + 0 ) } else if (contentLength > 0) { return appendOutput( @@ -208,7 +208,7 @@ function onHeaders (context, parser, from, size) { context, parser, from, - size + 0 ) } else { return appendOutput( @@ -223,7 +223,7 @@ function onHeaders (context, parser, from, size) { context, parser, from, - size + 0 ) } } diff --git a/scripts/buildinfo.js b/scripts/buildinfo.js index 3a7b866..b9757e4 100644 --- a/scripts/buildinfo.js +++ b/scripts/buildinfo.js @@ -1,7 +1,12 @@ +import { execFile } from 'node:child_process' import { readFile } from 'node:fs/promises' +import { fileURLToPath } from 'node:url' +import { promisify } from 'node:util' import semver from 'semver' import YAML from 'yaml' +const execFileAsync = promisify(execFile) + async function readYamlList (name) { const raw = await readFile(new URL(`../macros/constants/${name}.yml`, import.meta.url), 'utf-8') return YAML.parse(raw) @@ -30,13 +35,27 @@ async function readVersion () { } } +async function readParserFields () { + const { stdout } = await execFileAsync('cargo', [ + 'run', + '--quiet', + '--manifest-path', + fileURLToPath(new URL('../macros/Cargo.toml', import.meta.url)), + '--bin', + 'milo-parser-fields' + ]) + + return JSON.parse(stdout) +} + export async function getBuildInfo () { - const [version, methods, errors, callbacks, states] = await Promise.all([ + const [version, methods, errors, callbacks, states, parserFields] = await Promise.all([ readVersion(), readYamlList('methods'), readYamlList('errors'), readYamlList('callbacks'), - readYamlList('states') + readYamlList('states'), + readParserFields() ]) const constants = {} @@ -48,14 +67,22 @@ export async function getBuildInfo () { constants[`CALLBACK_${callback.toUpperCase()}`] = i } + constants.EVENT_END = 0 + for (const [i, callback] of callbacks.entries()) { + constants[`EVENT_${callback.replace(/^on_/, '').toUpperCase()}`] = i + 1 + } + let all = 0 constants.CALLBACK_ACTIVE_NONE = 0 + constants.EVENT_ACTIVE_NONE = 0 for (const [i, callback] of callbacks.entries()) { const bit = 1 << i constants[`CALLBACK_ACTIVE_${callback.toUpperCase()}`] = bit + constants[`EVENT_ACTIVE_${callback.toUpperCase()}`] = bit all |= bit } constants.CALLBACK_ACTIVE_ALL = all + constants.EVENT_ACTIVE_ALL = all for (const [i, error] of errors.entries()) { constants[`ERROR_${error}`] = i @@ -65,6 +92,8 @@ export async function getBuildInfo () { constants[`STATE_${state.toUpperCase()}`] = i } + Object.assign(constants, parserFields) + return { version, constants } } diff --git a/scripts/postbuild-wasm.js b/scripts/postbuild-wasm.js index f0150c3..2aacc07 100644 --- a/scripts/postbuild-wasm.js +++ b/scripts/postbuild-wasm.js @@ -10,7 +10,10 @@ const enums = { METHOD: 'Methods', CALLBACK: 'Callbacks', CALLBACK_ACTIVE: 'CallbackActives', - STATE: 'States' + EVENT: 'Events', + EVENT_ACTIVE: 'EventActives', + STATE: 'States', + PARSER_FIELD: 'ParserFields' } const getters = { @@ -18,10 +21,13 @@ const getters = { isRequest: ['bool', 'is_request'], isPaused: ['bool', 'is_paused'], shouldManageUnconsumed: ['bool', 'should_manage_unconsumed'], + shouldSuspendAfterHeaders: ['bool', 'should_suspend_after_headers'], getMaxStartLineLength: ['number', 'get_max_start_line_length'], getMaxHeaderLength: ['number', 'get_max_header_length'], + getMaxBodyPayload: ['bigint', 'get_max_body_payload'], shouldContinueWithoutData: ['bool', 'should_continue_without_data'], isConnect: ['bool', 'is_connect'], + isDebug: ['bool', 'is_debug'], shouldSkipBody: ['bool', 'should_skip_body'], getState: ['number', 'get_state'], getPosition: ['number', 'get_position'], @@ -29,8 +35,6 @@ const getters = { getErrorCode: ['number', 'get_error_code'], getMethod: ['number', 'get_method'], getStatus: ['number', 'get_status'], - getVersionMajor: ['number', 'get_version_major'], - getVersionMinor: ['number', 'get_version_minor'], hasConnectionClose: ['bool', 'has_connection_close'], hasConnectionUpgrade: ['bool', 'has_connection_upgrade'], getContentLength: ['bigint', 'get_content_length'], @@ -50,11 +54,15 @@ const setters = { setShouldContinueWithoutData: 'set_should_continue_without_data', setIsRequest: 'set_is_request', setIsConnect: 'set_is_connect', + setDebug: 'set_debug', setShouldManageUnconsumed: 'set_should_manage_unconsumed', + setShouldSuspendAfterHeaders: 'set_should_suspend_after_headers', setMaxStartLineLength: 'set_max_start_line_length', setMaxHeaderLength: 'set_max_header_length', + setMaxBodyPayload: 'set_max_body_payload', setShouldSkipBody: 'set_should_skip_body', - setActiveCallbacks: 'set_active_callbacks' + setActiveCallbacks: 'set_active_callbacks', + setActiveEvents: 'set_active_events' } function getCallbacks (constants) { @@ -70,7 +78,9 @@ function generateEnums (constants) { let suffix = '' if (selector === 'CALLBACK_') { matching = matching.filter(c => !c.startsWith('CALLBACK_ACTIVE')) - } else if (selector === 'CALLBACK_ACTIVE_') { + } else if (selector === 'EVENT_') { + matching = matching.filter(c => !c.startsWith('EVENT_ACTIVE_')) + } else if (selector === 'CALLBACK_ACTIVE_' || selector === 'EVENT_ACTIVE_') { suffix = 'n' } @@ -94,7 +104,7 @@ function generateConstants (constants) { .map(([k, v]) => { let value = v.toString() - if (k.startsWith('CALLBACK_ACTIVE_')) { + if (k.startsWith('CALLBACK_ACTIVE_') || k.startsWith('EVENT_ACTIVE_')) { value += 'n' }