Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@
rewriter flushes remaining input bytes before propagating a handler error, preserving
the response. Currently exposed via the Rust API only; the C API still uses the original
behavior.
- Added `Settings::append_bail_out_handler()` and the matching `bail_out!` macro,
`BailOut` rewritable unit, and `BailOutHandler` / `BailOutHandlerSend` type aliases.
Bail-out handlers fire immediately before the raw flush of remaining unparsed input on a
graceful bail-out (memory or content-handler error). Handlers receive the
`RewritingError` and a `BailOut` through which they can append final bytes to the sink
via `BailOut::append(content, content_type)`. Intended for handlers that buffer state
across the document (e.g. text-buffering handlers that defer emission) and need to
flush that state on bail-out.
- Marked `RewritingError` `#[non_exhaustive]` so future error variants can be added without
a major version bump. External callers can still `match` on it, but must include a
catch-all `_ =>` arm.
- Reworked `Settings`, `MemorySettings` and `RewriteStrSettings` to use a consuming-builder
API. Fields are now private; construction is via `::new()` plus chained `with_*` setters
and `append_*` methods for the content-handler vectors. This makes future field additions
Expand Down
17 changes: 9 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ mod transform_stream;
use cfg_if::cfg_if;

pub use self::rewriter::{
AsciiCompatibleEncoding, CommentHandler, DoctypeHandler, DocumentContentHandlers,
ElementContentHandlers, ElementHandler, EndHandler, EndTagHandler, HandlerResult, HandlerTypes,
HtmlRewriter, LocalHandlerTypes, MemorySettings, RewriteStrSettings, Settings, TextHandler,
rewrite_str,
AsciiCompatibleEncoding, BailOutHandler, CommentHandler, DoctypeHandler,
DocumentContentHandlers, ElementContentHandlers, ElementHandler, EndHandler, EndTagHandler,
HandlerResult, HandlerTypes, HtmlRewriter, LocalHandlerTypes, MemorySettings,
RewriteStrSettings, Settings, TextHandler, rewrite_str,
};
pub use self::selectors_vm::Selector;
pub use self::transform_stream::OutputSink;
Expand All @@ -56,9 +56,10 @@ pub use self::transform_stream::OutputSink;
/// Rewriting is sequential, so there's no benefit from using the `Send`-compatible rewriter.
pub mod send {
pub use crate::rewriter::{
CommentHandlerSend as CommentHandler, DoctypeHandlerSend as DoctypeHandler,
ElementHandlerSend as ElementHandler, EndHandlerSend as EndHandler,
EndTagHandlerSend as EndTagHandler, TextHandlerSend as TextHandler,
BailOutHandlerSend as BailOutHandler, CommentHandlerSend as CommentHandler,
DoctypeHandlerSend as DoctypeHandler, ElementHandlerSend as ElementHandler,
EndHandlerSend as EndHandler, EndTagHandlerSend as EndTagHandler,
TextHandlerSend as TextHandler,
};
pub use crate::rewriter::{IntoHandler, SendHandlerTypes};

Expand Down Expand Up @@ -95,7 +96,7 @@ pub mod errors {
/// HTML content descriptors that can be produced and modified by a rewriter.
pub mod html_content {
pub use super::rewritable_units::{
Attribute, Comment, ContentType, Doctype, DocumentEnd, Element, EndTag, StartTag,
Attribute, BailOut, Comment, ContentType, Doctype, DocumentEnd, Element, EndTag, StartTag,
StreamingHandler, StreamingHandlerSink, TextChunk, UserData,
};

Expand Down
72 changes: 72 additions & 0 deletions src/rewritable_units/bail_out.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
use super::{ContentType, StreamingHandlerSink};
use crate::transform_stream::OutputSink;
use encoding_rs::Encoding;

/// A rewritable unit that represents the moment the rewriter is about to abandon
/// processing through a graceful bail-out.
///
/// Bail-out handlers registered via [`Settings::append_bail_out_handler()`] receive a
/// `&mut BailOut` and can emit final bytes into the output sink via [`append()`]. This
/// is the only opportunity for content other handlers have buffered (e.g. text withheld
/// pending a future chunk) to land in the response when the rewriter aborts.
///
/// Bytes appended via this unit are written *before* the rewriter's own raw flush of
/// remaining unparsed input. The resulting sink order is:
///
/// 1. Transformed bytes the rewriter already emitted normally.
/// 2. Bytes appended by bail-out handlers, in registration order.
/// 3. The rewriter's raw flush of the chunk's unparsed suffix.
///
/// [`Settings::append_bail_out_handler()`]:
/// crate::Settings::append_bail_out_handler
/// [`append()`]: Self::append
pub struct BailOut<'a> {
output_sink: &'a mut dyn OutputSink,
encoding: &'static Encoding,
}

impl<'a> BailOut<'a> {
#[inline]
#[must_use]
pub(crate) fn new(output_sink: &'a mut dyn OutputSink, encoding: &'static Encoding) -> Self {
Self {
output_sink,
encoding,
}
}

/// Appends `content` at the bail-out point.
///
/// Subsequent calls to this method append `content` to the previously inserted
/// content within the same bail-out invocation. When multiple bail-out handlers are
/// registered, their `append` calls are concatenated in registration order.
///
/// `content_type` controls how the content is interpreted before being written to
/// the sink. See [`ContentType`].
///
/// # Example
///
/// ```
/// use lol_html::{bail_out, Settings};
/// use lol_html::errors::RewritingError;
/// use lol_html::html_content::ContentType;
///
/// // A handler that, on content-handler-error bail-out, drops a notice into the sink
/// // before the rewriter's own raw flush of remaining unparsed input.
/// let settings = Settings::new()
/// .with_graceful_bail_out_on_content_handler_error(true)
/// .append_bail_out_handler(bail_out!(|err, bail_out| {
/// if matches!(err, RewritingError::ContentHandlerError(_)) {
/// bail_out.append("<!-- bailed out -->", ContentType::Html);
/// }
/// }));
/// # let _ = settings;
/// ```
#[inline]
pub fn append(&mut self, content: &str, content_type: ContentType) {
StreamingHandlerSink::new(self.encoding, &mut |c| {
self.output_sink.handle_chunk(c);
})
.write_str(content, content_type);
}
}
2 changes: 2 additions & 0 deletions src/rewritable_units/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub(crate) use self::mutations::{Mutations, StringChunk};
pub(crate) use self::text_decoder::TextDecoder;
pub(crate) use self::text_encoder::{IncompleteUtf8Resync, TextEncoder};

pub use self::bail_out::*;
pub use self::document_end::*;
pub use self::element::*;
pub use self::mutations::{ContentType, StreamingHandler};
Expand Down Expand Up @@ -83,6 +84,7 @@ macro_rules! impl_user_data {
#[macro_use]
mod mutations;

mod bail_out;
mod document_end;
mod element;
mod streaming_sink;
Expand Down
Loading
Loading