diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec53dd3f..a9d8be10 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,7 @@ jobs: - name: Run clippy run: cargo clippy --all --all-targets --target-dir=target - name: Run clippy on integration tests - run: cargo clippy --all --all-targets --features=integration_test --target-dir=target + run: cargo clippy --all --all-targets --features=_integration_test --target-dir=target - name: Run clippy on C API run: cargo clippy --all-targets --manifest-path=c-api/Cargo.toml --target-dir=target - name: Run clippy on JS API diff --git a/CHANGELOG.md b/CHANGELOG.md index 59e45c1a..0a42ab5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,16 +2,34 @@ ## v3.0.0 -- Added `MemorySettings::graceful_bail_out_on_memory_limit_exceeded`: when set, the rewriter - flushes every input byte it has received but not yet emitted to the sink (as-is) before - returning `MemoryLimitExceededError`, so callers can continue the response by writing - subsequent bytes directly to their downstream sink instead of breaking it. -- Added `Settings::graceful_bail_out_on_content_handler_error`: symmetric to the memory flag - above, but for `RewritingError::ContentHandlerError`. When set, the rewriter flushes - remaining input bytes before propagating a handler error, preserving the response. - Currently exposed via the Rust API only; the C API still uses the original behavior. -- Adding new fields to `MemorySettings` and `Settings` is a SemVer-breaking change for - existing struct-literal construction, hence the major version bump. +- Added `MemorySettings::with_graceful_bail_out_on_memory_limit_exceeded()`: when set, the + rewriter flushes every input byte it has received but not yet emitted to the sink (as-is) + before returning `MemoryLimitExceededError`, so callers can continue the response by + writing subsequent bytes directly to their downstream sink instead of breaking it. +- Added `Settings::with_graceful_bail_out_on_content_handler_error()`: symmetric to the + memory setting above, but for `RewritingError::ContentHandlerError`. When set, the + rewriter flushes remaining input bytes before propagating a handler error, preserving + the response. Currently exposed via the Rust API only; the C API still uses the original + behavior. +- Reworked `Settings`, `MemorySettings` and `RewriteStrSettings` to use a consuming-builder + API. Fields are now private; construction is via `::new()` plus chained `with_*` setters + and `append_*` methods for the content-handler vectors. This makes future field additions + non-breaking. Migration: + ```rust + // before + Settings { + element_content_handlers: vec![element!("div", |el| { /* ... */ Ok(()) })], + strict: false, + ..Settings::new() + } + // after + Settings::new() + .with_strict(false) + .append_element_content_handler(element!("div", |el| { /* ... */ Ok(()) })) + ``` +- Renamed the internal-use feature `integration_test` to `_integration_test`. The leading + underscore signals to `cargo-semver-checks` and similar tools that the feature is not + part of the public API. ## v2.9.0 diff --git a/Cargo.toml b/Cargo.toml index 489b810a..87d17117 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,9 @@ bench = false [features] debug_trace = [] -# Unstable: for internal use only -integration_test = [] +# Unstable: for internal use only. The leading underscore signals "internal" to +# `cargo-semver-checks` and similar tools so they exclude it from the public-API analysis. +_integration_test = [] [[bench]] harness = false diff --git a/README.md b/README.md index edf5d0bd..f8fa22a4 100644 --- a/README.md +++ b/README.md @@ -37,22 +37,17 @@ fn main() -> Result<(), Box> { let mut output = vec![]; let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![ - element!("a[href]", |el| { - let href = el - .get_attribute("href") - .expect("href was required") - .replace("http:", "https:"); - - el.set_attribute("href", &href)?; - - Ok(()) - }) - ], - ..Settings::new() - }, - |c: &[u8]| output.extend_from_slice(c) + Settings::new().append_element_content_handler(element!("a[href]", |el| { + let href = el + .get_attribute("href") + .expect("href was required") + .replace("http:", "https:"); + + el.set_attribute("href", &href)?; + + Ok(()) + })), + |c: &[u8]| output.extend_from_slice(c), ); rewriter.write(b"
li", noop_handler!()), - element!("table > tbody td dfn", noop_handler!()), - element!("body table > tbody tr", noop_handler!()), - element!("body [href]", noop_handler!()), - element!("div img", noop_handler!()), - element!("div.note span", noop_handler!()) - ], - ..Settings::new() - } + Settings::new() + .append_element_content_handler(element!("ul", noop_handler!())) + .append_element_content_handler(element!("ul > li", noop_handler!())) + .append_element_content_handler(element!("table > tbody td dfn", noop_handler!())) + .append_element_content_handler(element!("body table > tbody tr", noop_handler!())) + .append_element_content_handler(element!("body [href]", noop_handler!())) + .append_element_content_handler(element!("div img", noop_handler!())) + .append_element_content_handler(element!("div.note span", noop_handler!())) ) ] ); diff --git a/c-api/src/rewriter.rs b/c-api/src/rewriter.rs index 9fdd8dc9..f6f35049 100644 --- a/c-api/src/rewriter.rs +++ b/c-api/src/rewriter.rs @@ -51,21 +51,26 @@ fn lol_html_rewriter_build_inner( let maybe_encoding = encoding_rs::Encoding::for_label_no_replacement(to_bytes!(encoding, encoding_len)); let encoding = maybe_encoding.ok_or(EncodingError::UnknownEncoding)?; - let settings = Settings { - element_content_handlers: handlers.element, - document_content_handlers: handlers.document, - encoding: encoding - .try_into() - .or(Err(EncodingError::NonAsciiCompatibleEncoding))?, - memory_settings, - strict, - enable_esi_tags, - adjust_charset_on_meta_tag: false, - // TODO: expose `graceful_bail_out_on_content_handler_error` through the C API. Adding - // a new parameter to `lol_html_rewriter_build()` is a breaking ABI change, so it - // belongs behind a new function variant or a settings struct. - graceful_bail_out_on_content_handler_error: false, - }; + // NOTE: `graceful_bail_out_on_content_handler_error` is not yet exposed in the C API + // (default `false`). Adding a parameter to `lol_html_rewriter_build()` is a breaking ABI + // change, so it belongs behind a new function variant or a settings struct. + let mut settings = Settings::new() + .with_encoding( + encoding + .try_into() + .or(Err(EncodingError::NonAsciiCompatibleEncoding))?, + ) + .with_memory_settings(memory_settings) + .with_strict(strict) + .with_enable_esi_tags(enable_esi_tags); + + for handler in handlers.element { + settings = settings.append_element_content_handler(handler); + } + + for handler in handlers.document { + settings = settings.append_document_content_handler(handler); + } let output_sink = ExternOutputSink::new(output_sink, output_sink_user_data); let rewriter = lol_html::HtmlRewriter::new(settings, output_sink); diff --git a/examples/defer_scripts/main.rs b/examples/defer_scripts/main.rs index 7f1d593d..f76748d7 100644 --- a/examples/defer_scripts/main.rs +++ b/examples/defer_scripts/main.rs @@ -13,16 +13,13 @@ fn main() { // Create the rewriter let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!( - "script[src]:not([async]):not([defer])", - |el| { - el.set_attribute("defer", "").unwrap(); - Ok(()) - } - )], - ..Settings::new() - }, + Settings::new().append_element_content_handler(element!( + "script[src]:not([async]):not([defer])", + |el| { + el.set_attribute("defer", "").unwrap(); + Ok(()) + } + )), output_sink, ); diff --git a/examples/mixed_content_rewriter/main.rs b/examples/mixed_content_rewriter/main.rs index 5f2aea0f..0002f563 100644 --- a/examples/mixed_content_rewriter/main.rs +++ b/examples/mixed_content_rewriter/main.rs @@ -23,22 +23,18 @@ fn main() { // Create the rewriter let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![ - element!("a[href], link[rel=stylesheet][href]", |el| { - rewrite_url_in_attr(el, "href"); + Settings::new() + .append_element_content_handler(element!("a[href], link[rel=stylesheet][href]", |el| { + rewrite_url_in_attr(el, "href"); + Ok(()) + })) + .append_element_content_handler(element!( + "script[src], iframe[src], img[src], audio[src], video[src]", + |el| { + rewrite_url_in_attr(el, "src"); Ok(()) - }), - element!( - "script[src], iframe[src], img[src], audio[src], video[src]", - |el| { - rewrite_url_in_attr(el, "src"); - Ok(()) - } - ), - ], - ..Settings::new() - }, + } + )), output_sink, ); diff --git a/fuzz/test_case/src/lib.rs b/fuzz/test_case/src/lib.rs index 66b269b8..bd38a2e4 100644 --- a/fuzz/test_case/src/lib.rs +++ b/fuzz/test_case/src/lib.rs @@ -11,7 +11,7 @@ use std::ffi::{CStr, CString}; use encoding_rs::*; use lol_html::html_content::ContentType; -use lol_html::{HtmlRewriter, MemorySettings, Settings}; +use lol_html::{HtmlRewriter, Settings}; use lol_html::{comments, doc_comments, doc_text, element, streaming, text}; include!(concat!(env!("OUT_DIR"), "/bindings.rs")); @@ -99,87 +99,78 @@ fn get_random_selector() -> &'static str { } fn run_rewriter_iter(data: &[u8], selector: &str, encoding: &'static Encoding) { - let mut rewriter: HtmlRewriter<_> = HtmlRewriter::new( - Settings { - enable_esi_tags: true, - element_content_handlers: vec![ - element!(selector, |el| { - el.before( - &format!(""), - ContentType::Html, - ); - el.after( - &format!(""), - ContentType::Html, - ); - - let replaced = format!(""); - el.streaming_set_inner_content(streaming!(move |sink| { - sink.write_str(&replaced, ContentType::Html); - Ok(()) - })); - - Ok(()) - }), - comments!(selector, |c| { - c.before( - &format!(""), - ContentType::Html, - ); - c.after( - &format!(""), - ContentType::Html, - ); - - Ok(()) - }), - text!(selector, |t| { - t.before(&format!(""), ContentType::Html); - - if t.last_in_text_node() { - t.after(&format!(""), ContentType::Html); - } - - Ok(()) - }), - element!(selector, |el| { - el.replace("hey & ya", ContentType::Html); - - Ok(()) - }), - element!(selector, |el| { - el.remove(); - - Ok(()) - }), - element!(selector, |el| { - el.remove_and_keep_content(); - - Ok(()) - }), - ], - document_content_handlers: vec![ - doc_comments!(|c| { - c.set_text("123456").unwrap(); - - Ok(()) - }), - doc_text!(|t| { - if t.last_in_text_node() { - t.after("BAZ", ContentType::Text); - } - - Ok(()) - }), - ], - encoding: encoding.try_into().unwrap(), - memory_settings: MemorySettings::new(), - strict: false, - adjust_charset_on_meta_tag: false, - graceful_bail_out_on_content_handler_error: false, - }, - |_: &[u8]| {}, - ); + let settings = Settings::new() + .with_enable_esi_tags(true) + .with_encoding(encoding.try_into().unwrap()) + .with_strict(false) + .append_element_content_handler(element!(selector, |el| { + el.before( + &format!(""), + ContentType::Html, + ); + el.after( + &format!(""), + ContentType::Html, + ); + + let replaced = format!(""); + el.streaming_set_inner_content(streaming!(move |sink| { + sink.write_str(&replaced, ContentType::Html); + Ok(()) + })); + + Ok(()) + })) + .append_element_content_handler(comments!(selector, |c| { + c.before( + &format!(""), + ContentType::Html, + ); + c.after( + &format!(""), + ContentType::Html, + ); + + Ok(()) + })) + .append_element_content_handler(text!(selector, |t| { + t.before(&format!(""), ContentType::Html); + + if t.last_in_text_node() { + t.after(&format!(""), ContentType::Html); + } + + Ok(()) + })) + .append_element_content_handler(element!(selector, |el| { + el.replace("hey & ya", ContentType::Html); + + Ok(()) + })) + .append_element_content_handler(element!(selector, |el| { + el.remove(); + + Ok(()) + })) + .append_element_content_handler(element!(selector, |el| { + el.remove_and_keep_content(); + + Ok(()) + })) + .append_document_content_handler(doc_comments!(|c| { + c.set_text("123456").unwrap(); + + Ok(()) + })) + .append_document_content_handler(doc_text!(|t| { + if t.last_in_text_node() { + t.after("BAZ", ContentType::Text); + } + + Ok(()) + })); + + let mut rewriter: HtmlRewriter<_> = HtmlRewriter::new(settings, |_: &[u8]| {}); rewriter.write(data).unwrap(); rewriter.end().unwrap(); diff --git a/js-api/src/html_rewriter.rs b/js-api/src/html_rewriter.rs index 08829bce..d811634f 100644 --- a/js-api/src/html_rewriter.rs +++ b/js-api/src/html_rewriter.rs @@ -61,11 +61,8 @@ impl HTMLRewriter { Ok(Self(RewriterState::Before { output_sink: JsOutputSink::new(output_sink), - settings: Settings { - encoding, - // TODO: accept options bag and parse out here - ..Settings::default() - }, + // TODO: accept options bag and parse out here + settings: Settings::new().with_encoding(encoding), })) } @@ -101,10 +98,11 @@ impl HTMLRewriter { ref mut settings, .. } => { let selector = selector.parse::().into_js_result()?; - - settings - .element_content_handlers - .push((Cow::Owned(selector), handlers.into_native())); + let taken = std::mem::take(settings); + *settings = taken.append_element_content_handler(( + Cow::Owned(selector), + handlers.into_native(), + )); Ok(()) } @@ -118,9 +116,8 @@ impl HTMLRewriter { RewriterState::Before { ref mut settings, .. } => { - settings - .document_content_handlers - .push(handlers.into_native()); + let taken = std::mem::take(settings); + *settings = taken.append_document_content_handler(handlers.into_native()); Ok(()) } _ => Err(JsError::new("Handlers cannot be added after write").into()), diff --git a/scripts/test.sh b/scripts/test.sh index fa5cc5cc..20610743 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -5,7 +5,7 @@ set -e export CARGO_TARGET_DIR=$PWD/target echo "=== Running library tests... ===" -cargo test --features=integration_test "$@" +cargo test --features=_integration_test "$@" echo "=== Running C API tests... ===" (cd ./c-api/ && cargo check --locked) diff --git a/src/html/text_type.rs b/src/html/text_type.rs index 5bb3b71d..7e04b880 100644 --- a/src/html/text_type.rs +++ b/src/html/text_type.rs @@ -60,7 +60,7 @@ impl TextType { } cfg_if! { - if #[cfg(feature = "integration_test")] { + if #[cfg(feature = "_integration_test")] { impl TextType { #[must_use] pub fn should_replace_unsafe_null_in_text(self) -> bool { self != Self::Data && self != Self::CDataSection diff --git a/src/lib.rs b/src/lib.rs index 5a64af5f..e2ff3293 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,8 +21,8 @@ #![allow(clippy::module_name_repetitions)] #![allow(clippy::redundant_pub_crate)] #![deny(rustdoc::broken_intra_doc_links)] -#![cfg_attr(not(any(feature = "integration_test", test)), warn(missing_docs))] -#![cfg_attr(any(feature = "integration_test", test), allow(unnameable_types))] +#![cfg_attr(not(any(feature = "_integration_test", test)), warn(missing_docs))] +#![cfg_attr(any(feature = "_integration_test", test), allow(unnameable_types))] #[macro_use] mod base; @@ -103,7 +103,7 @@ pub mod html_content { pub use super::html::TextType; } -#[cfg(any(test, feature = "integration_test"))] +#[cfg(any(test, feature = "_integration_test"))] pub mod test_utils { use encoding_rs::*; @@ -201,7 +201,7 @@ pub mod test_utils { } cfg_if! { - if #[cfg(feature = "integration_test")] { + if #[cfg(feature = "_integration_test")] { pub mod selectors_vm; pub use self::transform_stream::{ diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6720d9dd..85325c0b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -123,7 +123,7 @@ impl Parser { } cfg_if! { - if #[cfg(feature = "integration_test")] { + if #[cfg(feature = "_integration_test")] { use crate::html::{LocalNameHash, TextType}; #[allow(private_bounds)] diff --git a/src/rewritable_units/document_end.rs b/src/rewritable_units/document_end.rs index c3146199..37233a28 100644 --- a/src/rewritable_units/document_end.rs +++ b/src/rewritable_units/document_end.rs @@ -35,14 +35,11 @@ impl<'a> DocumentEnd<'a> { /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// document_content_handlers: vec![end!(|end| { - /// end.append("", ContentType::Html); - /// end.append("", ContentType::Text); - /// Ok(()) - /// })], - /// ..RewriteStrSettings::new() - /// } + /// RewriteStrSettings::new().append_document_content_handler(end!(|end| { + /// end.append("", ContentType::Html); + /// end.append("", ContentType::Text); + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"
<baz>"#); diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index e99cbd87..c796ee01 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -238,18 +238,13 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("#foo", |el| { - /// el.before("", ContentType::Html); - /// el.before("", ContentType::Html); - /// el.before("", ContentType::Text); + /// RewriteStrSettings::new().append_element_content_handler(element!("#foo", |el| { + /// el.before("", ContentType::Html); + /// el.before("", ContentType::Html); + /// el.before("", ContentType::Text); /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"<quz>
"#); @@ -288,18 +283,13 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("#foo", |el| { - /// el.after("", ContentType::Html); - /// el.after("", ContentType::Html); - /// el.after("", ContentType::Text); + /// RewriteStrSettings::new().append_element_content_handler(element!("#foo", |el| { + /// el.after("", ContentType::Html); + /// el.after("", ContentType::Html); + /// el.after("", ContentType::Text); /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"
<quz>"#); @@ -352,13 +342,9 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("#foo", handler), - /// element!("img", handler), - /// ], - /// ..RewriteStrSettings::new() - /// } + /// RewriteStrSettings::new() + /// .append_element_content_handler(element!("#foo", handler)) + /// .append_element_content_handler(element!("img", handler)) /// ).unwrap(); /// /// assert_eq!(html, r#"
<quz>
"#); @@ -417,13 +403,9 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("#foo", handler), - /// element!("img", handler), - /// ], - /// ..RewriteStrSettings::new() - /// } + /// RewriteStrSettings::new() + /// .append_element_content_handler(element!("#foo", handler)) + /// .append_element_content_handler(element!("img", handler)) /// ).unwrap(); /// /// assert_eq!(html, r#"
<quz>
"#); @@ -476,13 +458,9 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("#foo", handler), - /// element!("img", handler), - /// ], - /// ..RewriteStrSettings::new() - /// } + /// RewriteStrSettings::new() + /// .append_element_content_handler(element!("#foo", handler)) + /// .append_element_content_handler(element!("img", handler)) /// ).unwrap(); /// /// assert_eq!(html, r#"
"#); @@ -532,17 +510,12 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("#foo", |el| { - /// el.replace("", ContentType::Html); - /// el.replace("Hello", ContentType::Text); + /// RewriteStrSettings::new().append_element_content_handler(element!("#foo", |el| { + /// el.replace("", ContentType::Html); + /// el.replace("Hello", ContentType::Text); /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"Hello"#); @@ -591,16 +564,11 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("div", |el| { - /// el.remove_and_keep_content(); + /// RewriteStrSettings::new().append_element_content_handler(element!("div", |el| { + /// el.remove_and_keep_content(); /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#""#); @@ -648,37 +616,33 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// let buffer = std::rc::Rc::new(std::cell::RefCell::new(String::new())); /// let html = rewrite_str( /// "Short13 characters", - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("span", |el: &mut Element| { - /// // Truncate string for each new span. - /// buffer.borrow_mut().clear(); - /// let buffer = buffer.clone(); - /// if let Some(handlers) = el.end_tag_handlers() { - /// handlers.push(Box::new(move |end| { - /// let s = buffer.borrow(); - /// if s.len() == 13 { - /// // add text before the end tag - /// end.before("!", ContentType::Text); - /// } else { - /// // replace the end tag with an uppercase version - /// end.remove(); - /// let name = end.name().to_uppercase(); - /// end.after(&format!("", name), ContentType::Html); - /// } - /// Ok(()) - /// })); - /// } - /// Ok(()) - /// }), - /// text!("span", |t| { - /// // Save the text contents for the end tag handler. - /// buffer.borrow_mut().push_str(t.as_str()); - /// Ok(()) - /// }), - /// ], - /// ..RewriteStrSettings::new() - /// }, + /// RewriteStrSettings::new() + /// .append_element_content_handler(element!("span", |el: &mut Element| { + /// // Truncate string for each new span. + /// buffer.borrow_mut().clear(); + /// let buffer = buffer.clone(); + /// if let Some(handlers) = el.end_tag_handlers() { + /// handlers.push(Box::new(move |end| { + /// let s = buffer.borrow(); + /// if s.len() == 13 { + /// // add text before the end tag + /// end.before("!", ContentType::Text); + /// } else { + /// // replace the end tag with an uppercase version + /// end.remove(); + /// let name = end.name().to_uppercase(); + /// end.after(&format!("", name), ContentType::Html); + /// } + /// Ok(()) + /// })); + /// } + /// Ok(()) + /// })) + /// .append_element_content_handler(text!("span", |t| { + /// // Save the text contents for the end tag handler. + /// buffer.borrow_mut().push_str(t.as_str()); + /// Ok(()) + /// })), /// ) /// .unwrap(); /// @@ -705,18 +669,16 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token, /// use lol_html::{element, end_tag, rewrite_str, text, RewriteStrSettings}; /// let html = rewrite_str( /// "hi", - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// element!("span", |el: &mut Element| { - /// el.on_end_tag(end_tag!(move |end| { - /// end.before("?", ContentType::Text); - /// end.after("!", ContentType::Text); - /// Ok(()) - /// })) - /// }), - /// ], - /// ..RewriteStrSettings::new() - /// }, + /// RewriteStrSettings::new().append_element_content_handler(element!( + /// "span", + /// |el: &mut Element| { + /// el.on_end_tag(end_tag!(move |end| { + /// end.before("?", ContentType::Text); + /// end.after("!", ContentType::Text); + /// Ok(()) + /// })) + /// } + /// )), /// ) /// .unwrap(); /// diff --git a/src/rewritable_units/mod.rs b/src/rewritable_units/mod.rs index f261558e..c26dafb3 100644 --- a/src/rewritable_units/mod.rs +++ b/src/rewritable_units/mod.rs @@ -26,32 +26,28 @@ pub use self::tokens::*; /// /// rewrite_str( /// r#"
"#, -/// RewriteStrSettings { -/// element_content_handlers: vec![ -/// element!("*", |el| { -/// el.set_user_data("Captured by `*`"); +/// RewriteStrSettings::new() +/// .append_element_content_handler(element!("*", |el| { +/// el.set_user_data("Captured by `*`"); /// -/// Ok(()) -/// }), -/// element!("#foo", |el| { -/// let user_data = el.user_data_mut().downcast_mut::<&'static str>().unwrap(); +/// Ok(()) +/// })) +/// .append_element_content_handler(element!("#foo", |el| { +/// let user_data = el.user_data_mut().downcast_mut::<&'static str>().unwrap(); /// -/// assert_eq!(*user_data, "Captured by `*`"); +/// assert_eq!(*user_data, "Captured by `*`"); /// -/// *user_data = "Captured by `#foo`"; +/// *user_data = "Captured by `#foo`"; /// -/// Ok(()) -/// }), -/// element!("div", |el| { -/// let user_data = el.user_data().downcast_ref::<&'static str>().unwrap(); +/// Ok(()) +/// })) +/// .append_element_content_handler(element!("div", |el| { +/// let user_data = el.user_data().downcast_ref::<&'static str>().unwrap(); /// -/// assert_eq!(*user_data, "Captured by `#foo`"); +/// assert_eq!(*user_data, "Captured by `#foo`"); /// -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::new() -/// } +/// Ok(()) +/// })) /// ).unwrap(); /// ``` pub trait UserData { @@ -133,15 +129,18 @@ mod test_utils { let mut output = Output::new(encoding); { - let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers, - document_content_handlers, - encoding: AsciiCompatibleEncoding::new(encoding).unwrap(), - ..Settings::new() - }, - |c: &[u8]| output.push(c), - ); + let mut settings = + Settings::new().with_encoding(AsciiCompatibleEncoding::new(encoding).unwrap()); + + for handler in element_content_handlers { + settings = settings.append_element_content_handler(handler); + } + + for handler in document_content_handlers { + settings = settings.append_document_content_handler(handler); + } + + let mut rewriter = HtmlRewriter::new(settings, |c: &[u8]| output.push(c)); for ch in html.chunks(15) { rewriter.write(ch).unwrap(); diff --git a/src/rewritable_units/tokens/comment.rs b/src/rewritable_units/tokens/comment.rs index e8943115..3182c728 100644 --- a/src/rewritable_units/tokens/comment.rs +++ b/src/rewritable_units/tokens/comment.rs @@ -99,17 +99,12 @@ impl<'i> Comment<'i> { /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// comments!("div", |c| { - /// c.before("", ContentType::Html); - /// c.before("bar", ContentType::Text); + /// RewriteStrSettings::new().append_element_content_handler(comments!("div", |c| { + /// c.before("", ContentType::Html); + /// c.before("bar", ContentType::Text); /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"
bar
"#); @@ -147,17 +142,12 @@ impl<'i> Comment<'i> { /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// comments!("div", |c| { - /// c.after("Bar", ContentType::Text); - /// c.after("Qux", ContentType::Text); + /// RewriteStrSettings::new().append_element_content_handler(comments!("div", |c| { + /// c.after("Bar", ContentType::Text); + /// c.after("Qux", ContentType::Text); /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"
QuxBar
"#); @@ -195,17 +185,12 @@ impl<'i> Comment<'i> { /// /// let html = rewrite_str( /// r#"
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// comments!("div", |c| { - /// c.replace("Bar", ContentType::Text); - /// c.replace("Qux", ContentType::Text); + /// RewriteStrSettings::new().append_element_content_handler(comments!("div", |c| { + /// c.replace("Bar", ContentType::Text); + /// c.replace("Qux", ContentType::Text); /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"
Qux
"#); diff --git a/src/rewritable_units/tokens/doctype.rs b/src/rewritable_units/tokens/doctype.rs index 6f9d93ae..28a50a49 100644 --- a/src/rewritable_units/tokens/doctype.rs +++ b/src/rewritable_units/tokens/doctype.rs @@ -18,18 +18,13 @@ use std::fmt::{self, Debug}; /// /// rewrite_str( /// r#" Doctype<'i> { } #[inline] - #[cfg(feature = "integration_test")] + #[cfg(feature = "_integration_test")] #[must_use] pub const fn force_quirks(&self) -> bool { self.force_quirks diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs index bdf31d1c..1f56f71e 100644 --- a/src/rewritable_units/tokens/text_chunk.rs +++ b/src/rewritable_units/tokens/text_chunk.rs @@ -45,21 +45,16 @@ use std::fmt::{self, Debug}; /// /// { /// let mut rewriter = HtmlRewriter::new( -/// Settings { -/// element_content_handlers: vec![ -/// text!("div", |t| { -/// greeting += t.as_str(); +/// Settings::new().append_element_content_handler(text!("div", |t| { +/// greeting += t.as_str(); /// -/// if t.last_in_text_node() { -/// greeting += "!"; -/// } +/// if t.last_in_text_node() { +/// greeting += "!"; +/// } /// -/// Ok(()) -/// }) -/// ], -/// ..Settings::new() -/// }, -/// |_:&[u8]| {} +/// Ok(()) +/// })), +/// |_: &[u8]| {}, /// ); /// /// rewriter.write(b"
He").unwrap(); @@ -158,21 +153,17 @@ impl<'i> TextChunk<'i> { /// /// let html = rewrite_str( /// r#"
Hello
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// text!("div", |t| { - /// assert_eq!(t.text_type(), TextType::Data); - /// - /// Ok(()) - /// }), - /// text!("script", |t| { - /// assert_eq!(t.text_type(), TextType::ScriptData); - /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// RewriteStrSettings::new() + /// .append_element_content_handler(text!("div", |t| { + /// assert_eq!(t.text_type(), TextType::Data); + /// + /// Ok(()) + /// })) + /// .append_element_content_handler(text!("script", |t| { + /// assert_eq!(t.text_type(), TextType::ScriptData); + /// + /// Ok(()) + /// })) /// ).unwrap(); /// ``` #[inline] @@ -206,19 +197,14 @@ impl<'i> TextChunk<'i> { /// /// let html = rewrite_str( /// r#"
world
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// text!("div", |t| { - /// if !t.last_in_text_node(){ - /// t.before("", ContentType::Html); - /// t.before("Hello ", ContentType::Text); - /// } - /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// RewriteStrSettings::new().append_element_content_handler(text!("div", |t| { + /// if !t.last_in_text_node() { + /// t.before("", ContentType::Html); + /// t.before("Hello ", ContentType::Text); + /// } + /// + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"
Hello world
"#); @@ -243,19 +229,14 @@ impl<'i> TextChunk<'i> { /// /// let html = rewrite_str( /// r#"
Foo
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// text!("div", |t| { - /// if t.last_in_text_node(){ - /// t.after("Bar", ContentType::Text); - /// t.after("Qux", ContentType::Text); - /// } - /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// RewriteStrSettings::new().append_element_content_handler(text!("div", |t| { + /// if t.last_in_text_node() { + /// t.after("Bar", ContentType::Text); + /// t.after("Qux", ContentType::Text); + /// } + /// + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"
FooQuxBar
"#); @@ -280,19 +261,14 @@ impl<'i> TextChunk<'i> { /// /// let html = rewrite_str( /// r#"
Foo
"#, - /// RewriteStrSettings { - /// element_content_handlers: vec![ - /// text!("div", |t| { - /// if !t.last_in_text_node(){ - /// t.replace("Bar", ContentType::Text); - /// t.replace("Qux", ContentType::Text); - /// } - /// - /// Ok(()) - /// }) - /// ], - /// ..RewriteStrSettings::new() - /// } + /// RewriteStrSettings::new().append_element_content_handler(text!("div", |t| { + /// if !t.last_in_text_node() { + /// t.replace("Bar", ContentType::Text); + /// t.replace("Qux", ContentType::Text); + /// } + /// + /// Ok(()) + /// })) /// ).unwrap(); /// /// assert_eq!(html, r#"
Qux
"#); diff --git a/src/rewriter/mod.rs b/src/rewriter/mod.rs index 38934fd1..a002a9d8 100644 --- a/src/rewriter/mod.rs +++ b/src/rewriter/mod.rs @@ -100,23 +100,18 @@ pub enum RewritingError { /// /// { /// let mut rewriter = HtmlRewriter::new( -/// Settings { -/// element_content_handlers: vec![ -/// // Rewrite insecure hyperlinks -/// element!("a[href]", |el| { -/// let href = el -/// .get_attribute("href") -/// .unwrap() -/// .replace("http:", "https:"); +/// // Rewrite insecure hyperlinks +/// Settings::new().append_element_content_handler(element!("a[href]", |el| { +/// let href = el +/// .get_attribute("href") +/// .unwrap() +/// .replace("http:", "https:"); /// -/// el.set_attribute("href", &href).unwrap(); +/// el.set_attribute("href", &href).unwrap(); /// -/// Ok(()) -/// }) -/// ], -/// ..Settings::new() -/// }, -/// |c: &[u8]| output.extend_from_slice(c) +/// Ok(()) +/// })), +/// |c: &[u8]| output.extend_from_slice(c), /// ); /// /// rewriter.write(b"
"#, +/// RewriteStrSettings::new().append_element_content_handler(element!("a[href]", |el| { +/// // Rewrite insecure hyperlinks /// let href = el /// .get_attribute("href") /// .unwrap() /// .replace("http:", "https:"); /// -/// el.set_attribute("href", &href).unwrap(); +/// el.set_attribute("href", &href).unwrap(); /// -/// Ok(()) -/// }) -/// ]; -/// let output = rewrite_str( -/// r#"
"#, -/// RewriteStrSettings { -/// element_content_handlers, -/// ..RewriteStrSettings::new() -/// } -/// ).unwrap(); +/// Ok(()) +/// })), +/// ) +/// .unwrap(); /// /// assert_eq!(output, r#"
"#); /// ``` @@ -314,9 +304,10 @@ pub fn rewrite_str<'h, 's, H: HandlerTypes>( html: &str, settings: impl Into>, ) -> Result { - let mut settings = settings.into(); - settings.adjust_charset_on_meta_tag = false; - settings.encoding = AsciiCompatibleEncoding::utf_8(); + let settings = settings + .into() + .with_adjust_charset_on_meta_tag(false) + .with_encoding(AsciiCompatibleEncoding::utf_8()); rewrite_str_utf8(html, settings) } @@ -406,14 +397,14 @@ mod tests { Ok(()) }); - let settings = Settings { - document_content_handlers: vec![doc_handler_static, doc_handler_local], - element_content_handlers: vec![el_handler_static, el_handler_local], - encoding: AsciiCompatibleEncoding::utf_8(), - strict: false, - adjust_charset_on_meta_tag: false, - ..Settings::new() - }; + let settings = Settings::new() + .with_encoding(AsciiCompatibleEncoding::utf_8()) + .with_strict(false) + .with_adjust_charset_on_meta_tag(false) + .append_document_content_handler(doc_handler_static) + .append_document_content_handler(doc_handler_local) + .append_element_content_handler(el_handler_static) + .append_element_content_handler(el_handler_local); let rewriter = HtmlRewriter::new(settings, |_: &[u8]| ()); drop(rewriter); @@ -425,19 +416,15 @@ mod tests { fn rewrite_html_str() { let res = rewrite_str::( "
", - RewriteStrSettings { - element_content_handlers: vec![ - element!("div", |el| { - el.set_tag_name("span").unwrap(); - Ok(()) - }), - comments!("div", |c| { - c.set_text("hello").unwrap(); - Ok(()) - }), - ], - ..RewriteStrSettings::new() - }, + RewriteStrSettings::new() + .append_element_content_handler(element!("div", |el| { + el.set_tag_name("span").unwrap(); + Ok(()) + })) + .append_element_content_handler(comments!("div", |c| { + c.set_text("hello").unwrap(); + Ok(()) + })), ) .unwrap(); @@ -449,15 +436,15 @@ mod tests { let res = rewrite_str::( "


", - RewriteStrSettings { - element_content_handlers: vec![element!("*:not(svg)", |el| { + RewriteStrSettings::new().append_element_content_handler(element!( + "*:not(svg)", + |el| { el.set_attribute("s", if el.is_self_closing() { "y" } else { "n" })?; el.set_attribute("c", if el.can_have_content() { "y" } else { "n" })?; el.append("…", ContentType::Text); Ok(()) - })], - ..RewriteStrSettings::new() - }, + } + )), ) .unwrap(); @@ -479,11 +466,9 @@ mod tests { let text = "前"; let rewritten = rewrite_str( text, - Settings { - encoding: encoding_rs::BIG5.try_into().unwrap(), - adjust_charset_on_meta_tag: true, - ..Settings::new() - }, + Settings::new() + .with_encoding(encoding_rs::BIG5.try_into().unwrap()) + .with_adjust_charset_on_meta_tag(true), ) .unwrap(); assert_eq!(rewritten, text); @@ -503,15 +488,13 @@ mod tests { { let rewriter = HtmlRewriter::new( - Settings { - document_content_handlers: vec![doctype!(|d| { + Settings::new() + // NOTE: unwrap() here is intentional; it also tests `Ascii::new`. + .with_encoding(enc.try_into().unwrap()) + .append_document_content_handler(doctype!(|d| { doctypes.push((d.name(), d.public_id(), d.system_id())); Ok(()) - })], - // NOTE: unwrap() here is intentional; it also tests `Ascii::new`. - encoding: enc.try_into().unwrap(), - ..Settings::new() - }, + })), |_: &[u8]| {}, ); @@ -550,15 +533,13 @@ mod tests { let mut output = Output::new(enc); let rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!("*", |el| { + Settings::new() + .with_encoding(enc.try_into().unwrap()) + .append_element_content_handler(element!("*", |el| { el.set_attribute("foo", "bar").unwrap(); el.prepend("", ContentType::Html); Ok(()) - })], - encoding: enc.try_into().unwrap(), - ..Settings::new() - }, + })), |c: &[u8]| output.push(c), ); @@ -601,24 +582,19 @@ mod tests { let mut output = Output::new(enc); let rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![], - document_content_handlers: vec![ - doc_comments!(|c| { - c.set_text(&(c.text() + "1337")).unwrap(); - Ok(()) - }), - doc_text!(|c| { - if c.last_in_text_node() { - c.after("BAZ", ContentType::Text); - } - - Ok(()) - }), - ], - encoding: enc.try_into().unwrap(), - ..Settings::new() - }, + Settings::new() + .with_encoding(enc.try_into().unwrap()) + .append_document_content_handler(doc_comments!(|c| { + c.set_text(&(c.text() + "1337")).unwrap(); + Ok(()) + })) + .append_document_content_handler(doc_text!(|c| { + if c.last_in_text_node() { + c.after("BAZ", ContentType::Text); + } + + Ok(()) + })), |c: &[u8]| output.push(c), ); @@ -665,9 +641,9 @@ mod tests { let mut output = Output::new(enc); let rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![], - document_content_handlers: vec![doc_text!(|c| { + Settings::new() + .with_encoding(enc.try_into().unwrap()) + .append_document_content_handler(doc_text!(|c| { let replace = match c.text_type() { TextType::PlainText => 'P', TextType::RCData => 'r', @@ -687,10 +663,7 @@ mod tests { c.set_str(replaced); Ok(()) - })], - encoding: enc.try_into().unwrap(), - ..Settings::new() - }, + })), |c: &[u8]| output.push(c), ); @@ -749,16 +722,12 @@ mod tests { let _res = rewrite_str( "
", - RewriteStrSettings { - element_content_handlers: vec![ - create_handlers!("div span", 0), - create_handlers!("div > span", 1), - create_handlers!("span", 2), - create_handlers!("[foo]", 3), - create_handlers!("div span[foo]", 4), - ], - ..RewriteStrSettings::new() - }, + RewriteStrSettings::new() + .append_element_content_handler(create_handlers!("div span", 0)) + .append_element_content_handler(create_handlers!("div > span", 1)) + .append_element_content_handler(create_handlers!("span", 2)) + .append_element_content_handler(create_handlers!("[foo]", 3)) + .append_element_content_handler(create_handlers!("div span[foo]", 4)), ) .unwrap(); @@ -769,14 +738,12 @@ mod tests { fn write_esi_tags() { let res = rewrite_str( "", - RewriteStrSettings { - element_content_handlers: vec![element!("esi\\:include", |el| { + RewriteStrSettings::new() + .with_enable_esi_tags(true) + .append_element_content_handler(element!("esi\\:include", |el| { el.replace("?", ContentType::Text); Ok(()) - })], - enable_esi_tags: true, - ..RewriteStrSettings::new() - }, + })), ) .unwrap(); @@ -816,10 +783,7 @@ mod tests { let transformed_no_charset_adjustment: Vec = rewrite_html_bytes( &html, - Settings { - document_content_handlers: vec![enthusiastic_text_handler()], - ..Settings::new() - }, + Settings::new().append_document_content_handler(enthusiastic_text_handler()), ); // Without charset adjustment the response has to be corrupted: @@ -827,11 +791,9 @@ mod tests { let transformed_charset_adjustment: Vec = rewrite_html_bytes( &html, - Settings { - document_content_handlers: vec![enthusiastic_text_handler()], - adjust_charset_on_meta_tag: true, - ..Settings::new() - }, + Settings::new() + .with_adjust_charset_on_meta_tag(true) + .append_document_content_handler(enthusiastic_text_handler()), ); // If it adapts the charset according to the meta tag everything will be correctly @@ -864,19 +826,17 @@ mod tests { }; let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!("[attr]", |el| { + Settings::new() + .with_encoding(AsciiCompatibleEncoding::utf_8()) + .with_adjust_charset_on_meta_tag(true) + .append_element_content_handler(element!("[attr]", |el| { assert_eq!(el.get_attribute("attr").unwrap(), "ð"); Ok(()) - })], - document_content_handlers: vec![doc_text!(|text| { + })) + .append_document_content_handler(doc_text!(|text| { assert!(matches!(text.as_str(), "ð" | "")); Ok(()) - })], - encoding: AsciiCompatibleEncoding::utf_8(), - adjust_charset_on_meta_tag: true, - ..Default::default() - }, + })), &mut sink, ); @@ -898,15 +858,13 @@ mod tests { let html = b"\xC3\xB0"; let rewritten = rewrite_html_bytes( html, - Settings { - document_content_handlers: vec![doc_text!(|text| { + Settings::new() + .with_encoding(AsciiCompatibleEncoding::utf_8()) + .with_adjust_charset_on_meta_tag(true) + .append_document_content_handler(doc_text!(|text| { assert_ne!(text.as_str(), "ð"); Ok(()) - })], - encoding: AsciiCompatibleEncoding::utf_8(), - adjust_charset_on_meta_tag: true, - ..Default::default() - }, + })), ); assert_eq!( "�°", @@ -944,10 +902,7 @@ mod tests { let transformed_no_charset_adjustment: Vec = rewrite_html_bytes( &html, - Settings { - document_content_handlers: vec![enthusiastic_text_handler()], - ..Settings::new() - }, + Settings::new().append_document_content_handler(enthusiastic_text_handler()), ); // Without charset adjustment the response has to be corrupted: @@ -955,11 +910,9 @@ mod tests { let transformed_charset_adjustment: Vec = rewrite_html_bytes( &html, - Settings { - document_content_handlers: vec![enthusiastic_text_handler()], - adjust_charset_on_meta_tag: true, - ..Settings::new() - }, + Settings::new() + .with_adjust_charset_on_meta_tag(true) + .append_document_content_handler(enthusiastic_text_handler()), ); // If it adapts the charset according to the meta tag everything will be correctly @@ -978,15 +931,13 @@ mod tests { output_sink: O, ) -> HtmlRewriter<'static, O> { HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!("*", |_| Ok(()))], - memory_settings: MemorySettings { - max_allowed_memory_usage, - preallocated_parsing_buffer_size: 0, - ..MemorySettings::new() - }, - ..Settings::new() - }, + Settings::new() + .with_memory_settings( + MemorySettings::new() + .with_max_allowed_memory_usage(max_allowed_memory_usage) + .with_preallocated_parsing_buffer_size(0), + ) + .append_element_content_handler(element!("*", |_| Ok(()))), output_sink, ) } @@ -1029,15 +980,14 @@ mod tests { output_sink: O, ) -> HtmlRewriter<'static, O> { HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!("*", |_| Ok(()))], - memory_settings: MemorySettings { - max_allowed_memory_usage, - preallocated_parsing_buffer_size: 0, - graceful_bail_out_on_memory_limit_exceeded: true, - }, - ..Settings::new() - }, + Settings::new() + .with_memory_settings( + MemorySettings::new() + .with_max_allowed_memory_usage(max_allowed_memory_usage) + .with_preallocated_parsing_buffer_size(0) + .with_graceful_bail_out_on_memory_limit_exceeded(true), + ) + .append_element_content_handler(element!("*", |_| Ok(()))), output_sink, ) } @@ -1085,14 +1035,12 @@ mod tests { // No element handlers, so we avoid allocating the selectors VM stack which would // fail first with such a tight limit. let mut rewriter = HtmlRewriter::new( - Settings { - memory_settings: MemorySettings { - max_allowed_memory_usage: MAX, - preallocated_parsing_buffer_size: 0, - graceful_bail_out_on_memory_limit_exceeded: true, - }, - ..Settings::new() - }, + Settings::new().with_memory_settings( + MemorySettings::new() + .with_max_allowed_memory_usage(MAX) + .with_preallocated_parsing_buffer_size(0) + .with_graceful_bail_out_on_memory_limit_exceeded(true), + ), |c: &[u8]| output.extend_from_slice(c), ); @@ -1152,19 +1100,18 @@ mod tests { let mut output = Vec::::new(); let mut rewriter = HtmlRewriter::new( - Settings { - document_content_handlers: vec![doc_comments!(|c| { + Settings::new() + .with_memory_settings( + MemorySettings::new() + .with_max_allowed_memory_usage(MAX) + .with_preallocated_parsing_buffer_size(0) + .with_graceful_bail_out_on_memory_limit_exceeded(true), + ) + .append_document_content_handler(doc_comments!(|c| { let text = c.text(); c.set_text(&format!("REWRITTEN-{text}")).unwrap(); Ok(()) - })], - memory_settings: MemorySettings { - max_allowed_memory_usage: MAX, - preallocated_parsing_buffer_size: 0, - graceful_bail_out_on_memory_limit_exceeded: true, - }, - ..Settings::new() - }, + })), |c: &[u8]| output.extend_from_slice(c), ); @@ -1256,11 +1203,10 @@ mod tests { // closing marker, not the whole section), so it doesn't cause Arena growth. fn bail_out_settings(max_memory: usize) -> MemorySettings { - MemorySettings { - max_allowed_memory_usage: max_memory, - preallocated_parsing_buffer_size: 0, - graceful_bail_out_on_memory_limit_exceeded: true, - } + MemorySettings::new() + .with_max_allowed_memory_usage(max_memory) + .with_preallocated_parsing_buffer_size(0) + .with_graceful_bail_out_on_memory_limit_exceeded(true) } /// Feeds `html` to a graceful-bail-out rewriter in `chunk_size`-byte pieces. When @@ -1328,11 +1274,9 @@ mod tests { let reconstructed = reconstruct_response_on_oom( html.as_bytes(), 512, - Settings { - element_content_handlers: vec![element!("*", |_| Ok(()))], - memory_settings: bail_out_settings(8192), - ..Settings::new() - }, + Settings::new() + .with_memory_settings(bail_out_settings(8192)) + .append_element_content_handler(element!("*", |_| Ok(()))), ); assert_eq!( @@ -1355,11 +1299,9 @@ mod tests { let reconstructed = reconstruct_response_on_oom( html.as_bytes(), 512, - Settings { - element_content_handlers: vec![element!("*", |_| Ok(()))], - memory_settings: bail_out_settings(8192), - ..Settings::new() - }, + Settings::new() + .with_memory_settings(bail_out_settings(8192)) + .append_element_content_handler(element!("*", |_| Ok(()))), ); assert_eq!( @@ -1380,11 +1322,9 @@ mod tests { let reconstructed = reconstruct_response_on_oom( html.as_bytes(), 512, - Settings { - element_content_handlers: vec![comments!("div", |_| Ok(()))], - memory_settings: bail_out_settings(8192), - ..Settings::new() - }, + Settings::new() + .with_memory_settings(bail_out_settings(8192)) + .append_element_content_handler(comments!("div", |_| Ok(()))), ); assert_eq!( @@ -1406,11 +1346,9 @@ mod tests { let reconstructed = reconstruct_response_on_oom( html.as_bytes(), 512, - Settings { - element_content_handlers: vec![element!("*", |_| Ok(()))], - memory_settings: bail_out_settings(4096), - ..Settings::new() - }, + Settings::new() + .with_memory_settings(bail_out_settings(4096)) + .append_element_content_handler(element!("*", |_| Ok(()))), ); assert_eq!( @@ -1432,11 +1370,9 @@ mod tests { let reconstructed = reconstruct_response_on_oom( html.as_bytes(), 512, - Settings { - element_content_handlers: vec![element!("*", |_| Ok(()))], - memory_settings: bail_out_settings(4096), - ..Settings::new() - }, + Settings::new() + .with_memory_settings(bail_out_settings(4096)) + .append_element_content_handler(element!("*", |_| Ok(()))), ); assert_eq!( @@ -1462,17 +1398,15 @@ mod tests { let mut output = Vec::::new(); let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![ - element!("a", |el| { - el.set_attribute("rewritten", "yes").unwrap(); - Ok(()) - }), - element!("stop", |_| Err("handler refused".into())), - ], - graceful_bail_out_on_content_handler_error: true, - ..Settings::new() - }, + Settings::new() + .with_graceful_bail_out_on_content_handler_error(true) + .append_element_content_handler(element!("a", |el| { + el.set_attribute("rewritten", "yes").unwrap(); + Ok(()) + })) + .append_element_content_handler(element!("stop", |_| Err( + "handler refused".into() + ))), |c: &[u8]| output.extend_from_slice(c), ); @@ -1505,12 +1439,9 @@ mod tests { let mut output = Vec::::new(); let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!("stop", |_| Err( - "handler refused".into() - ))], - ..Settings::new() - }, + Settings::new().append_element_content_handler(element!("stop", |_| Err( + "handler refused".into() + ))), |c: &[u8]| output.extend_from_slice(c), ); @@ -1531,13 +1462,11 @@ mod tests { let mut output = Vec::::new(); let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![comments!("div", |_| { + Settings::new() + .with_graceful_bail_out_on_content_handler_error(true) + .append_element_content_handler(comments!("div", |_| { Err("comment refused".into()) - })], - graceful_bail_out_on_content_handler_error: true, - ..Settings::new() - }, + })), |c: &[u8]| output.extend_from_slice(c), ); @@ -1557,11 +1486,9 @@ mod tests { let mut output = Vec::::new(); let mut rewriter = HtmlRewriter::new( - Settings { - document_content_handlers: vec![end!(|_| Err("end refused".into()))], - graceful_bail_out_on_content_handler_error: true, - ..Settings::new() - }, + Settings::new() + .with_graceful_bail_out_on_content_handler_error(true) + .append_document_content_handler(end!(|_| Err("end refused".into()))), |c: &[u8]| output.extend_from_slice(c), ); @@ -1582,13 +1509,11 @@ mod tests { let mut output = Vec::::new(); let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!("div", |_| { + Settings::new() + .with_graceful_bail_out_on_content_handler_error(true) + .append_element_content_handler(element!("div", |_| { Err("div refused".into()) - })], - graceful_bail_out_on_content_handler_error: true, - ..Settings::new() - }, + })), |c: &[u8]| output.extend_from_slice(c), ); @@ -1609,16 +1534,15 @@ mod tests { const MAX: usize = 100; let mut output = Vec::::new(); let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!("*", |_| Ok(()))], - memory_settings: MemorySettings { - max_allowed_memory_usage: MAX, - preallocated_parsing_buffer_size: 0, - graceful_bail_out_on_memory_limit_exceeded: false, - }, - graceful_bail_out_on_content_handler_error: true, - ..Settings::new() - }, + Settings::new() + .with_memory_settings( + MemorySettings::new() + .with_max_allowed_memory_usage(MAX) + .with_preallocated_parsing_buffer_size(0) + .with_graceful_bail_out_on_memory_limit_exceeded(false), + ) + .with_graceful_bail_out_on_content_handler_error(true) + .append_element_content_handler(element!("*", |_| Ok(()))), |c: &[u8]| output.extend_from_slice(c), ); @@ -1644,14 +1568,12 @@ mod tests { use std::borrow::Cow; let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![( + Settings::new() + .append_element_content_handler(( Cow::Owned("*".parse().unwrap()), element_handlers, - )], - document_content_handlers: vec![document_handlers], - ..Settings::new() - }, + )) + .append_document_content_handler(document_handlers), |_: &[u8]| {}, ); @@ -1732,8 +1654,9 @@ mod tests { rewrite_str::( html, - RewriteStrSettings { - element_content_handlers: vec![element!("div", move |el| { + RewriteStrSettings::new().append_element_content_handler(element!( + "div", + move |el| { for attr in el.attributes() { let name_loc = attr.name_source_location(); let value_loc = attr.value_source_location(); @@ -1745,9 +1668,8 @@ mod tests { )); } Ok(()) - })], - ..RewriteStrSettings::new() - }, + } + )), ) .unwrap(); @@ -1775,25 +1697,22 @@ mod tests { fn attribute_source_locations_none_for_programmatic_attributes() { rewrite_str::( "
", - RewriteStrSettings { - element_content_handlers: vec![element!("div", |el| { - el.set_attribute("added", "val").unwrap(); - for attr in el.attributes() { - if attr.name() == "added" { - assert!( - attr.name_source_location().is_none(), - "programmatic attribute should have no name source location", - ); - assert!( - attr.value_source_location().is_none(), - "programmatic attribute should have no value source location", - ); - } + RewriteStrSettings::new().append_element_content_handler(element!("div", |el| { + el.set_attribute("added", "val").unwrap(); + for attr in el.attributes() { + if attr.name() == "added" { + assert!( + attr.name_source_location().is_none(), + "programmatic attribute should have no name source location", + ); + assert!( + attr.value_source_location().is_none(), + "programmatic attribute should have no value source location", + ); } - Ok(()) - })], - ..RewriteStrSettings::new() - }, + } + Ok(()) + })), ) .unwrap(); } diff --git a/src/rewriter/settings.rs b/src/rewriter/settings.rs index 8adcbfde..eb64da4e 100644 --- a/src/rewriter/settings.rs +++ b/src/rewriter/settings.rs @@ -418,16 +418,11 @@ macro_rules! __element_content_handler { /// /// let html = rewrite_str( /// r#""#, -/// RewriteStrSettings { -/// element_content_handlers: vec![ -/// element!("#foo", |el| { -/// el.set_inner_content("Hello!", ContentType::Text); -/// -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::new() -/// } +/// RewriteStrSettings::new().append_element_content_handler(element!("#foo", |el| { +/// el.set_inner_content("Hello!", ContentType::Text); +/// +/// Ok(()) +/// })) /// ).unwrap(); /// /// assert_eq!(html, r#"Hello!"#); @@ -483,18 +478,13 @@ macro_rules! end_tag { /// /// let html = rewrite_str( /// r#"Hello"#, -/// RewriteStrSettings { -/// element_content_handlers: vec![ -/// text!("span", |t| { -/// if t.last_in_text_node() { -/// t.after(" world", ContentType::Text); -/// } -/// -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::new() -/// } +/// RewriteStrSettings::new().append_element_content_handler(text!("span", |t| { +/// if t.last_in_text_node() { +/// t.after(" world", ContentType::Text); +/// } +/// +/// Ok(()) +/// })) /// ).unwrap(); /// /// assert_eq!(html, r#"Hello world"#); @@ -528,16 +518,11 @@ macro_rules! text { /// /// let html = rewrite_str( /// r#""#, -/// RewriteStrSettings { -/// element_content_handlers: vec![ -/// comments!("span", |c| { -/// c.set_text("Hello!").unwrap(); -/// -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::new() -/// } +/// RewriteStrSettings::new().append_element_content_handler(comments!("span", |c| { +/// c.set_text("Hello!").unwrap(); +/// +/// Ok(()) +/// })) /// ).unwrap(); /// /// assert_eq!(html, r#""#); @@ -571,19 +556,14 @@ macro_rules! comments { /// use lol_html::{element, streaming, RewriteStrSettings}; /// use lol_html::html_content::ContentType; /// -/// RewriteStrSettings { -/// element_content_handlers: vec![ -/// element!("div", |element| { -/// element.streaming_replace(streaming!(|sink| { -/// sink.write_str("…", ContentType::Html); -/// sink.write_str("…", ContentType::Html); -/// Ok(()) -/// })); -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::default() -/// }; +/// RewriteStrSettings::new().append_element_content_handler(element!("div", |element| { +/// element.streaming_replace(streaming!(|sink| { +/// sink.write_str("…", ContentType::Html); +/// sink.write_str("…", ContentType::Html); +/// Ok(()) +/// })); +/// Ok(()) +/// })); /// ``` /// /// Note: if you get "implementation of `FnOnce` is not general enough" error, add explicit argument @@ -627,16 +607,11 @@ macro_rules! __document_content_handler { /// /// rewrite_str( /// r#""#, -/// RewriteStrSettings { -/// document_content_handlers: vec![ -/// doctype!(|d| { -/// assert_eq!(d.name().unwrap(), "html"); -/// -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::new() -/// } +/// RewriteStrSettings::new().append_document_content_handler(doctype!(|d| { +/// assert_eq!(d.name().unwrap(), "html"); +/// +/// Ok(()) +/// })) /// ).unwrap(); /// ``` /// @@ -668,18 +643,13 @@ macro_rules! doctype { /// /// let html = rewrite_str( /// r#"HelloHelloHello"#, -/// RewriteStrSettings { -/// document_content_handlers: vec![ -/// doc_text!(|t| { -/// if t.last_in_text_node() { -/// t.after(" world", ContentType::Text); -/// } -/// -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::new() -/// } +/// RewriteStrSettings::new().append_document_content_handler(doc_text!(|t| { +/// if t.last_in_text_node() { +/// t.after(" world", ContentType::Text); +/// } +/// +/// Ok(()) +/// })) /// ).unwrap(); /// /// assert_eq!(html, r#"Hello worldHello worldHello world"#); @@ -709,16 +679,11 @@ macro_rules! doc_text { /// /// let html = rewrite_str( /// r#""#, -/// RewriteStrSettings { -/// document_content_handlers: vec![ -/// doc_comments!(|c| { -/// c.set_text("Hello!").unwrap(); -/// -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::new() -/// } +/// RewriteStrSettings::new().append_document_content_handler(doc_comments!(|c| { +/// c.set_text("Hello!").unwrap(); +/// +/// Ok(()) +/// })) /// ).unwrap(); /// /// assert_eq!(html, r#""#); @@ -750,23 +715,17 @@ macro_rules! doc_comments { /// /// let html = rewrite_str( /// r#"foo"#, -/// RewriteStrSettings { -/// element_content_handlers: vec![ -/// element!("span", |el| { -/// el.append("bar", ContentType::Text); -/// -/// Ok(()) -/// }) -/// ], -/// document_content_handlers: vec![ -/// end!(|end| { -/// end.append("
baz
", ContentType::Html); -/// -/// Ok(()) -/// }) -/// ], -/// ..RewriteStrSettings::new() -/// } +/// RewriteStrSettings::new() +/// .append_element_content_handler(element!("span", |el| { +/// el.append("bar", ContentType::Text); +/// +/// Ok(()) +/// })) +/// .append_document_content_handler(end!(|end| { +/// end.append("
baz
", ContentType::Html); +/// +/// Ok(()) +/// })) /// ).unwrap(); /// /// assert_eq!(html, r#"foobar
baz
"#); @@ -789,11 +748,38 @@ macro_rules! end { /// Specifies the memory settings for [`HtmlRewriter`]. /// +/// Construct with [`MemorySettings::new()`] (or [`MemorySettings::default()`]) and configure the +/// individual values via the `with_*` builder methods. +/// /// [`HtmlRewriter`]: struct.HtmlRewriter.html // NOTE: exposed in C API as well, thus repr(C). #[repr(C)] pub struct MemorySettings { - /// Specifies the number of bytes that should be preallocated on [`HtmlRewriter`] instantiation + pub(crate) preallocated_parsing_buffer_size: usize, + pub(crate) max_allowed_memory_usage: usize, + pub(crate) graceful_bail_out_on_memory_limit_exceeded: bool, +} + +impl Default for MemorySettings { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl MemorySettings { + /// Create a new [`MemorySettings`] with default values. + #[inline] + #[must_use] + pub const fn new() -> Self { + Self { + preallocated_parsing_buffer_size: 1024, + max_allowed_memory_usage: usize::MAX, + graceful_bail_out_on_memory_limit_exceeded: false, + } + } + + /// Sets the number of bytes that should be preallocated on [`HtmlRewriter`] instantiation /// for the internal parsing buffer. /// /// In some cases (e.g. when rewriter encounters a start tag represented by two or more input @@ -808,10 +794,15 @@ pub struct MemorySettings { /// /// ### Default /// - /// `1024` bytes when constructed with `MemorySettings::new()`. + /// `1024` bytes. /// /// [`HtmlRewriter`]: struct.HtmlRewriter.html - pub preallocated_parsing_buffer_size: usize, + #[inline] + #[must_use] + pub const fn with_preallocated_parsing_buffer_size(mut self, size: usize) -> Self { + self.preallocated_parsing_buffer_size = size; + self + } /// Sets a hard limit in bytes on memory consumption of a [`HtmlRewriter`] instance. /// @@ -824,15 +815,20 @@ pub struct MemorySettings { /// /// ### Default /// - /// [`std::usize::MAX`] when constructed with `MemorySettings::new()`. + /// [`usize::MAX`]. /// /// [`HtmlRewriter`]: struct.HtmlRewriter.html - /// [`std::usize::MAX`]: https://doc.rust-lang.org/std/usize/constant.MAX.html + /// [`usize::MAX`]: https://doc.rust-lang.org/std/usize/constant.MAX.html /// [`write`]: struct.HtmlRewriter.html#method.write /// [`end`]: struct.HtmlRewriter.html#method.end - pub max_allowed_memory_usage: usize, + #[inline] + #[must_use] + pub const fn with_max_allowed_memory_usage(mut self, bytes: usize) -> Self { + self.max_allowed_memory_usage = bytes; + self + } - /// Controls how the rewriter recovers when [`max_allowed_memory_usage`] is exceeded. + /// Controls how the rewriter recovers when [the memory limit] is exceeded. /// /// When `false` (the default), the rewriter aborts processing the response, returns /// [`MemoryLimitExceededError`], and leaves the output sink in a potentially inconsistent state @@ -859,97 +855,166 @@ pub struct MemorySettings { /// /// ### Default /// - /// `false` when constructed with `MemorySettings::new()`. + /// `false`. /// + /// [the memory limit]: #method.with_max_allowed_memory_usage /// [`MemoryLimitExceededError`]: struct.MemoryLimitExceededError.html - /// [`max_allowed_memory_usage`]: #structfield.max_allowed_memory_usage /// [`Element::remove()`]: html_content/struct.Element.html#method.remove - pub graceful_bail_out_on_memory_limit_exceeded: bool, + #[inline] + #[must_use] + pub const fn with_graceful_bail_out_on_memory_limit_exceeded(mut self, value: bool) -> Self { + self.graceful_bail_out_on_memory_limit_exceeded = value; + self + } } -impl Default for MemorySettings { +/// Specifies settings for [`HtmlRewriter`]. +/// +/// Construct with [`Settings::new()`] / [`Settings::new_send()`] (or [`Settings::default()`]) and +/// configure the values via the `with_*` builder methods, plus +/// [`append_element_content_handler()`] and [`append_document_content_handler()`] for adding +/// handlers. +/// +/// ### Example +/// +/// ``` +/// use lol_html::{Settings, element, comments}; +/// use lol_html::html_content::{Comment, Element}; +/// +/// let settings = Settings::new() +/// .append_element_content_handler(element!("div[foo]", |el: &mut Element| { +/// // ... +/// Ok(()) +/// })) +/// .append_element_content_handler(comments!("body", |c: &mut Comment| { +/// // ... +/// Ok(()) +/// })); +/// ``` +/// +/// [`HtmlRewriter`]: struct.HtmlRewriter.html +/// [`append_element_content_handler()`]: #method.append_element_content_handler +/// [`append_document_content_handler()`]: #method.append_document_content_handler +pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> { + pub(crate) element_content_handlers: Vec<( + Cow<'selectors, Selector>, + ElementContentHandlers<'handlers, H>, + )>, + pub(crate) document_content_handlers: Vec>, + pub(crate) encoding: AsciiCompatibleEncoding, + pub(crate) memory_settings: MemorySettings, + pub(crate) strict: bool, + pub(crate) enable_esi_tags: bool, + pub(crate) adjust_charset_on_meta_tag: bool, + pub(crate) graceful_bail_out_on_content_handler_error: bool, +} + +impl Default for Settings<'_, '_, LocalHandlerTypes> { #[inline] fn default() -> Self { - Self { - preallocated_parsing_buffer_size: 1024, - max_allowed_memory_usage: usize::MAX, - graceful_bail_out_on_memory_limit_exceeded: false, - } + Self::new() } } -impl MemorySettings { - /// Create a new [`MemorySettings`] with default values. +impl Settings<'_, '_, LocalHandlerTypes> { + /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. + #[inline] #[must_use] pub fn new() -> Self { - Self::default() + Self::new_for_handler_types() } } -/// Specifies settings for [`HtmlRewriter`]. -/// -/// [`HtmlRewriter`]: struct.HtmlRewriter.html -pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> { - /// Specifies CSS selectors and rewriting handlers for elements and their inner content. +impl Settings<'_, '_, SendHandlerTypes> { + /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. + #[inline] + #[must_use] + pub fn new_send() -> Self { + Self::new_for_handler_types() + } +} + +impl<'handlers, 'selectors, H: HandlerTypes> Settings<'handlers, 'selectors, H> { + /// Creates [`Settings`]. + #[inline] + #[must_use] + pub fn new_for_handler_types() -> Self { + Settings { + element_content_handlers: vec![], + document_content_handlers: vec![], + encoding: AsciiCompatibleEncoding(encoding_rs::UTF_8), + memory_settings: MemorySettings::new(), + strict: true, + enable_esi_tags: false, + adjust_charset_on_meta_tag: false, + graceful_bail_out_on_content_handler_error: false, + } + } + + /// Appends a `(selector, handlers)` tuple to the list of element content handlers. + /// + /// The handlers specify CSS selectors and rewriting handlers for elements and their inner + /// content. /// /// ### Hint /// - /// [`element`], [`comments`] and [`text`] convenience macros can be used to construct a - /// `(Selector, ElementContentHandlers)` tuple. + /// The [`element`], [`comments`] and [`text`] convenience macros expand to the expected + /// `(Selector, ElementContentHandlers)` tuple, so they can be passed directly: /// - /// ### Example /// ``` - /// use std::borrow::Cow; - /// use lol_html::{ElementContentHandlers, Settings}; - /// use lol_html::html_content::{Comment, Element}; - /// - /// let settings = Settings { - /// element_content_handlers: vec! [ - /// ( - /// Cow::Owned("div[foo]".parse().unwrap()), - /// ElementContentHandlers::default().element(|el: &mut Element| { - /// // ... - /// - /// Ok(()) - /// }) - /// ), - /// ( - /// Cow::Owned("body".parse().unwrap()), - /// ElementContentHandlers::default().comments(|c: &mut Comment| { - /// // ... - /// - /// Ok(()) - /// }) - /// ) - /// ], - /// ..Settings::new() - /// }; + /// use lol_html::{Settings, element}; + /// use lol_html::html_content::Element; + /// + /// let settings = Settings::new() + /// .append_element_content_handler(element!("div[foo]", |el: &mut Element| { + /// // ... + /// Ok(()) + /// })); /// ``` /// /// [`element`]: macro.element.html /// [`comments`]: macro.comments.html /// [`text`]: macro.text.html - pub element_content_handlers: Vec<( - Cow<'selectors, Selector>, - ElementContentHandlers<'handlers, H>, - )>, + #[inline] + #[must_use] + pub fn append_element_content_handler( + mut self, + handler: ( + Cow<'selectors, Selector>, + ElementContentHandlers<'handlers, H>, + ), + ) -> Self { + self.element_content_handlers.push(handler); + self + } - /// Specifies rewriting handlers for the content without associating it to a particular - /// CSS selector. + /// Appends a [`DocumentContentHandlers`] to the list of document content handlers. + /// + /// Document content handlers specify rewriting handlers for the content without associating + /// it to a particular CSS selector. /// /// Refer to [`DocumentContentHandlers`] documentation for more information. /// /// ### Hint - /// [`doctype`], [`doc_comments`] and [`doc_text`] convenience macros can be used to construct - /// items of this vector. + /// + /// The [`doctype`], [`doc_comments`] and [`doc_text`] convenience macros return values of the + /// expected type, so they can be passed directly. /// /// [`DocumentContentHandlers`]: struct.DocumentContentHandlers.html /// [`doctype`]: macro.doctype.html /// [`doc_comments`]: macro.doc_comments.html /// [`doc_text`]: macro.doc_text.html - pub document_content_handlers: Vec>, + #[inline] + #[must_use] + pub fn append_document_content_handler( + mut self, + handler: DocumentContentHandlers<'handlers, H>, + ) -> Self { + self.document_content_handlers.push(handler); + self + } - /// Specifies the [character encoding] for the input and the output of the rewriter. + /// Sets the [character encoding] for the input and the output of the rewriter. /// /// Can be a [label] for any of the web-compatible encodings with an exception for `UTF-16LE`, /// `UTF-16BE`, `ISO-2022-JP` and `replacement` (these non-ASCII-compatible encodings @@ -960,11 +1025,21 @@ pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> /// /// ### Default /// - /// `"utf-8"` when constructed with `Settings::new()`. - pub encoding: AsciiCompatibleEncoding, + /// `"utf-8"`. + #[inline] + #[must_use] + pub const fn with_encoding(mut self, encoding: AsciiCompatibleEncoding) -> Self { + self.encoding = encoding; + self + } - /// Specifies the memory settings. - pub memory_settings: MemorySettings, + /// Sets the memory settings. + #[inline] + #[must_use] + pub const fn with_memory_settings(mut self, memory_settings: MemorySettings) -> Self { + self.memory_settings = memory_settings; + self + } /// If set to `true` the rewriter bails out if it encounters markup that drives the HTML parser /// into ambiguous state. @@ -997,17 +1072,29 @@ pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> /// /// ### Default /// - /// `true` when constructed with `Settings::new()`. - pub strict: bool, + /// `true`. + #[inline] + #[must_use] + pub const fn with_strict(mut self, strict: bool) -> Self { + self.strict = strict; + self + } /// If enabled the rewriter enables support for [Edge Side Includes] tags, treating them as /// [void elements] and allowing them to be replaced with desired content. /// - /// `false` when constructed with `Settings::new()`. + /// ### Default + /// + /// `false`. /// /// [Edge Side Includes]: https://www.w3.org/TR/esi-lang/ /// [void elements]: https://developer.mozilla.org/en-US/docs/Glossary/Void_element - pub enable_esi_tags: bool, + #[inline] + #[must_use] + pub const fn with_enable_esi_tags(mut self, enable: bool) -> Self { + self.enable_esi_tags = enable; + self + } /// If enabled the rewriter will dynamically change the charset when it encounters a `meta` tag /// that specifies the charset. @@ -1032,8 +1119,13 @@ pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> /// /// ### Default /// - /// `false` when constructed with `Settings::new()`. - pub adjust_charset_on_meta_tag: bool, + /// `false`. + #[inline] + #[must_use] + pub const fn with_adjust_charset_on_meta_tag(mut self, adjust: bool) -> Self { + self.adjust_charset_on_meta_tag = adjust; + self + } /// Controls how the rewriter recovers when a content handler returns an `Err`. /// @@ -1054,9 +1146,9 @@ pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> /// of this setting. /// /// This is symmetric with - /// [`MemorySettings::graceful_bail_out_on_memory_limit_exceeded`], but kept as a separate - /// flag because the underlying error has different semantics: a memory limit is an - /// environmental constraint, whereas a content handler returning `Err` is an explicit + /// [`MemorySettings::with_graceful_bail_out_on_memory_limit_exceeded`], but kept as a + /// separate flag because the underlying error has different semantics: a memory limit is + /// an environmental constraint, whereas a content handler returning `Err` is an explicit /// signal from the application that something is wrong with the input. Some callers will /// want graceful recovery for one but not the other. /// @@ -1074,134 +1166,165 @@ pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> /// /// ### Default /// - /// `false` when constructed with `Settings::new()`. + /// `false`. /// - /// [`MemorySettings::graceful_bail_out_on_memory_limit_exceeded`]: - /// struct.MemorySettings.html#structfield.graceful_bail_out_on_memory_limit_exceeded + /// [`MemorySettings::with_graceful_bail_out_on_memory_limit_exceeded`]: + /// struct.MemorySettings.html#method.with_graceful_bail_out_on_memory_limit_exceeded /// [`RewritingError::ContentHandlerError`]: /// errors/enum.RewritingError.html#variant.ContentHandlerError /// [`Element::remove()`]: html_content/struct.Element.html#method.remove - pub graceful_bail_out_on_content_handler_error: bool, + #[inline] + #[must_use] + pub const fn with_graceful_bail_out_on_content_handler_error(mut self, value: bool) -> Self { + self.graceful_bail_out_on_content_handler_error = value; + self + } } -impl Default for Settings<'_, '_, LocalHandlerTypes> { +impl<'h, 's, H: HandlerTypes> From> for Settings<'h, 's, H> { + #[inline] + fn from(settings: RewriteStrSettings<'h, 's, H>) -> Self { + Settings { + element_content_handlers: settings.element_content_handlers, + document_content_handlers: settings.document_content_handlers, + strict: settings.strict, + enable_esi_tags: settings.enable_esi_tags, + ..Settings::new_for_handler_types() + } + } +} + +/// Specifies settings for the [`rewrite_str`] function. +/// +/// Construct with [`RewriteStrSettings::new()`] / [`RewriteStrSettings::new_send()`] (or +/// [`RewriteStrSettings::default()`]) and configure the values via the `with_*` builder methods, +/// plus [`append_element_content_handler()`] and [`append_document_content_handler()`] for adding +/// handlers. +/// +/// ### Example +/// +/// ``` +/// use lol_html::{RewriteStrSettings, element}; +/// use lol_html::html_content::Element; +/// +/// let settings = RewriteStrSettings::new() +/// .append_element_content_handler(element!("div[foo]", |el: &mut Element| { +/// // ... +/// Ok(()) +/// })); +/// ``` +/// +/// [`rewrite_str`]: fn.rewrite_str.html +/// [`append_element_content_handler()`]: #method.append_element_content_handler +/// [`append_document_content_handler()`]: #method.append_document_content_handler +pub struct RewriteStrSettings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> { + pub(crate) element_content_handlers: Vec<( + Cow<'selectors, Selector>, + ElementContentHandlers<'handlers, H>, + )>, + pub(crate) document_content_handlers: Vec>, + pub(crate) strict: bool, + pub(crate) enable_esi_tags: bool, +} + +impl Default for RewriteStrSettings<'_, '_, LocalHandlerTypes> { #[inline] fn default() -> Self { Self::new() } } -impl Settings<'_, '_, LocalHandlerTypes> { - /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. +impl RewriteStrSettings<'_, '_, LocalHandlerTypes> { + /// Creates [`RewriteStrSettings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. #[inline] #[must_use] - pub fn new() -> Self { + pub const fn new() -> Self { Self::new_for_handler_types() } } -impl Settings<'_, '_, SendHandlerTypes> { - /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. +impl RewriteStrSettings<'_, '_, SendHandlerTypes> { + /// Creates [`RewriteStrSettings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. #[inline] #[must_use] - pub fn new_send() -> Self { + pub const fn new_send() -> Self { Self::new_for_handler_types() } } -impl Settings<'_, '_, H> { - /// Creates [`Settings`]. +impl<'handlers, 'selectors, H: HandlerTypes> RewriteStrSettings<'handlers, 'selectors, H> { + /// Creates [`RewriteStrSettings`]. #[inline] #[must_use] - pub fn new_for_handler_types() -> Self { - Settings { + pub const fn new_for_handler_types() -> Self { + RewriteStrSettings { element_content_handlers: vec![], document_content_handlers: vec![], - encoding: AsciiCompatibleEncoding(encoding_rs::UTF_8), - memory_settings: MemorySettings::default(), strict: true, - enable_esi_tags: false, - adjust_charset_on_meta_tag: false, - graceful_bail_out_on_content_handler_error: false, - } - } -} - -impl<'h, 's, H: HandlerTypes> From> for Settings<'h, 's, H> { - #[inline] - fn from(settings: RewriteStrSettings<'h, 's, H>) -> Self { - Settings { - element_content_handlers: settings.element_content_handlers, - document_content_handlers: settings.document_content_handlers, - strict: settings.strict, - enable_esi_tags: settings.enable_esi_tags, - ..Settings::new_for_handler_types() + enable_esi_tags: true, } } -} -/// Specifies settings for the [`rewrite_str`] function. -/// -/// [`rewrite_str`]: fn.rewrite_str.html -pub struct RewriteStrSettings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> { - /// Specifies CSS selectors and rewriting handlers for elements and their inner content. + /// Appends a `(selector, handlers)` tuple to the list of element content handlers. + /// + /// The handlers specify CSS selectors and rewriting handlers for elements and their inner + /// content. /// /// ### Hint /// - /// [`element`], [`comments`] and [`text`] convenience macros can be used to construct a - /// `(Selector, ElementContentHandlers)` tuple. + /// The [`element`], [`comments`] and [`text`] convenience macros expand to the expected + /// `(Selector, ElementContentHandlers)` tuple, so they can be passed directly: /// - /// ### Example /// ``` - /// use std::borrow::Cow; - /// use lol_html::{ElementContentHandlers, RewriteStrSettings}; - /// use lol_html::html_content::{Comment, Element}; - /// - /// let settings = RewriteStrSettings { - /// element_content_handlers: vec! [ - /// ( - /// Cow::Owned("div[foo]".parse().unwrap()), - /// ElementContentHandlers::default().element(|el: &mut Element| { - /// // ... - /// - /// Ok(()) - /// }) - /// ), - /// ( - /// Cow::Owned("div[foo]".parse().unwrap()), - /// ElementContentHandlers::default().comments(|c: &mut Comment| { - /// // ... - /// - /// Ok(()) - /// }) - /// ) - /// ], - /// ..RewriteStrSettings::new() - /// }; + /// use lol_html::{RewriteStrSettings, element}; + /// use lol_html::html_content::Element; + /// + /// let settings = RewriteStrSettings::new() + /// .append_element_content_handler(element!("div[foo]", |el: &mut Element| { + /// // ... + /// Ok(()) + /// })); /// ``` /// /// [`element`]: macro.element.html /// [`comments`]: macro.comments.html /// [`text`]: macro.text.html - pub element_content_handlers: Vec<( - Cow<'selectors, Selector>, - ElementContentHandlers<'handlers, H>, - )>, + #[inline] + #[must_use] + pub fn append_element_content_handler( + mut self, + handler: ( + Cow<'selectors, Selector>, + ElementContentHandlers<'handlers, H>, + ), + ) -> Self { + self.element_content_handlers.push(handler); + self + } - /// Specifies rewriting handlers for the content without associating it to a particular - /// CSS selector. + /// Appends a [`DocumentContentHandlers`] to the list of document content handlers. /// - /// Refer to [`DocumentContentHandlers`] documentation for more information. + /// Document content handlers specify rewriting handlers for the content without associating + /// it to a particular CSS selector. /// /// ### Hint - /// [`doctype`], [`doc_comments`] and [`doc_text`] convenience macros can be used to construct - /// items of this vector. + /// + /// The [`doctype`], [`doc_comments`] and [`doc_text`] convenience macros return values of the + /// expected type, so they can be passed directly. /// /// [`DocumentContentHandlers`]: struct.DocumentContentHandlers.html /// [`doctype`]: macro.doctype.html /// [`doc_comments`]: macro.doc_comments.html /// [`doc_text`]: macro.doc_text.html - pub document_content_handlers: Vec>, + #[inline] + #[must_use] + pub fn append_document_content_handler( + mut self, + handler: DocumentContentHandlers<'handlers, H>, + ) -> Self { + self.document_content_handlers.push(handler); + self + } /// If set to `true` the rewriter bails out if it encounters markup that drives the HTML parser /// into ambiguous state. @@ -1234,54 +1357,27 @@ pub struct RewriteStrSettings<'handlers, 'selectors, H: HandlerTypes = LocalHand /// /// ### Default /// - /// `true` when constructed with `Settings::new()`. - pub strict: bool, + /// `true`. + #[inline] + #[must_use] + pub const fn with_strict(mut self, strict: bool) -> Self { + self.strict = strict; + self + } /// If enabled the rewriter enables support for [Edge Side Includes] tags, treating them as /// [void elements] and allowing them to be replaced with desired content. /// - /// `true` when constructed with `RewriteStrSettings::new()`. + /// ### Default + /// + /// `true`. /// /// [Edge Side Includes]: https://www.w3.org/TR/esi-lang/ /// [void elements]: https://developer.mozilla.org/en-US/docs/Glossary/Void_element - pub enable_esi_tags: bool, -} - -impl Default for RewriteStrSettings<'_, '_, LocalHandlerTypes> { - #[inline] - fn default() -> Self { - Self::new() - } -} - -impl RewriteStrSettings<'_, '_, LocalHandlerTypes> { - /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. - #[inline] - #[must_use] - pub const fn new() -> Self { - Self::new_for_handler_types() - } -} - -impl RewriteStrSettings<'_, '_, SendHandlerTypes> { - /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. #[inline] #[must_use] - pub const fn new_send() -> Self { - Self::new_for_handler_types() - } -} - -impl RewriteStrSettings<'_, '_, H> { - /// Creates [`RewriteStrSettings`]. - #[inline] - #[must_use] - pub const fn new_for_handler_types() -> Self { - RewriteStrSettings { - element_content_handlers: vec![], - document_content_handlers: vec![], - strict: true, - enable_esi_tags: true, - } + pub const fn with_enable_esi_tags(mut self, enable: bool) -> Self { + self.enable_esi_tags = enable; + self } } diff --git a/src/transform_stream/mod.rs b/src/transform_stream/mod.rs index d42fafcb..9d9fe932 100644 --- a/src/transform_stream/mod.rs +++ b/src/transform_stream/mod.rs @@ -196,7 +196,7 @@ where self.parser.get_dispatcher().finish(chunk) } - #[cfg(feature = "integration_test")] + #[cfg(feature = "_integration_test")] #[allow(private_interfaces)] pub fn parser(&mut self) -> &mut Parser> { &mut self.parser diff --git a/tests/fixtures/element_content_replacement.rs b/tests/fixtures/element_content_replacement.rs index acfa84a1..8c7f9d9c 100644 --- a/tests/fixtures/element_content_replacement.rs +++ b/tests/fixtures/element_content_replacement.rs @@ -20,18 +20,16 @@ impl TestFixture for ElementContentReplacementTests { { let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![element!(test.selector, |el| { + Settings::new() + .with_encoding(encoding) + .append_element_content_handler(element!(test.selector, |el| { el.set_inner_content( &format!("", test.selector), ContentType::Html, ); Ok(()) - })], - encoding, - ..Settings::new() - }, + })), |c: &[u8]| output.push(c), ); diff --git a/tests/fixtures/selector_matching.rs b/tests/fixtures/selector_matching.rs index 79cc561d..01668e8f 100644 --- a/tests/fixtures/selector_matching.rs +++ b/tests/fixtures/selector_matching.rs @@ -18,58 +18,54 @@ impl TestFixture for SelectorMatchingTests { { let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![ - element!(test.selector, |el| { - el.before( - &format!("", test.selector), + Settings::new() + .with_encoding(encoding) + .append_element_content_handler(element!(test.selector, |el| { + el.before( + &format!("", test.selector), + ContentType::Html, + ); + + el.after( + &format!("", test.selector), + ContentType::Html, + ); + + Ok(()) + })) + .append_element_content_handler(comments!(test.selector, |c| { + c.before( + &format!("", test.selector), + ContentType::Html, + ); + c.after( + &format!("", test.selector), + ContentType::Html, + ); + + Ok(()) + })) + .append_element_content_handler(text!(test.selector, |t| { + if first_text_chunk_expected { + t.before( + &format!("", test.selector), ContentType::Html, ); - el.after( - &format!("", test.selector), - ContentType::Html, - ); + first_text_chunk_expected = false; + } - Ok(()) - }), - comments!(test.selector, |c| { - c.before( - &format!("", test.selector), - ContentType::Html, - ); - c.after( - &format!("", test.selector), + if t.last_in_text_node() { + t.after( + &format!("", test.selector), ContentType::Html, ); - Ok(()) - }), - text!(test.selector, |t| { - if first_text_chunk_expected { - t.before( - &format!("", test.selector), - ContentType::Html, - ); - - first_text_chunk_expected = false; - } - - if t.last_in_text_node() { - t.after( - &format!("", test.selector), - ContentType::Html, - ); - - first_text_chunk_expected = true; - } - - Ok(()) - }), - ], - encoding, - ..Settings::new() - }, + first_text_chunk_expected = true; + } + + Ok(()) + })), |c: &[u8]| output.push(c), ); diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index f6f451fc..c03277df 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1,4 +1,4 @@ -#![cfg(feature = "integration_test")] +#![cfg(feature = "_integration_test")] #[macro_use] mod harness; diff --git a/tools/parser_trace/Cargo.toml b/tools/parser_trace/Cargo.toml index bf94aa60..9c97f787 100644 --- a/tools/parser_trace/Cargo.toml +++ b/tools/parser_trace/Cargo.toml @@ -6,5 +6,5 @@ publish = false [dependencies] encoding_rs = "0.8.35" -lol_html = { path = "../../", features=["integration_test", "debug_trace"] } +lol_html = { path = "../../", features=["_integration_test", "debug_trace"] } getopts = "0.2.23" diff --git a/tools/selectors_ast/Cargo.toml b/tools/selectors_ast/Cargo.toml index 4bee8f30..99d342d3 100644 --- a/tools/selectors_ast/Cargo.toml +++ b/tools/selectors_ast/Cargo.toml @@ -6,5 +6,5 @@ edition = "2024" publish = false [dependencies] -lol_html = { path = "../../", features=["integration_test"] } +lol_html = { path = "../../", features=["_integration_test"] } serde_json = "1.0.140"