Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
- name: Run clippy
run: cargo clippy --all --all-targets --target-dir=target
- name: Run clippy on integration tests
run: cargo clippy --all --all-targets --features=integration_test --target-dir=target
run: cargo clippy --all --all-targets --features=_integration_test --target-dir=target
- name: Run clippy on C API
run: cargo clippy --all-targets --manifest-path=c-api/Cargo.toml --target-dir=target
- name: Run clippy on JS API
Expand Down
38 changes: 28 additions & 10 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,34 @@

## v3.0.0

- Added `MemorySettings::graceful_bail_out_on_memory_limit_exceeded`: when set, the rewriter
flushes every input byte it has received but not yet emitted to the sink (as-is) before
returning `MemoryLimitExceededError`, so callers can continue the response by writing
subsequent bytes directly to their downstream sink instead of breaking it.
- Added `Settings::graceful_bail_out_on_content_handler_error`: symmetric to the memory flag
above, but for `RewritingError::ContentHandlerError`. When set, the rewriter flushes
remaining input bytes before propagating a handler error, preserving the response.
Currently exposed via the Rust API only; the C API still uses the original behavior.
- Adding new fields to `MemorySettings` and `Settings` is a SemVer-breaking change for
existing struct-literal construction, hence the major version bump.
- Added `MemorySettings::with_graceful_bail_out_on_memory_limit_exceeded()`: when set, the
rewriter flushes every input byte it has received but not yet emitted to the sink (as-is)
before returning `MemoryLimitExceededError`, so callers can continue the response by
writing subsequent bytes directly to their downstream sink instead of breaking it.
- Added `Settings::with_graceful_bail_out_on_content_handler_error()`: symmetric to the
memory setting above, but for `RewritingError::ContentHandlerError`. When set, the
rewriter flushes remaining input bytes before propagating a handler error, preserving
the response. Currently exposed via the Rust API only; the C API still uses the original
behavior.
- Reworked `Settings`, `MemorySettings` and `RewriteStrSettings` to use a consuming-builder
API. Fields are now private; construction is via `::new()` plus chained `with_*` setters
and `append_*` methods for the content-handler vectors. This makes future field additions
non-breaking. Migration:
```rust
// before
Settings {
element_content_handlers: vec![element!("div", |el| { /* ... */ Ok(()) })],
strict: false,
..Settings::new()
}
// after
Settings::new()
.with_strict(false)
.append_element_content_handler(element!("div", |el| { /* ... */ Ok(()) }))
```
- Renamed the internal-use feature `integration_test` to `_integration_test`. The leading
underscore signals to `cargo-semver-checks` and similar tools that the feature is not
part of the public API.

## v2.9.0

Expand Down
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ bench = false

[features]
debug_trace = []
# Unstable: for internal use only
integration_test = []
# Unstable: for internal use only. The leading underscore signals "internal" to
# `cargo-semver-checks` and similar tools so they exclude it from the public-API analysis.
_integration_test = []

[[bench]]
harness = false
Expand Down
27 changes: 11 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,17 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut output = vec![];

let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
element!("a[href]", |el| {
let href = el
.get_attribute("href")
.expect("href was required")
.replace("http:", "https:");

el.set_attribute("href", &href)?;

Ok(())
})
],
..Settings::new()
},
|c: &[u8]| output.extend_from_slice(c)
Settings::new().append_element_content_handler(element!("a[href]", |el| {
let href = el
.get_attribute("href")
.expect("href was required")
.replace("http:", "https:");

el.set_attribute("href", &href)?;

Ok(())
})),
|c: &[u8]| output.extend_from_slice(c),
);

rewriter.write(b"<div><a href=")?;
Expand Down
10 changes: 2 additions & 8 deletions benches/cases/parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@ define_group!(
// NOTE: this switches parser to the lexer mode and doesn't
// trigger token production for anything, except doctype. So,
// we can get relatively fair comparison.
Settings {
document_content_handlers: vec![doctype!(noop_handler!())],
..Settings::new()
}
Settings::new().append_document_content_handler(doctype!(noop_handler!()))
),
(
"Text rewritable unit parsing and decoding",
Expand All @@ -21,10 +18,7 @@ define_group!(
// sequence of bytes for the given character encoding. So, if there is a text
// handler in the selector matching scope, we need to slice and decode all
// incoming chunks to produce correct text chunk rewritable units.
Settings {
document_content_handlers: vec![doc_text!(noop_handler!())],
..Settings::new()
}
Settings::new().append_document_content_handler(doc_text!(noop_handler!()))
)
]
);
24 changes: 9 additions & 15 deletions benches/cases/rewriting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,20 @@ define_group!(
[
(
"Modification of tags of an element with lots of content",
Settings {
element_content_handlers: vec![element!("body", |el| {
el.set_tag_name("body1").unwrap();
el.after("test", ContentType::Text);
Settings::new().append_element_content_handler(element!("body", |el| {
el.set_tag_name("body1").unwrap();
el.after("test", ContentType::Text);

Ok(())
})],
..Settings::new()
}
Ok(())
}))
),
(
"Remove content of an element",
Settings {
element_content_handlers: vec![element!("ul", |el| {
el.set_inner_content("", ContentType::Text);
Settings::new().append_element_content_handler(element!("ul", |el| {
el.set_inner_content("", ContentType::Text);

Ok(())
})],
..Settings::new()
}
Ok(())
}))
)
]
);
40 changes: 12 additions & 28 deletions benches/cases/selector_matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,46 +5,30 @@ define_group!(
[
(
"Match-all selector",
Settings {
element_content_handlers: vec![element!("*", noop_handler!())],
..Settings::new()
}
Settings::new().append_element_content_handler(element!("*", noop_handler!()))
),
(
"Tag name selector",
Settings {
element_content_handlers: vec![element!("div", noop_handler!())],
..Settings::new()
}
Settings::new().append_element_content_handler(element!("div", noop_handler!()))
),
(
"Class selector",
Settings {
element_content_handlers: vec![element!(".note", noop_handler!())],
..Settings::new()
}
Settings::new().append_element_content_handler(element!(".note", noop_handler!()))
),
(
"Attribute selector",
Settings {
element_content_handlers: vec![element!("[href]", noop_handler!())],
..Settings::new()
}
Settings::new().append_element_content_handler(element!("[href]", noop_handler!()))
),
(
"Multiple selectors",
Settings {
element_content_handlers: vec![
element!("ul", noop_handler!()),
element!("ul > li", noop_handler!()),
element!("table > tbody td dfn", noop_handler!()),
element!("body table > tbody tr", noop_handler!()),
element!("body [href]", noop_handler!()),
element!("div img", noop_handler!()),
element!("div.note span", noop_handler!())
],
..Settings::new()
}
Settings::new()
.append_element_content_handler(element!("ul", noop_handler!()))
.append_element_content_handler(element!("ul > li", noop_handler!()))
.append_element_content_handler(element!("table > tbody td dfn", noop_handler!()))
.append_element_content_handler(element!("body table > tbody tr", noop_handler!()))
.append_element_content_handler(element!("body [href]", noop_handler!()))
.append_element_content_handler(element!("div img", noop_handler!()))
.append_element_content_handler(element!("div.note span", noop_handler!()))
)
]
);
35 changes: 20 additions & 15 deletions c-api/src/rewriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,26 @@ fn lol_html_rewriter_build_inner(
let maybe_encoding =
encoding_rs::Encoding::for_label_no_replacement(to_bytes!(encoding, encoding_len));
let encoding = maybe_encoding.ok_or(EncodingError::UnknownEncoding)?;
let settings = Settings {
element_content_handlers: handlers.element,
document_content_handlers: handlers.document,
encoding: encoding
.try_into()
.or(Err(EncodingError::NonAsciiCompatibleEncoding))?,
memory_settings,
strict,
enable_esi_tags,
adjust_charset_on_meta_tag: false,
// TODO: expose `graceful_bail_out_on_content_handler_error` through the C API. Adding
// a new parameter to `lol_html_rewriter_build()` is a breaking ABI change, so it
// belongs behind a new function variant or a settings struct.
graceful_bail_out_on_content_handler_error: false,
};
// NOTE: `graceful_bail_out_on_content_handler_error` is not yet exposed in the C API
// (default `false`). Adding a parameter to `lol_html_rewriter_build()` is a breaking ABI
// change, so it belongs behind a new function variant or a settings struct.
let mut settings = Settings::new()
.with_encoding(
encoding
.try_into()
.or(Err(EncodingError::NonAsciiCompatibleEncoding))?,
)
.with_memory_settings(memory_settings)
.with_strict(strict)
.with_enable_esi_tags(enable_esi_tags);

for handler in handlers.element {
settings = settings.append_element_content_handler(handler);
}

for handler in handlers.document {
settings = settings.append_document_content_handler(handler);
}

let output_sink = ExternOutputSink::new(output_sink, output_sink_user_data);
let rewriter = lol_html::HtmlRewriter::new(settings, output_sink);
Expand Down
17 changes: 7 additions & 10 deletions examples/defer_scripts/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@ fn main() {

// Create the rewriter
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![element!(
"script[src]:not([async]):not([defer])",
|el| {
el.set_attribute("defer", "").unwrap();
Ok(())
}
)],
..Settings::new()
},
Settings::new().append_element_content_handler(element!(
"script[src]:not([async]):not([defer])",
|el| {
el.set_attribute("defer", "").unwrap();
Ok(())
}
)),
output_sink,
);

Expand Down
26 changes: 11 additions & 15 deletions examples/mixed_content_rewriter/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,18 @@ fn main() {

// Create the rewriter
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
element!("a[href], link[rel=stylesheet][href]", |el| {
rewrite_url_in_attr(el, "href");
Settings::new()
.append_element_content_handler(element!("a[href], link[rel=stylesheet][href]", |el| {
rewrite_url_in_attr(el, "href");
Ok(())
}))
.append_element_content_handler(element!(
"script[src], iframe[src], img[src], audio[src], video[src]",
|el| {
rewrite_url_in_attr(el, "src");
Ok(())
}),
element!(
"script[src], iframe[src], img[src], audio[src], video[src]",
|el| {
rewrite_url_in_attr(el, "src");
Ok(())
}
),
],
..Settings::new()
},
}
)),
output_sink,
);

Expand Down
Loading
Loading