Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.vscode/settings.json seems unrelated to "Further fixes."
Perhaps this should be a separate commit or PR?

"git.ignoreLimitWarning": true,
"files.eol": "\n",
"editor.formatOnSave": true,
"files.exclude": {
"tmp/**": true
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"[markdown]": {
"editor.formatOnSave": true,
"editor.formatOnPaste": true
},
"markdownlint.config": {
"MD013": false,
"MD024": false
},
"cSpell.diagnosticLevel": "Hint",
}
42 changes: 28 additions & 14 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,18 +1,30 @@
[package]
name = "encoding_rs"
name = "encoding_rs2"
description = "A Gecko-oriented implementation of the Encoding Standard"
version = "0.8.35"
edition = '2018'
version = "0.8.36"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably shouldn't bump the version number in a pull request, especially in a commit with changes other than just bumping the version.

hsivonen has previously bumped the version while tagging a release, uploading to crates.io, and so on:

edition = '2024'
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
license = "(Apache-2.0 OR MIT) AND BSD-3-Clause"
include = ["src/**/*.rs", "/data", "Cargo.toml", "COPYRIGHT", "LICENSE*", "README.md"]
include = [
"src/**/*.rs",
"/data",
"Cargo.toml",
"COPYRIGHT",
"LICENSE*",
"README.md",
]
readme = "README.md"
documentation = "https://docs.rs/encoding_rs/"
homepage = "https://docs.rs/encoding_rs/"
repository = "https://github.com/hsivonen/encoding_rs"
repository = "https://github.com/brmmm3/encoding_rs"
keywords = ["encoding", "web", "unicode", "charset"]
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
rust-version = "1.40"
categories = [
"text-processing",
"encoding",
"web-programming",
"internationalization",
]
rust-version = "1.86"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the rationale for bumping to rust-version 1.86? Is there an important feature there?

Note that Debian stable (trixie) currently ships rustc 1.85: https://packages.debian.org/trixie/rustc, that 1 in 5 requests to crates.io are made with a stable Rust version older than 1.86: https://lib.rs/stats#rustc, and that Rust Edition 2024 shipped with Rust 1.85.

Please remember to update the README's references to the MSRV when bumping it, as was done previously in the bump from 1.36 to 1.40: #111


[features]
default = ["alloc"]
Expand All @@ -26,20 +38,22 @@ fast-hanja-encode = []
fast-kanji-encode = []
fast-gb-hanzi-encode = []
fast-big5-hanzi-encode = []
fast-legacy-encode = ["fast-hangul-encode",
"fast-hanja-encode",
"fast-kanji-encode",
"fast-gb-hanzi-encode",
"fast-big5-hanzi-encode"]
fast-legacy-encode = [
"fast-hangul-encode",
"fast-hanja-encode",
"fast-kanji-encode",
"fast-gb-hanzi-encode",
"fast-big5-hanzi-encode",
]

[dependencies]
cfg-if = "1.0"
serde = { version = "1.0", optional = true }
any_all_workaround = { version = "0.1.0" , optional = true }
any_all_workaround = { version = "0.1.0", optional = true }

[dev-dependencies]
serde_derive = "1.0"
bincode = "1.0"
bincode = "2.0"
serde_json = "1.0"

[profile.release]
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
[![crates.io](https://img.shields.io/crates/v/encoding_rs.svg)](https://crates.io/crates/encoding_rs)
[![docs.rs](https://docs.rs/encoding_rs/badge.svg)](https://docs.rs/encoding_rs/)

This is a fork from hsivonen/encoding_rs with some updates.

encoding_rs an implementation of the (non-JavaScript parts of) the
[Encoding Standard](https://encoding.spec.whatwg.org/) written in Rust.

Expand Down Expand Up @@ -53,7 +55,7 @@ Specifically, encoding_rs does the following:
workloads than the standard library; hopefully will get upstreamed some
day) and ASCII.

Additionally, `encoding_rs::mem` does the following:
Additionally, `encoding_rs2::mem` does the following:

* Checks if a byte buffer contains only ASCII.
* Checks if a potentially-invalid UTF-16 buffer contains only Basic Latin (ASCII).
Expand Down
128 changes: 65 additions & 63 deletions fuzz/fuzzers/fuzz_encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,48 +12,50 @@
extern crate libfuzzer_sys;
extern crate encoding_rs;

use encoding_rs::*;
use encoding_rs2::*;

// Doesn't included ISO-8859-8-I.
static ENCODINGS: [&'static Encoding; 39] = [&UTF_8_INIT,
&REPLACEMENT_INIT,
&GBK_INIT,
&BIG5_INIT,
&EUC_JP_INIT,
&GB18030_INIT,
&UTF_16BE_INIT,
&UTF_16LE_INIT,
&SHIFT_JIS_INIT,
&EUC_KR_INIT,
&ISO_2022_JP_INIT,
&X_USER_DEFINED_INIT,
&WINDOWS_1250_INIT,
&WINDOWS_1251_INIT,
&WINDOWS_1252_INIT,
&WINDOWS_1253_INIT,
&WINDOWS_1254_INIT,
&WINDOWS_1255_INIT,
&WINDOWS_1256_INIT,
&WINDOWS_1257_INIT,
&WINDOWS_1258_INIT,
&KOI8_U_INIT,
&MACINTOSH_INIT,
&IBM866_INIT,
&KOI8_R_INIT,
&ISO_8859_2_INIT,
&ISO_8859_3_INIT,
&ISO_8859_4_INIT,
&ISO_8859_5_INIT,
&ISO_8859_6_INIT,
&ISO_8859_7_INIT,
&ISO_8859_10_INIT,
&ISO_8859_13_INIT,
&ISO_8859_14_INIT,
&WINDOWS_874_INIT,
&ISO_8859_15_INIT,
&ISO_8859_16_INIT,
&ISO_8859_8_I_INIT,
&X_MAC_CYRILLIC_INIT];
static ENCODINGS: [&'static Encoding; 39] = [
&UTF_8_INIT,
&REPLACEMENT_INIT,
&GBK_INIT,
&BIG5_INIT,
&EUC_JP_INIT,
&GB18030_INIT,
&UTF_16BE_INIT,
&UTF_16LE_INIT,
&SHIFT_JIS_INIT,
&EUC_KR_INIT,
&ISO_2022_JP_INIT,
&X_USER_DEFINED_INIT,
&WINDOWS_1250_INIT,
&WINDOWS_1251_INIT,
&WINDOWS_1252_INIT,
&WINDOWS_1253_INIT,
&WINDOWS_1254_INIT,
&WINDOWS_1255_INIT,
&WINDOWS_1256_INIT,
&WINDOWS_1257_INIT,
&WINDOWS_1258_INIT,
&KOI8_U_INIT,
&MACINTOSH_INIT,
&IBM866_INIT,
&KOI8_R_INIT,
&ISO_8859_2_INIT,
&ISO_8859_3_INIT,
&ISO_8859_4_INIT,
&ISO_8859_5_INIT,
&ISO_8859_6_INIT,
&ISO_8859_7_INIT,
&ISO_8859_10_INIT,
&ISO_8859_13_INIT,
&ISO_8859_14_INIT,
&WINDOWS_874_INIT,
&ISO_8859_15_INIT,
&ISO_8859_16_INIT,
&ISO_8859_8_I_INIT,
&X_MAC_CYRILLIC_INIT,
];

fn check_utf8(data: &[u8]) {
if let Err(_) = ::std::str::from_utf8(data) {
Expand Down Expand Up @@ -146,8 +148,7 @@ fn encode_from_utf8(encoding: &'static Encoding, data: &[u8]) {
} else {
let mut total_read = 0;
loop {
if let Some(needed) = encoder
.max_buffer_length_from_utf8_if_no_unmappables(
if let Some(needed) = encoder.max_buffer_length_from_utf8_if_no_unmappables(
string.len() - total_read,
) {
dst.resize(needed, 0);
Expand All @@ -164,9 +165,9 @@ fn encode_from_utf8(encoding: &'static Encoding, data: &[u8]) {
}
let mut total_read = 0;
loop {
if let Some(needed) = encoder.max_buffer_length_from_utf8_if_no_unmappables(
string.len() - total_read,
) {
if let Some(needed) =
encoder.max_buffer_length_from_utf8_if_no_unmappables(string.len() - total_read)
{
dst.resize(needed, 0);
let (result, read, _, _) =
encoder.encode_from_utf8(&string[total_read..], &mut dst, false);
Expand Down Expand Up @@ -206,7 +207,8 @@ fn encode_from_utf8_without_replacement(encoding: &'static Encoding, data: &[u8]
string.push(c);
} else {
if let Some(needed) =
encoder.max_buffer_length_from_utf8_without_replacement(string.len()) {
encoder.max_buffer_length_from_utf8_without_replacement(string.len())
{
dst.resize(needed, 0);
let (result, _, _) =
encoder.encode_from_utf8_without_replacement(&string, &mut dst, true);
Expand All @@ -216,7 +218,8 @@ fn encode_from_utf8_without_replacement(encoding: &'static Encoding, data: &[u8]
}
}
if let Some(needed) =
encoder.max_buffer_length_from_utf8_without_replacement(string.len()) {
encoder.max_buffer_length_from_utf8_without_replacement(string.len())
{
dst.resize(needed, 0);
let (result, _, _) =
encoder.encode_from_utf8_without_replacement(&string, &mut dst, false);
Expand Down Expand Up @@ -260,7 +263,8 @@ fn encode_from_utf16(encoding: &'static Encoding, data: &[u8]) {
let mut total_read = 0;
loop {
if let Some(needed) =
encoder.max_buffer_length_from_utf16_if_no_unmappables(chunk.len() - total_read) {
encoder.max_buffer_length_from_utf16_if_no_unmappables(chunk.len() - total_read)
{
dst.resize(needed, 0);
let (result, read, _, _) =
encoder.encode_from_utf16(&chunk[total_read..], &mut dst, last);
Expand Down Expand Up @@ -301,11 +305,11 @@ fn encode_from_utf16_without_replacement(encoding: &'static Encoding, data: &[u8
let new_offset = offset + chunk_size;
let chunk = &s[offset..new_offset];
offset = new_offset;
if let Some(needed) = encoder
.max_buffer_length_from_utf16_without_replacement(chunk.len()) {
if let Some(needed) = encoder.max_buffer_length_from_utf16_without_replacement(chunk.len())
{
dst.resize(needed, 0);
let (result, _, _) = encoder
.encode_from_utf16_without_replacement(&chunk, &mut dst, last);
let (result, _, _) =
encoder.encode_from_utf16_without_replacement(&chunk, &mut dst, last);
match result {
EncoderResult::InputEmpty => {
if last {
Expand Down Expand Up @@ -524,16 +528,14 @@ fn dispatch_test(encoding: &'static Encoding, data: &[u8]) {
}
}

fuzz_target!(
|data: &[u8]| {
if let Some(first) = data.first() {
let index = *first as usize;
if index >= ENCODINGS.len() {
return;
}
let encoding = ENCODINGS[index];
dispatch_test(encoding, &data[1..]);
fuzz_target!(|data: &[u8]| {
if let Some(first) = data.first() {
let index = *first as usize;
if index >= ENCODINGS.len() {
return;
}
// Comment to make rustfmt not introduce a compilation error
let encoding = ENCODINGS[index];
dispatch_test(encoding, &data[1..]);
}
);
// Comment to make rustfmt not introduce a compilation error
});
5 changes: 3 additions & 2 deletions fuzz/fuzzers/fuzz_labels.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#![no_main]
#[macro_use] extern crate libfuzzer_sys;
#[macro_use]
extern crate libfuzzer_sys;
extern crate encoding_rs;
use encoding_rs::*;
use encoding_rs2::*;

fuzz_target!(|data: &[u8]| {
Encoding::for_label(data);
Expand Down
Loading
Loading