From e828a089a51135d60caba932ccc63d56b92ca975 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Wed, 28 Jan 2026 16:03:09 +0530 Subject: [PATCH 1/9] feat(rust): make PyO3 optional to fix Python 3.14 fuzz linking - Make pyo3 dependency optional behind 'python' feature flag - Add rlib crate-type to support both cdylib (Python) and rlib (fuzzing) - Gate all Python-specific code with #[cfg(feature = "python")] - Make pure Rust functions (escape_xml, wrap_cdata, etc.) public for fuzzing - Add comprehensive unit tests for XML utility functions Fixes linker error with Python 3.14 where PyUnicode_DATA and PyUnicode_KIND symbols are now inline macros, not exported functions. Fuzz targets can now build without linking against Python. Amp-Thread-ID: https://ampcode.com/threads/T-019c0425-ac62-76d8-9d59-4d6aba3edf45 Co-authored-by: Amp --- rust/Cargo.toml | 8 +- rust/fuzz/Cargo.toml | 51 ++++ .../71853c6197a6a7f222db0f1978c7cb232b87c5ee | 2 + .../a70fb6a9415d3c34a9ea5510ba7e3a055d88aef9 | 1 + rust/fuzz/fuzz_targets/fuzz_escape_xml.rs | 23 ++ .../fuzz_targets/fuzz_is_valid_xml_name.rs | 33 +++ .../fuzz_targets/fuzz_make_attr_string.rs | 35 +++ .../fuzz_targets/fuzz_make_valid_xml_name.rs | 30 +++ rust/fuzz/fuzz_targets/fuzz_wrap_cdata.rs | 21 ++ rust/src/lib.rs | 254 +++++++++++++++++- 10 files changed, 451 insertions(+), 7 deletions(-) create mode 100644 rust/fuzz/Cargo.toml create mode 100644 rust/fuzz/corpus/fuzz_escape_xml/71853c6197a6a7f222db0f1978c7cb232b87c5ee create mode 100644 rust/fuzz/corpus/fuzz_escape_xml/a70fb6a9415d3c34a9ea5510ba7e3a055d88aef9 create mode 100644 rust/fuzz/fuzz_targets/fuzz_escape_xml.rs create mode 100644 rust/fuzz/fuzz_targets/fuzz_is_valid_xml_name.rs create mode 100644 rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs create mode 100644 rust/fuzz/fuzz_targets/fuzz_make_valid_xml_name.rs create mode 100644 rust/fuzz/fuzz_targets/fuzz_wrap_cdata.rs diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 756aadc..54c1e60 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -7,10 +7,14 @@ license = "Apache-2.0" [lib] name = "json2xml_rs" -crate-type = ["cdylib"] +crate-type = ["cdylib", "rlib"] + +[features] +default = ["python"] +python = ["pyo3/extension-module", "dep:pyo3"] [dependencies] -pyo3 = { version = "0.27", features = ["extension-module"] } +pyo3 = { version = "0.27", optional = true } [profile.release] lto = true diff --git a/rust/fuzz/Cargo.toml b/rust/fuzz/Cargo.toml new file mode 100644 index 0000000..8d7c025 --- /dev/null +++ b/rust/fuzz/Cargo.toml @@ -0,0 +1,51 @@ +[package] +name = "json2xml_rs-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +arbitrary = { version = "1", features = ["derive"] } + +[dependencies.json2xml_rs] +path = ".." +default-features = false + +[[bin]] +name = "fuzz_escape_xml" +path = "fuzz_targets/fuzz_escape_xml.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_wrap_cdata" +path = "fuzz_targets/fuzz_wrap_cdata.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_is_valid_xml_name" +path = "fuzz_targets/fuzz_is_valid_xml_name.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_make_valid_xml_name" +path = "fuzz_targets/fuzz_make_valid_xml_name.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_make_attr_string" +path = "fuzz_targets/fuzz_make_attr_string.rs" +test = false +doc = false +bench = false diff --git a/rust/fuzz/corpus/fuzz_escape_xml/71853c6197a6a7f222db0f1978c7cb232b87c5ee b/rust/fuzz/corpus/fuzz_escape_xml/71853c6197a6a7f222db0f1978c7cb232b87c5ee new file mode 100644 index 0000000..139597f --- /dev/null +++ b/rust/fuzz/corpus/fuzz_escape_xml/71853c6197a6a7f222db0f1978c7cb232b87c5ee @@ -0,0 +1,2 @@ + + diff --git a/rust/fuzz/corpus/fuzz_escape_xml/a70fb6a9415d3c34a9ea5510ba7e3a055d88aef9 b/rust/fuzz/corpus/fuzz_escape_xml/a70fb6a9415d3c34a9ea5510ba7e3a055d88aef9 new file mode 100644 index 0000000..b8635bc --- /dev/null +++ b/rust/fuzz/corpus/fuzz_escape_xml/a70fb6a9415d3c34a9ea5510ba7e3a055d88aef9 @@ -0,0 +1 @@ +ë \ No newline at end of file diff --git a/rust/fuzz/fuzz_targets/fuzz_escape_xml.rs b/rust/fuzz/fuzz_targets/fuzz_escape_xml.rs new file mode 100644 index 0000000..23951ed --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_escape_xml.rs @@ -0,0 +1,23 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use json2xml_rs::escape_xml; + +fuzz_target!(|data: &str| { + let result = escape_xml(data); + + // Verify invariants: + // 1. Result should not contain unescaped special chars + assert!(!result.contains('&') || result.contains("&") || result.contains(""") + || result.contains("'") || result.contains("<") || result.contains(">")); + + // 2. Result should be valid (no panics occurred) + // 3. If input had no special chars, output equals input + if !data.contains('&') && !data.contains('"') && !data.contains('\'') + && !data.contains('<') && !data.contains('>') { + assert_eq!(result, data); + } + + // 4. Output length should be >= input length (escaping only adds chars) + assert!(result.len() >= data.len()); +}); diff --git a/rust/fuzz/fuzz_targets/fuzz_is_valid_xml_name.rs b/rust/fuzz/fuzz_targets/fuzz_is_valid_xml_name.rs new file mode 100644 index 0000000..45b59f0 --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_is_valid_xml_name.rs @@ -0,0 +1,33 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use json2xml_rs::is_valid_xml_name; + +fuzz_target!(|data: &str| { + let result = is_valid_xml_name(data); + + // Verify invariants: + // 1. Empty string is always invalid + if data.is_empty() { + assert!(!result); + } + + // 2. String starting with digit is invalid + if let Some(first) = data.chars().next() { + if first.is_ascii_digit() { + assert!(!result); + } + } + + // 3. String starting with "xml" (case-insensitive) is invalid + if data.to_lowercase().starts_with("xml") { + assert!(!result); + } + + // 4. String containing spaces is invalid + if data.contains(' ') { + assert!(!result); + } + + // 5. Function should never panic - reaching here means it didn't +}); diff --git a/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs b/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs new file mode 100644 index 0000000..294ad88 --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs @@ -0,0 +1,35 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use arbitrary::Arbitrary; +use json2xml_rs::make_attr_string; + +#[derive(Arbitrary, Debug)] +struct AttrInput { + attrs: Vec<(String, String)>, +} + +fuzz_target!(|input: AttrInput| { + let result = make_attr_string(&input.attrs); + + // Verify invariants: + // 1. Empty attrs should produce empty string + if input.attrs.is_empty() { + assert!(result.is_empty()); + return; + } + + // 2. Result should start with space (for XML formatting) + assert!(result.starts_with(' '), "Attribute string should start with space"); + + // 3. Each attribute should produce key="value" format + for (key, _value) in &input.attrs { + // Key should appear in the result + assert!(result.contains(key), "Key '{}' should appear in result", key); + } + + // 4. Values should be escaped (no raw & < > " ' in values) + // The make_attr_string calls escape_xml on values + + // 5. Function should never panic - reaching here means it didn't +}); diff --git a/rust/fuzz/fuzz_targets/fuzz_make_valid_xml_name.rs b/rust/fuzz/fuzz_targets/fuzz_make_valid_xml_name.rs new file mode 100644 index 0000000..bdf417b --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_make_valid_xml_name.rs @@ -0,0 +1,30 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use json2xml_rs::make_valid_xml_name; + +fuzz_target!(|data: &str| { + let (name, attr) = make_valid_xml_name(data); + + // Verify invariants: + // 1. The returned name must be a valid XML name OR be "key" with an attribute + if name != "key" { + // If we didn't fall back to "key", the name should be valid + // (though it might have been transformed) + assert!(!name.is_empty(), "Name should not be empty"); + } + + // 2. If attr is Some, name should be "key" + if attr.is_some() { + assert_eq!(name, "key", "Fallback name should be 'key'"); + let (attr_name, _attr_value) = attr.unwrap(); + assert_eq!(attr_name, "name", "Attribute key should be 'name'"); + } + + // 3. Purely numeric input should get 'n' prefix + if !data.is_empty() && data.chars().all(|c| c.is_ascii_digit()) { + assert!(name.starts_with('n'), "Numeric keys should get 'n' prefix"); + } + + // 4. Function should never panic - reaching here means it didn't +}); diff --git a/rust/fuzz/fuzz_targets/fuzz_wrap_cdata.rs b/rust/fuzz/fuzz_targets/fuzz_wrap_cdata.rs new file mode 100644 index 0000000..f159747 --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_wrap_cdata.rs @@ -0,0 +1,21 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use json2xml_rs::wrap_cdata; + +fuzz_target!(|data: &str| { + let result = wrap_cdata(data); + + // Verify invariants: + // 1. Result must start with CDATA opening + assert!(result.starts_with("")); + + // 3. The ]]> sequence in input must be properly escaped + // (split into ]]]]>) + + // 4. Result should be longer than or equal to input + CDATA wrapper (12 chars) + assert!(result.len() >= data.len() + 12); +}); diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 6cff327..ea38ea5 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -3,14 +3,16 @@ //! This module provides a high-performance Rust implementation of dicttoxml //! that can be used as a drop-in replacement for the pure Python version. +#[cfg(feature = "python")] use pyo3::prelude::*; +#[cfg(feature = "python")] use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString}; use std::fmt::Write; /// Escape special XML characters in a string. /// This is one of the hottest paths - optimized for single-pass processing. #[inline] -fn escape_xml(s: &str) -> String { +pub fn escape_xml(s: &str) -> String { let mut result = String::with_capacity(s.len() + s.len() / 10); for c in s.chars() { match c { @@ -27,14 +29,14 @@ fn escape_xml(s: &str) -> String { /// Wrap content in CDATA section #[inline] -fn wrap_cdata(s: &str) -> String { +pub fn wrap_cdata(s: &str) -> String { let escaped = s.replace("]]>", "]]]]>"); format!("", escaped) } /// Check if a key is a valid XML element name (simplified check) /// Full validation would require XML parsing, but this catches common issues -fn is_valid_xml_name(key: &str) -> bool { +pub fn is_valid_xml_name(key: &str) -> bool { if key.is_empty() { return false; } @@ -59,7 +61,7 @@ fn is_valid_xml_name(key: &str) -> bool { } /// Make a valid XML name from a key, returning the key and any attributes -fn make_valid_xml_name(key: &str) -> (String, Option<(String, String)>) { +pub fn make_valid_xml_name(key: &str) -> (String, Option<(String, String)>) { let escaped = escape_xml(key); // Already valid @@ -83,7 +85,7 @@ fn make_valid_xml_name(key: &str) -> (String, Option<(String, String)>) { } /// Build an attribute string from key-value pairs -fn make_attr_string(attrs: &[(String, String)]) -> String { +pub fn make_attr_string(attrs: &[(String, String)]) -> String { if attrs.is_empty() { return String::new(); } @@ -95,6 +97,7 @@ fn make_attr_string(attrs: &[(String, String)]) -> String { } /// Configuration for XML conversion +#[cfg(feature = "python")] struct ConvertConfig { attr_type: bool, cdata: bool, @@ -102,7 +105,11 @@ struct ConvertConfig { list_headers: bool, } +#[cfg(feature = "python")] +use pyo3::PyResult; + /// Convert a Python value to XML string +#[cfg(feature = "python")] fn convert_value( py: Python<'_>, obj: &Bound<'_, PyAny>, @@ -167,6 +174,7 @@ fn convert_value( } /// Convert a string value to XML +#[cfg(feature = "python")] fn convert_string(key: &str, val: &str, config: &ConvertConfig) -> PyResult { let (xml_key, name_attr) = make_valid_xml_name(key); let mut attrs = Vec::new(); @@ -192,6 +200,7 @@ fn convert_string(key: &str, val: &str, config: &ConvertConfig) -> PyResult PyResult { let (xml_key, name_attr) = make_valid_xml_name(key); let mut attrs = Vec::new(); @@ -233,6 +243,7 @@ fn convert_bool(key: &str, val: bool, config: &ConvertConfig) -> PyResult PyResult { let (xml_key, name_attr) = make_valid_xml_name(key); let mut attrs = Vec::new(); @@ -249,6 +260,7 @@ fn convert_none(key: &str, config: &ConvertConfig) -> PyResult { } /// Convert a dictionary to XML +#[cfg(feature = "python")] fn convert_dict( py: Python<'_>, dict: &Bound<'_, PyDict>, @@ -426,6 +438,7 @@ fn convert_dict( } /// Convert a list to XML +#[cfg(feature = "python")] fn convert_list( py: Python<'_>, list: &Bound<'_, PyList>, @@ -602,6 +615,7 @@ fn convert_list( /// /// Returns: /// bytes: The XML representation of the input object +#[cfg(feature = "python")] #[pyfunction] #[pyo3(signature = (obj, root=true, custom_root="root", attr_type=true, item_wrap=true, cdata=false, list_headers=false))] #[allow(clippy::too_many_arguments)] @@ -647,18 +661,21 @@ fn dicttoxml( /// Fast XML string escaping. /// /// Escapes &, ", ', <, > characters for XML. +#[cfg(feature = "python")] #[pyfunction] fn escape_xml_py(s: &str) -> String { escape_xml(s) } /// Wrap a string in CDATA section. +#[cfg(feature = "python")] #[pyfunction] fn wrap_cdata_py(s: &str) -> String { wrap_cdata(s) } /// A Python module implemented in Rust. +#[cfg(feature = "python")] #[pymodule] fn json2xml_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(dicttoxml, m)?)?; @@ -666,3 +683,230 @@ fn json2xml_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(wrap_cdata_py, m)?)?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + mod escape_xml_tests { + use super::*; + + #[test] + fn escapes_ampersand() { + assert_eq!(escape_xml("foo & bar"), "foo & bar"); + } + + #[test] + fn escapes_double_quote() { + assert_eq!(escape_xml("say \"hello\""), "say "hello""); + } + + #[test] + fn escapes_single_quote() { + assert_eq!(escape_xml("it's fine"), "it's fine"); + } + + #[test] + fn escapes_less_than() { + assert_eq!(escape_xml("a < b"), "a < b"); + } + + #[test] + fn escapes_greater_than() { + assert_eq!(escape_xml("a > b"), "a > b"); + } + + #[test] + fn escapes_all_special_chars() { + assert_eq!( + escape_xml(""), + "<tag attr="val" & 'x'>" + ); + } + + #[test] + fn handles_empty_string() { + assert_eq!(escape_xml(""), ""); + } + + #[test] + fn handles_no_special_chars() { + assert_eq!(escape_xml("hello world 123"), "hello world 123"); + } + + #[test] + fn handles_unicode() { + assert_eq!(escape_xml("café & thé"), "café & thé"); + } + } + + mod wrap_cdata_tests { + use super::*; + + #[test] + fn wraps_simple_string() { + assert_eq!(wrap_cdata("hello"), ""); + } + + #[test] + fn wraps_empty_string() { + assert_eq!(wrap_cdata(""), ""); + } + + #[test] + fn escapes_cdata_end_sequence() { + assert_eq!(wrap_cdata("foo]]>bar"), "bar]]>"); + } + + #[test] + fn handles_multiple_cdata_end_sequences() { + assert_eq!( + wrap_cdata("a]]>b]]>c"), + "b]]]]>c]]>" + ); + } + + #[test] + fn handles_special_xml_chars() { + assert_eq!(wrap_cdata(""), "]]>"); + } + } + + mod is_valid_xml_name_tests { + use super::*; + + #[test] + fn accepts_simple_name() { + assert!(is_valid_xml_name("element")); + } + + #[test] + fn accepts_name_with_underscore_prefix() { + assert!(is_valid_xml_name("_element")); + } + + #[test] + fn accepts_name_with_numbers() { + assert!(is_valid_xml_name("item123")); + } + + #[test] + fn accepts_name_with_hyphens() { + assert!(is_valid_xml_name("my-element")); + } + + #[test] + fn accepts_name_with_dots() { + assert!(is_valid_xml_name("my.element")); + } + + #[test] + fn accepts_name_with_colons() { + assert!(is_valid_xml_name("ns:element")); + } + + #[test] + fn rejects_empty_string() { + assert!(!is_valid_xml_name("")); + } + + #[test] + fn rejects_name_starting_with_number() { + assert!(!is_valid_xml_name("123element")); + } + + #[test] + fn rejects_name_starting_with_hyphen() { + assert!(!is_valid_xml_name("-element")); + } + + #[test] + fn rejects_name_with_spaces() { + assert!(!is_valid_xml_name("my element")); + } + + #[test] + fn rejects_xml_prefix_lowercase() { + assert!(!is_valid_xml_name("xmlelement")); + } + + #[test] + fn rejects_xml_prefix_uppercase() { + assert!(!is_valid_xml_name("XMLelement")); + } + + #[test] + fn rejects_xml_prefix_mixed_case() { + assert!(!is_valid_xml_name("XmLelement")); + } + } + + mod make_valid_xml_name_tests { + use super::*; + + #[test] + fn returns_valid_name_unchanged() { + let (name, attr) = make_valid_xml_name("element"); + assert_eq!(name, "element"); + assert!(attr.is_none()); + } + + #[test] + fn prepends_n_to_numeric_key() { + let (name, attr) = make_valid_xml_name("123"); + assert_eq!(name, "n123"); + assert!(attr.is_none()); + } + + #[test] + fn replaces_spaces_with_underscores() { + let (name, attr) = make_valid_xml_name("my element"); + assert_eq!(name, "my_element"); + assert!(attr.is_none()); + } + + #[test] + fn falls_back_to_key_with_name_attr() { + let (name, attr) = make_valid_xml_name("-invalid"); + assert_eq!(name, "key"); + assert_eq!(attr, Some(("name".to_string(), "-invalid".to_string()))); + } + + #[test] + fn escapes_special_chars_in_name() { + let (name, attr) = make_valid_xml_name("tag&name"); + assert_eq!(name, "key"); + assert_eq!(attr, Some(("name".to_string(), "tag&name".to_string()))); + } + } + + mod make_attr_string_tests { + use super::*; + + #[test] + fn returns_empty_for_empty_attrs() { + assert_eq!(make_attr_string(&[]), ""); + } + + #[test] + fn formats_single_attr() { + let attrs = vec![("type".to_string(), "str".to_string())]; + assert_eq!(make_attr_string(&attrs), " type=\"str\""); + } + + #[test] + fn formats_multiple_attrs() { + let attrs = vec![ + ("name".to_string(), "foo".to_string()), + ("type".to_string(), "int".to_string()), + ]; + assert_eq!(make_attr_string(&attrs), " name=\"foo\" type=\"int\""); + } + + #[test] + fn escapes_attr_values() { + let attrs = vec![("name".to_string(), "foo & bar".to_string())]; + assert_eq!(make_attr_string(&attrs), " name=\"foo & bar\""); + } + } +} From 889d6b58c73763b301a6626822616db90240e414 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Wed, 28 Jan 2026 16:04:00 +0530 Subject: [PATCH 2/9] ci: add Rust unit tests to CI workflow Amp-Thread-ID: https://ampcode.com/threads/T-019c0425-ac62-76d8-9d59-4d6aba3edf45 Co-authored-by: Amp --- .github/workflows/rust-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 7ad6a06..4bc0627 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -38,6 +38,10 @@ jobs: working-directory: rust run: cargo clippy --all-targets --all-features -- -D warnings + - name: Run Rust unit tests + working-directory: rust + run: cargo test --no-default-features + rust-test: name: Build & Test (${{ matrix.os }}, Python ${{ matrix.python-version }}) runs-on: ${{ matrix.os }} From 607a093102802eef623898f389fe66654f3885ba Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Wed, 28 Jan 2026 16:07:27 +0530 Subject: [PATCH 3/9] refactor: reformat code with rust fmt --- rust/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ea38ea5..6345de1 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -768,7 +768,10 @@ mod tests { #[test] fn handles_special_xml_chars() { - assert_eq!(wrap_cdata(""), "]]>"); + assert_eq!( + wrap_cdata(""), + "]]>" + ); } } From 29131ca2dd993a2aeb0d6b9d84baa2cd808e4439 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Wed, 28 Jan 2026 16:09:03 +0530 Subject: [PATCH 4/9] fix: improve fuzz test assertion for make_attr_string Use more specific pattern matching (` key="`) instead of just checking if key exists as substring. This avoids false positives with overlapping keys (e.g. 'a' vs 'aa') or malformed attribute formatting. Suggested-by: sourcery-ai Amp-Thread-ID: https://ampcode.com/threads/T-019c0425-ac62-76d8-9d59-4d6aba3edf45 Co-authored-by: Amp --- .../23dbd0963ab0e20509f61d3bbab1aab8c0acc1ce | 1 + .../2cac163ebc8063f5adb15a7ede5fab5c2a9b9e23 | 1 + .../55cdfedcb4f4369cf383ac30deee62d55cbea7f3 | 1 + .../71261c370e317f7ebcb6d903dba6f5dc1396384a | 1 + .../8206f3e251d21e9c9a1b29ff459c7fb6addd8ae6 | 1 + .../de603c91038f329cce1cca8a30ea161b2271e2f6 | 1 + rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs | 13 ++++++++++--- 7 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 rust/fuzz/corpus/fuzz_make_attr_string/23dbd0963ab0e20509f61d3bbab1aab8c0acc1ce create mode 100644 rust/fuzz/corpus/fuzz_make_attr_string/2cac163ebc8063f5adb15a7ede5fab5c2a9b9e23 create mode 100644 rust/fuzz/corpus/fuzz_make_attr_string/55cdfedcb4f4369cf383ac30deee62d55cbea7f3 create mode 100644 rust/fuzz/corpus/fuzz_make_attr_string/71261c370e317f7ebcb6d903dba6f5dc1396384a create mode 100644 rust/fuzz/corpus/fuzz_make_attr_string/8206f3e251d21e9c9a1b29ff459c7fb6addd8ae6 create mode 100644 rust/fuzz/corpus/fuzz_make_attr_string/de603c91038f329cce1cca8a30ea161b2271e2f6 diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/23dbd0963ab0e20509f61d3bbab1aab8c0acc1ce b/rust/fuzz/corpus/fuzz_make_attr_string/23dbd0963ab0e20509f61d3bbab1aab8c0acc1ce new file mode 100644 index 0000000..850a8b6 --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/23dbd0963ab0e20509f61d3bbab1aab8c0acc1ce @@ -0,0 +1 @@ +]I]] \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/2cac163ebc8063f5adb15a7ede5fab5c2a9b9e23 b/rust/fuzz/corpus/fuzz_make_attr_string/2cac163ebc8063f5adb15a7ede5fab5c2a9b9e23 new file mode 100644 index 0000000..9f5daf2 --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/2cac163ebc8063f5adb15a7ede5fab5c2a9b9e23 @@ -0,0 +1 @@ +£¶¢§ \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/55cdfedcb4f4369cf383ac30deee62d55cbea7f3 b/rust/fuzz/corpus/fuzz_make_attr_string/55cdfedcb4f4369cf383ac30deee62d55cbea7f3 new file mode 100644 index 0000000..0c5c35f --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/55cdfedcb4f4369cf383ac30deee62d55cbea7f3 @@ -0,0 +1 @@ +]]J( \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/71261c370e317f7ebcb6d903dba6f5dc1396384a b/rust/fuzz/corpus/fuzz_make_attr_string/71261c370e317f7ebcb6d903dba6f5dc1396384a new file mode 100644 index 0000000..cae5eaa --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/71261c370e317f7ebcb6d903dba6f5dc1396384a @@ -0,0 +1 @@ +]]( \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/8206f3e251d21e9c9a1b29ff459c7fb6addd8ae6 b/rust/fuzz/corpus/fuzz_make_attr_string/8206f3e251d21e9c9a1b29ff459c7fb6addd8ae6 new file mode 100644 index 0000000..edbee49 --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/8206f3e251d21e9c9a1b29ff459c7fb6addd8ae6 @@ -0,0 +1 @@ +]J( \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/de603c91038f329cce1cca8a30ea161b2271e2f6 b/rust/fuzz/corpus/fuzz_make_attr_string/de603c91038f329cce1cca8a30ea161b2271e2f6 new file mode 100644 index 0000000..079b579 --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/de603c91038f329cce1cca8a30ea161b2271e2f6 @@ -0,0 +1 @@ +] diff --git a/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs b/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs index 294ad88..250eb47 100644 --- a/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs +++ b/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs @@ -22,10 +22,17 @@ fuzz_target!(|input: AttrInput| { // 2. Result should start with space (for XML formatting) assert!(result.starts_with(' '), "Attribute string should start with space"); - // 3. Each attribute should produce key="value" format + // 3. Each attribute should produce a ` key="value"`-like fragment. + // We check for the more specific pattern ` {key}="` to avoid + // passing on overlapping keys (e.g. "a" vs "aa") or malformed formatting. for (key, _value) in &input.attrs { - // Key should appear in the result - assert!(result.contains(key), "Key '{}' should appear in result", key); + let expected_fragment = format!(" {}=\"", key); + assert!( + result.contains(&expected_fragment), + "Attribute fragment '{}' should appear in result '{}'", + expected_fragment, + result + ); } // 4. Values should be escaped (no raw & < > " ' in values) From c7208c5ac7b0f3ec58ff9c377b48b79a411916d9 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Wed, 28 Jan 2026 16:38:48 +0530 Subject: [PATCH 5/9] Update benchmarks with latest results (Jan 28, 2026) Amp-Thread-ID: https://ampcode.com/threads/T-019c0438-3a17-7033-9c8b-a5dd94144dd5 Co-authored-by: Amp --- BENCHMARKS.md | 64 +++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 4f4c6d7..53e80c3 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -6,7 +6,7 @@ Comprehensive performance comparison between all json2xml implementations. - **Machine**: Apple Silicon (M-series, aarch64) - **OS**: macOS -- **Date**: January 16, 2026 +- **Date**: January 28, 2026 ### Implementations Tested @@ -14,7 +14,7 @@ Comprehensive performance comparison between all json2xml implementations. |----------------|------|-------| | Python | Library | Pure Python (json2xml) | | Rust | Library | Native extension via PyO3 (json2xml-rs) | -| Go | CLI | Standalone binary (json2xml-go) | +| Go | CLI | Standalone binary (json2xml-go v1.0.0) | | Zig | CLI | Standalone binary (json2xml-zig) | ## Test Data @@ -22,10 +22,10 @@ Comprehensive performance comparison between all json2xml implementations. | Size | Description | Bytes | |------|-------------|-------| | Small | Simple object `{"name": "John", "age": 30, "city": "New York"}` | 47 | -| Medium | 10 generated records with nested structures | 3,212 | +| Medium | 10 generated records with nested structures | ~3,208 | | bigexample.json | Real-world patent data | 2,018 | -| Large | 100 generated records with nested structures | 32,226 | -| Very Large | 1,000 generated records with nested structures | 323,126 | +| Large | 100 generated records with nested structures | ~32,205 | +| Very Large | 1,000 generated records with nested structures | ~323,119 | ## Results @@ -33,50 +33,54 @@ Comprehensive performance comparison between all json2xml implementations. | Test Case | Python | Rust | Go | Zig | |-----------|--------|------|-----|-----| -| Small (47B) | 40.12µs | 1.45µs | 4.65ms | 3.74ms | -| Medium (3.2KB) | 2.14ms | 71.28µs | 4.07ms | 3.28ms | -| bigexample (2KB) | 819.46µs | 32.88µs | 4.02ms | 2.96ms | -| Large (32KB) | 21.08ms | 739.89µs | 4.05ms | 6.11ms | -| Very Large (323KB) | 212.61ms | 7.55ms | 4.38ms | 33.24ms | +| Small (47B) | 41.88µs | 1.66µs | 4.52ms | 2.80ms | +| Medium (3.2KB) | 2.19ms | 71.85µs | 4.33ms | 2.18ms | +| bigexample (2KB) | 854.38µs | 30.89µs | 4.28ms | 2.12ms | +| Large (32KB) | 21.57ms | 672.96µs | 4.47ms | 2.48ms | +| Very Large (323KB) | 216.52ms | 6.15ms | 4.44ms | 5.54ms | ### Speedup vs Pure Python | Test Case | Rust | Go | Zig | |-----------|------|-----|-----| -| Small (47B) | **27.6x** | 0.0x* | 0.0x* | -| Medium (3.2KB) | **30.0x** | 0.5x* | 0.7x* | -| bigexample (2KB) | **24.9x** | 0.2x* | 0.3x* | -| Large (32KB) | **28.5x** | 5.2x | 3.5x | -| Very Large (323KB) | **28.2x** | **48.5x** | 6.4x | +| Small (47B) | **25.2x** | 0.0x* | 0.0x* | +| Medium (3.2KB) | **30.5x** | 0.5x* | 1.0x* | +| bigexample (2KB) | **27.7x** | 0.2x* | 0.4x* | +| Large (32KB) | **32.1x** | 4.8x | **8.7x** | +| Very Large (323KB) | **35.2x** | **48.8x** | **39.1x** | -*CLI tools have process spawn overhead (~3-4ms) which dominates for small inputs +*CLI tools have process spawn overhead (~2-4ms) which dominates for small inputs ## Key Observations ### 1. Rust Extension is the Best Choice for Python Users 🦀 The Rust extension (json2xml-rs) provides: -- **~28x faster** than pure Python consistently across all input sizes +- **~25-35x faster** than pure Python consistently across all input sizes - **Zero process overhead** - called directly from Python - **Automatic fallback** - pure Python used if Rust unavailable - **Easy install**: `pip install json2xml[fast]` -### 2. Go Excels for Large CLI Workloads 🚀 +### 2. Go Excels for Very Large CLI Workloads 🚀 For very large inputs (323KB+): -- **48.5x faster** than Python -- But ~3-4ms startup overhead hurts small file performance +- **48.8x faster** than Python +- But ~4ms startup overhead hurts small file performance - Best for batch processing or large file conversions -### 3. Zig is Competitive but Has Trade-offs +### 3. Zig is Now Highly Competitive âš¡ -- Consistent ~3ms startup overhead -- Good for medium-large files (3-6x faster than Python) -- Less optimized than Go for very large inputs +After recent optimizations: +- **39.1x faster** than Python for very large files +- **8.7x faster** for large files (32KB) +- Faster startup than Go (~2ms vs ~4ms) +- Best balance of startup time and throughput ### 4. Process Spawn Overhead Matters -CLI tools (Go, Zig) have ~3-4ms process spawn overhead: +CLI tools (Go, Zig) have process spawn overhead: +- Go: ~4ms startup overhead +- Zig: ~2ms startup overhead - Dominates for small inputs (makes them appear slower than Python!) - Negligible for large inputs where actual work dominates - Rust extension avoids this entirely by being a native Python module @@ -85,9 +89,9 @@ CLI tools (Go, Zig) have ~3-4ms process spawn overhead: | Use Case | Recommended | Why | |----------|-------------|-----| -| Python library calls | **Rust** (`pip install json2xml[fast]`) | 28x faster, no overhead | -| Small files via CLI | **Rust** via Python | CLI overhead dominates | -| Large files via CLI | **Go** (json2xml-go) | 48x faster for 300KB+ | +| Python library calls | **Rust** (`pip install json2xml[fast]`) | 25-35x faster, no overhead | +| Small files via CLI | **Zig** (json2xml-zig) | Fastest startup (~2ms) | +| Large files via CLI | **Go** or **Zig** | Both excellent (Go slightly faster) | | Batch processing | **Go** or **Rust** | Both excellent | | Pure Python required | **Python** (json2xml) | Always available | @@ -104,7 +108,7 @@ pip install json2xml[fast] go install github.com/vinitkumar/json2xml-go@latest # Zig CLI -# See: github.com/nicholasgriffintn/json2xml-zig +# See: github.com/vinitkumar/json2xml-zig ``` ## Running the Benchmarks @@ -130,4 +134,4 @@ python benchmark_multi_python.py ## Related Projects - **Go version**: [github.com/vinitkumar/json2xml-go](https://github.com/vinitkumar/json2xml-go) -- **Zig version**: [github.com/nicholasgriffintn/json2xml-zig](https://github.com/nicholasgriffintn/json2xml-zig) +- **Zig version**: [github.com/vinitkumar/json2xml-zig](https://github.com/vinitkumar/json2xml-zig) From 59a65d6e6c527734a8cecd9372862891b79911e3 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Thu, 29 Jan 2026 12:47:23 +0530 Subject: [PATCH 6/9] fix: move dev dependencies to optional-dependencies pytest, pytest-cov, coverage, and setuptools were incorrectly listed as runtime dependencies, causing them to be installed with 'pip install json2xml'. Moved them to [project.optional-dependencies] under 'dev' group, so users get only defusedxml, urllib3, and xmltodict at runtime. Developers can install with 'pip install json2xml[dev]'. fixes #272 --- pyproject.toml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2ea980c..0a6bdc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,10 +31,6 @@ dependencies = [ "defusedxml", "urllib3", "xmltodict>=0.12.0", - "pytest", - "pytest-cov", - "coverage", - "setuptools", ] [project.urls] @@ -47,8 +43,11 @@ json2xml-py = "json2xml.cli:main" include = ["json2xml"] [project.optional-dependencies] -test = [ +dev = [ "pytest>=8.4.1", + "pytest-cov", + "coverage", + "setuptools", ] fast = ["json2xml-rs>=0.1.0"] From 6f5e399d770daade10b432d27b730484313dc9e5 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Thu, 29 Jan 2026 12:50:15 +0530 Subject: [PATCH 7/9] fix: update deps --- uv.lock | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/uv.lock b/uv.lock index e35f3aa..485952a 100644 --- a/uv.lock +++ b/uv.lock @@ -147,39 +147,37 @@ wheels = [ [[package]] name = "json2xml" -version = "6.0.1" +version = "6.0.2" source = { editable = "." } dependencies = [ - { name = "coverage" }, { name = "defusedxml" }, - { name = "pytest" }, - { name = "pytest-cov" }, - { name = "setuptools" }, { name = "urllib3" }, { name = "xmltodict" }, ] [package.optional-dependencies] +dev = [ + { name = "coverage" }, + { name = "pytest" }, + { name = "pytest-cov" }, + { name = "setuptools" }, +] fast = [ { name = "json2xml-rs" }, ] -test = [ - { name = "pytest" }, -] [package.metadata] requires-dist = [ - { name = "coverage" }, + { name = "coverage", marker = "extra == 'dev'" }, { name = "defusedxml" }, { name = "json2xml-rs", marker = "extra == 'fast'", specifier = ">=0.1.0" }, - { name = "pytest" }, - { name = "pytest", marker = "extra == 'test'", specifier = ">=8.4.1" }, - { name = "pytest-cov" }, - { name = "setuptools" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.4.1" }, + { name = "pytest-cov", marker = "extra == 'dev'" }, + { name = "setuptools", marker = "extra == 'dev'" }, { name = "urllib3" }, { name = "xmltodict", specifier = ">=0.12.0" }, ] -provides-extras = ["test", "fast"] +provides-extras = ["dev", "fast"] [[package]] name = "json2xml-rs" From e62fefa6fd79661798f5a05d35ae1660e80fb6a3 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Thu, 29 Jan 2026 12:53:31 +0530 Subject: [PATCH 8/9] fix: deps --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 63fb222..a8ad82b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -44,7 +44,7 @@ jobs: - name: Install dependencies run: | uv venv - uv pip install -e . + uv pip install -e json2xml[dev] - name: Run ty run: | source .venv/bin/activate From cd76e3659d0b5b1250ac01e1723cd61aea345844 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Thu, 29 Jan 2026 13:34:31 +0530 Subject: [PATCH 9/9] fix: update --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a8ad82b..ae1c20e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -44,7 +44,7 @@ jobs: - name: Install dependencies run: | uv venv - uv pip install -e json2xml[dev] + uv pip install -e ".[dev]" - name: Run ty run: | source .venv/bin/activate