diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 7ad6a06..4bc0627 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -38,6 +38,10 @@ jobs: working-directory: rust run: cargo clippy --all-targets --all-features -- -D warnings + - name: Run Rust unit tests + working-directory: rust + run: cargo test --no-default-features + rust-test: name: Build & Test (${{ matrix.os }}, Python ${{ matrix.python-version }}) runs-on: ${{ matrix.os }} diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 756aadc..54c1e60 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -7,10 +7,14 @@ license = "Apache-2.0" [lib] name = "json2xml_rs" -crate-type = ["cdylib"] +crate-type = ["cdylib", "rlib"] + +[features] +default = ["python"] +python = ["pyo3/extension-module", "dep:pyo3"] [dependencies] -pyo3 = { version = "0.27", features = ["extension-module"] } +pyo3 = { version = "0.27", optional = true } [profile.release] lto = true diff --git a/rust/fuzz/Cargo.toml b/rust/fuzz/Cargo.toml new file mode 100644 index 0000000..8d7c025 --- /dev/null +++ b/rust/fuzz/Cargo.toml @@ -0,0 +1,51 @@ +[package] +name = "json2xml_rs-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +arbitrary = { version = "1", features = ["derive"] } + +[dependencies.json2xml_rs] +path = ".." +default-features = false + +[[bin]] +name = "fuzz_escape_xml" +path = "fuzz_targets/fuzz_escape_xml.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_wrap_cdata" +path = "fuzz_targets/fuzz_wrap_cdata.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_is_valid_xml_name" +path = "fuzz_targets/fuzz_is_valid_xml_name.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_make_valid_xml_name" +path = "fuzz_targets/fuzz_make_valid_xml_name.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_make_attr_string" +path = "fuzz_targets/fuzz_make_attr_string.rs" +test = false +doc = false +bench = false diff --git a/rust/fuzz/corpus/fuzz_escape_xml/71853c6197a6a7f222db0f1978c7cb232b87c5ee b/rust/fuzz/corpus/fuzz_escape_xml/71853c6197a6a7f222db0f1978c7cb232b87c5ee new file mode 100644 index 0000000..139597f --- /dev/null +++ b/rust/fuzz/corpus/fuzz_escape_xml/71853c6197a6a7f222db0f1978c7cb232b87c5ee @@ -0,0 +1,2 @@ + + diff --git a/rust/fuzz/corpus/fuzz_escape_xml/a70fb6a9415d3c34a9ea5510ba7e3a055d88aef9 b/rust/fuzz/corpus/fuzz_escape_xml/a70fb6a9415d3c34a9ea5510ba7e3a055d88aef9 new file mode 100644 index 0000000..b8635bc --- /dev/null +++ b/rust/fuzz/corpus/fuzz_escape_xml/a70fb6a9415d3c34a9ea5510ba7e3a055d88aef9 @@ -0,0 +1 @@ +ë \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/23dbd0963ab0e20509f61d3bbab1aab8c0acc1ce b/rust/fuzz/corpus/fuzz_make_attr_string/23dbd0963ab0e20509f61d3bbab1aab8c0acc1ce new file mode 100644 index 0000000..850a8b6 --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/23dbd0963ab0e20509f61d3bbab1aab8c0acc1ce @@ -0,0 +1 @@ +]I]] \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/2cac163ebc8063f5adb15a7ede5fab5c2a9b9e23 b/rust/fuzz/corpus/fuzz_make_attr_string/2cac163ebc8063f5adb15a7ede5fab5c2a9b9e23 new file mode 100644 index 0000000..9f5daf2 --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/2cac163ebc8063f5adb15a7ede5fab5c2a9b9e23 @@ -0,0 +1 @@ +ٶ˘§ \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/55cdfedcb4f4369cf383ac30deee62d55cbea7f3 b/rust/fuzz/corpus/fuzz_make_attr_string/55cdfedcb4f4369cf383ac30deee62d55cbea7f3 new file mode 100644 index 0000000..0c5c35f --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/55cdfedcb4f4369cf383ac30deee62d55cbea7f3 @@ -0,0 +1 @@ +]]J( \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/71261c370e317f7ebcb6d903dba6f5dc1396384a b/rust/fuzz/corpus/fuzz_make_attr_string/71261c370e317f7ebcb6d903dba6f5dc1396384a new file mode 100644 index 0000000..cae5eaa --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/71261c370e317f7ebcb6d903dba6f5dc1396384a @@ -0,0 +1 @@ +]]( \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/8206f3e251d21e9c9a1b29ff459c7fb6addd8ae6 b/rust/fuzz/corpus/fuzz_make_attr_string/8206f3e251d21e9c9a1b29ff459c7fb6addd8ae6 new file mode 100644 index 0000000..edbee49 --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/8206f3e251d21e9c9a1b29ff459c7fb6addd8ae6 @@ -0,0 +1 @@ +]J( \ No newline at end of file diff --git a/rust/fuzz/corpus/fuzz_make_attr_string/de603c91038f329cce1cca8a30ea161b2271e2f6 b/rust/fuzz/corpus/fuzz_make_attr_string/de603c91038f329cce1cca8a30ea161b2271e2f6 new file mode 100644 index 0000000..079b579 --- /dev/null +++ b/rust/fuzz/corpus/fuzz_make_attr_string/de603c91038f329cce1cca8a30ea161b2271e2f6 @@ -0,0 +1 @@ +] diff --git a/rust/fuzz/fuzz_targets/fuzz_escape_xml.rs b/rust/fuzz/fuzz_targets/fuzz_escape_xml.rs new file mode 100644 index 0000000..23951ed --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_escape_xml.rs @@ -0,0 +1,23 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use json2xml_rs::escape_xml; + +fuzz_target!(|data: &str| { + let result = escape_xml(data); + + // Verify invariants: + // 1. Result should not contain unescaped special chars + assert!(!result.contains('&') || result.contains("&") || result.contains(""") + || result.contains("'") || result.contains("<") || result.contains(">")); + + // 2. Result should be valid (no panics occurred) + // 3. If input had no special chars, output equals input + if !data.contains('&') && !data.contains('"') && !data.contains('\'') + && !data.contains('<') && !data.contains('>') { + assert_eq!(result, data); + } + + // 4. Output length should be >= input length (escaping only adds chars) + assert!(result.len() >= data.len()); +}); diff --git a/rust/fuzz/fuzz_targets/fuzz_is_valid_xml_name.rs b/rust/fuzz/fuzz_targets/fuzz_is_valid_xml_name.rs new file mode 100644 index 0000000..45b59f0 --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_is_valid_xml_name.rs @@ -0,0 +1,33 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use json2xml_rs::is_valid_xml_name; + +fuzz_target!(|data: &str| { + let result = is_valid_xml_name(data); + + // Verify invariants: + // 1. Empty string is always invalid + if data.is_empty() { + assert!(!result); + } + + // 2. String starting with digit is invalid + if let Some(first) = data.chars().next() { + if first.is_ascii_digit() { + assert!(!result); + } + } + + // 3. String starting with "xml" (case-insensitive) is invalid + if data.to_lowercase().starts_with("xml") { + assert!(!result); + } + + // 4. String containing spaces is invalid + if data.contains(' ') { + assert!(!result); + } + + // 5. Function should never panic - reaching here means it didn't +}); diff --git a/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs b/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs new file mode 100644 index 0000000..250eb47 --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_make_attr_string.rs @@ -0,0 +1,42 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use arbitrary::Arbitrary; +use json2xml_rs::make_attr_string; + +#[derive(Arbitrary, Debug)] +struct AttrInput { + attrs: Vec<(String, String)>, +} + +fuzz_target!(|input: AttrInput| { + let result = make_attr_string(&input.attrs); + + // Verify invariants: + // 1. Empty attrs should produce empty string + if input.attrs.is_empty() { + assert!(result.is_empty()); + return; + } + + // 2. Result should start with space (for XML formatting) + assert!(result.starts_with(' '), "Attribute string should start with space"); + + // 3. Each attribute should produce a ` key="value"`-like fragment. + // We check for the more specific pattern ` {key}="` to avoid + // passing on overlapping keys (e.g. "a" vs "aa") or malformed formatting. + for (key, _value) in &input.attrs { + let expected_fragment = format!(" {}=\"", key); + assert!( + result.contains(&expected_fragment), + "Attribute fragment '{}' should appear in result '{}'", + expected_fragment, + result + ); + } + + // 4. Values should be escaped (no raw & < > " ' in values) + // The make_attr_string calls escape_xml on values + + // 5. Function should never panic - reaching here means it didn't +}); diff --git a/rust/fuzz/fuzz_targets/fuzz_make_valid_xml_name.rs b/rust/fuzz/fuzz_targets/fuzz_make_valid_xml_name.rs new file mode 100644 index 0000000..bdf417b --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_make_valid_xml_name.rs @@ -0,0 +1,30 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use json2xml_rs::make_valid_xml_name; + +fuzz_target!(|data: &str| { + let (name, attr) = make_valid_xml_name(data); + + // Verify invariants: + // 1. The returned name must be a valid XML name OR be "key" with an attribute + if name != "key" { + // If we didn't fall back to "key", the name should be valid + // (though it might have been transformed) + assert!(!name.is_empty(), "Name should not be empty"); + } + + // 2. If attr is Some, name should be "key" + if attr.is_some() { + assert_eq!(name, "key", "Fallback name should be 'key'"); + let (attr_name, _attr_value) = attr.unwrap(); + assert_eq!(attr_name, "name", "Attribute key should be 'name'"); + } + + // 3. Purely numeric input should get 'n' prefix + if !data.is_empty() && data.chars().all(|c| c.is_ascii_digit()) { + assert!(name.starts_with('n'), "Numeric keys should get 'n' prefix"); + } + + // 4. Function should never panic - reaching here means it didn't +}); diff --git a/rust/fuzz/fuzz_targets/fuzz_wrap_cdata.rs b/rust/fuzz/fuzz_targets/fuzz_wrap_cdata.rs new file mode 100644 index 0000000..f159747 --- /dev/null +++ b/rust/fuzz/fuzz_targets/fuzz_wrap_cdata.rs @@ -0,0 +1,21 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use json2xml_rs::wrap_cdata; + +fuzz_target!(|data: &str| { + let result = wrap_cdata(data); + + // Verify invariants: + // 1. Result must start with CDATA opening + assert!(result.starts_with("")); + + // 3. The ]]> sequence in input must be properly escaped + // (split into ]]]]>) + + // 4. Result should be longer than or equal to input + CDATA wrapper (12 chars) + assert!(result.len() >= data.len() + 12); +}); diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 6cff327..6345de1 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -3,14 +3,16 @@ //! This module provides a high-performance Rust implementation of dicttoxml //! that can be used as a drop-in replacement for the pure Python version. +#[cfg(feature = "python")] use pyo3::prelude::*; +#[cfg(feature = "python")] use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString}; use std::fmt::Write; /// Escape special XML characters in a string. /// This is one of the hottest paths - optimized for single-pass processing. #[inline] -fn escape_xml(s: &str) -> String { +pub fn escape_xml(s: &str) -> String { let mut result = String::with_capacity(s.len() + s.len() / 10); for c in s.chars() { match c { @@ -27,14 +29,14 @@ fn escape_xml(s: &str) -> String { /// Wrap content in CDATA section #[inline] -fn wrap_cdata(s: &str) -> String { +pub fn wrap_cdata(s: &str) -> String { let escaped = s.replace("]]>", "]]]]>"); format!("", escaped) } /// Check if a key is a valid XML element name (simplified check) /// Full validation would require XML parsing, but this catches common issues -fn is_valid_xml_name(key: &str) -> bool { +pub fn is_valid_xml_name(key: &str) -> bool { if key.is_empty() { return false; } @@ -59,7 +61,7 @@ fn is_valid_xml_name(key: &str) -> bool { } /// Make a valid XML name from a key, returning the key and any attributes -fn make_valid_xml_name(key: &str) -> (String, Option<(String, String)>) { +pub fn make_valid_xml_name(key: &str) -> (String, Option<(String, String)>) { let escaped = escape_xml(key); // Already valid @@ -83,7 +85,7 @@ fn make_valid_xml_name(key: &str) -> (String, Option<(String, String)>) { } /// Build an attribute string from key-value pairs -fn make_attr_string(attrs: &[(String, String)]) -> String { +pub fn make_attr_string(attrs: &[(String, String)]) -> String { if attrs.is_empty() { return String::new(); } @@ -95,6 +97,7 @@ fn make_attr_string(attrs: &[(String, String)]) -> String { } /// Configuration for XML conversion +#[cfg(feature = "python")] struct ConvertConfig { attr_type: bool, cdata: bool, @@ -102,7 +105,11 @@ struct ConvertConfig { list_headers: bool, } +#[cfg(feature = "python")] +use pyo3::PyResult; + /// Convert a Python value to XML string +#[cfg(feature = "python")] fn convert_value( py: Python<'_>, obj: &Bound<'_, PyAny>, @@ -167,6 +174,7 @@ fn convert_value( } /// Convert a string value to XML +#[cfg(feature = "python")] fn convert_string(key: &str, val: &str, config: &ConvertConfig) -> PyResult { let (xml_key, name_attr) = make_valid_xml_name(key); let mut attrs = Vec::new(); @@ -192,6 +200,7 @@ fn convert_string(key: &str, val: &str, config: &ConvertConfig) -> PyResult PyResult { let (xml_key, name_attr) = make_valid_xml_name(key); let mut attrs = Vec::new(); @@ -233,6 +243,7 @@ fn convert_bool(key: &str, val: bool, config: &ConvertConfig) -> PyResult PyResult { let (xml_key, name_attr) = make_valid_xml_name(key); let mut attrs = Vec::new(); @@ -249,6 +260,7 @@ fn convert_none(key: &str, config: &ConvertConfig) -> PyResult { } /// Convert a dictionary to XML +#[cfg(feature = "python")] fn convert_dict( py: Python<'_>, dict: &Bound<'_, PyDict>, @@ -426,6 +438,7 @@ fn convert_dict( } /// Convert a list to XML +#[cfg(feature = "python")] fn convert_list( py: Python<'_>, list: &Bound<'_, PyList>, @@ -602,6 +615,7 @@ fn convert_list( /// /// Returns: /// bytes: The XML representation of the input object +#[cfg(feature = "python")] #[pyfunction] #[pyo3(signature = (obj, root=true, custom_root="root", attr_type=true, item_wrap=true, cdata=false, list_headers=false))] #[allow(clippy::too_many_arguments)] @@ -647,18 +661,21 @@ fn dicttoxml( /// Fast XML string escaping. /// /// Escapes &, ", ', <, > characters for XML. +#[cfg(feature = "python")] #[pyfunction] fn escape_xml_py(s: &str) -> String { escape_xml(s) } /// Wrap a string in CDATA section. +#[cfg(feature = "python")] #[pyfunction] fn wrap_cdata_py(s: &str) -> String { wrap_cdata(s) } /// A Python module implemented in Rust. +#[cfg(feature = "python")] #[pymodule] fn json2xml_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(dicttoxml, m)?)?; @@ -666,3 +683,233 @@ fn json2xml_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(wrap_cdata_py, m)?)?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + mod escape_xml_tests { + use super::*; + + #[test] + fn escapes_ampersand() { + assert_eq!(escape_xml("foo & bar"), "foo & bar"); + } + + #[test] + fn escapes_double_quote() { + assert_eq!(escape_xml("say \"hello\""), "say "hello""); + } + + #[test] + fn escapes_single_quote() { + assert_eq!(escape_xml("it's fine"), "it's fine"); + } + + #[test] + fn escapes_less_than() { + assert_eq!(escape_xml("a < b"), "a < b"); + } + + #[test] + fn escapes_greater_than() { + assert_eq!(escape_xml("a > b"), "a > b"); + } + + #[test] + fn escapes_all_special_chars() { + assert_eq!( + escape_xml(""), + "<tag attr="val" & 'x'>" + ); + } + + #[test] + fn handles_empty_string() { + assert_eq!(escape_xml(""), ""); + } + + #[test] + fn handles_no_special_chars() { + assert_eq!(escape_xml("hello world 123"), "hello world 123"); + } + + #[test] + fn handles_unicode() { + assert_eq!(escape_xml("cafĂ© & thĂ©"), "cafĂ© & thĂ©"); + } + } + + mod wrap_cdata_tests { + use super::*; + + #[test] + fn wraps_simple_string() { + assert_eq!(wrap_cdata("hello"), ""); + } + + #[test] + fn wraps_empty_string() { + assert_eq!(wrap_cdata(""), ""); + } + + #[test] + fn escapes_cdata_end_sequence() { + assert_eq!(wrap_cdata("foo]]>bar"), "bar]]>"); + } + + #[test] + fn handles_multiple_cdata_end_sequences() { + assert_eq!( + wrap_cdata("a]]>b]]>c"), + "b]]]]>c]]>" + ); + } + + #[test] + fn handles_special_xml_chars() { + assert_eq!( + wrap_cdata(""), + "]]>" + ); + } + } + + mod is_valid_xml_name_tests { + use super::*; + + #[test] + fn accepts_simple_name() { + assert!(is_valid_xml_name("element")); + } + + #[test] + fn accepts_name_with_underscore_prefix() { + assert!(is_valid_xml_name("_element")); + } + + #[test] + fn accepts_name_with_numbers() { + assert!(is_valid_xml_name("item123")); + } + + #[test] + fn accepts_name_with_hyphens() { + assert!(is_valid_xml_name("my-element")); + } + + #[test] + fn accepts_name_with_dots() { + assert!(is_valid_xml_name("my.element")); + } + + #[test] + fn accepts_name_with_colons() { + assert!(is_valid_xml_name("ns:element")); + } + + #[test] + fn rejects_empty_string() { + assert!(!is_valid_xml_name("")); + } + + #[test] + fn rejects_name_starting_with_number() { + assert!(!is_valid_xml_name("123element")); + } + + #[test] + fn rejects_name_starting_with_hyphen() { + assert!(!is_valid_xml_name("-element")); + } + + #[test] + fn rejects_name_with_spaces() { + assert!(!is_valid_xml_name("my element")); + } + + #[test] + fn rejects_xml_prefix_lowercase() { + assert!(!is_valid_xml_name("xmlelement")); + } + + #[test] + fn rejects_xml_prefix_uppercase() { + assert!(!is_valid_xml_name("XMLelement")); + } + + #[test] + fn rejects_xml_prefix_mixed_case() { + assert!(!is_valid_xml_name("XmLelement")); + } + } + + mod make_valid_xml_name_tests { + use super::*; + + #[test] + fn returns_valid_name_unchanged() { + let (name, attr) = make_valid_xml_name("element"); + assert_eq!(name, "element"); + assert!(attr.is_none()); + } + + #[test] + fn prepends_n_to_numeric_key() { + let (name, attr) = make_valid_xml_name("123"); + assert_eq!(name, "n123"); + assert!(attr.is_none()); + } + + #[test] + fn replaces_spaces_with_underscores() { + let (name, attr) = make_valid_xml_name("my element"); + assert_eq!(name, "my_element"); + assert!(attr.is_none()); + } + + #[test] + fn falls_back_to_key_with_name_attr() { + let (name, attr) = make_valid_xml_name("-invalid"); + assert_eq!(name, "key"); + assert_eq!(attr, Some(("name".to_string(), "-invalid".to_string()))); + } + + #[test] + fn escapes_special_chars_in_name() { + let (name, attr) = make_valid_xml_name("tag&name"); + assert_eq!(name, "key"); + assert_eq!(attr, Some(("name".to_string(), "tag&name".to_string()))); + } + } + + mod make_attr_string_tests { + use super::*; + + #[test] + fn returns_empty_for_empty_attrs() { + assert_eq!(make_attr_string(&[]), ""); + } + + #[test] + fn formats_single_attr() { + let attrs = vec![("type".to_string(), "str".to_string())]; + assert_eq!(make_attr_string(&attrs), " type=\"str\""); + } + + #[test] + fn formats_multiple_attrs() { + let attrs = vec![ + ("name".to_string(), "foo".to_string()), + ("type".to_string(), "int".to_string()), + ]; + assert_eq!(make_attr_string(&attrs), " name=\"foo\" type=\"int\""); + } + + #[test] + fn escapes_attr_values() { + let attrs = vec![("name".to_string(), "foo & bar".to_string())]; + assert_eq!(make_attr_string(&attrs), " name=\"foo & bar\""); + } + } +}