From 4804bf0d7a0cde3b4176a042dd4e6768d6f53b47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 15 Jan 2026 02:39:28 +0100 Subject: [PATCH 1/3] Add unit tests for pretty_print.rs --- src/pretty_print.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/pretty_print.rs b/src/pretty_print.rs index 625283f8..39fb8a34 100644 --- a/src/pretty_print.rs +++ b/src/pretty_print.rs @@ -513,3 +513,68 @@ fn need_quotes(string: &str) -> bool { || string.parse::().is_ok() || string.parse::().is_ok() } + +#[cfg(test)] +mod tests { + use super::*; + use sxd_document::dom::{ChildOfElement, ChildOfRoot}; + use sxd_document::parser; + + /// helper function + fn first_element(package: &sxd_document::Package) -> Element<'_> { + let doc = package.as_document(); + for child in doc.root().children() { + if let ChildOfRoot::Element(e) = child { + return e; + } + } + panic!("No root element found"); + } + + #[test] + /// Escapes XML entities and invisible characters for safe display. + /// Tests the method on a few hardcoded characters. + fn handle_special_chars_escapes() { + let input = "& < > \" ' \u{2061} \u{2062} \u{2063} \u{2064} x"; + let expected = "& < > " ' ⁡ ⁢ ⁣ ⁤ x"; + assert_eq!(handle_special_chars(input), expected); + } + + #[test] + /// Formats a leaf element as a single line with escaped text. + fn format_element_leaf_text() { + let package = parser::parse("&").unwrap(); + let math = first_element(&package); + let mi = math + .children() + .iter() + .find_map(|c| match c { + ChildOfElement::Element(e) => Some(*e), + _ => None, + }) + .unwrap(); + assert_eq!(format_element(mi, 0), " &\n"); + } + + #[test] + /// Formats a nested element with indentation and newlines. + fn format_element_nested() { + let package = parser::parse("x+").unwrap(); + let math = first_element(&package); + let rendered = format_element(math, 0); + assert!(rendered.starts_with(" \n")); + assert!(rendered.contains("\n x\n")); + assert!(rendered.contains("\n +\n")); + assert!(rendered.ends_with("\n")); + } + + #[test] + /// Escapes special characters in attribute values. + fn format_attrs_escapes() { + let package = parser::parse("").unwrap(); + let math = first_element(&package); + let rendered = format_attrs(&math.attributes()); + assert!(rendered.contains(" a='&'")); + assert!(rendered.contains(" b='<'")); + } +} \ No newline at end of file From fb089ec66098313deaec9e955ae79a2f23fbe65e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 15 Jan 2026 18:09:14 +0100 Subject: [PATCH 2/3] Extend unit tests in `pretty_print.rs` to handle non-BMP characters and XPath evaluation --- src/pretty_print.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/src/pretty_print.rs b/src/pretty_print.rs index 39fb8a34..f81cd157 100644 --- a/src/pretty_print.rs +++ b/src/pretty_print.rs @@ -577,4 +577,70 @@ mod tests { assert!(rendered.contains(" a='&'")); assert!(rendered.contains(" b='<'")); } -} \ No newline at end of file + + #[test] + /// Preserves non-BMP characters from a literal XML form. + fn format_element_non_bmp_character_literal() { + let package = parser::parse("𝞪").unwrap(); + let math = first_element(&package); + let mi = math + .children() + .iter() + .find_map(|c| match c { + ChildOfElement::Element(e) => Some(*e), + _ => None, + }) + .unwrap(); + let rendered = format_element(mi, 0); + assert!(rendered.contains("𝞪")); + } + + #[test] + /// Preserves non-BMP characters from a numeric XML form. + fn format_element_non_bmp_character_numeric() { + let package = parser::parse("𝞪").unwrap(); + let math = first_element(&package); + let mi = math + .children() + .iter() + .find_map(|c| match c { + ChildOfElement::Element(e) => Some(*e), + _ => None, + }) + .unwrap(); + let rendered = format_element(mi, 0); + assert!(rendered.contains("𝞪")); + } + + #[test] + /// Evaluates non-BMP literal text through sxd_xpath. + fn xpath_non_bmp_literal() { + use sxd_xpath::{Factory, Value}; + + let package = parser::parse("𝞪").unwrap(); + let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap(); + let context = sxd_xpath::Context::new(); + + let value = xpath.evaluate(&context, first_element(&package)).unwrap(); + match value { + Value::String(s) => assert_eq!(s, "𝞪"), + _ => panic!("Expected string value from xpath"), + } + } + + #[test] + /// Evaluates non-BMP numeric text through sxd_xpath. + fn xpath_non_bmp_numeric() { + use sxd_xpath::{Factory, Value}; + + let package = parser::parse("𝞪").unwrap(); + let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap(); + let context = sxd_xpath::Context::new(); + + let value = xpath.evaluate(&context, first_element(&package)).unwrap(); + match value { + Value::String(s) => assert_eq!(s, "𝞪"), + _ => panic!("Expected string value from xpath"), + } + } +} From 12e8f14ccbc13a6d6ce511102eb58557bd2b5f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 15 Jan 2026 18:20:43 +0100 Subject: [PATCH 3/3] Add unit tests for non-BMP character handling with MathML and XPath --- src/pretty_print.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/pretty_print.rs b/src/pretty_print.rs index f81cd157..9768a667 100644 --- a/src/pretty_print.rs +++ b/src/pretty_print.rs @@ -643,4 +643,70 @@ mod tests { _ => panic!("Expected string value from xpath"), } } + + #[test] + /// Evaluates non-BMP literal text with a MathML namespace-qualified XPath. + fn xpath_non_bmp_namespace_literal() { + use sxd_xpath::{Factory, Value}; + + let xml = "𝞪"; + let package = parser::parse(xml).unwrap(); + let xpath = Factory::new() + .build("string(/m:math/m:mi)") + .unwrap() + .unwrap(); + let mut context = sxd_xpath::Context::new(); + context.set_namespace("m", "http://www.w3.org/1998/Math/MathML"); + + let value = xpath.evaluate(&context, first_element(&package)).unwrap(); + match value { + Value::String(s) => assert_eq!(s, "𝞪"), + _ => panic!("Expected string value from xpath"), + } + } + + #[test] + /// Evaluates non-BMP numeric text with a MathML namespace-qualified XPath. + fn xpath_non_bmp_namespace_numeric() { + use sxd_xpath::{Factory, Value}; + + let xml = "𝞪"; + let package = parser::parse(xml).unwrap(); + let xpath = Factory::new() + .build("string(/m:math/m:mi)") + .unwrap() + .unwrap(); + let mut context = sxd_xpath::Context::new(); + context.set_namespace("m", "http://www.w3.org/1998/Math/MathML"); + + let value = xpath.evaluate(&context, first_element(&package)).unwrap(); + match value { + Value::String(s) => assert_eq!(s, "𝞪"), + _ => panic!("Expected string value from xpath"), + } + } + + #[test] + /// Extracts a text node via XPath (nodeset result) and verifies the non-BMP character survives. + fn xpath_non_bmp_text_nodeset() { + use sxd_xpath::{Factory, Value}; + + let xml = "𝞪"; + let package = parser::parse(xml).unwrap(); + let xpath = Factory::new().build("/m:math/m:mi/text()").unwrap().unwrap(); + let mut context = sxd_xpath::Context::new(); + context.set_namespace("m", "http://www.w3.org/1998/Math/MathML"); + + let value = xpath.evaluate(&context, first_element(&package)).unwrap(); + match value { + Value::Nodeset(nodes) => { + let ordered = nodes.document_order(); + let node = ordered.first().expect("Expected one text node"); + let text = node.text().expect("Expected text node"); + assert_eq!(text.text(), "𝞪"); + assert_eq!(ordered.len(), 1); + } + _ => panic!("Expected nodeset value from xpath"), + } + } }