Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 80 additions & 25 deletions crates/oxc_angular_compiler/src/output/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1227,9 +1227,11 @@ fn is_nullish_coalesce(expr: &OutputExpression<'_>) -> bool {
/// Escape a string for JavaScript output.
///
/// Uses double quotes to match Angular's output style.
/// Only escapes control characters (`"`, `\`, `\n`, `\r`, and `$` when requested).
/// Non-ASCII printable characters (e.g. `×`, `é`, `α`) are emitted as literal UTF-8,
/// matching Angular's `escapeIdentifier` behavior.
/// Escapes `"`, `\`, `\n`, `\r`, `$` (when requested), ASCII control characters,
/// and all non-ASCII characters (code point > 0x7E) as `\uNNNN` sequences.
/// Characters above the BMP (U+10000+) are encoded as UTF-16 surrogate pairs
/// (`\uXXXX\uXXXX`). This matches TypeScript's emitter behavior, which escapes
/// non-ASCII characters in string literals.
fn escape_string(input: &str, escape_dollar: bool) -> String {
let mut result = String::with_capacity(input.len() + 2);
result.push('"');
Expand All @@ -1240,19 +1242,37 @@ fn escape_string(input: &str, escape_dollar: bool) -> String {
'\n' => result.push_str("\\n"),
'\r' => result.push_str("\\r"),
'$' if escape_dollar => result.push_str("\\$"),
// Escape ASCII control characters (0x00-0x1F, 0x7F) other than \n and \r
c if c.is_ascii_control() => {
// ASCII printable characters (0x20-0x7E) are emitted literally
c if (' '..='\x7E').contains(&c) => result.push(c),
// Everything else (ASCII control chars, non-ASCII) is escaped as \uNNNN.
// Characters above the BMP are encoded as UTF-16 surrogate pairs.
c => {
let code = c as u32;
result.push_str(&format!("\\u{code:04X}"));
if code <= 0xFFFF {
push_unicode_escape(&mut result, code);
} else {
let hi = 0xD800 + ((code - 0x10000) >> 10);
let lo = 0xDC00 + ((code - 0x10000) & 0x3FF);
push_unicode_escape(&mut result, hi);
push_unicode_escape(&mut result, lo);
}
}
// All other characters (including non-ASCII printable) are emitted literally
_ => result.push(c),
}
}
result.push('"');
result
}

/// Push a `\uXXXX` escape sequence for a 16-bit code unit.
fn push_unicode_escape(buf: &mut String, code: u32) {
const HEX: &[u8; 16] = b"0123456789ABCDEF";
buf.push_str("\\u");
buf.push(HEX[((code >> 12) & 0xF) as usize] as char);
buf.push(HEX[((code >> 8) & 0xF) as usize] as char);
buf.push(HEX[((code >> 4) & 0xF) as usize] as char);
buf.push(HEX[(code & 0xF) as usize] as char);
}

/// Escape an identifier for use as a property key.
fn escape_identifier(input: &Atom<'_>, escape_dollar: bool, always_quote: bool) -> String {
// Check if the identifier is a valid JavaScript identifier
Expand Down Expand Up @@ -1487,35 +1507,35 @@ mod tests {

#[test]
fn test_escape_string_unicode_literals() {
// Non-ASCII printable characters should be emitted as literal UTF-8,
// matching Angular's escapeIdentifier behavior.
// Non-ASCII characters should be escaped as \uNNNN to match
// TypeScript's emitter behavior.

// &times; (multiplication sign U+00D7) -> literal ×
assert_eq!(escape_string("\u{00D7}", false), "\"\u{00D7}\"");
// &times; (multiplication sign U+00D7) -> \u00D7
assert_eq!(escape_string("\u{00D7}", false), "\"\\u00D7\"");

// &nbsp; (non-breaking space U+00A0) -> literal
assert_eq!(escape_string("\u{00A0}", false), "\"\u{00A0}\"");
// &nbsp; (non-breaking space U+00A0) -> \u00A0
assert_eq!(escape_string("\u{00A0}", false), "\"\\u00A0\"");

// Mixed ASCII and non-ASCII
assert_eq!(escape_string("a\u{00D7}b", false), "\"a\u{00D7}b\"");
assert_eq!(escape_string("a\u{00D7}b", false), "\"a\\u00D7b\"");

// Multiple non-ASCII characters
assert_eq!(escape_string("\u{00D7}\u{00A0}", false), "\"\u{00D7}\u{00A0}\"");
assert_eq!(escape_string("\u{00D7}\u{00A0}", false), "\"\\u00D7\\u00A0\"");

// Characters outside BMP (emoji) -> emitted literally
assert_eq!(escape_string("\u{1F600}", false), "\"\u{1F600}\"");
// Characters outside BMP (emoji) -> surrogate pair
assert_eq!(escape_string("\u{1F600}", false), "\"\\uD83D\\uDE00\"");

// Common HTML entities -> all emitted literally
assert_eq!(escape_string("\u{00A9}", false), "\"\u{00A9}\""); // &copy; ©
assert_eq!(escape_string("\u{00AE}", false), "\"\u{00AE}\""); // &reg; ®
assert_eq!(escape_string("\u{2014}", false), "\"\u{2014}\""); // &mdash; —
assert_eq!(escape_string("\u{2013}", false), "\"\u{2013}\""); // &ndash; –
// Common HTML entities -> all escaped as \uNNNN
assert_eq!(escape_string("\u{00A9}", false), "\"\\u00A9\""); // &copy; ©
assert_eq!(escape_string("\u{00AE}", false), "\"\\u00AE\""); // &reg; ®
assert_eq!(escape_string("\u{2014}", false), "\"\\u2014\""); // &mdash; —
assert_eq!(escape_string("\u{2013}", false), "\"\\u2013\""); // &ndash; –

// Greek letter alpha
assert_eq!(escape_string("\u{03B1}", false), "\"\u{03B1}\""); // α
assert_eq!(escape_string("\u{03B1}", false), "\"\\u03B1\""); // α

// Accented Latin letter
assert_eq!(escape_string("\u{00E9}", false), "\"\u{00E9}\""); // é
assert_eq!(escape_string("\u{00E9}", false), "\"\\u00E9\""); // é
}

#[test]
Expand All @@ -1533,6 +1553,41 @@ mod tests {
assert_eq!(escape_string("\r", false), "\"\\r\"");
}

#[test]
fn test_escape_string_non_ascii_as_unicode_escapes() {
// Non-ASCII characters should be escaped as \uNNNN to match
// TypeScript's emitter behavior (which escapes non-ASCII in string literals).

// Non-breaking space U+00A0
assert_eq!(escape_string("\u{00A0}", false), "\"\\u00A0\"");

// En dash U+2013
assert_eq!(escape_string("\u{2013}", false), "\"\\u2013\"");

// Trademark U+2122
assert_eq!(escape_string("\u{2122}", false), "\"\\u2122\"");

// Infinity U+221E
assert_eq!(escape_string("\u{221E}", false), "\"\\u221E\"");

// Mixed ASCII and non-ASCII
assert_eq!(escape_string("a\u{00D7}b", false), "\"a\\u00D7b\"");

// Multiple non-ASCII characters
assert_eq!(escape_string("\u{00D7}\u{00A0}", false), "\"\\u00D7\\u00A0\"");

// Characters above BMP should use surrogate pairs
// U+1F600 (grinning face) = surrogate pair D83D DE00
assert_eq!(escape_string("\u{1F600}", false), "\"\\uD83D\\uDE00\"");

// U+10000 (first supplementary char) = surrogate pair D800 DC00
assert_eq!(escape_string("\u{10000}", false), "\"\\uD800\\uDC00\"");

// ASCII printable chars (0x20-0x7E) should remain literal
assert_eq!(escape_string(" ~", false), "\" ~\"");
assert_eq!(escape_string("abc123!@#", false), "\"abc123!@#\"");
}

// ========================================================================
// Source Map Tests
// ========================================================================
Expand Down
28 changes: 14 additions & 14 deletions crates/oxc_angular_compiler/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,26 +145,26 @@ fn test_multiple_interpolations() {

#[test]
fn test_html_entity_between_interpolations() {
// HTML entity &times; between two interpolations should produce literal UTF-8 in the output
// HTML entity &times; between two interpolations should produce \u00D7 in the output
let js = compile_template_to_js("<div>{{ a }}&times;{{ b }}</div>", "TestComponent");
// Should produce: textInterpolate2("", ctx.a, "\u{00D7}", ctx.b)
// Note: × (multiplication sign) = U+00D7, emitted as literal UTF-8
// Should produce: textInterpolate2("", ctx.a, "\u00D7", ctx.b)
// Note: × (multiplication sign) = U+00D7, escaped as \u00D7
assert!(
js.contains("textInterpolate2(\"\",ctx.a,\"\u{00D7}\",ctx.b)"),
"Expected textInterpolate2 with literal times character. Got:\n{js}"
js.contains(r#"textInterpolate2("",ctx.a,"\u00D7",ctx.b)"#),
"Expected textInterpolate2 with escaped times character. Got:\n{js}"
);
}

#[test]
fn test_html_entity_at_start_of_interpolation() {
// Entity at start: &times;{{ a }}
let js = compile_template_to_js("<div>&times;{{ a }}</div>", "TestComponent");
// Should produce: textInterpolate1("\u{00D7}", ctx.a)
// Note: × (multiplication sign) = U+00D7, emitted as literal UTF-8
// Should produce: textInterpolate1("\u00D7", ctx.a)
// Note: × (multiplication sign) = U+00D7, escaped as \u00D7
assert!(
js.contains("textInterpolate1(\"\u{00D7}\",ctx.a)")
|| js.contains("textInterpolate(\"\u{00D7}\",ctx.a)"),
"Expected textInterpolate with literal times character at start. Got:\n{js}"
js.contains(r#"textInterpolate1("\u00D7",ctx.a)"#)
|| js.contains(r#"textInterpolate("\u00D7",ctx.a)"#),
"Expected textInterpolate with escaped times character at start. Got:\n{js}"
);
}

Expand All @@ -173,11 +173,11 @@ fn test_multiple_html_entities_between_interpolations() {
// Multiple entities: {{ a }}&nbsp;&times;&nbsp;{{ b }}
let js =
compile_template_to_js("<div>{{ a }}&nbsp;&times;&nbsp;{{ b }}</div>", "TestComponent");
// Should produce: textInterpolate2("", ctx.a, "\u{00A0}\u{00D7}\u{00A0}", ctx.b)
// Note: &nbsp; = U+00A0, &times; = U+00D7, both emitted as literal UTF-8
// Should produce: textInterpolate2("", ctx.a, "\u00A0\u00D7\u00A0", ctx.b)
// Note: &nbsp; = U+00A0, &times; = U+00D7, both escaped as \uNNNN
assert!(
js.contains("textInterpolate2(\"\",ctx.a,\"\u{00A0}\u{00D7}\u{00A0}\",ctx.b)"),
"Expected textInterpolate2 with literal Unicode entities. Got:\n{js}"
js.contains(r#"textInterpolate2("",ctx.a,"\u00A0\u00D7\u00A0",ctx.b)"#),
"Expected textInterpolate2 with escaped Unicode entities. Got:\n{js}"
);
}

Expand Down
Loading