From da9d7fb93f796d90d36b495f2a13dd5fa5335db3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 00:37:38 +0000 Subject: [PATCH 1/5] Initial plan From abe9474557bdcb4fbdf66124c8e9f464f0c2311b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 00:46:11 +0000 Subject: [PATCH 2/5] feat: support unicode string escapes and padding Co-authored-by: JosunLP <20913954+JosunLP@users.noreply.github.com> --- hypnoscript-compiler/hypnoscript_output | Bin 15792 -> 15792 bytes hypnoscript-lexer-parser/src/lexer.rs | 68 +++++++++++++++++++++ hypnoscript-runtime/src/string_builtins.rs | 14 ++++- 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/hypnoscript-compiler/hypnoscript_output b/hypnoscript-compiler/hypnoscript_output index a4b50103b22b54a56f1f887cf93fa26179de07fb..0749d4b387a390a9a106cdf4488325e341df6076 100755 GIT binary patch delta 70 zcmV-M0J;CLd$4=3Xaf|`Uafu)0ui{%%^3meVb$kJvyeaOX(LCSOpc&Bs diff --git a/hypnoscript-lexer-parser/src/lexer.rs b/hypnoscript-lexer-parser/src/lexer.rs index f157872..96c2dec 100644 --- a/hypnoscript-lexer-parser/src/lexer.rs +++ b/hypnoscript-lexer-parser/src/lexer.rs @@ -435,6 +435,8 @@ impl Lexer { 'r' => string.push('\r'), '\\' => string.push('\\'), '"' => string.push('"'), + 'u' => string.push(self.read_hex_escape(4, "\\u")?), + 'x' => string.push(self.read_hex_escape(2, "\\x")?), _ => string.push(escaped), } } @@ -451,6 +453,52 @@ impl Lexer { Err(format!("Unterminated string at line {}", self.line)) } + fn read_hex_escape(&mut self, digits: usize, escape_prefix: &str) -> Result { + let mut hex = String::with_capacity(digits); + + for _ in 0..digits { + if self.is_at_end() { + return Err(format!( + "Unterminated {} escape at line {}, column {}", + escape_prefix, self.line, self.column + )); + } + + let digit = self.advance(); + if !digit.is_ascii_hexdigit() { + return Err(format!( + "Invalid {} escape digit '{}' at line {}, column {}", + escape_prefix, + digit, + self.line, + self.column.saturating_sub(1) + )); + } + + hex.push(digit); + } + + let value = u32::from_str_radix(&hex, 16).map_err(|_| { + format!( + "Invalid {} escape '{}' at line {}, column {}", + escape_prefix, + hex, + self.line, + self.column.saturating_sub(digits) + ) + })?; + + char::from_u32(value).ok_or_else(|| { + format!( + "Invalid Unicode scalar value for {} escape '{}' at line {}, column {}", + escape_prefix, + hex, + self.line, + self.column.saturating_sub(digits) + ) + }) + } + fn keyword_or_identifier(&self, s: &str) -> (TokenType, String) { if let Some(definition) = TokenType::keyword_definition(s) { (definition.token, definition.canonical_lexeme.to_string()) @@ -480,6 +528,26 @@ mod tests { assert_eq!(tokens[0].lexeme, "Hello, World!"); } + #[test] + fn test_string_literal_unicode_escapes() { + let mut unicode_lexer = Lexer::new(r#""\u0041\u0042\u0043""#); + let unicode_tokens = unicode_lexer.lex().unwrap(); + assert_eq!(unicode_tokens[0].token_type, TokenType::StringLiteral); + assert_eq!(unicode_tokens[0].lexeme, "ABC"); + + let mut hex_lexer = Lexer::new(r#""Hello\x20World\x21""#); + let hex_tokens = hex_lexer.lex().unwrap(); + assert_eq!(hex_tokens[0].token_type, TokenType::StringLiteral); + assert_eq!(hex_tokens[0].lexeme, "Hello World!"); + } + + #[test] + fn test_string_literal_invalid_unicode_escape() { + let mut lexer = Lexer::new(r#""\u12G4""#); + let error = lexer.lex().unwrap_err(); + assert!(error.contains("Invalid \\u escape digit")); + } + #[test] fn test_operator_synonym_tokenization() { let mut lexer = Lexer::new("if (a youAreFeelingVerySleepy b) { }"); diff --git a/hypnoscript-runtime/src/string_builtins.rs b/hypnoscript-runtime/src/string_builtins.rs index 17668dd..faabdfd 100644 --- a/hypnoscript-runtime/src/string_builtins.rs +++ b/hypnoscript-runtime/src/string_builtins.rs @@ -199,13 +199,13 @@ impl StringBuiltins { /// Pad left with character pub fn pad_left(s: &str, total_width: usize, pad_char: char) -> String { - let padding = total_width.saturating_sub(s.len()); + let padding = total_width.saturating_sub(s.chars().count()); format!("{}{}", pad_char.to_string().repeat(padding), s) } /// Pad right with character pub fn pad_right(s: &str, total_width: usize, pad_char: char) -> String { - let padding = total_width.saturating_sub(s.len()); + let padding = total_width.saturating_sub(s.chars().count()); format!("{}{}", s, pad_char.to_string().repeat(padding)) } @@ -435,4 +435,14 @@ mod tests { let lines = StringBuiltins::wrap_text(text, 20); assert!(lines.iter().all(|line| line.chars().count() <= 20)); } + + #[test] + fn test_padding_is_unicode_aware() { + assert_eq!(StringBuiltins::pad_left("hello", 10, '-'), "-----hello"); + assert_eq!(StringBuiltins::pad_right("hello", 10, '-'), "hello-----"); + assert_eq!(StringBuiltins::pad_left("🎯", 4, '-'), "---🎯"); + assert_eq!(StringBuiltins::pad_right("🎯", 4, '-'), "🎯---"); + assert_eq!(StringBuiltins::pad_left("café", 6, ' '), " café"); + assert_eq!(StringBuiltins::pad_right("café", 6, ' '), "café "); + } } From 2db17529a1be4cb45e04331d30ae78d3ea7995d2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 00:47:42 +0000 Subject: [PATCH 3/5] chore: ignore generated native output Co-authored-by: JosunLP <20913954+JosunLP@users.noreply.github.com> --- .gitignore | 1 + hypnoscript-compiler/hypnoscript_output | Bin 15792 -> 0 bytes 2 files changed, 1 insertion(+) delete mode 100755 hypnoscript-compiler/hypnoscript_output diff --git a/.gitignore b/.gitignore index 813cac6..c07420e 100644 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,4 @@ hypnoscript-docs/static/install.sh *.dylib *.wasm *.wat +hypnoscript-compiler/hypnoscript_output* diff --git a/hypnoscript-compiler/hypnoscript_output b/hypnoscript-compiler/hypnoscript_output deleted file mode 100755 index 0749d4b387a390a9a106cdf4488325e341df6076..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15792 zcmeHOZEPGz8J;^QjYC81xD7E$+N{7u3Dg^Z#R{Y*=lE-loTMZU1Vpl0`_}fM`{Hho z+EEmO0#Z#CHK3|3LXoNp0TSW|QY(=7!IeNokw8g*K>23@k%CmH5d|44e7x_@`<&OS zGm!{C_%UPMJ~PkzJ~OlL&hE{5XTC5pF`7&y6jGD=sNzzpRWm6u6#FGZkWz=$yXp9l z+Nhwl(JvIy9M98{!&NRTinx+r4!5b@-g@aM3^ zGme(7MFHcwv!aXRu;_r?2B*`vgY~{lP=2Oz% zM%STL;t!PYs)O0gRDa*WY`Qa>$yeq(=MVRH_V?N4g547~&Hdswcub9*IHQu6Rh2pD zIsYJiIIL{HdH#EsT9P|{`1Y$5MYdQ(}S>YB|J z@>092T^l-b1QX$(9^eTobAK^DD!VgVIqHuJ_tICNr7$Py38xe8Tih?!4$ZH~{S1Cd zDr`<^J9w1eII4NRhI~zsmLh!gy5EWLy60N49N~HVAgx4r9xo=D2$%?%2$%?%2$%?% z2$%?%2sDboUt1pjr*q}aX6LERzdfjw^W|!gtlf03{IvN-=w3VYBGI)+zDdWHeJRTQ zgmN?2P#5dF|1`dM>%6o0JLk$DmLH!S>8^Ia=3E{41^Lu=mdX9?8M|fQmqH2gUnj=7 zy7_AiA6^M|Qq!N{JT<+ww$!rkGCST7B>zn4e`t!~z^b#j?7Z^lqs}WUO-|xf=hkZQ zAXV^)RM1>onrYcLyr$>=T^?Yws(k2-b7dgOS!eNfu+4dD;65tcIZ&(B?xgAP>gIpZ zukOS-YP;S(>;L!!8PAZWx9TjudG1C%X7wm>k4-WWFcB~jFcB~jFcB~jFcB~jFcB~j zFcB~j`2URnzYk07X*yz27x_KY-sM_tlJHT&i-eC6K1RszGC3tKoKlJT*2JD|Tbi%Y zi!A2(9sSB%wb}>RVR%dUCai<9ee|gk9}?SJN4K|stmXbEnlG!PyN`UZwdrsQU%m1$PPpM5d~;9&(EqENKY+s0vSuKf{Fj9+oov-l^&SQk+6+Rm#iKUwShf zo)6+kKbOPulGy(tW%Ns`|K9=Dhu>q3Sf@HRH1rW`|Cy;uKB!pTy>_qN)!APOi#?Zm z`s}VgyZe9`{!^12k4@??wYU$*J3*F<5)exqcpY&gf4&|`wOtiuJc_%rgnuE%_X>~k z4gYb|+mcjkY}~5T)1{!>F3`$$Zz|&k-mD^`98_jz>}j>O3f>KJ?liB1FOz4w z;Lc_XQ(o3h2Zd7E^(ymfx{xbo{lHJ#U5!0>1%Af$N+oZ>_47e#LCuuBobRS9x!eLt zA|;nRgLSTR3&nh)JYC8Z1GiWz%#xBMqo)RskGLZzhFw~UKm4f^gU82*h^9Ra?ua8+ zXZVzI$0kl58=P=Yj*gxlnRF)yk4=n_BEC}~g$)6B+XUdJy}(o92~jOv&^rRCuzq`h zn=TjJIWM2)tpnpH$t#`7yOpw^Ruls|xKrh_Xl1VgZ8yLf@jirxn+w+OXsCCa_c4T_ zqg_1ZgBxhFJPToD zZGTRN!(5v5C~3VO7g~WlRqo|7)6^ZlFg68M*)$+?G|r+o!9C=8zCpJTJ)3oEm+uiE z<{M%TeUWE;KYe&k0R3Un11+*e6XpTncpF?)e^&Is4mb+p`2?E3hoX9X4+1SX3dZ%H zB>wL-r$GNPj{>57%%kG{_ejsrH0UwU0$!7LxsM!Y=rKRa5XYwsJ?3pd{(g)mt|OX0 zM+KfULBAv?2*mso{f8g$66tvkhBD@1z%FSl-hcGrDrLAy=rJz@R>cwR$It&cvhz8? zUVo0u^BnVGsU~a-dB*GXiRvxU0i{@%pbUI5rtcC15dB33CcYRvX*f#~aV5KBA-Eu+ TB^v@yD;w$0AJSgi?xO!Ur}mR_ From 79f49858ee7e713b609643d4da70db8608f323ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 00:48:59 +0000 Subject: [PATCH 4/5] test: cover unicode escape edge cases Co-authored-by: JosunLP <20913954+JosunLP@users.noreply.github.com> --- hypnoscript-lexer-parser/src/lexer.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hypnoscript-lexer-parser/src/lexer.rs b/hypnoscript-lexer-parser/src/lexer.rs index 96c2dec..36abffe 100644 --- a/hypnoscript-lexer-parser/src/lexer.rs +++ b/hypnoscript-lexer-parser/src/lexer.rs @@ -548,6 +548,24 @@ mod tests { assert!(error.contains("Invalid \\u escape digit")); } + #[test] + fn test_string_literal_unterminated_unicode_escape() { + let mut unicode_lexer = Lexer::new("\"\\u12"); + let unicode_error = unicode_lexer.lex().unwrap_err(); + assert!(unicode_error.contains("Unterminated \\u escape")); + + let mut hex_lexer = Lexer::new("\"\\x4"); + let hex_error = hex_lexer.lex().unwrap_err(); + assert!(hex_error.contains("Unterminated \\x escape")); + } + + #[test] + fn test_string_literal_invalid_unicode_scalar_escape() { + let mut lexer = Lexer::new(r#""\uD800""#); + let error = lexer.lex().unwrap_err(); + assert!(error.contains("Invalid Unicode scalar value")); + } + #[test] fn test_operator_synonym_tokenization() { let mut lexer = Lexer::new("if (a youAreFeelingVerySleepy b) { }"); From 0543b7e881d07d9b0d82b7c0b18831e0de2d0e72 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 00:54:39 +0000 Subject: [PATCH 5/5] refactor: document unicode escape helper Co-authored-by: JosunLP <20913954+JosunLP@users.noreply.github.com> --- hypnoscript-lexer-parser/src/lexer.rs | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/hypnoscript-lexer-parser/src/lexer.rs b/hypnoscript-lexer-parser/src/lexer.rs index 36abffe..e0147c5 100644 --- a/hypnoscript-lexer-parser/src/lexer.rs +++ b/hypnoscript-lexer-parser/src/lexer.rs @@ -453,6 +453,13 @@ impl Lexer { Err(format!("Unterminated string at line {}", self.line)) } + /// Reads a fixed-width hexadecimal escape sequence from the current position. + /// + /// `digits` controls how many hexadecimal digits are consumed after the + /// escape prefix (for example 4 for `\uXXXX` and 2 for `\xXX`). The method + /// returns the decoded Unicode scalar value or an error if the escape is + /// truncated, contains non-hex digits, or decodes to an invalid scalar such + /// as a UTF-16 surrogate. fn read_hex_escape(&mut self, digits: usize, escape_prefix: &str) -> Result { let mut hex = String::with_capacity(digits); @@ -478,15 +485,18 @@ impl Lexer { hex.push(digit); } - let value = u32::from_str_radix(&hex, 16).map_err(|_| { - format!( - "Invalid {} escape '{}' at line {}, column {}", + // Safe because each digit was already validated with `is_ascii_hexdigit`. + let value = u32::from_str_radix(&hex, 16).unwrap(); + + if (0xD800..=0xDFFF).contains(&value) { + return Err(format!( + "Invalid Unicode scalar value for {} escape '{}' at line {}, column {}: surrogate code points (U+D800 to U+DFFF) are not valid scalar values", escape_prefix, hex, self.line, self.column.saturating_sub(digits) - ) - })?; + )); + } char::from_u32(value).ok_or_else(|| { format!(