Kink-Development-Group · JosunLP · Mar 21, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/.gitignore b/.gitignore
@@ -88,3 +88,4 @@ hypnoscript-docs/static/install.sh
 *.dylib
 *.wasm
 *.wat
+hypnoscript-compiler/hypnoscript_output*
diff --git a/hypnoscript-compiler/hypnoscript_output b/hypnoscript-compiler/hypnoscript_output
diff --git a/hypnoscript-lexer-parser/src/lexer.rs b/hypnoscript-lexer-parser/src/lexer.rs
@@ -435,6 +435,8 @@ impl Lexer {
                         'r' => string.push('\r'),
                         '\\' => string.push('\\'),
                         '"' => string.push('"'),
+                        'u' => string.push(self.read_hex_escape(4, "\\u")?),
+                        'x' => string.push(self.read_hex_escape(2, "\\x")?),
                         _ => string.push(escaped),
                     }
                 }
@@ -451,6 +453,62 @@ impl Lexer {
         Err(format!("Unterminated string at line {}", self.line))
     }
 
+    /// Reads a fixed-width hexadecimal escape sequence from the current position.
+    ///
+    /// `digits` controls how many hexadecimal digits are consumed after the
+    /// escape prefix (for example 4 for `\uXXXX` and 2 for `\xXX`). The method
+    /// returns the decoded Unicode scalar value or an error if the escape is
+    /// truncated, contains non-hex digits, or decodes to an invalid scalar such
+    /// as a UTF-16 surrogate.
+    fn read_hex_escape(&mut self, digits: usize, escape_prefix: &str) -> Result<char, String> {
+        let mut hex = String::with_capacity(digits);
+
+        for _ in 0..digits {
+            if self.is_at_end() {
+                return Err(format!(
+                    "Unterminated {} escape at line {}, column {}",
+                    escape_prefix, self.line, self.column
+                ));
+            }
+
+            let digit = self.advance();
+            if !digit.is_ascii_hexdigit() {
+                return Err(format!(
+                    "Invalid {} escape digit '{}' at line {}, column {}",
+                    escape_prefix,
+                    digit,
+                    self.line,
+                    self.column.saturating_sub(1)
+                ));
+            }
+
+            hex.push(digit);
+        }
+
+        // Safe because each digit was already validated with `is_ascii_hexdigit`.
+        let value = u32::from_str_radix(&hex, 16).unwrap();
+
+        if (0xD800..=0xDFFF).contains(&value) {
+            return Err(format!(
+                "Invalid Unicode scalar value for {} escape '{}' at line {}, column {}: surrogate code points (U+D800 to U+DFFF) are not valid scalar values",
+                escape_prefix,
+                hex,
+                self.line,
+                self.column.saturating_sub(digits)
+            ));
+        }
+
+        char::from_u32(value).ok_or_else(|| {
+            format!(
+                "Invalid Unicode scalar value for {} escape '{}' at line {}, column {}",
+                escape_prefix,
+                hex,
+                self.line,
+                self.column.saturating_sub(digits)
+            )
+        })
+    }
+
     fn keyword_or_identifier(&self, s: &str) -> (TokenType, String) {
         if let Some(definition) = TokenType::keyword_definition(s) {
             (definition.token, definition.canonical_lexeme.to_string())
@@ -480,6 +538,44 @@ mod tests {
         assert_eq!(tokens[0].lexeme, "Hello, World!");
     }
 
+    #[test]
+    fn test_string_literal_unicode_escapes() {
+        let mut unicode_lexer = Lexer::new(r#""\u0041\u0042\u0043""#);
+        let unicode_tokens = unicode_lexer.lex().unwrap();
+        assert_eq!(unicode_tokens[0].token_type, TokenType::StringLiteral);
+        assert_eq!(unicode_tokens[0].lexeme, "ABC");
+
+        let mut hex_lexer = Lexer::new(r#""Hello\x20World\x21""#);
+        let hex_tokens = hex_lexer.lex().unwrap();
+        assert_eq!(hex_tokens[0].token_type, TokenType::StringLiteral);
+        assert_eq!(hex_tokens[0].lexeme, "Hello World!");
+    }
+
+    #[test]
+    fn test_string_literal_invalid_unicode_escape() {
+        let mut lexer = Lexer::new(r#""\u12G4""#);
+        let error = lexer.lex().unwrap_err();
+        assert!(error.contains("Invalid \\u escape digit"));
+    }
+
+    #[test]
+    fn test_string_literal_unterminated_unicode_escape() {
+        let mut unicode_lexer = Lexer::new("\"\\u12");
+        let unicode_error = unicode_lexer.lex().unwrap_err();
+        assert!(unicode_error.contains("Unterminated \\u escape"));
+
+        let mut hex_lexer = Lexer::new("\"\\x4");
+        let hex_error = hex_lexer.lex().unwrap_err();
+        assert!(hex_error.contains("Unterminated \\x escape"));
+    }
+
+    #[test]
+    fn test_string_literal_invalid_unicode_scalar_escape() {
+        let mut lexer = Lexer::new(r#""\uD800""#);
+        let error = lexer.lex().unwrap_err();
+        assert!(error.contains("Invalid Unicode scalar value"));
+    }
+
     #[test]
     fn test_operator_synonym_tokenization() {
         let mut lexer = Lexer::new("if (a youAreFeelingVerySleepy b) { }");

diff --git a/hypnoscript-runtime/src/string_builtins.rs b/hypnoscript-runtime/src/string_builtins.rs
@@ -199,13 +199,13 @@ impl StringBuiltins {
 
     /// Pad left with character
     pub fn pad_left(s: &str, total_width: usize, pad_char: char) -> String {
-        let padding = total_width.saturating_sub(s.len());
+        let padding = total_width.saturating_sub(s.chars().count());
         format!("{}{}", pad_char.to_string().repeat(padding), s)
     }
 
     /// Pad right with character
     pub fn pad_right(s: &str, total_width: usize, pad_char: char) -> String {
-        let padding = total_width.saturating_sub(s.len());
+        let padding = total_width.saturating_sub(s.chars().count());
         format!("{}{}", s, pad_char.to_string().repeat(padding))
     }
 
@@ -435,4 +435,14 @@ mod tests {
         let lines = StringBuiltins::wrap_text(text, 20);
         assert!(lines.iter().all(|line| line.chars().count() <= 20));
     }
+
+    #[test]
+    fn test_padding_is_unicode_aware() {
+        assert_eq!(StringBuiltins::pad_left("hello", 10, '-'), "-----hello");
+        assert_eq!(StringBuiltins::pad_right("hello", 10, '-'), "hello-----");
+        assert_eq!(StringBuiltins::pad_left("🎯", 4, '-'), "---🎯");
+        assert_eq!(StringBuiltins::pad_right("🎯", 4, '-'), "🎯---");
+        assert_eq!(StringBuiltins::pad_left("café", 6, ' '), "  café");
+        assert_eq!(StringBuiltins::pad_right("café", 6, ' '), "café  ");
+    }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -88,3 +88,4 @@ hypnoscript-docs/static/install.sh @@
     *.dylib
     *.wasm
     *.wat
+    hypnoscript-compiler/hypnoscript_output*