Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
283 changes: 149 additions & 134 deletions src/sexp/ansi_c.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,154 +11,169 @@
/// `chars` is the full character array, `pos` points to the first char after `$'`.
/// Returns the processed content (without surrounding quotes).
/// Advances `pos` past the closing `'`.
#[allow(clippy::too_many_lines)]
pub fn process_ansi_c_content(chars: &[char], pos: &mut usize) -> String {
let mut out = String::new();
while *pos < chars.len() {
let c = chars[*pos];
if c == '\'' {
*pos += 1; // skip closing '
*pos += 1;
return out;
}
if c == '\\' && *pos + 1 < chars.len() {
*pos += 1;
let esc = chars[*pos];
*pos += 1;
match esc {
'n' => out.push('\n'),
't' => out.push('\t'),
'r' => out.push('\r'),
'a' => out.push('\x07'),
'b' => out.push('\x08'),
'f' => out.push('\x0C'),
'v' => out.push('\x0B'),
'e' | 'E' => out.push('\x1B'),
'\\' => out.push('\\'),
'c' => {
// Control character: \cX → chr(X & 0x1F)
if *pos < chars.len() {
let ctrl = chars[*pos];
*pos += 1;
let val = (ctrl as u32) & 0x1F;
if val > 0
&& let Some(ch) = char::from_u32(val)
{
out.push(ch);
}
// \c@ or val==0 → NUL, which is dropped
} else {
// \c at end of string — output literal \c
out.push('\\');
out.push('c');
}
}
'\'' => {
// Escaped single quote: output as '\\''
out.push('\'');
out.push('\\');
out.push('\'');
out.push('\'');
return process_ansi_c_continue(chars, pos, out);
}
'"' => out.push('"'),
'x' => {
// Hex escape: \xNN — if no valid hex digits, output literal \x
let before = *pos;
let hex = read_hex(chars, pos, 2);
if *pos == before {
// No hex digits consumed — output literal \x
out.push('\\');
out.push('x');
} else if hex == 0 {
// NUL byte truncates the string
skip_to_closing_quote(chars, pos);
return out;
} else if hex > 0x7F {
// High bytes are invalid standalone UTF-8 — replacement char
out.push('\u{FFFD}');
} else if let Some(ch) = char::from_u32(hex) {
// Bash prefixes CTLESC (0x01) and CTLNUL (0x7F) with
// CTLESC in its internal representation
if ch == '\x01' || ch == '\x7F' {
out.push('\x01');
}
out.push(ch);
}
}
'u' => {
// Unicode: \uNNNN — if no hex digits, output literal \u
let before = *pos;
let val = read_hex(chars, pos, 4);
if *pos == before {
out.push('\\');
out.push('u');
} else if val > 0
&& let Some(ch) = char::from_u32(val)
{
out.push(ch);
}
// val==0 with digits → NUL, truncate
else if val == 0 && *pos > before {
skip_to_closing_quote(chars, pos);
return out;
}
}
'U' => {
// Unicode long: \UNNNNNNNN — if no hex digits, output literal \U
let before = *pos;
let val = read_hex(chars, pos, 8);
if *pos == before {
out.push('\\');
out.push('U');
} else if val > 0
&& let Some(ch) = char::from_u32(val)
{
out.push(ch);
}
// val==0 with digits → NUL, truncate
else if val == 0 && *pos > before {
skip_to_closing_quote(chars, pos);
return out;
}
}
'0'..='7' => {
// Octal escape — NUL terminates the string (bash behavior)
let mut val = u32::from(esc as u8 - b'0');
for _ in 0..2 {
if *pos < chars.len() && chars[*pos] >= '0' && chars[*pos] <= '7' {
val = val * 8 + u32::from(chars[*pos] as u8 - b'0');
*pos += 1;
}
}
if val == 0 {
skip_to_closing_quote(chars, pos);
return out;
}
if let Some(ch) = char::from_u32(val) {
if ch == '\x01' || ch == '\x7F' {
out.push('\x01');
}
out.push(ch);
}
}
_ => {
out.push('\\');
out.push(esc);
}
}
} else {
if c != '\\' || *pos + 1 >= chars.len() {
out.push(c);
*pos += 1;
continue;
}
*pos += 1;
let esc = chars[*pos];
*pos += 1;
if handle_escape(chars, pos, esc, &mut out) {
// NUL-valued escape truncated the string; skip_to_closing_quote
// was already called by the specific handler.
return out;
}
}
out
}

/// Continue processing after an escaped quote split.
fn process_ansi_c_continue(chars: &[char], pos: &mut usize, mut out: String) -> String {
// After \' we output '\\'' and need to continue in a new quote context
out.push_str(&process_ansi_c_content(chars, pos));
out
/// Dispatch a single escape sequence. Returns `true` when the sequence
/// NUL-truncated the quoted string so the caller can exit the loop.
fn handle_escape(chars: &[char], pos: &mut usize, esc: char, out: &mut String) -> bool {
if let Some(ch) = simple_escape(esc) {
out.push(ch);
return false;
}
match esc {
'c' => handle_control(chars, pos, out),
'\'' => push_escaped_quote(out),
'x' => return handle_hex(chars, pos, out),
'u' => return handle_unicode(chars, pos, out, 4),
'U' => return handle_unicode(chars, pos, out, 8),
'0'..='7' => return handle_octal(chars, pos, esc, out),
_ => {
out.push('\\');
out.push(esc);
}
}
false
}

/// Table of single-character escapes. Returns `None` for escapes that
/// need context (hex/unicode/octal/control) or are unrecognised.
const fn simple_escape(esc: char) -> Option<char> {
Some(match esc {
'n' => '\n',
't' => '\t',
'r' => '\r',
'a' => '\x07',
'b' => '\x08',
'f' => '\x0C',
'v' => '\x0B',
'e' | 'E' => '\x1B',
'\\' => '\\',
'"' => '"',
_ => return None,
})
}

/// `\xNN` — up to 2 hex digits. Returns `true` on NUL truncation.
fn handle_hex(chars: &[char], pos: &mut usize, out: &mut String) -> bool {
let before = *pos;
let hex = read_hex(chars, pos, 2);
if *pos == before {
out.push('\\');
out.push('x');
return false;
}
if hex == 0 {
skip_to_closing_quote(chars, pos);
return true;
}
if hex > 0x7F {
// High bytes are invalid standalone UTF-8 — replacement char.
out.push('\u{FFFD}');
} else if let Some(ch) = char::from_u32(hex) {
push_with_ctlesc(out, ch);
}
false
}

/// `\uNNNN` (width=4) or `\UNNNNNNNN` (width=8). Returns `true` on NUL
/// truncation.
fn handle_unicode(chars: &[char], pos: &mut usize, out: &mut String, width: usize) -> bool {
let before = *pos;
let val = read_hex(chars, pos, width);
if *pos == before {
out.push('\\');
out.push(if width == 4 { 'u' } else { 'U' });
return false;
}
if val == 0 {
skip_to_closing_quote(chars, pos);
return true;
}
if let Some(ch) = char::from_u32(val) {
out.push(ch);
}
false
}

/// `\0`–`\7` followed by up to 2 additional octal digits. Returns `true`
/// on NUL truncation.
fn handle_octal(chars: &[char], pos: &mut usize, first: char, out: &mut String) -> bool {
let mut val = u32::from(first as u8 - b'0');
for _ in 0..2 {
if *pos < chars.len() && chars[*pos] >= '0' && chars[*pos] <= '7' {
val = val * 8 + u32::from(chars[*pos] as u8 - b'0');
*pos += 1;
}
}
if val == 0 {
skip_to_closing_quote(chars, pos);
return true;
}
if let Some(ch) = char::from_u32(val) {
push_with_ctlesc(out, ch);
}
false
}

/// `\cX` — emits `chr(X & 0x1F)`. `\c@` (value 0) is silently dropped,
/// matching the existing behavior (not a NUL-truncation case).
fn handle_control(chars: &[char], pos: &mut usize, out: &mut String) {
if *pos >= chars.len() {
// `\c` at end of input — output literal backslash + c.
out.push('\\');
out.push('c');
return;
}
let ctrl = chars[*pos];
*pos += 1;
let val = (ctrl as u32) & 0x1F;
if val > 0
&& let Some(ch) = char::from_u32(val)
{
out.push(ch);
}
}

/// Pushes `ch` to `out`, prefixing `0x01` (CTLESC) for bytes that bash
/// escapes internally (`0x01` and `0x7F`).
fn push_with_ctlesc(out: &mut String, ch: char) {
if ch == '\x01' || ch == '\x7F' {
out.push('\x01');
}
out.push(ch);
}

/// `\'` inside `$'...'` expands to the 4-character sequence `'\''` — close
/// the current quote, escape a single quote, reopen. The outer loop
/// continues from the next character; no recursive re-entry needed.
fn push_escaped_quote(out: &mut String) {
out.push('\'');
out.push('\\');
out.push('\'');
out.push('\'');
}

/// Read up to `max` hex digits from chars at pos.
Expand Down
Loading