diff --git a/crates/llmbc/src/opcode.rs b/crates/llmbc/src/opcode.rs index f4be8ac..4287030 100644 --- a/crates/llmbc/src/opcode.rs +++ b/crates/llmbc/src/opcode.rs @@ -175,6 +175,47 @@ pub enum Op { /// Pop a List, push reversed List. ListReverse, + // String built-ins (0x9A+) + /// Pop two strings (string, sep), push List. + StringSplit, + + /// Pop three strings (string, from, to), push String. + StringReplace, + + /// Pop a String and two Ints (start, end), push String slice. + StringSlice, + + /// Pop a String, push Option. + IntParse, + + /// Pop a String, push Option. + FloatParse, + + /// Pop a Bool, push String ("true" or "false"). + BoolToString, + + // Map built-ins (0xA0+) + /// Pop a Map and a String key, push Option. + MapGet, + + /// Pop a Map, a String key, and a Value; push new Map with key set. + MapSet, + + /// Pop a Map and a String key; push new Map with key removed. + MapRemove, + + /// Pop a Map and a String key; push Bool. + MapContainsKey, + + /// Pop a Map; push List of keys. + MapKeys, + + /// Pop a Map; push List of values. + MapValues, + + /// Pop a Map; push Int length. + MapLen, + /// No operation. Nop, @@ -250,6 +291,19 @@ impl Op { Op::ListAppend => 0x97, Op::ListConcat => 0x98, Op::ListReverse => 0x99, + Op::StringSplit => 0x9A, + Op::StringReplace => 0x9B, + Op::StringSlice => 0x9C, + Op::IntParse => 0x9D, + Op::FloatParse => 0x9E, + Op::BoolToString => 0x9F, + Op::MapGet => 0xA0, + Op::MapSet => 0xA1, + Op::MapRemove => 0xA2, + Op::MapContainsKey => 0xA3, + Op::MapKeys => 0xA4, + Op::MapValues => 0xA5, + Op::MapLen => 0xA6, Op::Nop => 0xFE, Op::Halt => 0xFF, } diff --git a/crates/llmc/src/codegen.rs b/crates/llmc/src/codegen.rs index 876cbaa..7d39434 100644 --- a/crates/llmc/src/codegen.rs +++ b/crates/llmc/src/codegen.rs @@ -390,6 +390,81 @@ impl Emitter { fe.code.push(Op::ListReverse); return Ok(()); } + "__builtin_string_split" if args.len() == 2 => { + self.emit_expr(&args[0], fe)?; + self.emit_expr(&args[1], fe)?; + fe.code.push(Op::StringSplit); + return Ok(()); + } + "__builtin_string_replace" if args.len() == 3 => { + self.emit_expr(&args[0], fe)?; + self.emit_expr(&args[1], fe)?; + self.emit_expr(&args[2], fe)?; + fe.code.push(Op::StringReplace); + return Ok(()); + } + "__builtin_string_slice" if args.len() == 3 => { + self.emit_expr(&args[0], fe)?; + self.emit_expr(&args[1], fe)?; + self.emit_expr(&args[2], fe)?; + fe.code.push(Op::StringSlice); + return Ok(()); + } + "__builtin_int_parse" if args.len() == 1 => { + self.emit_expr(&args[0], fe)?; + fe.code.push(Op::IntParse); + return Ok(()); + } + "__builtin_float_parse" if args.len() == 1 => { + self.emit_expr(&args[0], fe)?; + fe.code.push(Op::FloatParse); + return Ok(()); + } + "__builtin_bool_to_string" if args.len() == 1 => { + self.emit_expr(&args[0], fe)?; + fe.code.push(Op::BoolToString); + return Ok(()); + } + "__builtin_map_get" if args.len() == 2 => { + self.emit_expr(&args[0], fe)?; + self.emit_expr(&args[1], fe)?; + fe.code.push(Op::MapGet); + return Ok(()); + } + "__builtin_map_set" if args.len() == 3 => { + self.emit_expr(&args[0], fe)?; + self.emit_expr(&args[1], fe)?; + self.emit_expr(&args[2], fe)?; + fe.code.push(Op::MapSet); + return Ok(()); + } + "__builtin_map_remove" if args.len() == 2 => { + self.emit_expr(&args[0], fe)?; + self.emit_expr(&args[1], fe)?; + fe.code.push(Op::MapRemove); + return Ok(()); + } + "__builtin_map_contains_key" if args.len() == 2 => { + self.emit_expr(&args[0], fe)?; + self.emit_expr(&args[1], fe)?; + fe.code.push(Op::MapContainsKey); + return Ok(()); + } + "__builtin_map_keys" if args.len() == 1 => { + self.emit_expr(&args[0], fe)?; + fe.code.push(Op::MapKeys); + return Ok(()); + } + "__builtin_map_values" if args.len() == 1 => { + self.emit_expr(&args[0], fe)?; + fe.code.push(Op::MapValues); + return Ok(()); + } + "__builtin_map_len" if args.len() == 1 => { + self.emit_expr(&args[0], fe)?; + fe.code.push(Op::MapLen); + return Ok(()); + } // 0.3-S14: builtin `step_input(name)` reads a // workflow step's resolved upstream output. // Emits `Op::CapCall(StepInput, 1)` which diff --git a/crates/llmc/src/typeck.rs b/crates/llmc/src/typeck.rs index 22977f7..15cf285 100644 --- a/crates/llmc/src/typeck.rs +++ b/crates/llmc/src/typeck.rs @@ -71,6 +71,19 @@ impl TypeChecker { functions.insert("__builtin_list_append".to_string(), 2); functions.insert("__builtin_list_concat".to_string(), 2); functions.insert("__builtin_list_reverse".to_string(), 1); + functions.insert("__builtin_string_split".to_string(), 2); + functions.insert("__builtin_string_replace".to_string(), 3); + functions.insert("__builtin_string_slice".to_string(), 3); + functions.insert("__builtin_int_parse".to_string(), 1); + functions.insert("__builtin_float_parse".to_string(), 1); + functions.insert("__builtin_bool_to_string".to_string(), 1); + functions.insert("__builtin_map_get".to_string(), 2); + functions.insert("__builtin_map_set".to_string(), 3); + functions.insert("__builtin_map_remove".to_string(), 2); + functions.insert("__builtin_map_contains_key".to_string(), 2); + functions.insert("__builtin_map_keys".to_string(), 1); + functions.insert("__builtin_map_values".to_string(), 1); + functions.insert("__builtin_map_len".to_string(), 1); // 0.3-S14: read a workflow step's resolved input value at // runtime. Compiles to `Op::CapCall(StepInput, 1)` which // dispatches through the gateway's StepInputHandler. Returns diff --git a/crates/llmvm/src/tests.rs b/crates/llmvm/src/tests.rs index c91f639..147a9f6 100644 --- a/crates/llmvm/src/tests.rs +++ b/crates/llmvm/src/tests.rs @@ -2363,4 +2363,303 @@ mod tests { Value::List(vec![Value::Int(3), Value::Int(2), Value::Int(1)]) ); } + + // ── New string/map built-ins ── + + #[test] + fn test_string_split() { + let module = simple_module( + vec![Op::PushConst(0), Op::PushConst(1), Op::StringSplit, Op::Ret], + vec![Value::String("a,b,c".into()), Value::String(",".into())], + ); + assert_eq!( + run_module(module).unwrap(), + Value::List(vec![ + Value::String("a".into()), + Value::String("b".into()), + Value::String("c".into()), + ]) + ); + } + + #[test] + fn test_string_split_empty_sep() { + let module = simple_module( + vec![Op::PushConst(0), Op::PushConst(1), Op::StringSplit, Op::Ret], + vec![Value::String("ab".into()), Value::String("".into())], + ); + assert_eq!( + run_module(module).unwrap(), + Value::List(vec![ + Value::String("a".into()), + Value::String("b".into()), + ]) + ); + } + + #[test] + fn test_string_replace() { + let module = simple_module( + vec![ + Op::PushConst(0), + Op::PushConst(1), + Op::PushConst(2), + Op::StringReplace, + Op::Ret, + ], + vec![ + Value::String("hello world".into()), + Value::String("world".into()), + Value::String("Rust".into()), + ], + ); + assert_eq!( + run_module(module).unwrap(), + Value::String("hello Rust".into()) + ); + } + + #[test] + fn test_string_slice() { + let module = simple_module( + vec![ + Op::PushConst(0), + Op::PushConst(1), + Op::PushConst(2), + Op::StringSlice, + Op::Ret, + ], + vec![ + Value::String("hello".into()), + Value::Int(1), + Value::Int(4), + ], + ); + assert_eq!( + run_module(module).unwrap(), + Value::String("ell".into()) + ); + } + + #[test] + fn test_string_slice_out_of_bounds() { + let module = simple_module( + vec![ + Op::PushConst(0), + Op::PushConst(1), + Op::PushConst(2), + Op::StringSlice, + Op::Ret, + ], + vec![ + Value::String("hi".into()), + Value::Int(0), + Value::Int(100), + ], + ); + assert_eq!(run_module(module).unwrap(), Value::String("".into())); + } + + #[test] + fn test_int_parse_ok() { + let module = simple_module( + vec![Op::PushConst(0), Op::IntParse, Op::Ret], + vec![Value::String("42".into())], + ); + assert_eq!( + run_module(module).unwrap(), + Value::Some(Box::new(Value::Int(42))) + ); + } + + #[test] + fn test_int_parse_fail() { + let module = simple_module( + vec![Op::PushConst(0), Op::IntParse, Op::Ret], + vec![Value::String("abc".into())], + ); + assert_eq!(run_module(module).unwrap(), Value::None); + } + + #[test] + fn test_float_parse_ok() { + let module = simple_module( + vec![Op::PushConst(0), Op::FloatParse, Op::Ret], + vec![Value::String("3.14".into())], + ); + assert_eq!( + run_module(module).unwrap(), + Value::Some(Box::new(Value::Float(3.14))) + ); + } + + #[test] + fn test_float_parse_fail() { + let module = simple_module( + vec![Op::PushConst(0), Op::FloatParse, Op::Ret], + vec![Value::String("nope".into())], + ); + assert_eq!(run_module(module).unwrap(), Value::None); + } + + #[test] + fn test_bool_to_string_true() { + let module = simple_module( + vec![Op::PushConst(0), Op::BoolToString, Op::Ret], + vec![Value::Bool(true)], + ); + assert_eq!(run_module(module).unwrap(), Value::String("true".into())); + } + + #[test] + fn test_bool_to_string_false() { + let module = simple_module( + vec![Op::PushConst(0), Op::BoolToString, Op::Ret], + vec![Value::Bool(false)], + ); + assert_eq!(run_module(module).unwrap(), Value::String("false".into())); + } + + #[test] + fn test_map_get_some() { + use std::collections::BTreeMap; + let mut m = BTreeMap::new(); + m.insert("key".to_string(), Value::Int(99)); + let module = simple_module( + vec![Op::PushConst(0), Op::PushConst(1), Op::MapGet, Op::Ret], + vec![Value::Map(m), Value::String("key".into())], + ); + assert_eq!( + run_module(module).unwrap(), + Value::Some(Box::new(Value::Int(99))) + ); + } + + #[test] + fn test_map_get_none() { + use std::collections::BTreeMap; + let m: BTreeMap = BTreeMap::new(); + let module = simple_module( + vec![Op::PushConst(0), Op::PushConst(1), Op::MapGet, Op::Ret], + vec![Value::Map(m), Value::String("missing".into())], + ); + assert_eq!(run_module(module).unwrap(), Value::None); + } + + #[test] + fn test_map_set() { + use std::collections::BTreeMap; + let m: BTreeMap = BTreeMap::new(); + let module = simple_module( + vec![ + Op::PushConst(0), + Op::PushConst(1), + Op::PushConst(2), + Op::MapSet, + Op::Ret, + ], + vec![ + Value::Map(m), + Value::String("x".into()), + Value::Int(5), + ], + ); + let mut expected = BTreeMap::new(); + expected.insert("x".to_string(), Value::Int(5)); + assert_eq!(run_module(module).unwrap(), Value::Map(expected)); + } + + #[test] + fn test_map_remove() { + use std::collections::BTreeMap; + let mut m = BTreeMap::new(); + m.insert("a".to_string(), Value::Int(1)); + m.insert("b".to_string(), Value::Int(2)); + let module = simple_module( + vec![Op::PushConst(0), Op::PushConst(1), Op::MapRemove, Op::Ret], + vec![Value::Map(m), Value::String("a".into())], + ); + let mut expected = BTreeMap::new(); + expected.insert("b".to_string(), Value::Int(2)); + assert_eq!(run_module(module).unwrap(), Value::Map(expected)); + } + + #[test] + fn test_map_contains_key_true() { + use std::collections::BTreeMap; + let mut m = BTreeMap::new(); + m.insert("k".to_string(), Value::Bool(true)); + let module = simple_module( + vec![ + Op::PushConst(0), + Op::PushConst(1), + Op::MapContainsKey, + Op::Ret, + ], + vec![Value::Map(m), Value::String("k".into())], + ); + assert_eq!(run_module(module).unwrap(), Value::Bool(true)); + } + + #[test] + fn test_map_contains_key_false() { + use std::collections::BTreeMap; + let m: BTreeMap = BTreeMap::new(); + let module = simple_module( + vec![ + Op::PushConst(0), + Op::PushConst(1), + Op::MapContainsKey, + Op::Ret, + ], + vec![Value::Map(m), Value::String("missing".into())], + ); + assert_eq!(run_module(module).unwrap(), Value::Bool(false)); + } + + #[test] + fn test_map_keys() { + use std::collections::BTreeMap; + let mut m = BTreeMap::new(); + m.insert("a".to_string(), Value::Int(1)); + m.insert("b".to_string(), Value::Int(2)); + let module = simple_module( + vec![Op::PushConst(0), Op::MapKeys, Op::Ret], + vec![Value::Map(m)], + ); + assert_eq!( + run_module(module).unwrap(), + Value::List(vec![Value::String("a".into()), Value::String("b".into())]) + ); + } + + #[test] + fn test_map_values() { + use std::collections::BTreeMap; + let mut m = BTreeMap::new(); + m.insert("a".to_string(), Value::Int(10)); + m.insert("b".to_string(), Value::Int(20)); + let module = simple_module( + vec![Op::PushConst(0), Op::MapValues, Op::Ret], + vec![Value::Map(m)], + ); + assert_eq!( + run_module(module).unwrap(), + Value::List(vec![Value::Int(10), Value::Int(20)]) + ); + } + + #[test] + fn test_map_len() { + use std::collections::BTreeMap; + let mut m = BTreeMap::new(); + m.insert("x".to_string(), Value::Unit); + m.insert("y".to_string(), Value::Unit); + m.insert("z".to_string(), Value::Unit); + let module = simple_module( + vec![Op::PushConst(0), Op::MapLen, Op::Ret], + vec![Value::Map(m)], + ); + assert_eq!(run_module(module).unwrap(), Value::Int(3)); + } } diff --git a/crates/llmvm/src/vm.rs b/crates/llmvm/src/vm.rs index 018d23e..1d2ab8a 100644 --- a/crates/llmvm/src/vm.rs +++ b/crates/llmvm/src/vm.rs @@ -1193,6 +1193,262 @@ impl Vm { } } } + Op::StringSplit => { + let sep = self.pop()?; + let s = self.pop()?; + match (s, sep) { + (Value::String(s), Value::String(sep)) => { + let parts: Vec = if sep.is_empty() { + s.chars().map(|c| Value::String(c.to_string())).collect() + } else { + s.split(sep.as_str()) + .map(|p| Value::String(p.to_string())) + .collect() + }; + self.push(Value::List(parts))?; + } + (Value::String(_), b) => { + return Err(VmError::TypeError { + expected: "String", + got: b.type_name(), + }) + } + (a, _) => { + return Err(VmError::TypeError { + expected: "String", + got: a.type_name(), + }) + } + } + } + Op::StringReplace => { + let to = self.pop()?; + let from = self.pop()?; + let s = self.pop()?; + match (s, from, to) { + (Value::String(s), Value::String(from), Value::String(to)) => { + self.push(Value::String(s.replace(from.as_str(), to.as_str())))?; + } + _ => { + return Err(VmError::TypeError { + expected: "String", + got: "non-String", + }) + } + } + } + Op::StringSlice => { + let end = self.pop()?; + let start = self.pop()?; + let s = self.pop()?; + match (s, start, end) { + (Value::String(s), Value::Int(start), Value::Int(end)) => { + let slice = s + .get(start as usize..end as usize) + .unwrap_or("") + .to_string(); + self.push(Value::String(slice))?; + } + _ => { + return Err(VmError::TypeError { + expected: "String, Int, Int", + got: "wrong types", + }) + } + } + } + Op::IntParse => { + let val = self.pop()?; + match val { + Value::String(s) => { + let result = match s.parse::() { + std::result::Result::Ok(n) => Value::Some(Box::new(Value::Int(n))), + std::result::Result::Err(_) => Value::None, + }; + self.push(result)?; + } + _ => { + return Err(VmError::TypeError { + expected: "String", + got: val.type_name(), + }) + } + } + } + Op::FloatParse => { + let val = self.pop()?; + match val { + Value::String(s) => { + let result = match s.parse::() { + std::result::Result::Ok(f) => { + Value::Some(Box::new(Value::Float(f))) + } + std::result::Result::Err(_) => Value::None, + }; + self.push(result)?; + } + _ => { + return Err(VmError::TypeError { + expected: "String", + got: val.type_name(), + }) + } + } + } + Op::BoolToString => { + let val = self.pop()?; + match val { + Value::Bool(b) => { + self.push(Value::String(if b { + "true".to_string() + } else { + "false".to_string() + }))?; + } + _ => { + return Err(VmError::TypeError { + expected: "Bool", + got: val.type_name(), + }) + } + } + } + Op::MapGet => { + let key = self.pop()?; + let map = self.pop()?; + match (map, key) { + (Value::Map(m), Value::String(k)) => { + let result = match m.get(&k) { + Some(v) => Value::Some(Box::new(v.clone())), + Option::None => Value::None, + }; + self.push(result)?; + } + (Value::Map(_), k) => { + return Err(VmError::TypeError { + expected: "String", + got: k.type_name(), + }) + } + (m, _) => { + return Err(VmError::TypeError { + expected: "Map", + got: m.type_name(), + }) + } + } + } + Op::MapSet => { + let val = self.pop()?; + let key = self.pop()?; + let map = self.pop()?; + match (map, key) { + (Value::Map(mut m), Value::String(k)) => { + m.insert(k, val); + self.push(Value::Map(m))?; + } + (Value::Map(_), k) => { + return Err(VmError::TypeError { + expected: "String", + got: k.type_name(), + }) + } + (m, _) => { + return Err(VmError::TypeError { + expected: "Map", + got: m.type_name(), + }) + } + } + } + Op::MapRemove => { + let key = self.pop()?; + let map = self.pop()?; + match (map, key) { + (Value::Map(mut m), Value::String(k)) => { + m.remove(&k); + self.push(Value::Map(m))?; + } + (Value::Map(_), k) => { + return Err(VmError::TypeError { + expected: "String", + got: k.type_name(), + }) + } + (m, _) => { + return Err(VmError::TypeError { + expected: "Map", + got: m.type_name(), + }) + } + } + } + Op::MapContainsKey => { + let key = self.pop()?; + let map = self.pop()?; + match (map, key) { + (Value::Map(m), Value::String(k)) => { + self.push(Value::Bool(m.contains_key(&k)))?; + } + (Value::Map(_), k) => { + return Err(VmError::TypeError { + expected: "String", + got: k.type_name(), + }) + } + (m, _) => { + return Err(VmError::TypeError { + expected: "Map", + got: m.type_name(), + }) + } + } + } + Op::MapKeys => { + let val = self.pop()?; + match val { + Value::Map(m) => { + let keys: Vec = + m.keys().map(|k| Value::String(k.clone())).collect(); + self.push(Value::List(keys))?; + } + _ => { + return Err(VmError::TypeError { + expected: "Map", + got: val.type_name(), + }) + } + } + } + Op::MapValues => { + let val = self.pop()?; + match val { + Value::Map(m) => { + let values: Vec = m.values().cloned().collect(); + self.push(Value::List(values))?; + } + _ => { + return Err(VmError::TypeError { + expected: "Map", + got: val.type_name(), + }) + } + } + } + Op::MapLen => { + let val = self.pop()?; + match val { + Value::Map(m) => { + self.push(Value::Int(m.len() as i64))?; + } + _ => { + return Err(VmError::TypeError { + expected: "Map", + got: val.type_name(), + }) + } + } + } Op::Nop => {} Op::Halt => { return Ok(self.stack.pop().unwrap_or(Value::Unit));