diff --git a/bin/Index/IRGen.cpp b/bin/Index/IRGen.cpp index 4e555fdac..4c1040511 100644 --- a/bin/Index/IRGen.cpp +++ b/bin/Index/IRGen.cpp @@ -404,11 +404,15 @@ std::optional IRGenerator::Generate( // Verify block structure. VerifyBlocks(); + // Compute stack frame layout: assign offsets to non-dynamic objects. + ComputeFrameLayout(); + LOG(INFO) << "Generated IR for function entity " << func_.func_decl_entity_id << ": " << func_.blocks.size() << " blocks, " << func_.instructions.size() << " instructions, " - << func_.objects.size() << " objects"; + << func_.objects.size() << " objects" + << ", frame=" << func_.frame_size_bytes << " bytes"; return std::move(func_); @@ -523,10 +527,12 @@ std::optional IRGenerator::GenerateGlobalInit( ComputeDominators(); ComputeRPO(); VerifyBlocks(); + ComputeFrameLayout(); LOG(INFO) << "Generated global init IR for var entity " << func_.func_decl_entity_id - << ": " << func_.instructions.size() << " instructions"; + << ": " << func_.instructions.size() << " instructions" + << ", frame=" << func_.frame_size_bytes << " bytes"; return std::move(func_); @@ -997,22 +1003,27 @@ void IRGenerator::EmitBody(const pasta::Stmt &body) { if (!is_function_body) { PushStructure(mx::ir::StructureKind::SCOPE, EntityIdOf(body)); - // Emit ENTER_SCOPE instruction. - InstructionIR enter; - enter.opcode = mx::ir::OpCode::ENTER_SCOPE; - enter.source_entity_id = EntityIdOf(body); - enter.structure_index = current_structure_index_; - EmitTopLevel(std::move(enter)); + // Emit ENTER_SCOPE — but only if the block isn't already terminated. + // If it is (e.g., after a goto), the scope is only reachable via a + // label inside; goto compensation blocks handle the scope entry. + if (!CurrentBlockTerminated()) { + InstructionIR enter; + enter.opcode = mx::ir::OpCode::ENTER_SCOPE; + enter.source_entity_id = EntityIdOf(body); + enter.structure_index = current_structure_index_; + EmitTopLevel(std::move(enter)); + } } for (const auto &child : cs->Children()) { // Skip dead code after a terminator (goto/return/break/continue), - // but always process labels and case/default — they start new - // reachable blocks. + // but always process labels, case/default, and compound statements — + // they may contain labels that start new reachable blocks. if (CurrentBlockTerminated() && !pasta::LabelStmt::From(child) && !pasta::CaseStmt::From(child) && - !pasta::DefaultStmt::From(child)) continue; + !pasta::DefaultStmt::From(child) && + !pasta::CompoundStmt::From(child)) continue; EmitStmt(child); } @@ -1406,6 +1417,8 @@ void IRGenerator::EmitSwitchStmt(const pasta::Stmt &s) { auto body = sw->Body(); std::function collect_cases; collect_cases = [&](const pasta::Stmt &stmt) { + // Don't descend into nested switch statements. + if (pasta::SwitchStmt::From(stmt)) return; if (auto cs = pasta::CaseStmt::From(stmt)) { int64_t low = 0, high = 0; auto *raw_lhs = reinterpret_cast( @@ -1433,21 +1446,17 @@ void IRGenerator::EmitSwitchStmt(const pasta::Stmt &s) { uint32_t block = NewBlock(mx::ir::BlockKind::SWITCH_CASE); cases.push_back({low, high, false, block, EntityIdOf(stmt)}); case_blocks_[EntityIdOf(stmt)] = block; - // Recurse into SubStatement to find nested cases (case 1: case 2: ...). - auto sub = cs->SubStatement(); - if (pasta::CaseStmt::From(sub) || pasta::DefaultStmt::From(sub)) { - collect_cases(sub); - } + // Always recurse into SubStatement to find nested cases. + // Handles both direct nesting (case 1: case 2: ...) and + // Duff's device (case 0: do { case 7: ... } while(...);). + collect_cases(cs->SubStatement()); return; } if (auto ds = pasta::DefaultStmt::From(stmt)) { uint32_t block = NewBlock(mx::ir::BlockKind::SWITCH_DEFAULT); cases.push_back({0, 0, true, block, EntityIdOf(stmt)}); case_blocks_[EntityIdOf(stmt)] = block; - auto sub = ds->SubStatement(); - if (pasta::CaseStmt::From(sub) || pasta::DefaultStmt::From(sub)) { - collect_cases(sub); - } + collect_cases(ds->SubStatement()); return; } // Recurse into children, but stop at nested switch statements — @@ -1472,6 +1481,7 @@ void IRGenerator::EmitSwitchStmt(const pasta::Stmt &s) { if (cond_type) term.type_entity_id = TypeEntityIdOf(*cond_type); // Build switch cases with full provenance. + bool has_default = false; for (const auto &ci : cases) { InstructionIR::SwitchCaseIR sc; sc.low = ci.low; @@ -1479,10 +1489,30 @@ void IRGenerator::EmitSwitchStmt(const pasta::Stmt &s) { sc.block_index = ci.block_index; sc.source_entity_id = ci.source_entity_id; sc.is_default = ci.is_default; + if (ci.is_default) has_default = true; term.switch_cases.push_back(sc); AddEdge(current_block_index_, ci.block_index); } + // If no explicit default, add an implicit default that branches to the + // switch exit block. Without this, the interpreter errors when no case + // matches (e.g., a switch with gaps in its case values). + if (!has_default) { + // Create a structure for the implicit default so serialization succeeds. + uint32_t impl_struct = PushStructure( + mx::ir::StructureKind::SWITCH_CASE, EntityIdOf(s)); + auto &sc_struct = func_.structures[impl_struct]; + sc_struct.is_default = true; + PopStructure(); + + InstructionIR::SwitchCaseIR implicit_default; + implicit_default.is_default = true; + implicit_default.block_index = exit_block; + implicit_default.structure_index = impl_struct; + term.switch_cases.push_back(implicit_default); + AddEdge(current_block_index_, exit_block); + } + uint32_t term_idx = EmitTopLevel(std::move(term)); // Push switch context so break statements work. @@ -1532,13 +1562,14 @@ void IRGenerator::EmitSwitchStmt(const pasta::Stmt &s) { // Record case block structure for Duff's device (external goto into case). label_structure_[cases[ci].block_index] = current_structure_index_; ci++; - // If SubStatement is another case/default, handle it via recursion - // (empty case fallthrough: case 1: case 2: case 3: body). + // Recurse into SubStatement. Handles direct nesting (case 1: case 2:) + // and Duff's device (case inside do-while). But if the sub IS a nested + // switch, emit it as code — its cases belong to the inner switch. auto sub = cs->SubStatement(); - if (pasta::CaseStmt::From(sub) || pasta::DefaultStmt::From(sub)) { - emit_case_bodies(sub); + if (pasta::SwitchStmt::From(sub)) { + EmitStmt(sub); } else { - EmitBody(sub); + emit_case_bodies(sub); } PopStructure(); // SWITCH_CASE } @@ -1558,34 +1589,60 @@ void IRGenerator::EmitSwitchStmt(const pasta::Stmt &s) { label_structure_[cases[ci].block_index] = current_structure_index_; ci++; auto sub = ds->SubStatement(); - if (pasta::CaseStmt::From(sub) || pasta::DefaultStmt::From(sub)) { - emit_case_bodies(sub); + if (pasta::SwitchStmt::From(sub)) { + EmitStmt(sub); } else { - EmitBody(sub); + emit_case_bodies(sub); } PopStructure(); // SWITCH_CASE (default) } return; } - // For CompoundStmt or other container, process children. - // Non-case/default statements (like break, assignments between cases) - // are emitted directly. if (pasta::CompoundStmt::From(stmt)) { + // CompoundStmt: recurse into children (normal switch body). for (const auto &child : stmt.Children()) { - // Skip dead code after a terminator, but always process case/default. if (CurrentBlockTerminated() && !pasta::CaseStmt::From(child) && !pasta::DefaultStmt::From(child)) continue; - // Don't descend into nested switch statements — their cases - // belong to the inner switch, not this one. if (pasta::SwitchStmt::From(child)) { EmitStmt(child); } else { emit_case_bodies(child); } } + } else if (auto do_stmt = pasta::DoStmt::From(stmt)) { + // Duff's device: do-while loop interleaved with switch cases. + // Process the body (which contains case statements), then emit + // the loop condition and back-edge. + // The loop body starts at the case containing this do-while + // (already processed, so ci-1), not the first nested case. + auto loop_top_ci = (ci > 0) ? ci - 1 : ci; + + // Process the body's case statements. + emit_case_bodies(do_stmt->Body()); + + // Emit the loop condition and back-edge. + // After the last case in the body, branch to the condition block. + uint32_t cond_block = NewBlock(mx::ir::BlockKind::LOOP_CONDITION); + EmitBranch(cond_block); + SwitchToBlock(cond_block); + + uint32_t cond_val = EmitRValue(do_stmt->Condition()); + PopExpressionScope(); + + // Back-edge: condition true → first case block in the loop body. + // Exit: condition false → fall through after the loop. + uint32_t loop_body_block = (loop_top_ci < cases.size()) + ? cases[loop_top_ci].block_index + : exit_block; + uint32_t loop_exit = NewBlock(mx::ir::BlockKind::LOOP_EXIT); + EmitCondBranch(cond_val, loop_body_block, loop_exit, EntityIdOf(stmt)); + SwitchToBlock(loop_exit); + } else if (pasta::WhileStmt::From(stmt) || pasta::ForStmt::From(stmt)) { + // Other loops with nested cases — emit as regular code. + EmitStmt(stmt); } else { - // Regular statement between cases (e.g., break, goto, assignment). + // Regular statement: emit as code. EmitStmt(stmt); } }; @@ -3932,15 +3989,44 @@ uint32_t IRGenerator::EmitRValue(const pasta::Expr &e) { } // VAArgExpr -- va_arg(ap, type). + // CONSUME_VA_PARAM returns a pointer to the next variadic arg's storage. + // Wrap it in a LOAD of the appropriate width (like EmitLoadFromLValue). if (auto va = pasta::VAArgExpr::From(e)) { uint32_t sub_idx = EmitRValue(va->SubExpression()); - InstructionIR inst; - inst.opcode = mx::ir::OpCode::MEMORY; - inst.mem_op = static_cast(mx::ir::MemOp::CONSUME_VA_PARAM); - inst.source_entity_id = eid; - if (auto t = e.Type()) inst.type_entity_id = TypeEntityIdOf(*t); - inst.operand_indices = {sub_idx}; - return emit_typed(std::move(inst)); + InstructionIR cvp; + cvp.opcode = mx::ir::OpCode::MEMORY; + cvp.mem_op = static_cast(mx::ir::MemOp::CONSUME_VA_PARAM); + cvp.source_entity_id = eid; + if (auto t = e.Type()) cvp.type_entity_id = TypeEntityIdOf(*t); + cvp.operand_indices = {sub_idx}; + uint32_t ptr_idx = EmitInstruction(std::move(cvp)); + + // For large types (>8 bytes), return the pointer directly — the caller + // will MEMCPY from it, like other aggregate lvalues. + if (auto t = e.Type()) { + if (auto sz = TypeSizeBytes(*t)) { + if (!IsScalarSize(*sz)) { + return ptr_idx; + } + } + } + + // Wrap in a LOAD for scalar types. + InstructionIR load; + load.opcode = mx::ir::OpCode::MEMORY; + load.source_entity_id = eid; + if (auto t = e.Type()) { + load.type_entity_id = TypeEntityIdOf(*t); + unsigned sz = 8; + if (auto s = TypeSizeBytes(*t)) sz = *s; + load.mem_op = static_cast( + DetermineMemOp(false, false, sz, t->IsFloatingType())); + } else { + load.mem_op = static_cast( + DetermineMemOp(false, false, 8)); + } + load.operand_indices = {ptr_idx}; + return emit_typed(std::move(load)); } @@ -4642,5 +4728,42 @@ void IRGenerator::VerifyBlocks() { } } +void IRGenerator::ComputeFrameLayout() { + uint32_t offset = 0; + func_.has_dynamic_allocas = false; + + for (auto &obj : func_.objects) { + // Dynamic allocas (VLA, alloca()) are not part of the fixed frame. + if (obj.kind == mx::ir::ObjectKind::ALLOCA) { + func_.has_dynamic_allocas = true; + obj.frame_offset = 0; + continue; + } + + // Global, thread-local, heap, and string literals live outside the frame. + if (obj.kind == mx::ir::ObjectKind::GLOBAL || + obj.kind == mx::ir::ObjectKind::THREAD_LOCAL || + obj.kind == mx::ir::ObjectKind::HEAP || + obj.kind == mx::ir::ObjectKind::STRING_LITERAL) { + obj.frame_offset = 0; + continue; + } + + // Stack-allocated object: assign an aligned offset within the frame. + uint32_t align = obj.align_bytes; + if (align == 0) align = 1; + uint32_t sz = obj.size_bytes; + if (sz == 0) sz = 1; + + // Align the current offset. + offset = (offset + align - 1) & ~(align - 1); + obj.frame_offset = offset; + offset += sz; + } + + // Final frame size, rounded up to 8-byte alignment. + func_.frame_size_bytes = (offset + 7) & ~7u; +} + } // namespace ir } // namespace indexer diff --git a/bin/Index/IRGen.h b/bin/Index/IRGen.h index b0c4736a4..4aff237b3 100644 --- a/bin/Index/IRGen.h +++ b/bin/Index/IRGen.h @@ -131,6 +131,7 @@ struct ObjectIR { mx::RawEntityId type_entity_id{mx::kInvalidEntityId}; uint32_t size_bytes{0}; uint32_t align_bytes{1}; + uint32_t frame_offset{0}; // Offset within the stack frame. mx::ir::ObjectKind kind{mx::ir::ObjectKind::LOCAL}; }; @@ -168,6 +169,10 @@ struct FunctionIR { uint32_t entry_block_index{0}; uint32_t body_scope_index{UINT32_MAX}; // FUNCTION_SCOPE structure std::vector rpo_block_order; + + // Stack frame layout (computed after all objects are collected). + uint32_t frame_size_bytes{0}; // Total fixed frame size. + bool has_dynamic_allocas{false}; // True if frame needs to grow at runtime. }; // --------------------------------------------------------------------------- @@ -350,6 +355,7 @@ class IRGenerator { void ComputeDominators(); void ComputeRPO(); void VerifyBlocks(); + void ComputeFrameLayout(); }; } // namespace ir diff --git a/bin/Index/SerializeIR.cpp b/bin/Index/SerializeIR.cpp index a3475aa2a..b4a77a56d 100644 --- a/bin/Index/SerializeIR.cpp +++ b/bin/Index/SerializeIR.cpp @@ -417,6 +417,7 @@ void SerializeIR( ob.setSizeBytes(src.size_bytes); ob.setAlignBytes(src.align_bytes); ob.setKind(static_cast(src.kind)); + ob.setFrameOffset(src.frame_offset); } // No reverse map needed: each instruction stores parent_block_index. @@ -568,6 +569,8 @@ void SerializeIR( ffb.setBodyScopeId(MakeStructureEid(func, fragment_id, struct_offset, func.body_scope_index)); } + ffb.setFrameSizeBytes(func.frame_size_bytes); + ffb.setHasDynamicAllocas(func.has_dynamic_allocas); // Function's block and object lists go into the entity pool. uint32_t func_ent_start = pool.EntitySize(); diff --git a/bin/InterpretIR/InterpretIR.cpp b/bin/InterpretIR/InterpretIR.cpp index 66e71decb..24425152e 100644 --- a/bin/InterpretIR/InterpretIR.cpp +++ b/bin/InterpretIR/InterpretIR.cpp @@ -3,3517 +3,30 @@ // This source code is licensed in accordance with the terms specified in // the LICENSE file found in the root directory of this source tree. -// A concrete interpreter for the Multiplier IR. Walks the CFG, evaluates -// instructions, and tracks memory state. Designed as testing infrastructure -// to shake out IR generation bugs and validate the API surface. +// A concrete interpreter for the Multiplier IR. Thin CLI wrapper around the +// interpreter library at lib/IR/Interpret/. // // Usage: mx-interpret-ir --db /path/to/index.db --entity_name "function_name" -// -// Critique notes (embedded for future reference): -// - The IR API requires downcasting to get result_type() from most instructions. -// A base-class result_type() would simplify the interpreter significantly. -// - STRING_PTR returns a pointer to interpreter-managed storage populated -// from StringLiteral::bytes(). Each STRING_PTR instruction gets a unique -// address keyed by its entity ID. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Index.h" -#include -#include -#include -#include - -DEFINE_uint64(entity_id, mx::kInvalidEntityId, "ID of the entity to interpret"); -DEFINE_string(entity_name, "", "Name of the function to interpret"); -DEFINE_bool(trace, false, "Print each instruction as it executes"); -DEFINE_uint64(max_steps, 100000, "Maximum instruction steps before aborting"); - -namespace { - -// --------------------------------------------------------------------------- -// Value representation -// --------------------------------------------------------------------------- - -// A value is either an integer, a float, or a pointer (object + byte offset). -struct Pointer { - mx::RawEntityId object_id{mx::kInvalidEntityId}; - int64_t offset{0}; - - bool operator==(const Pointer &o) const { - return object_id == o.object_id && offset == o.offset; - } -}; - -struct Value { - enum Kind { UNDEFINED, INTEGER, FLOATING, POINTER } kind{UNDEFINED}; - int64_t ival{0}; - double fval{0.0}; - Pointer ptr{}; - - static Value Int(int64_t v) { return {INTEGER, v, 0.0, {}}; } - static Value Float(double v) { return {FLOATING, 0, v, {}}; } - static Value Ptr(mx::RawEntityId obj, int64_t off = 0) { - return {POINTER, 0, 0.0, {obj, off}}; - } - static Value Undef() { return {}; } - - bool is_truthy() const { - switch (kind) { - case INTEGER: return ival != 0; - case FLOATING: return fval != 0.0; - case POINTER: return ptr.object_id != mx::kInvalidEntityId; - default: return false; - } - } - - int64_t as_int() const { return ival; } - uint64_t as_uint() const { return static_cast(ival); } - double as_float() const { return fval; } -}; - -// --------------------------------------------------------------------------- -// Memory model -// --------------------------------------------------------------------------- - -struct MemoryObject { - std::vector bytes; - bool allocated{false}; - bool poisoned{false}; // Set when scope exits -}; - -// --------------------------------------------------------------------------- -// Interpreter state -// --------------------------------------------------------------------------- - -class Interpreter { - public: - Interpreter(const mx::IRFunction &func, bool trace) - : func_(func), trace_(trace) {} - - // Run the interpreter. Returns the return value (or UNDEFINED for void). - Value Run(const std::vector &args); - - private: - const mx::IRFunction &func_; - bool trace_; - - // Instruction ID → computed value. - std::unordered_map values_; - - // Object ID → memory. - std::unordered_map memory_; - - // Block ID → IRBlock (for CFG navigation). - std::unordered_map block_map_; - - // Parameter values passed to the function. - std::vector params_; - - // Pointers to parameter storage (allocated during setup, populated from params_). - // PARAM_PTR(n) returns param_ptrs_[n]. - std::vector param_ptrs_; - - // Pointer to return value storage. RETURN_PTR returns this. - Value return_ptr_ = Value::Undef(); - - // Counter for interpreter-allocated objects (return storage, etc.). - // The memory_ map uses uint64_t keys. Real entity IDs use packed - // formats with category/fragment/offset bits. We use small integers - // (1, 2, 3...) which can't collide with packed entity IDs since those - // always have category bits set in the high word. - uint64_t next_interp_object_id_{1}; - - uint64_t steps_{0}; - - // Evaluate a single instruction, storing result in values_. - void Eval(const mx::IRInstruction &inst); - - // Recursively evaluate all sub-expressions of an instruction. - void EvalSubExpressions(const mx::IRInstruction &inst); - - // Get the value of an instruction (must have been evaluated already). - Value GetValue(const mx::IRInstruction &inst); - - // Memory operations. - void MemWrite(const Pointer &ptr, const void *data, size_t len); - void MemRead(const Pointer &ptr, void *data, size_t len); - void MemWriteValue(const Pointer &ptr, const Value &val, size_t size); - Value MemReadValue(const Pointer &ptr, size_t size, bool is_float); - - // Pointer shadow map: tracks which memory locations hold pointer values. - // Key = (object_id << 32) | offset. When a pointer is written, it's - // recorded here. When loading pointer-sized values, check here first. - std::unordered_map pointer_shadow_; - - // Allocate memory for an object. - void AllocateObject(const mx::IRObject &obj); - - // Trace output. - void Trace(const mx::IRInstruction &inst, const Value &result); -}; - -// --------------------------------------------------------------------------- -// Memory implementation -// --------------------------------------------------------------------------- - -void Interpreter::AllocateObject(const mx::IRObject &obj) { - auto eid = mx::EntityId(obj.id()).Pack(); - auto &mem = memory_[eid]; - uint32_t size = obj.size_bytes(); - if (size == 0) size = 8; // Default for unknown-size objects. - mem.bytes.resize(size, 0); - mem.allocated = true; - mem.poisoned = false; -} - -void Interpreter::MemWrite(const Pointer &ptr, const void *data, size_t len) { - auto it = memory_.find(ptr.object_id); - if (it == memory_.end()) { - LOG(WARNING) << "Write to unallocated object " << ptr.object_id; - return; - } - auto &mem = it->second; - if (mem.poisoned) { - LOG(WARNING) << "Write to poisoned (out-of-scope) object " << ptr.object_id; - } - size_t start = static_cast(ptr.offset); - if (start + len > mem.bytes.size()) { - // Auto-grow for VLA-like objects (compile-time size unknown). - mem.bytes.resize(start + len, 0); - } - std::memcpy(mem.bytes.data() + start, data, len); -} - -void Interpreter::MemRead(const Pointer &ptr, void *data, size_t len) { - auto it = memory_.find(ptr.object_id); - if (it == memory_.end()) { - LOG(WARNING) << "Read from unallocated object " << ptr.object_id; - std::memset(data, 0, len); - return; - } - auto &mem = it->second; - if (mem.poisoned) { - LOG(WARNING) << "Read from poisoned (out-of-scope) object " << ptr.object_id; - } - size_t start = static_cast(ptr.offset); - if (start + len > mem.bytes.size()) { - // Auto-grow for VLA-like objects. - mem.bytes.resize(start + len, 0); - } - std::memcpy(data, mem.bytes.data() + start, len); -} - -void Interpreter::MemWriteValue(const Pointer &ptr, const Value &val, - size_t size) { - uint64_t shadow_key = (static_cast(ptr.object_id) << 32) | - (static_cast(ptr.offset) & 0xFFFFFFFF); - if (val.kind == Value::POINTER) { - // Record pointer in shadow map for later loads. - pointer_shadow_[shadow_key] = val.ptr; - // Also write a sentinel to memory bytes (not meaningful, just fills space). - int64_t sentinel = 0; - MemWrite(ptr, &sentinel, std::min(size, sizeof(sentinel))); - return; - } - // Non-pointer write: clear pointer shadow at this location. - pointer_shadow_.erase(shadow_key); - if (val.kind == Value::FLOATING) { - if (size == 4) { - float f = static_cast(val.fval); - MemWrite(ptr, &f, 4); - } else { - MemWrite(ptr, &val.fval, 8); - } - } else { - // Integer or undefined — write as int. - MemWrite(ptr, &val.ival, std::min(size, sizeof(val.ival))); - } -} - -Value Interpreter::MemReadValue(const Pointer &ptr, size_t size, - bool is_float) { - // Check pointer shadow map first. - uint64_t shadow_key = (static_cast(ptr.object_id) << 32) | - (static_cast(ptr.offset) & 0xFFFFFFFF); - auto pit = pointer_shadow_.find(shadow_key); - if (pit != pointer_shadow_.end()) { - return Value::Ptr(pit->second.object_id, pit->second.offset); - } - if (is_float) { - if (size == 4) { - float f = 0; - MemRead(ptr, &f, 4); - return Value::Float(static_cast(f)); - } - double d = 0; - MemRead(ptr, &d, 8); - return Value::Float(d); - } - // Read integer and sign-extend to int64 to match CONST representation. - // Everything is int64 internally. Sign extension ensures -10 stored in - // 4 bytes reads back as int64(-10), matching CONST(INT32, -10). - int64_t v = 0; - MemRead(ptr, &v, std::min(size, sizeof(v))); - switch (size) { - case 1: v = static_cast(static_cast(v)); break; - case 2: v = static_cast(static_cast(v)); break; - case 4: v = static_cast(static_cast(v)); break; - default: break; - } - return Value::Int(v); -} - -// --------------------------------------------------------------------------- -// Value access -// --------------------------------------------------------------------------- - -Value Interpreter::GetValue(const mx::IRInstruction &inst) { - auto op = inst.opcode(); - auto eid = mx::EntityId(inst.id()).Pack(); - - // Use cached result if available. - auto it = values_.find(eid); - if (it != values_.end()) return it->second; - - // Lazy evaluation for sub-expressions not yet computed. - if (!mx::ir::IsTerminator(op)) { - Eval(inst); - it = values_.find(eid); - if (it != values_.end()) return it->second; - } - return Value::Undef(); -} - -// --------------------------------------------------------------------------- -// Trace -// --------------------------------------------------------------------------- - -void Interpreter::Trace(const mx::IRInstruction &inst, const Value &result) { - if (!trace_) return; - std::cerr << " [" << steps_ << "] " - << static_cast(inst.opcode()); - switch (result.kind) { - case Value::INTEGER: - std::cerr << " → " << result.ival; - break; - case Value::FLOATING: - std::cerr << " → " << result.fval; - break; - case Value::POINTER: - std::cerr << " → ptr(" << result.ptr.object_id - << "+" << result.ptr.offset << ")"; - break; - default: - std::cerr << " → undef"; - break; - } - std::cerr << "\n"; -} - -// --------------------------------------------------------------------------- -// Instruction evaluation -// --------------------------------------------------------------------------- - -void Interpreter::Eval(const mx::IRInstruction &inst) { - auto op = inst.opcode(); - auto eid = mx::EntityId(inst.id()).Pack(); - Value result = Value::Undef(); - - switch (op) { - - // --- Constants --- - case mx::ir::OpCode::CONST: { - if (auto ci = mx::ConstInst::from(inst)) { - auto sub = ci->sub_opcode(); - if (sub == mx::ir::ConstOp::NULL_PTR) { - result = Value::Ptr(mx::kInvalidEntityId, 0); - } else if (sub == mx::ir::ConstOp::FLOAT32 || - sub == mx::ir::ConstOp::FLOAT64 || - sub == mx::ir::ConstOp::FLOAT16) { - result = Value::Float(ci->float_value()); - } else if (sub == mx::ir::ConstOp::UINT64) { - // UINT64: use unsigned value directly (no sign extension needed). - result = Value::Int(static_cast(ci->unsigned_value())); - } else if (sub == mx::ir::ConstOp::UINT32) { - // UINT32: sign-extend to match LOAD_LE_32 sign-extension. - result = Value::Int(static_cast( - static_cast(static_cast(ci->unsigned_value())))); - } else if (sub == mx::ir::ConstOp::UINT16) { - result = Value::Int(static_cast( - static_cast(static_cast(ci->unsigned_value())))); - } else if (sub == mx::ir::ConstOp::UINT8) { - result = Value::Int(static_cast( - static_cast(static_cast(ci->unsigned_value())))); - } else { - result = Value::Int(ci->signed_value()); - } - } - break; - } - - // --- Memory --- - case mx::ir::OpCode::ALLOCA: { - if (auto ai = mx::AllocaInst::from(inst)) { - auto obj = ai->object(); - auto obj_eid = mx::EntityId(obj.id()).Pack(); - if (memory_.find(obj_eid) == memory_.end()) { - // For DYNAMIC allocas (VLAs), use the runtime size operand. - if (auto da = mx::DynamicAllocaInst::from(inst)) { - Value sz_val = GetValue(da->size()); - uint32_t runtime_sz = static_cast(sz_val.as_int()); - if (runtime_sz > 0) { - auto &mem = memory_[obj_eid]; - mem.bytes.resize(runtime_sz, 0); - mem.allocated = true; - mem.poisoned = false; - } else { - AllocateObject(obj); - } - } else { - AllocateObject(obj); - } - } - result = Value::Ptr(obj_eid, 0); - } - break; - } - - // STRING_PTR: pointer to a string literal. The interpreter allocates - // storage keyed by the instruction's entity ID and populates it from - // StringLiteral::bytes(). Subsequent evaluations return the same pointer. - case mx::ir::OpCode::STRING_PTR_32: - case mx::ir::OpCode::STRING_PTR_64: { - auto inst_eid = mx::EntityId(inst.id()).Pack(); - if (memory_.find(inst_eid) == memory_.end()) { - if (auto src = inst.source_statement()) { - if (auto sl = mx::StringLiteral::from(*src)) { - auto bytes = sl->bytes(); - uint32_t char_width = sl->character_byte_width(); - uint32_t total = sl->byte_length() + char_width; - auto &mem = memory_[inst_eid]; - mem.bytes.resize(total, 0); - mem.allocated = true; - mem.poisoned = false; - size_t copy_len = std::min(bytes.size(), total); - std::memcpy(mem.bytes.data(), bytes.data(), copy_len); - } - } - } - result = Value::Ptr(inst_eid, 0); - break; - } - case mx::ir::OpCode::MEMORY: { - if (auto mi = mx::MemoryInst::from(inst)) { - auto sub = mi->sub_opcode(); - if (mx::ir::IsDirectLoadStore(sub)) { - unsigned sz = mx::ir::AccessSize(sub); - bool is_float = mx::ir::IsFloatLoad(sub); - if (mx::ir::IsAnyLoad(sub)) { - Value addr = GetValue(mi->address()); - if (addr.kind == Value::POINTER) { - result = MemReadValue(addr.ptr, sz, is_float); - } else { - LOG(WARNING) << "MEMORY load from non-pointer value"; - } - } else { - // Store. - Value addr = GetValue(mi->address()); - Value val = GetValue(mi->stored_value()); - if (addr.kind == Value::POINTER) { - MemWriteValue(addr.ptr, val, sz); - } else { - LOG(WARNING) << "MEMORY store to non-pointer value"; - } - } - } else { - // Bulk memory/string operations. - auto ops_gen = inst.operands(); - std::vector ops; - for (auto op_inst : ops_gen) { - ops.push_back(GetValue(op_inst)); - } - using MO = mx::ir::MemOp; - switch (sub) { - case MO::MEMSET: { - if (ops.size() >= 3 && ops[0].kind == Value::POINTER && ops[2].as_int() > 0) { - auto it = memory_.find(ops[0].ptr.object_id); - if (it != memory_.end()) { - size_t start = static_cast(ops[0].ptr.offset); - size_t len = static_cast(ops[2].as_int()); - size_t end = std::min(start + len, it->second.bytes.size()); - std::memset(it->second.bytes.data() + start, - static_cast(ops[1].as_int()), end - start); - } - } - result = ops.empty() ? Value::Undef() : ops[0]; - break; - } - case MO::MEMCPY: - case MO::MEMMOVE: { - if (ops.size() >= 3 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER && ops[2].as_int() > 0) { - size_t len = static_cast(ops[2].as_int()); - std::vector tmp(len); - MemRead(ops[1].ptr, tmp.data(), len); - MemWrite(ops[0].ptr, tmp.data(), len); - } - result = ops.empty() ? Value::Undef() : ops[0]; - break; - } - case MO::BZERO: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER && ops[1].as_int() > 0) { - auto it = memory_.find(ops[0].ptr.object_id); - if (it != memory_.end()) { - size_t start = static_cast(ops[0].ptr.offset); - size_t len = static_cast(ops[1].as_int()); - size_t end = std::min(start + len, it->second.bytes.size()); - std::memset(it->second.bytes.data() + start, 0, end - start); - } - } - result = ops.empty() ? Value::Undef() : ops[0]; - break; - } - case MO::STRLEN: { - if (ops.size() >= 1 && ops[0].kind == Value::POINTER) { - auto it = memory_.find(ops[0].ptr.object_id); - if (it != memory_.end()) { - size_t start = static_cast(ops[0].ptr.offset); - size_t len = 0; - while (start + len < it->second.bytes.size() && - it->second.bytes[start + len] != 0) ++len; - result = Value::Int(static_cast(len)); - } - } - break; - } - case MO::STRCMP: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - auto it0 = memory_.find(ops[0].ptr.object_id); - auto it1 = memory_.find(ops[1].ptr.object_id); - if (it0 != memory_.end() && it1 != memory_.end()) { - size_t s0 = static_cast(ops[0].ptr.offset); - size_t s1 = static_cast(ops[1].ptr.offset); - int cmp = 0; - while (true) { - uint8_t c0 = (s0 < it0->second.bytes.size()) ? it0->second.bytes[s0] : 0; - uint8_t c1 = (s1 < it1->second.bytes.size()) ? it1->second.bytes[s1] : 0; - if (c0 != c1) { cmp = (c0 < c1) ? -1 : 1; break; } - if (c0 == 0) break; - ++s0; ++s1; - } - result = Value::Int(cmp); - } - } - break; - } - case MO::MEMCMP: { - if (ops.size() >= 3 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - size_t len = static_cast(ops[2].as_int()); - std::vector buf0(len, 0), buf1(len, 0); - MemRead(ops[0].ptr, buf0.data(), len); - MemRead(ops[1].ptr, buf1.data(), len); - result = Value::Int(std::memcmp(buf0.data(), buf1.data(), len)); - } - break; - } - case MO::MEMCHR: { - if (ops.size() >= 3 && ops[0].kind == Value::POINTER) { - size_t len = static_cast(ops[2].as_int()); - uint8_t needle = static_cast(ops[1].as_int()); - auto it = memory_.find(ops[0].ptr.object_id); - if (it != memory_.end()) { - size_t start = static_cast(ops[0].ptr.offset); - for (size_t i = 0; i < len && start + i < it->second.bytes.size(); ++i) { - if (it->second.bytes[start + i] == needle) { - result = Value::Ptr(ops[0].ptr.object_id, - ops[0].ptr.offset + static_cast(i)); - break; - } - } - } - } - break; - } - case MO::STRCHR: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER) { - uint8_t needle = static_cast(ops[1].as_int()); - auto it = memory_.find(ops[0].ptr.object_id); - if (it != memory_.end()) { - size_t start = static_cast(ops[0].ptr.offset); - bool found = false; - for (size_t i = start; i < it->second.bytes.size(); ++i) { - if (it->second.bytes[i] == needle) { - result = Value::Ptr(ops[0].ptr.object_id, - static_cast(i)); - found = true; - break; - } - if (it->second.bytes[i] == 0) break; - } - if (!found) { - // If searching for null terminator, point to it. - if (needle == 0) { - for (size_t i = start; i < it->second.bytes.size(); ++i) { - if (it->second.bytes[i] == 0) { - result = Value::Ptr(ops[0].ptr.object_id, - static_cast(i)); - found = true; - break; - } - } - } - if (!found) { - result = Value::Ptr(mx::kInvalidEntityId, 0); // NULL - } - } - } - } - break; - } - case MO::STRNLEN: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER) { - int64_t maxlen = ops[1].as_int(); - auto it = memory_.find(ops[0].ptr.object_id); - if (it != memory_.end()) { - size_t start = static_cast(ops[0].ptr.offset); - size_t len = 0; - while (len < static_cast(maxlen) && - start + len < it->second.bytes.size() && - it->second.bytes[start + len] != 0) ++len; - result = Value::Int(static_cast(len)); - } - } - break; - } - case MO::STRNCMP: { - if (ops.size() >= 3 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - size_t n = static_cast(ops[2].as_int()); - auto it0 = memory_.find(ops[0].ptr.object_id); - auto it1 = memory_.find(ops[1].ptr.object_id); - if (it0 != memory_.end() && it1 != memory_.end()) { - size_t s0 = static_cast(ops[0].ptr.offset); - size_t s1 = static_cast(ops[1].ptr.offset); - int cmp = 0; - for (size_t i = 0; i < n; ++i) { - uint8_t c0 = (s0 + i < it0->second.bytes.size()) ? it0->second.bytes[s0 + i] : 0; - uint8_t c1 = (s1 + i < it1->second.bytes.size()) ? it1->second.bytes[s1 + i] : 0; - if (c0 != c1) { cmp = (c0 < c1) ? -1 : 1; break; } - if (c0 == 0) break; - } - result = Value::Int(cmp); - } - } - break; - } - case MO::STRRCHR: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER) { - uint8_t needle = static_cast(ops[1].as_int()); - auto it = memory_.find(ops[0].ptr.object_id); - if (it != memory_.end()) { - size_t start = static_cast(ops[0].ptr.offset); - int64_t last_pos = -1; - for (size_t i = start; i < it->second.bytes.size(); ++i) { - if (it->second.bytes[i] == needle) { - last_pos = static_cast(i); - } - if (it->second.bytes[i] == 0) break; - } - if (last_pos >= 0) { - result = Value::Ptr(ops[0].ptr.object_id, last_pos); - } else { - result = Value::Ptr(mx::kInvalidEntityId, 0); // NULL - } - } - } - break; - } - case MO::STRSTR: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - auto it0 = memory_.find(ops[0].ptr.object_id); - auto it1 = memory_.find(ops[1].ptr.object_id); - if (it0 != memory_.end() && it1 != memory_.end()) { - // Read haystack string. - size_t hs = static_cast(ops[0].ptr.offset); - size_t hlen = 0; - while (hs + hlen < it0->second.bytes.size() && - it0->second.bytes[hs + hlen] != 0) ++hlen; - // Read needle string. - size_t ns = static_cast(ops[1].ptr.offset); - size_t nlen = 0; - while (ns + nlen < it1->second.bytes.size() && - it1->second.bytes[ns + nlen] != 0) ++nlen; - if (nlen == 0) { - result = ops[0]; // Empty needle: return haystack. - } else { - bool found = false; - for (size_t i = 0; i + nlen <= hlen; ++i) { - if (std::memcmp(it0->second.bytes.data() + hs + i, - it1->second.bytes.data() + ns, nlen) == 0) { - result = Value::Ptr(ops[0].ptr.object_id, - ops[0].ptr.offset + static_cast(i)); - found = true; - break; - } - } - if (!found) { - result = Value::Ptr(mx::kInvalidEntityId, 0); // NULL - } - } - } - } - break; - } - case MO::STRCPY: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - auto it_src = memory_.find(ops[1].ptr.object_id); - if (it_src != memory_.end()) { - size_t ss = static_cast(ops[1].ptr.offset); - size_t ds = static_cast(ops[0].ptr.offset); - Pointer dp = ops[0].ptr; - for (size_t i = 0; ; ++i) { - uint8_t c = (ss + i < it_src->second.bytes.size()) ? it_src->second.bytes[ss + i] : 0; - dp.offset = ops[0].ptr.offset + static_cast(i); - MemWrite(dp, &c, 1); - if (c == 0) break; - } - } - result = ops[0]; // Return dest. - } - break; - } - case MO::STRNCPY: { - if (ops.size() >= 3 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - size_t n = static_cast(ops[2].as_int()); - auto it_src = memory_.find(ops[1].ptr.object_id); - if (it_src != memory_.end()) { - size_t ss = static_cast(ops[1].ptr.offset); - Pointer dp = ops[0].ptr; - bool hit_null = false; - for (size_t i = 0; i < n; ++i) { - uint8_t c = 0; - if (!hit_null && ss + i < it_src->second.bytes.size()) { - c = it_src->second.bytes[ss + i]; - if (c == 0) hit_null = true; - } - dp.offset = ops[0].ptr.offset + static_cast(i); - MemWrite(dp, &c, 1); - } - } - result = ops[0]; // Return dest. - } - break; - } - case MO::STRCAT: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - // Find end of dest string. - auto it_dst = memory_.find(ops[0].ptr.object_id); - auto it_src = memory_.find(ops[1].ptr.object_id); - if (it_dst != memory_.end() && it_src != memory_.end()) { - size_t ds = static_cast(ops[0].ptr.offset); - size_t dlen = 0; - while (ds + dlen < it_dst->second.bytes.size() && - it_dst->second.bytes[ds + dlen] != 0) ++dlen; - // Copy src after dest's null. - size_t ss = static_cast(ops[1].ptr.offset); - Pointer dp = {ops[0].ptr.object_id, - ops[0].ptr.offset + static_cast(dlen)}; - for (size_t i = 0; ; ++i) { - uint8_t c = (ss + i < it_src->second.bytes.size()) ? it_src->second.bytes[ss + i] : 0; - dp.offset = ops[0].ptr.offset + static_cast(dlen + i); - MemWrite(dp, &c, 1); - if (c == 0) break; - } - } - result = ops[0]; // Return dest. - } - break; - } - case MO::STRNCAT: { - if (ops.size() >= 3 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - size_t n = static_cast(ops[2].as_int()); - auto it_dst = memory_.find(ops[0].ptr.object_id); - auto it_src = memory_.find(ops[1].ptr.object_id); - if (it_dst != memory_.end() && it_src != memory_.end()) { - size_t ds = static_cast(ops[0].ptr.offset); - size_t dlen = 0; - while (ds + dlen < it_dst->second.bytes.size() && - it_dst->second.bytes[ds + dlen] != 0) ++dlen; - size_t ss = static_cast(ops[1].ptr.offset); - Pointer dp = {ops[0].ptr.object_id, 0}; - size_t i = 0; - for (; i < n; ++i) { - uint8_t c = (ss + i < it_src->second.bytes.size()) ? it_src->second.bytes[ss + i] : 0; - if (c == 0) break; - dp.offset = ops[0].ptr.offset + static_cast(dlen + i); - MemWrite(dp, &c, 1); - } - // Write null terminator. - uint8_t nul = 0; - dp.offset = ops[0].ptr.offset + static_cast(dlen + i); - MemWrite(dp, &nul, 1); - } - result = ops[0]; // Return dest. - } - break; - } - case MO::STPCPY: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - auto it_src = memory_.find(ops[1].ptr.object_id); - if (it_src != memory_.end()) { - size_t ss = static_cast(ops[1].ptr.offset); - Pointer dp = ops[0].ptr; - size_t i = 0; - for (; ; ++i) { - uint8_t c = (ss + i < it_src->second.bytes.size()) ? it_src->second.bytes[ss + i] : 0; - dp.offset = ops[0].ptr.offset + static_cast(i); - MemWrite(dp, &c, 1); - if (c == 0) break; - } - // Return pointer to the null terminator in dest. - result = Value::Ptr(ops[0].ptr.object_id, - ops[0].ptr.offset + static_cast(i)); - } - } - break; - } - case MO::STPNCPY: { - if (ops.size() >= 3 && ops[0].kind == Value::POINTER - && ops[1].kind == Value::POINTER) { - size_t n = static_cast(ops[2].as_int()); - auto it_src = memory_.find(ops[1].ptr.object_id); - if (it_src != memory_.end()) { - size_t ss = static_cast(ops[1].ptr.offset); - Pointer dp = ops[0].ptr; - bool hit_null = false; - size_t null_pos = n; // default: dest+n - for (size_t i = 0; i < n; ++i) { - uint8_t c = 0; - if (!hit_null && ss + i < it_src->second.bytes.size()) { - c = it_src->second.bytes[ss + i]; - if (c == 0) { hit_null = true; null_pos = i; } - } else if (!hit_null) { - hit_null = true; - null_pos = i; - } - dp.offset = ops[0].ptr.offset + static_cast(i); - MemWrite(dp, &c, 1); - } - // Return pointer to null terminator or dest+n. - result = Value::Ptr(ops[0].ptr.object_id, - ops[0].ptr.offset + static_cast(null_pos)); - } - } - break; - } - case MO::STRTOI32: case MO::STRTOI64: - case MO::STRTOU32: case MO::STRTOU64: - case MO::STRTOF32: case MO::STRTOF64: { - if (ops.size() >= 1 && ops[0].kind == Value::POINTER) { - auto it = memory_.find(ops[0].ptr.object_id); - if (it != memory_.end()) { - // Read string bytes into a null-terminated buffer. - size_t start = static_cast(ops[0].ptr.offset); - std::string str; - for (size_t i = start; i < it->second.bytes.size(); ++i) { - if (it->second.bytes[i] == 0) break; - str.push_back(static_cast(it->second.bytes[i])); - } - switch (sub) { - case MO::STRTOI32: - result = Value::Int(static_cast( - std::strtol(str.c_str(), nullptr, 10))); - break; - case MO::STRTOI64: - result = Value::Int(static_cast( - std::strtoll(str.c_str(), nullptr, 10))); - break; - case MO::STRTOU32: - result = Value::Int(static_cast( - std::strtoul(str.c_str(), nullptr, 10))); - break; - case MO::STRTOU64: - result = Value::Int(static_cast( - std::strtoull(str.c_str(), nullptr, 10))); - break; - case MO::STRTOF32: - result = Value::Float(static_cast( - std::strtof(str.c_str(), nullptr))); - break; - case MO::STRTOF64: - result = Value::Float( - std::strtod(str.c_str(), nullptr)); - break; - default: - break; - } - } - } - break; - } - case MO::BIT_READ_LE: case MO::BIT_READ_BE: { - if (ops.size() >= 1 && ops[0].kind == Value::POINTER) { - uint32_t bo = mi->bit_offset(); - uint32_t bw = mi->bit_width(); - // Compute which bytes to read. - uint32_t first_byte = bo / 8; - uint32_t last_byte = (bo + bw - 1) / 8; - uint32_t num_bytes = last_byte - first_byte + 1; - std::vector buf(num_bytes, 0); - Pointer rp = ops[0].ptr; - rp.offset += first_byte; - MemRead(rp, buf.data(), num_bytes); - uint64_t raw = 0; - if (sub == MO::BIT_READ_LE) { - // LE: bit 0 = LSB of byte 0. - for (uint32_t i = 0; i < num_bytes; ++i) { - raw |= static_cast(buf[i]) << (i * 8); - } - // Shift right to remove bits below bit_offset within the - // fetched bytes. - raw >>= (bo % 8); - } else { - // BE: bit 0 = MSB of byte 0. Read bytes MSB-first. - for (uint32_t i = 0; i < num_bytes; ++i) { - raw = (raw << 8) | buf[i]; - } - // Bits are numbered from MSB. The field starts at - // bit bo within the full object. Within the fetched - // window, the field starts at (bo % 8) from the MSB - // of the first byte. - uint32_t top_bits = num_bytes * 8; - uint32_t shift = top_bits - (bo % 8) - bw; - raw >>= shift; - } - // Mask to bit_width. - uint64_t mask = (bw >= 64) ? ~uint64_t{0} : ((uint64_t{1} << bw) - 1); - raw &= mask; - result = Value::Int(static_cast(raw)); - } - break; - } - case MO::BIT_WRITE_LE: case MO::BIT_WRITE_BE: { - if (ops.size() >= 2 && ops[0].kind == Value::POINTER) { - uint32_t bo = mi->bit_offset(); - uint32_t bw = mi->bit_width(); - uint64_t val = static_cast(ops[1].as_int()); - uint64_t mask = (bw >= 64) ? ~uint64_t{0} : ((uint64_t{1} << bw) - 1); - val &= mask; - uint32_t first_byte = bo / 8; - uint32_t last_byte = (bo + bw - 1) / 8; - uint32_t num_bytes = last_byte - first_byte + 1; - std::vector buf(num_bytes, 0); - Pointer rp = ops[0].ptr; - rp.offset += first_byte; - MemRead(rp, buf.data(), num_bytes); - if (sub == MO::BIT_WRITE_LE) { - // LE: assemble bytes as little-endian integer. - uint64_t raw = 0; - for (uint32_t i = 0; i < num_bytes; ++i) { - raw |= static_cast(buf[i]) << (i * 8); - } - uint32_t shift = bo % 8; - raw &= ~(mask << shift); - raw |= (val << shift); - for (uint32_t i = 0; i < num_bytes; ++i) { - buf[i] = static_cast(raw >> (i * 8)); - } - } else { - // BE: assemble bytes as big-endian integer. - uint64_t raw = 0; - for (uint32_t i = 0; i < num_bytes; ++i) { - raw = (raw << 8) | buf[i]; - } - uint32_t top_bits = num_bytes * 8; - uint32_t shift = top_bits - (bo % 8) - bw; - raw &= ~(mask << shift); - raw |= (val << shift); - for (uint32_t i = 0; i < num_bytes; ++i) { - buf[num_bytes - 1 - i] = static_cast(raw >> (i * 8)); - } - } - MemWrite(rp, buf.data(), num_bytes); - } - break; - } - default: - if (mx::ir::IsCmpxchg(sub)) { - // Simplified: return undef (complex semantics). - result = Value::Undef(); - } - break; - } - } - } - break; - } - case mx::ir::OpCode::GEP_FIELD_32: - case mx::ir::OpCode::GEP_FIELD_64: { - if (auto gep = mx::GEPFieldInst::from(inst)) { - Value base = GetValue(gep->base()); - int64_t off = gep->byte_offset(); - if (base.kind == Value::POINTER) { - result = Value::Ptr(base.ptr.object_id, base.ptr.offset + off); - } - } - break; - } - case mx::ir::OpCode::PTR_ADD_32: - case mx::ir::OpCode::PTR_ADD_64: { - if (auto pa = mx::PtrAddInst::from(inst)) { - Value base = GetValue(pa->base()); - Value idx = GetValue(pa->index()); - int64_t elem_size = pa->element_size(); - if (base.kind == Value::POINTER) { - result = Value::Ptr(base.ptr.object_id, - base.ptr.offset + idx.as_int() * elem_size); - } - } - break; - } - - // --- Integer binary arithmetic (width-correct) --- - case mx::ir::OpCode::ADD_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) + static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::ADD_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) + static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::ADD_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) + static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::ADD_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(GetValue(bin->lhs()).as_int() + GetValue(bin->rhs()).as_int()); - break; - } - case mx::ir::OpCode::SUB_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) - static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::SUB_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) - static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::SUB_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) - static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::SUB_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(GetValue(bin->lhs()).as_int() - GetValue(bin->rhs()).as_int()); - break; - } - case mx::ir::OpCode::MUL_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) * static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::MUL_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) * static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::MUL_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) * static_cast(GetValue(bin->rhs()).as_int()))); - break; - } - case mx::ir::OpCode::MUL_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(GetValue(bin->lhs()).as_int() * GetValue(bin->rhs()).as_int()); - break; - } - case mx::ir::OpCode::DIV_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - int8_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(r ? static_cast(static_cast(GetValue(bin->lhs()).as_int()) / r) : 0); - } - break; - } - case mx::ir::OpCode::DIV_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - int16_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(r ? static_cast(static_cast(GetValue(bin->lhs()).as_int()) / r) : 0); - } - break; - } - case mx::ir::OpCode::DIV_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - int32_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(r ? static_cast(static_cast(GetValue(bin->lhs()).as_int()) / r) : 0); - } - break; - } - case mx::ir::OpCode::DIV_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - int64_t r = GetValue(bin->rhs()).as_int(); - result = Value::Int(r != 0 ? GetValue(bin->lhs()).as_int() / r : 0); - } - break; - } - case mx::ir::OpCode::REM_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - int8_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(r ? static_cast(static_cast(GetValue(bin->lhs()).as_int()) % r) : 0); - } - break; - } - case mx::ir::OpCode::REM_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - int16_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(r ? static_cast(static_cast(GetValue(bin->lhs()).as_int()) % r) : 0); - } - break; - } - case mx::ir::OpCode::REM_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - int32_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(r ? static_cast(static_cast(GetValue(bin->lhs()).as_int()) % r) : 0); - } - break; - } - case mx::ir::OpCode::REM_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - int64_t r = GetValue(bin->rhs()).as_int(); - result = Value::Int(r != 0 ? GetValue(bin->lhs()).as_int() % r : 0); - } - break; - } - - // --- Float binary arithmetic --- - case mx::ir::OpCode::FADD_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(static_cast(GetValue(bin->lhs()).as_float()) + static_cast(GetValue(bin->rhs()).as_float())); - break; - } - case mx::ir::OpCode::FADD_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(GetValue(bin->lhs()).as_float() + GetValue(bin->rhs()).as_float()); - break; - } - case mx::ir::OpCode::FSUB_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(static_cast(GetValue(bin->lhs()).as_float()) - static_cast(GetValue(bin->rhs()).as_float())); - break; - } - case mx::ir::OpCode::FSUB_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(GetValue(bin->lhs()).as_float() - GetValue(bin->rhs()).as_float()); - break; - } - case mx::ir::OpCode::FMUL_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(static_cast(GetValue(bin->lhs()).as_float()) * static_cast(GetValue(bin->rhs()).as_float())); - break; - } - case mx::ir::OpCode::FMUL_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(GetValue(bin->lhs()).as_float() * GetValue(bin->rhs()).as_float()); - break; - } - case mx::ir::OpCode::FDIV_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(static_cast(GetValue(bin->lhs()).as_float()) / static_cast(GetValue(bin->rhs()).as_float())); - break; - } - case mx::ir::OpCode::FDIV_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(GetValue(bin->lhs()).as_float() / GetValue(bin->rhs()).as_float()); - break; - } - case mx::ir::OpCode::FREM_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(std::fmodf(static_cast(GetValue(bin->lhs()).as_float()), static_cast(GetValue(bin->rhs()).as_float()))); - break; - } - case mx::ir::OpCode::FREM_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Float(std::fmod(GetValue(bin->lhs()).as_float(), GetValue(bin->rhs()).as_float())); - break; - } - case mx::ir::OpCode::BIT_AND_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() & GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_AND_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() & GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_AND_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() & GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_AND_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(GetValue(bin->lhs()).as_int() & GetValue(bin->rhs()).as_int()); - break; - } - case mx::ir::OpCode::BIT_OR_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() | GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_OR_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() | GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_OR_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() | GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_OR_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(GetValue(bin->lhs()).as_int() | GetValue(bin->rhs()).as_int()); - break; - } - case mx::ir::OpCode::BIT_XOR_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() ^ GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_XOR_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() ^ GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_XOR_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int() ^ GetValue(bin->rhs()).as_int())); - break; - } - case mx::ir::OpCode::BIT_XOR_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(GetValue(bin->lhs()).as_int() ^ GetValue(bin->rhs()).as_int()); - break; - } - case mx::ir::OpCode::SHL_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) << (GetValue(bin->rhs()).as_int() & 7))); - break; - } - case mx::ir::OpCode::SHL_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) << (GetValue(bin->rhs()).as_int() & 15))); - break; - } - case mx::ir::OpCode::SHL_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) << (GetValue(bin->rhs()).as_int() & 31))); - break; - } - case mx::ir::OpCode::SHL_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(GetValue(bin->lhs()).as_int() << (GetValue(bin->rhs()).as_int() & 63)); - break; - } - case mx::ir::OpCode::SHR_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int()) >> (GetValue(bin->rhs()).as_int() & 7)); - break; - } - case mx::ir::OpCode::SHR_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int()) >> (GetValue(bin->rhs()).as_int() & 15)); - break; - } - case mx::ir::OpCode::SHR_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(GetValue(bin->lhs()).as_int()) >> (GetValue(bin->rhs()).as_int() & 31)); - break; - } - case mx::ir::OpCode::SHR_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(GetValue(bin->lhs()).as_int() >> (GetValue(bin->rhs()).as_int() & 63)); - break; - } - // Unsigned arithmetic: per-width cases. - case mx::ir::OpCode::UDIV_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - uint8_t l = static_cast(GetValue(bin->lhs()).as_int()); - uint8_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(static_cast(r ? l / r : 0)); - } - break; - } - case mx::ir::OpCode::UDIV_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - uint16_t l = static_cast(GetValue(bin->lhs()).as_int()); - uint16_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(static_cast(r ? l / r : 0)); - } - break; - } - case mx::ir::OpCode::UDIV_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - uint32_t l = static_cast(GetValue(bin->lhs()).as_int()); - uint32_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(static_cast(r ? l / r : 0)); - } - break; - } - case mx::ir::OpCode::UDIV_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - uint64_t l = static_cast(GetValue(bin->lhs()).as_int()); - uint64_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(static_cast(r ? l / r : 0)); - } - break; - } - case mx::ir::OpCode::UREM_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - uint8_t l = static_cast(GetValue(bin->lhs()).as_int()); - uint8_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(static_cast(r ? l % r : 0)); - } - break; - } - case mx::ir::OpCode::UREM_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - uint16_t l = static_cast(GetValue(bin->lhs()).as_int()); - uint16_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(static_cast(r ? l % r : 0)); - } - break; - } - case mx::ir::OpCode::UREM_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - uint32_t l = static_cast(GetValue(bin->lhs()).as_int()); - uint32_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(static_cast(r ? l % r : 0)); - } - break; - } - case mx::ir::OpCode::UREM_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - uint64_t l = static_cast(GetValue(bin->lhs()).as_int()); - uint64_t r = static_cast(GetValue(bin->rhs()).as_int()); - result = Value::Int(static_cast(r ? l % r : 0)); - } - break; - } - case mx::ir::OpCode::USHR_8: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) >> (GetValue(bin->rhs()).as_int() & 7))); - break; - } - case mx::ir::OpCode::USHR_16: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) >> (GetValue(bin->rhs()).as_int() & 15))); - break; - } - case mx::ir::OpCode::USHR_32: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) >> (GetValue(bin->rhs()).as_int() & 31))); - break; - } - case mx::ir::OpCode::USHR_64: { - auto bin = mx::BinaryInst::from(inst); - if (bin) result = Value::Int(static_cast(static_cast(GetValue(bin->lhs()).as_int()) >> (GetValue(bin->rhs()).as_int() & 63))); - break; - } - case mx::ir::OpCode::LOGICAL_AND: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - // Short-circuit: IR keeps both sides evaluated in the tree. - // The conditionally-executed flag handles the real short-circuit. - bool l = GetValue(bin->lhs()).is_truthy(); - bool r = GetValue(bin->rhs()).is_truthy(); - result = Value::Int(l && r ? 1 : 0); - } - break; - } - case mx::ir::OpCode::LOGICAL_OR: { - auto bin = mx::BinaryInst::from(inst); - if (bin) { - bool l = GetValue(bin->lhs()).is_truthy(); - bool r = GetValue(bin->rhs()).is_truthy(); - result = Value::Int(l || r ? 1 : 0); - } - break; - } - case mx::ir::OpCode::PTR_DIFF_32: - case mx::ir::OpCode::PTR_DIFF_64: { - auto pd = mx::PtrDiffInst::from(inst); - if (pd) { - Value l = GetValue(pd->lhs()), r = GetValue(pd->rhs()); - if (l.kind == Value::POINTER && r.kind == Value::POINTER) { - int64_t byte_diff = l.ptr.offset - r.ptr.offset; - int64_t elem_size = pd->element_size(); - if (elem_size <= 0) elem_size = 1; - result = Value::Int(byte_diff / elem_size); - } - } - break; - } - - // --- Signed equality (width-correct) --- - case mx::ir::OpCode::CMP_EQ_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - result = Value::Int((lp.object_id == rp.object_id && lp.offset == rp.offset) ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) == static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_EQ_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - result = Value::Int((lp.object_id == rp.object_id && lp.offset == rp.offset) ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) == static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_EQ_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - result = Value::Int((lp.object_id == rp.object_id && lp.offset == rp.offset) ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) == static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_EQ_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - result = Value::Int((lp.object_id == rp.object_id && lp.offset == rp.offset) ? 1 : 0); - } else { - result = Value::Int(l.as_int() == r.as_int() ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_NE_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - result = Value::Int((lp.object_id != rp.object_id || lp.offset != rp.offset) ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) != static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_NE_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - result = Value::Int((lp.object_id != rp.object_id || lp.offset != rp.offset) ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) != static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_NE_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - result = Value::Int((lp.object_id != rp.object_id || lp.offset != rp.offset) ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) != static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_NE_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - result = Value::Int((lp.object_id != rp.object_id || lp.offset != rp.offset) ? 1 : 0); - } else { - result = Value::Int(l.as_int() != r.as_int() ? 1 : 0); - } - } - break; - } - // --- Signed ordering (width-correct) --- - case mx::ir::OpCode::CMP_LT_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval < rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) < static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_LT_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval < rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) < static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_LT_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval < rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) < static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_LT_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval < rval ? 1 : 0); - } else { - result = Value::Int(l.as_int() < r.as_int() ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_LE_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval <= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) <= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_LE_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval <= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) <= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_LE_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval <= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) <= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_LE_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval <= rval ? 1 : 0); - } else { - result = Value::Int(l.as_int() <= r.as_int() ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_GT_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval > rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) > static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_GT_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval > rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) > static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_GT_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval > rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) > static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_GT_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval > rval ? 1 : 0); - } else { - result = Value::Int(l.as_int() > r.as_int() ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_GE_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval >= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) >= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_GE_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval >= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) >= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_GE_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval >= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) >= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::CMP_GE_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval >= rval ? 1 : 0); - } else { - result = Value::Int(l.as_int() >= r.as_int() ? 1 : 0); - } - } - break; - } - // --- Unsigned ordering (width-correct) --- - case mx::ir::OpCode::UCMP_LT_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval < rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) < static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_LT_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval < rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) < static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_LT_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval < rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) < static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_LT_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval < rval ? 1 : 0); - } else { - result = Value::Int(l.as_uint() < r.as_uint() ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_LE_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval <= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) <= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_LE_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval <= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) <= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_LE_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval <= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) <= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_LE_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval <= rval ? 1 : 0); - } else { - result = Value::Int(l.as_uint() <= r.as_uint() ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_GT_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval > rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) > static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_GT_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval > rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) > static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_GT_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval > rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) > static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_GT_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval > rval ? 1 : 0); - } else { - result = Value::Int(l.as_uint() > r.as_uint() ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_GE_8: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval >= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) >= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_GE_16: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval >= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) >= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_GE_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval >= rval ? 1 : 0); - } else { - result = Value::Int(static_cast(l.as_int()) >= static_cast(r.as_int()) ? 1 : 0); - } - } - break; - } - case mx::ir::OpCode::UCMP_GE_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) { - Value l = GetValue(cmp->lhs()), r = GetValue(cmp->rhs()); - if (l.kind == Value::POINTER || r.kind == Value::POINTER) { - auto lp = l.kind == Value::POINTER ? l.ptr : Pointer{0, l.as_int()}; - auto rp = r.kind == Value::POINTER ? r.ptr : Pointer{0, r.as_int()}; - auto lval = (lp.object_id == rp.object_id) ? lp.offset : static_cast(lp.object_id); - auto rval = (lp.object_id == rp.object_id) ? rp.offset : static_cast(rp.object_id); - result = Value::Int(lval >= rval ? 1 : 0); - } else { - result = Value::Int(l.as_uint() >= r.as_uint() ? 1 : 0); - } - } - break; - } - // --- Float comparisons --- - case mx::ir::OpCode::FCMP_EQ_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(static_cast(GetValue(cmp->lhs()).as_float()) == static_cast(GetValue(cmp->rhs()).as_float()) ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_EQ_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(GetValue(cmp->lhs()).as_float() == GetValue(cmp->rhs()).as_float() ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_NE_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(static_cast(GetValue(cmp->lhs()).as_float()) != static_cast(GetValue(cmp->rhs()).as_float()) ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_NE_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(GetValue(cmp->lhs()).as_float() != GetValue(cmp->rhs()).as_float() ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_LT_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(static_cast(GetValue(cmp->lhs()).as_float()) < static_cast(GetValue(cmp->rhs()).as_float()) ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_LT_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(GetValue(cmp->lhs()).as_float() < GetValue(cmp->rhs()).as_float() ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_LE_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(static_cast(GetValue(cmp->lhs()).as_float()) <= static_cast(GetValue(cmp->rhs()).as_float()) ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_LE_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(GetValue(cmp->lhs()).as_float() <= GetValue(cmp->rhs()).as_float() ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_GT_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(static_cast(GetValue(cmp->lhs()).as_float()) > static_cast(GetValue(cmp->rhs()).as_float()) ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_GT_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(GetValue(cmp->lhs()).as_float() > GetValue(cmp->rhs()).as_float() ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_GE_32: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(static_cast(GetValue(cmp->lhs()).as_float()) >= static_cast(GetValue(cmp->rhs()).as_float()) ? 1 : 0); - break; - } - case mx::ir::OpCode::FCMP_GE_64: { - auto cmp = mx::ComparisonInst::from(inst); - if (cmp) result = Value::Int(GetValue(cmp->lhs()).as_float() >= GetValue(cmp->rhs()).as_float() ? 1 : 0); - break; - } - - // --- Unary --- - case mx::ir::OpCode::NEG_8: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(static_cast(-static_cast(GetValue(u->operand()).as_int()))); - break; - } - case mx::ir::OpCode::NEG_16: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(static_cast(-static_cast(GetValue(u->operand()).as_int()))); - break; - } - case mx::ir::OpCode::NEG_32: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(static_cast(-static_cast(GetValue(u->operand()).as_int()))); - break; - } - case mx::ir::OpCode::NEG_64: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(-GetValue(u->operand()).as_int()); - break; - } - case mx::ir::OpCode::FNEG_32: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Float(-static_cast(GetValue(u->operand()).as_float())); - break; - } - case mx::ir::OpCode::FNEG_64: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Float(-GetValue(u->operand()).as_float()); - break; - } - case mx::ir::OpCode::BIT_NOT_8: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(static_cast(~static_cast(GetValue(u->operand()).as_int()))); - break; - } - case mx::ir::OpCode::BIT_NOT_16: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(static_cast(~static_cast(GetValue(u->operand()).as_int()))); - break; - } - case mx::ir::OpCode::BIT_NOT_32: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(static_cast(~static_cast(GetValue(u->operand()).as_int()))); - break; - } - case mx::ir::OpCode::BIT_NOT_64: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(~GetValue(u->operand()).as_int()); - break; - } - case mx::ir::OpCode::LOGICAL_NOT: { - auto u = mx::UnaryInst::from(inst); - if (u) result = Value::Int(GetValue(u->operand()).is_truthy() ? 0 : 1); - break; - } - - // --- Casts --- - case mx::ir::OpCode::CAST: { - auto c = mx::CastInst::from(inst); - if (c) { - auto sub = c->sub_opcode(); - Value v = GetValue(c->operand()); - if (sub == mx::ir::CastOp::IDENTITY) { - result = v; - } else if (sub == mx::ir::CastOp::BITCAST) { - // Reinterpret bits: float↔int of same size. - if (v.kind == Value::FLOATING) { - // float/double bits → int - int64_t bits; - std::memcpy(&bits, &v.fval, sizeof(bits)); - result = Value::Int(bits); - } else if (v.kind == Value::INTEGER) { - // int bits → float/double - double fv; - std::memcpy(&fv, &v.ival, sizeof(fv)); - result = Value::Float(fv); - } else { - result = v; - } - } else if (sub == mx::ir::CastOp::PTR_TO_I32) { - int64_t iv = v.kind == Value::POINTER ? v.ptr.offset : v.ival; - result = Value::Int(static_cast(iv)); - } else if (sub == mx::ir::CastOp::PTR_TO_I64) { - result = Value::Int(v.kind == Value::POINTER ? v.ptr.offset : v.ival); - } else if (sub == mx::ir::CastOp::I32_TO_PTR) { - result = Value::Ptr(mx::kInvalidEntityId, static_cast(v.as_int())); - } else if (sub == mx::ir::CastOp::I64_TO_PTR) { - result = Value::Ptr(mx::kInvalidEntityId, v.as_int()); - } else if (mx::ir::IsFloatToInt(sub)) { - double fv; - if (v.kind == Value::FLOATING) { - fv = v.fval; - } else { - uint64_t bits = static_cast(v.ival); - std::memcpy(&fv, &bits, sizeof(fv)); - } - // Use float precision for F32_TO_* sources. - if (sub >= mx::ir::CastOp::F32_TO_SI8 && - sub <= mx::ir::CastOp::F32_TO_SI64) { - fv = static_cast(fv); - } else if (sub >= mx::ir::CastOp::F32_TO_UI8 && - sub <= mx::ir::CastOp::F32_TO_UI64) { - fv = static_cast(fv); - } - switch (sub) { - case mx::ir::CastOp::F32_TO_SI8: case mx::ir::CastOp::F64_TO_SI8: - result = Value::Int(static_cast(fv)); break; - case mx::ir::CastOp::F32_TO_SI16: case mx::ir::CastOp::F64_TO_SI16: - result = Value::Int(static_cast(fv)); break; - case mx::ir::CastOp::F32_TO_SI32: case mx::ir::CastOp::F64_TO_SI32: - result = Value::Int(static_cast(fv)); break; - case mx::ir::CastOp::F32_TO_SI64: case mx::ir::CastOp::F64_TO_SI64: - result = Value::Int(static_cast(fv)); break; - case mx::ir::CastOp::F32_TO_UI8: case mx::ir::CastOp::F64_TO_UI8: - result = Value::Int(static_cast(static_cast(fv))); break; - case mx::ir::CastOp::F32_TO_UI16: case mx::ir::CastOp::F64_TO_UI16: - result = Value::Int(static_cast(static_cast(fv))); break; - case mx::ir::CastOp::F32_TO_UI32: case mx::ir::CastOp::F64_TO_UI32: - result = Value::Int(static_cast(static_cast(fv))); break; - case mx::ir::CastOp::F32_TO_UI64: case mx::ir::CastOp::F64_TO_UI64: - result = Value::Int(static_cast(static_cast(fv))); break; - default: - result = Value::Int(static_cast(fv)); break; - } - } else if (mx::ir::IsIntToFloat(sub)) { - // Width-correct int→float: cast to source width, then to float/double. - switch (sub) { - case mx::ir::CastOp::SI8_TO_F32: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::SI8_TO_F64: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::SI16_TO_F32: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::SI16_TO_F64: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::SI32_TO_F32: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::SI32_TO_F64: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::SI64_TO_F32: - result = Value::Float(static_cast(v.as_int())); break; - case mx::ir::CastOp::SI64_TO_F64: - result = Value::Float(static_cast(v.as_int())); break; - case mx::ir::CastOp::UI8_TO_F32: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::UI8_TO_F64: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::UI16_TO_F32: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::UI16_TO_F64: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::UI32_TO_F32: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::UI32_TO_F64: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::UI64_TO_F32: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - case mx::ir::CastOp::UI64_TO_F64: - result = Value::Float(static_cast(static_cast(v.as_int()))); break; - default: - result = Value::Float(static_cast(v.as_int())); break; - } - } else if (sub == mx::ir::CastOp::F64_TO_F32) { - double fv; - if (v.kind == Value::FLOATING) { - fv = v.fval; - } else { - uint64_t bits = static_cast(v.ival); - std::memcpy(&fv, &bits, sizeof(fv)); - } - result = Value::Float(static_cast(fv)); - } else if (sub == mx::ir::CastOp::F32_TO_F64) { - if (v.kind == Value::FLOATING) { - result = Value::Float(v.fval); // already double internally - } else { - float fv; - uint32_t bits = static_cast(v.ival); - std::memcpy(&fv, &bits, sizeof(fv)); - result = Value::Float(static_cast(fv)); - } - } else if (mx::ir::IsSignExtend(sub)) { - // Sign-extend: cast to source signed type to get correct sign. - int64_t iv = v.as_int(); - switch (sub) { - case mx::ir::CastOp::SEXT_I8_I16: - case mx::ir::CastOp::SEXT_I8_I32: - case mx::ir::CastOp::SEXT_I8_I64: - iv = static_cast(iv); - break; - case mx::ir::CastOp::SEXT_I16_I32: - case mx::ir::CastOp::SEXT_I16_I64: - iv = static_cast(iv); - break; - case mx::ir::CastOp::SEXT_I32_I64: - iv = static_cast(iv); - break; - default: break; - } - result = Value::Int(iv); - } else if (mx::ir::IsZeroExtend(sub)) { - // Zero-extend: mask to source width (undoing sign-extension from LOAD). - int64_t iv = v.as_int(); - switch (sub) { - case mx::ir::CastOp::ZEXT_I8_I16: - case mx::ir::CastOp::ZEXT_I8_I32: - case mx::ir::CastOp::ZEXT_I8_I64: - iv = iv & 0xFF; - break; - case mx::ir::CastOp::ZEXT_I16_I32: - case mx::ir::CastOp::ZEXT_I16_I64: - iv = iv & 0xFFFF; - break; - case mx::ir::CastOp::ZEXT_I32_I64: - iv = iv & 0xFFFFFFFF; - break; - default: break; - } - result = Value::Int(iv); - } else if (mx::ir::IsTruncate(sub)) { - int64_t iv = v.as_int(); - switch (sub) { - case mx::ir::CastOp::TRUNC_I16_I8: - case mx::ir::CastOp::TRUNC_I32_I8: - case mx::ir::CastOp::TRUNC_I64_I8: - iv = static_cast(iv); - break; - case mx::ir::CastOp::TRUNC_I32_I16: - case mx::ir::CastOp::TRUNC_I64_I16: - iv = static_cast(iv); - break; - case mx::ir::CastOp::TRUNC_I64_I32: - iv = static_cast(iv); - break; - default: break; - } - result = Value::Int(iv); - } else { - // Other int-to-int casts. - result = Value::Int(v.as_int()); - } - } - break; - } - - // --- Read-modify-write (inc/dec, compound assign) --- - case mx::ir::OpCode::READ_MODIFY_WRITE: { - if (auto rmw = mx::ReadModifyWriteInst::from(inst)) { - Value addr = GetValue(rmw->address()); - if (addr.kind == Value::POINTER) { - // Determine access size from the object. - size_t access_sz = 8; - auto it = memory_.find(addr.ptr.object_id); - if (it != memory_.end() && it->second.bytes.size() <= 8) { - access_sz = it->second.bytes.size(); - } - // Determine if the underlying op is float to read correctly. - auto underlying = rmw->underlying_op(); - bool rmw_is_float = mx::ir::IsFloatArithmetic(underlying); - Value old_val = MemReadValue(addr.ptr, access_sz, rmw_is_float); - // Collect RHS operands (typically one value). - Value rhs = Value::Int(0); - for (auto rhs_op : rmw->rhs_operands()) { - rhs = GetValue(rhs_op); - break; // Use first RHS operand. - } - Value new_val; - switch (underlying) { - case mx::ir::OpCode::ADD_8: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) + static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ADD_16: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) + static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ADD_32: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) + static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ADD_64: - new_val = Value::Int(old_val.as_int() + rhs.as_int()); break; - case mx::ir::OpCode::SUB_8: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) - static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::SUB_16: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) - static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::SUB_32: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) - static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::SUB_64: - new_val = Value::Int(old_val.as_int() - rhs.as_int()); break; - case mx::ir::OpCode::MUL_8: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) * static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::MUL_16: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) * static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::MUL_32: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) * static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::MUL_64: - new_val = Value::Int(old_val.as_int() * rhs.as_int()); break; - case mx::ir::OpCode::DIV_8: { - int8_t r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? static_cast(static_cast(old_val.as_int()) / r) : 0); break; - } - case mx::ir::OpCode::DIV_16: { - int16_t r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? static_cast(static_cast(old_val.as_int()) / r) : 0); break; - } - case mx::ir::OpCode::DIV_32: { - int32_t r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? static_cast(static_cast(old_val.as_int()) / r) : 0); break; - } - case mx::ir::OpCode::DIV_64: - new_val = Value::Int(rhs.as_int() ? old_val.as_int() / rhs.as_int() : 0); break; - case mx::ir::OpCode::REM_8: { - int8_t r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? static_cast(static_cast(old_val.as_int()) % r) : 0); break; - } - case mx::ir::OpCode::REM_16: { - int16_t r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? static_cast(static_cast(old_val.as_int()) % r) : 0); break; - } - case mx::ir::OpCode::REM_32: { - int32_t r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? static_cast(static_cast(old_val.as_int()) % r) : 0); break; - } - case mx::ir::OpCode::REM_64: - new_val = Value::Int(rhs.as_int() ? old_val.as_int() % rhs.as_int() : 0); break; - case mx::ir::OpCode::BIT_AND_8: - new_val = Value::Int(static_cast(old_val.as_int() & rhs.as_int())); break; - case mx::ir::OpCode::BIT_AND_16: - new_val = Value::Int(static_cast(old_val.as_int() & rhs.as_int())); break; - case mx::ir::OpCode::BIT_AND_32: - new_val = Value::Int(static_cast(old_val.as_int() & rhs.as_int())); break; - case mx::ir::OpCode::BIT_AND_64: - new_val = Value::Int(old_val.as_int() & rhs.as_int()); break; - case mx::ir::OpCode::BIT_OR_8: - new_val = Value::Int(static_cast(old_val.as_int() | rhs.as_int())); break; - case mx::ir::OpCode::BIT_OR_16: - new_val = Value::Int(static_cast(old_val.as_int() | rhs.as_int())); break; - case mx::ir::OpCode::BIT_OR_32: - new_val = Value::Int(static_cast(old_val.as_int() | rhs.as_int())); break; - case mx::ir::OpCode::BIT_OR_64: - new_val = Value::Int(old_val.as_int() | rhs.as_int()); break; - case mx::ir::OpCode::BIT_XOR_8: - new_val = Value::Int(static_cast(old_val.as_int() ^ rhs.as_int())); break; - case mx::ir::OpCode::BIT_XOR_16: - new_val = Value::Int(static_cast(old_val.as_int() ^ rhs.as_int())); break; - case mx::ir::OpCode::BIT_XOR_32: - new_val = Value::Int(static_cast(old_val.as_int() ^ rhs.as_int())); break; - case mx::ir::OpCode::BIT_XOR_64: - new_val = Value::Int(old_val.as_int() ^ rhs.as_int()); break; - case mx::ir::OpCode::SHL_8: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) << (rhs.as_int() & 7))); break; - case mx::ir::OpCode::SHL_16: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) << (rhs.as_int() & 15))); break; - case mx::ir::OpCode::SHL_32: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) << (rhs.as_int() & 31))); break; - case mx::ir::OpCode::SHL_64: - new_val = Value::Int(old_val.as_int() << (rhs.as_int() & 63)); break; - case mx::ir::OpCode::SHR_8: - new_val = Value::Int(static_cast(old_val.as_int()) >> (rhs.as_int() & 7)); break; - case mx::ir::OpCode::SHR_16: - new_val = Value::Int(static_cast(old_val.as_int()) >> (rhs.as_int() & 15)); break; - case mx::ir::OpCode::SHR_32: - new_val = Value::Int(static_cast(old_val.as_int()) >> (rhs.as_int() & 31)); break; - case mx::ir::OpCode::SHR_64: - new_val = Value::Int(old_val.as_int() >> (rhs.as_int() & 63)); break; - case mx::ir::OpCode::UDIV_8: { - uint8_t l = static_cast(old_val.as_int()), r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? l / r : 0); break; - } - case mx::ir::OpCode::UDIV_16: { - uint16_t l = static_cast(old_val.as_int()), r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? l / r : 0); break; - } - case mx::ir::OpCode::UDIV_32: { - uint32_t l = static_cast(old_val.as_int()), r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? l / r : 0); break; - } - case mx::ir::OpCode::UDIV_64: { - uint64_t l = static_cast(old_val.as_int()), r = static_cast(rhs.as_int()); - new_val = Value::Int(static_cast(r ? l / r : 0)); break; - } - case mx::ir::OpCode::UREM_8: { - uint8_t l = static_cast(old_val.as_int()), r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? l % r : 0); break; - } - case mx::ir::OpCode::UREM_16: { - uint16_t l = static_cast(old_val.as_int()), r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? l % r : 0); break; - } - case mx::ir::OpCode::UREM_32: { - uint32_t l = static_cast(old_val.as_int()), r = static_cast(rhs.as_int()); - new_val = Value::Int(r ? l % r : 0); break; - } - case mx::ir::OpCode::UREM_64: { - uint64_t l = static_cast(old_val.as_int()), r = static_cast(rhs.as_int()); - new_val = Value::Int(static_cast(r ? l % r : 0)); break; - } - case mx::ir::OpCode::USHR_8: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) >> (rhs.as_int() & 7))); break; - case mx::ir::OpCode::USHR_16: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) >> (rhs.as_int() & 15))); break; - case mx::ir::OpCode::USHR_32: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) >> (rhs.as_int() & 31))); break; - case mx::ir::OpCode::USHR_64: - new_val = Value::Int(static_cast( - static_cast(old_val.as_int()) >> (rhs.as_int() & 63))); break; - case mx::ir::OpCode::ATOMIC_ADD_8: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) + static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_ADD_16: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) + static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_ADD_32: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) + static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_ADD_64: - new_val = Value::Int(old_val.as_int() + rhs.as_int()); break; - case mx::ir::OpCode::ATOMIC_SUB_8: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) - static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_SUB_16: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) - static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_SUB_32: - new_val = Value::Int(static_cast(static_cast(old_val.as_int()) - static_cast(rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_SUB_64: - new_val = Value::Int(old_val.as_int() - rhs.as_int()); break; - case mx::ir::OpCode::ATOMIC_AND_8: - new_val = Value::Int(static_cast(old_val.as_int() & rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_AND_16: - new_val = Value::Int(static_cast(old_val.as_int() & rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_AND_32: - new_val = Value::Int(static_cast(old_val.as_int() & rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_AND_64: - new_val = Value::Int(old_val.as_int() & rhs.as_int()); break; - case mx::ir::OpCode::ATOMIC_OR_8: - new_val = Value::Int(static_cast(old_val.as_int() | rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_OR_16: - new_val = Value::Int(static_cast(old_val.as_int() | rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_OR_32: - new_val = Value::Int(static_cast(old_val.as_int() | rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_OR_64: - new_val = Value::Int(old_val.as_int() | rhs.as_int()); break; - case mx::ir::OpCode::ATOMIC_XOR_8: - new_val = Value::Int(static_cast(old_val.as_int() ^ rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_XOR_16: - new_val = Value::Int(static_cast(old_val.as_int() ^ rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_XOR_32: - new_val = Value::Int(static_cast(old_val.as_int() ^ rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_XOR_64: - new_val = Value::Int(old_val.as_int() ^ rhs.as_int()); break; - case mx::ir::OpCode::ATOMIC_NAND_8: - new_val = Value::Int(static_cast(~(old_val.as_int() & rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_NAND_16: - new_val = Value::Int(static_cast(~(old_val.as_int() & rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_NAND_32: - new_val = Value::Int(static_cast(~(old_val.as_int() & rhs.as_int()))); break; - case mx::ir::OpCode::ATOMIC_NAND_64: - new_val = Value::Int(~(old_val.as_int() & rhs.as_int())); break; - case mx::ir::OpCode::ATOMIC_EXCHANGE_8: case mx::ir::OpCode::ATOMIC_EXCHANGE_16: - case mx::ir::OpCode::ATOMIC_EXCHANGE_32: case mx::ir::OpCode::ATOMIC_EXCHANGE_64: - new_val = rhs; break; - // Float compound assign (+=, -=, *=, /=, %=). - case mx::ir::OpCode::FADD_32: - new_val = Value::Float(static_cast(old_val.as_float()) + static_cast(rhs.as_float())); break; - case mx::ir::OpCode::FADD_64: - new_val = Value::Float(old_val.as_float() + rhs.as_float()); break; - case mx::ir::OpCode::FSUB_32: - new_val = Value::Float(static_cast(old_val.as_float()) - static_cast(rhs.as_float())); break; - case mx::ir::OpCode::FSUB_64: - new_val = Value::Float(old_val.as_float() - rhs.as_float()); break; - case mx::ir::OpCode::FMUL_32: - new_val = Value::Float(static_cast(old_val.as_float()) * static_cast(rhs.as_float())); break; - case mx::ir::OpCode::FMUL_64: - new_val = Value::Float(old_val.as_float() * rhs.as_float()); break; - case mx::ir::OpCode::FDIV_32: - new_val = Value::Float(static_cast(old_val.as_float()) / static_cast(rhs.as_float())); break; - case mx::ir::OpCode::FDIV_64: - new_val = Value::Float(old_val.as_float() / rhs.as_float()); break; - case mx::ir::OpCode::FREM_32: - new_val = Value::Float(std::fmodf(static_cast(old_val.as_float()), static_cast(rhs.as_float()))); break; - case mx::ir::OpCode::FREM_64: - new_val = Value::Float(std::fmod(old_val.as_float(), rhs.as_float())); break; - case mx::ir::OpCode::PTR_ADD_32: case mx::ir::OpCode::PTR_ADD_64: { - int64_t elem_sz = rmw->element_size(); - if (elem_sz <= 0) elem_sz = 1; - if (old_val.kind == Value::POINTER) { - new_val = Value::Ptr(old_val.ptr.object_id, - old_val.ptr.offset + rhs.as_int() * elem_sz); - } else { - new_val = Value::Int(old_val.as_int() + rhs.as_int() * elem_sz); - } - break; - } - // Overflow-checked arithmetic: RMW stores the result, returns - // the overflow flag (bool). - case mx::ir::OpCode::ADD_OVERFLOW_8: case mx::ir::OpCode::ADD_OVERFLOW_16: - case mx::ir::OpCode::ADD_OVERFLOW_32: case mx::ir::OpCode::ADD_OVERFLOW_64: - case mx::ir::OpCode::SUB_OVERFLOW_8: case mx::ir::OpCode::SUB_OVERFLOW_16: - case mx::ir::OpCode::SUB_OVERFLOW_32: case mx::ir::OpCode::SUB_OVERFLOW_64: - case mx::ir::OpCode::MUL_OVERFLOW_8: case mx::ir::OpCode::MUL_OVERFLOW_16: - case mx::ir::OpCode::MUL_OVERFLOW_32: case mx::ir::OpCode::MUL_OVERFLOW_64: { - Value a = Value::Int(0), b = Value::Int(0); - int rhs_i = 0; - for (auto rhs_op : rmw->rhs_operands()) { - if (rhs_i == 0) a = GetValue(rhs_op); - else if (rhs_i == 1) b = GetValue(rhs_op); - ++rhs_i; - } - __int128 wide; - if (underlying >= mx::ir::OpCode::ADD_OVERFLOW_8 && - underlying <= mx::ir::OpCode::ADD_OVERFLOW_64) - wide = static_cast<__int128>(a.as_int()) + static_cast<__int128>(b.as_int()); - else if (underlying >= mx::ir::OpCode::SUB_OVERFLOW_8 && - underlying <= mx::ir::OpCode::SUB_OVERFLOW_64) - wide = static_cast<__int128>(a.as_int()) - static_cast<__int128>(b.as_int()); - else - wide = static_cast<__int128>(a.as_int()) * static_cast<__int128>(b.as_int()); - new_val = Value::Int(static_cast(wide)); - bool overflow = (wide != static_cast<__int128>(static_cast(wide))); - MemWriteValue(addr.ptr, new_val, access_sz); - result = Value::Int(overflow ? 1 : 0); - break; - } - default: new_val = old_val; break; - } - if (!(underlying >= mx::ir::OpCode::ADD_OVERFLOW_8 && - underlying <= mx::ir::OpCode::MUL_OVERFLOW_64)) { - MemWriteValue(addr.ptr, new_val, access_sz); - result = rmw->returns_new_value() ? new_val : old_val; - } - } - } - break; - } - - // --- Call --- - case mx::ir::OpCode::CALL: { - if (auto ci = mx::CallInst::from(inst)) { - // Collect argument values. - std::vector call_args; - for (auto arg : ci->arguments()) { - call_args.push_back(GetValue(arg)); - } - - auto target = ci->target(); - if (target) { - // Try to find IR for the callee. - auto callee_ir = mx::IRFunction::from(*target); - if (callee_ir) { - if (trace_) { - std::cerr << " >> Entering call to " << target->name() << "\n"; - } - Interpreter callee_interp(*callee_ir, trace_); - result = callee_interp.Run(call_args); - if (trace_) { - std::cerr << " << Returned from " << target->name() << "\n"; - } - } else { - LOG(INFO) << "CALL to " << target->name() - << " (no IR available, returning undef)"; - } - } else { - LOG(INFO) << "Indirect CALL (not interpreted)"; - } - } - break; - } - - // --- Select (ternary) --- - case mx::ir::OpCode::SELECT: { - if (auto sel = mx::SelectInst::from(inst)) { - Value cond = GetValue(sel->condition()); - result = cond.is_truthy() ? GetValue(sel->true_value()) - : GetValue(sel->false_value()); - } - break; - } - - // --- Last value (comma operator) --- - case mx::ir::OpCode::LAST_VALUE: { - // All operands already evaluated (post-order). Return the last. - if (auto lv = mx::LastValueInst::from(inst)) { - result = GetValue(lv->last()); - } - break; - } - - - // --- Param pointer --- - case mx::ir::OpCode::PARAM_PTR_32: - case mx::ir::OpCode::PARAM_PTR_64: { - if (auto pr = mx::ParamPtrInst::from(inst)) { - uint32_t idx = pr->parameter_index(); - if (idx < param_ptrs_.size()) { - result = param_ptrs_[idx]; - } else { - LOG(WARNING) << "PARAM_PTR index " << idx - << " out of range (have " << param_ptrs_.size() - << " param pointers)"; - } - } - break; - } - - // MULTIMEM removed: merged into MEMORY case above. - - // --- Bitwise/intrinsic operations --- - case mx::ir::OpCode::BITWISE_8: - case mx::ir::OpCode::BITWISE_16: - case mx::ir::OpCode::BITWISE_32: - case mx::ir::OpCode::BITWISE_64: { - if (auto bw = mx::BitwiseOpInst::from(inst)) { - Value val = Value::Undef(); - auto ops = inst.operands(); - for (auto op_inst : ops) { val = GetValue(op_inst); break; } - int64_t v = val.as_int(); - using BO = mx::ir::BitwiseOp; - auto sub = bw->sub_opcode(); - switch (sub) { - case BO::BSWAP_16: - result = Value::Int(static_cast(__builtin_bswap16(static_cast(v)))); break; - case BO::BSWAP_32: - result = Value::Int(static_cast(__builtin_bswap32(static_cast(v)))); break; - case BO::BSWAP_64: - result = Value::Int(static_cast(__builtin_bswap64(static_cast(v)))); break; - case BO::POPCOUNT: - switch (op) { - case mx::ir::OpCode::BITWISE_8: result = Value::Int(__builtin_popcount(static_cast(v))); break; - case mx::ir::OpCode::BITWISE_16: result = Value::Int(__builtin_popcount(static_cast(v))); break; - case mx::ir::OpCode::BITWISE_32: result = Value::Int(__builtin_popcount(static_cast(v))); break; - default: result = Value::Int(__builtin_popcountll(static_cast(v))); break; - } - break; - case BO::CLZ: - if (!v) { result = Value::Undef(); break; } - switch (op) { - case mx::ir::OpCode::BITWISE_8: result = Value::Int(__builtin_clz(static_cast(v)) - 24); break; - case mx::ir::OpCode::BITWISE_16: result = Value::Int(__builtin_clz(static_cast(v)) - 16); break; - case mx::ir::OpCode::BITWISE_32: result = Value::Int(__builtin_clz(static_cast(v))); break; - default: result = Value::Int(__builtin_clzll(static_cast(v))); break; - } - break; - case BO::CTZ: - if (!v) { result = Value::Undef(); break; } - switch (op) { - case mx::ir::OpCode::BITWISE_8: result = Value::Int(__builtin_ctz(static_cast(v))); break; - case mx::ir::OpCode::BITWISE_16: result = Value::Int(__builtin_ctz(static_cast(v))); break; - case mx::ir::OpCode::BITWISE_32: result = Value::Int(__builtin_ctz(static_cast(v))); break; - default: result = Value::Int(__builtin_ctzll(static_cast(v))); break; - } - break; - case BO::FFS: - switch (op) { - case mx::ir::OpCode::BITWISE_8: result = Value::Int(__builtin_ffs(static_cast(v))); break; - case mx::ir::OpCode::BITWISE_16: result = Value::Int(__builtin_ffs(static_cast(v))); break; - case mx::ir::OpCode::BITWISE_32: result = Value::Int(__builtin_ffs(static_cast(v))); break; - default: result = Value::Int(__builtin_ffsll(static_cast(v))); break; - } - break; - case BO::PARITY: - switch (op) { - case mx::ir::OpCode::BITWISE_8: result = Value::Int(__builtin_parity(static_cast(v))); break; - case mx::ir::OpCode::BITWISE_16: result = Value::Int(__builtin_parity(static_cast(v))); break; - case mx::ir::OpCode::BITWISE_32: result = Value::Int(__builtin_parity(static_cast(v))); break; - default: result = Value::Int(__builtin_parityll(static_cast(v))); break; - } - break; - case BO::ROTL: case BO::ROTR: { - Value val2 = Value::Undef(); - int count = 0; - for (auto op_inst : ops) { - if (count == 1) { val2 = GetValue(op_inst); break; } - count++; - } - int64_t amount = val2.as_int(); - if (sub == BO::ROTL) { - switch (op) { - case mx::ir::OpCode::BITWISE_8: { uint8_t x = static_cast(v); result = Value::Int(static_cast((x << (amount & 7)) | (x >> (8 - (amount & 7))))); break; } - case mx::ir::OpCode::BITWISE_16: { uint16_t x = static_cast(v); result = Value::Int(static_cast((x << (amount & 15)) | (x >> (16 - (amount & 15))))); break; } - case mx::ir::OpCode::BITWISE_32: { uint32_t x = static_cast(v); result = Value::Int(static_cast((x << (amount & 31)) | (x >> (32 - (amount & 31))))); break; } - default: { uint64_t x = static_cast(v); result = Value::Int(static_cast((x << (amount & 63)) | (x >> (64 - (amount & 63))))); break; } - } - } else { - switch (op) { - case mx::ir::OpCode::BITWISE_8: { uint8_t x = static_cast(v); result = Value::Int(static_cast((x >> (amount & 7)) | (x << (8 - (amount & 7))))); break; } - case mx::ir::OpCode::BITWISE_16: { uint16_t x = static_cast(v); result = Value::Int(static_cast((x >> (amount & 15)) | (x << (16 - (amount & 15))))); break; } - case mx::ir::OpCode::BITWISE_32: { uint32_t x = static_cast(v); result = Value::Int(static_cast((x >> (amount & 31)) | (x << (32 - (amount & 31))))); break; } - default: { uint64_t x = static_cast(v); result = Value::Int(static_cast((x >> (amount & 63)) | (x << (64 - (amount & 63))))); break; } - } - } - break; - } - default: - result = val; - break; - } - } - break; - } - // Width-specific ABS (integer absolute value). - case mx::ir::OpCode::ABS_8: { - auto u = mx::UnaryInst::from(inst); - if (u) { int8_t v = static_cast(GetValue(u->operand()).as_int()); result = Value::Int(v < 0 ? -v : v); } - break; - } - case mx::ir::OpCode::ABS_16: { - auto u = mx::UnaryInst::from(inst); - if (u) { int16_t v = static_cast(GetValue(u->operand()).as_int()); result = Value::Int(v < 0 ? -v : v); } - break; - } - case mx::ir::OpCode::ABS_32: { - auto u = mx::UnaryInst::from(inst); - if (u) { int32_t v = static_cast(GetValue(u->operand()).as_int()); result = Value::Int(v < 0 ? -v : v); } - break; - } - case mx::ir::OpCode::ABS_64: { - auto u = mx::UnaryInst::from(inst); - if (u) { int64_t v = GetValue(u->operand()).as_int(); result = Value::Int(v < 0 ? -v : v); } - break; - } - - // --- Floating-point operations --- - case mx::ir::OpCode::FLOAT: { - if (auto fo = mx::FloatOpInst::from(inst)) { - // Collect operands. - std::vector ops; - for (auto op_inst : inst.operands()) { - ops.push_back(GetValue(op_inst)); - } - using FO = mx::ir::FloatOp; - auto sub = fo->sub_opcode(); - switch (sub) { - // --- 1-arg float→float ops --- - case FO::FABS_32: - result = ops.empty() ? Value::Undef() - : Value::Float(fabsf(static_cast(ops[0].as_float()))); - break; - case FO::FABS_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::fabs(ops[0].as_float())); - break; - case FO::SQRT_32: - result = ops.empty() ? Value::Undef() - : Value::Float(sqrtf(static_cast(ops[0].as_float()))); - break; - case FO::SQRT_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::sqrt(ops[0].as_float())); - break; - case FO::CEIL_32: - result = ops.empty() ? Value::Undef() - : Value::Float(ceilf(static_cast(ops[0].as_float()))); - break; - case FO::CEIL_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::ceil(ops[0].as_float())); - break; - case FO::FLOOR_32: - result = ops.empty() ? Value::Undef() - : Value::Float(floorf(static_cast(ops[0].as_float()))); - break; - case FO::FLOOR_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::floor(ops[0].as_float())); - break; - case FO::ROUND_32: - result = ops.empty() ? Value::Undef() - : Value::Float(roundf(static_cast(ops[0].as_float()))); - break; - case FO::ROUND_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::round(ops[0].as_float())); - break; - case FO::TRUNC_32: - result = ops.empty() ? Value::Undef() - : Value::Float(truncf(static_cast(ops[0].as_float()))); - break; - case FO::TRUNC_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::trunc(ops[0].as_float())); - break; - case FO::SIN_32: - result = ops.empty() ? Value::Undef() - : Value::Float(sinf(static_cast(ops[0].as_float()))); - break; - case FO::SIN_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::sin(ops[0].as_float())); - break; - case FO::COS_32: - result = ops.empty() ? Value::Undef() - : Value::Float(cosf(static_cast(ops[0].as_float()))); - break; - case FO::COS_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::cos(ops[0].as_float())); - break; - case FO::TAN_32: - result = ops.empty() ? Value::Undef() - : Value::Float(tanf(static_cast(ops[0].as_float()))); - break; - case FO::TAN_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::tan(ops[0].as_float())); - break; - case FO::ASIN_32: - result = ops.empty() ? Value::Undef() - : Value::Float(asinf(static_cast(ops[0].as_float()))); - break; - case FO::ASIN_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::asin(ops[0].as_float())); - break; - case FO::ACOS_32: - result = ops.empty() ? Value::Undef() - : Value::Float(acosf(static_cast(ops[0].as_float()))); - break; - case FO::ACOS_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::acos(ops[0].as_float())); - break; - case FO::ATAN_32: - result = ops.empty() ? Value::Undef() - : Value::Float(atanf(static_cast(ops[0].as_float()))); - break; - case FO::ATAN_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::atan(ops[0].as_float())); - break; - case FO::EXP_32: - result = ops.empty() ? Value::Undef() - : Value::Float(expf(static_cast(ops[0].as_float()))); - break; - case FO::EXP_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::exp(ops[0].as_float())); - break; - case FO::EXP2_32: - result = ops.empty() ? Value::Undef() - : Value::Float(exp2f(static_cast(ops[0].as_float()))); - break; - case FO::EXP2_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::exp2(ops[0].as_float())); - break; - case FO::LOG_32: - result = ops.empty() ? Value::Undef() - : Value::Float(logf(static_cast(ops[0].as_float()))); - break; - case FO::LOG_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::log(ops[0].as_float())); - break; - case FO::LOG2_32: - result = ops.empty() ? Value::Undef() - : Value::Float(log2f(static_cast(ops[0].as_float()))); - break; - case FO::LOG2_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::log2(ops[0].as_float())); - break; - case FO::LOG10_32: - result = ops.empty() ? Value::Undef() - : Value::Float(log10f(static_cast(ops[0].as_float()))); - break; - case FO::LOG10_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::log10(ops[0].as_float())); - break; - case FO::SINH_32: - result = ops.empty() ? Value::Undef() - : Value::Float(sinhf(static_cast(ops[0].as_float()))); - break; - case FO::SINH_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::sinh(ops[0].as_float())); - break; - case FO::COSH_32: - result = ops.empty() ? Value::Undef() - : Value::Float(coshf(static_cast(ops[0].as_float()))); - break; - case FO::COSH_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::cosh(ops[0].as_float())); - break; - case FO::TANH_32: - result = ops.empty() ? Value::Undef() - : Value::Float(tanhf(static_cast(ops[0].as_float()))); - break; - case FO::TANH_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::tanh(ops[0].as_float())); - break; - case FO::ERF_32: - result = ops.empty() ? Value::Undef() - : Value::Float(erff(static_cast(ops[0].as_float()))); - break; - case FO::ERF_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::erf(ops[0].as_float())); - break; - case FO::ERFC_32: - result = ops.empty() ? Value::Undef() - : Value::Float(erfcf(static_cast(ops[0].as_float()))); - break; - case FO::ERFC_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::erfc(ops[0].as_float())); - break; - case FO::TGAMMA_32: - result = ops.empty() ? Value::Undef() - : Value::Float(tgammaf(static_cast(ops[0].as_float()))); - break; - case FO::TGAMMA_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::tgamma(ops[0].as_float())); - break; - case FO::LGAMMA_32: - result = ops.empty() ? Value::Undef() - : Value::Float(lgammaf(static_cast(ops[0].as_float()))); - break; - case FO::LGAMMA_64: - result = ops.empty() ? Value::Undef() - : Value::Float(std::lgamma(ops[0].as_float())); - break; - - // --- 2-arg float→float ops --- - case FO::FMIN_32: - result = (ops.size() >= 2) - ? Value::Float(fminf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::FMIN_64: - result = (ops.size() >= 2) - ? Value::Float(std::fmin(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - case FO::FMAX_32: - result = (ops.size() >= 2) - ? Value::Float(fmaxf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::FMAX_64: - result = (ops.size() >= 2) - ? Value::Float(std::fmax(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - case FO::COPYSIGN_32: - result = (ops.size() >= 2) - ? Value::Float(copysignf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::COPYSIGN_64: - result = (ops.size() >= 2) - ? Value::Float(std::copysign(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - case FO::ATAN2_32: - result = (ops.size() >= 2) - ? Value::Float(atan2f(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::ATAN2_64: - result = (ops.size() >= 2) - ? Value::Float(std::atan2(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - case FO::POW_32: - result = (ops.size() >= 2) - ? Value::Float(powf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::POW_64: - result = (ops.size() >= 2) - ? Value::Float(std::pow(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - case FO::FMOD_32: - result = (ops.size() >= 2) - ? Value::Float(fmodf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::FMOD_64: - result = (ops.size() >= 2) - ? Value::Float(std::fmod(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - case FO::REMAINDER_32: - result = (ops.size() >= 2) - ? Value::Float(remainderf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::REMAINDER_64: - result = (ops.size() >= 2) - ? Value::Float(std::remainder(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - case FO::HYPOT_32: - result = (ops.size() >= 2) - ? Value::Float(hypotf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::HYPOT_64: - result = (ops.size() >= 2) - ? Value::Float(std::hypot(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - case FO::FDIM_32: - result = (ops.size() >= 2) - ? Value::Float(fdimf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()))) - : Value::Undef(); - break; - case FO::FDIM_64: - result = (ops.size() >= 2) - ? Value::Float(std::fdim(ops[0].as_float(), ops[1].as_float())) - : Value::Undef(); - break; - - // --- 3-arg ops --- - case FO::FMA_32: - result = (ops.size() >= 3) - ? Value::Float(fmaf(static_cast(ops[0].as_float()), - static_cast(ops[1].as_float()), - static_cast(ops[2].as_float()))) - : Value::Undef(); - break; - case FO::FMA_64: - result = (ops.size() >= 3) - ? Value::Float(std::fma(ops[0].as_float(), ops[1].as_float(), ops[2].as_float())) - : Value::Undef(); - break; - - // --- Classification ops (return int) --- - case FO::ISNAN_32: - result = ops.empty() ? Value::Undef() - : Value::Int(std::isnan(static_cast(ops[0].as_float())) ? 1 : 0); - break; - case FO::ISNAN_64: - result = ops.empty() ? Value::Undef() - : Value::Int(std::isnan(ops[0].as_float()) ? 1 : 0); - break; - case FO::ISINF_32: - result = ops.empty() ? Value::Undef() - : Value::Int(std::isinf(static_cast(ops[0].as_float())) ? 1 : 0); - break; - case FO::ISINF_64: - result = ops.empty() ? Value::Undef() - : Value::Int(std::isinf(ops[0].as_float()) ? 1 : 0); - break; - case FO::ISFINITE_32: - result = ops.empty() ? Value::Undef() - : Value::Int(std::isfinite(static_cast(ops[0].as_float())) ? 1 : 0); - break; - case FO::ISFINITE_64: - result = ops.empty() ? Value::Undef() - : Value::Int(std::isfinite(ops[0].as_float()) ? 1 : 0); - break; - case FO::SIGNBIT_32: - result = ops.empty() ? Value::Undef() - : Value::Int(std::signbit(static_cast(ops[0].as_float())) ? 1 : 0); - break; - case FO::SIGNBIT_64: - result = ops.empty() ? Value::Undef() - : Value::Int(std::signbit(ops[0].as_float()) ? 1 : 0); - break; - - // --- Zero-arg constants --- - case FO::INF_32: - result = Value::Float(std::numeric_limits::infinity()); - break; - case FO::INF_64: - result = Value::Float(std::numeric_limits::infinity()); - break; - case FO::NAN_32: - result = Value::Float(std::numeric_limits::quiet_NaN()); - break; - case FO::NAN_64: - result = Value::Float(std::numeric_limits::quiet_NaN()); - break; - case FO::HUGE_32: - result = Value::Float(static_cast(HUGE_VALF)); - break; - case FO::HUGE_64: - result = Value::Float(HUGE_VAL); - break; - } - } - break; - } - - // --- Frame/return address intrinsics --- - case mx::ir::OpCode::FRAME_PTR_32: case mx::ir::OpCode::FRAME_PTR_64: - case mx::ir::OpCode::RETURN_ADDRESS_32: case mx::ir::OpCode::RETURN_ADDRESS_64: - // Not meaningfully interpretable; return undef. - result = Value::Undef(); - break; - - // --- Return value pointer (callee side) --- - case mx::ir::OpCode::RETURN_PTR_32: - case mx::ir::OpCode::RETURN_PTR_64: - result = return_ptr_; - break; - - // --- Undefined/poison value --- - case mx::ir::OpCode::UNDEFINED: - result = Value::Undef(); - break; - - // --- Overflow opcodes (only valid as RMW underlying ops, not standalone) --- - case mx::ir::OpCode::ADD_OVERFLOW_8: case mx::ir::OpCode::ADD_OVERFLOW_16: - case mx::ir::OpCode::ADD_OVERFLOW_32: case mx::ir::OpCode::ADD_OVERFLOW_64: - case mx::ir::OpCode::SUB_OVERFLOW_8: case mx::ir::OpCode::SUB_OVERFLOW_16: - case mx::ir::OpCode::SUB_OVERFLOW_32: case mx::ir::OpCode::SUB_OVERFLOW_64: - case mx::ir::OpCode::MUL_OVERFLOW_8: case mx::ir::OpCode::MUL_OVERFLOW_16: - case mx::ir::OpCode::MUL_OVERFLOW_32: case mx::ir::OpCode::MUL_OVERFLOW_64: - case mx::ir::OpCode::ATOMIC_ADD_8: case mx::ir::OpCode::ATOMIC_ADD_16: - case mx::ir::OpCode::ATOMIC_ADD_32: case mx::ir::OpCode::ATOMIC_ADD_64: - case mx::ir::OpCode::ATOMIC_SUB_8: case mx::ir::OpCode::ATOMIC_SUB_16: - case mx::ir::OpCode::ATOMIC_SUB_32: case mx::ir::OpCode::ATOMIC_SUB_64: - case mx::ir::OpCode::ATOMIC_AND_8: case mx::ir::OpCode::ATOMIC_AND_16: - case mx::ir::OpCode::ATOMIC_AND_32: case mx::ir::OpCode::ATOMIC_AND_64: - case mx::ir::OpCode::ATOMIC_OR_8: case mx::ir::OpCode::ATOMIC_OR_16: - case mx::ir::OpCode::ATOMIC_OR_32: case mx::ir::OpCode::ATOMIC_OR_64: - case mx::ir::OpCode::ATOMIC_XOR_8: case mx::ir::OpCode::ATOMIC_XOR_16: - case mx::ir::OpCode::ATOMIC_XOR_32: case mx::ir::OpCode::ATOMIC_XOR_64: - case mx::ir::OpCode::ATOMIC_NAND_8: case mx::ir::OpCode::ATOMIC_NAND_16: - case mx::ir::OpCode::ATOMIC_NAND_32: case mx::ir::OpCode::ATOMIC_NAND_64: - case mx::ir::OpCode::ATOMIC_EXCHANGE_8: case mx::ir::OpCode::ATOMIC_EXCHANGE_16: - case mx::ir::OpCode::ATOMIC_EXCHANGE_32: case mx::ir::OpCode::ATOMIC_EXCHANGE_64: - LOG(WARNING) << "RMW-only opcode used as standalone instruction"; - break; - - // --- Variadic --- - case mx::ir::OpCode::VA_START: - case mx::ir::OpCode::VA_END: - case mx::ir::OpCode::VA_COPY: { - // CRITIQUE: Variadic args need a runtime va_list model. Not implemented - // in this simple interpreter. va_arg is handled via MEMORY/CONSUME_VA_PARAM. - break; - } - - // --- Global/function address --- - case mx::ir::OpCode::GLOBAL_PTR_32: case mx::ir::OpCode::GLOBAL_PTR_64: - case mx::ir::OpCode::THREAD_LOCAL_PTR_32: case mx::ir::OpCode::THREAD_LOCAL_PTR_64: { - // In a real interpreter, this would look up the global/TLS storage. - // For now, create a synthetic pointer using the target entity ID. - result = Value::Ptr(inst.source_entity_id(), 0); - break; - } - case mx::ir::OpCode::FUNC_PTR_32: - case mx::ir::OpCode::FUNC_PTR_64: { - // Function pointer — use the source entity ID as a handle. - result = Value::Ptr(inst.source_entity_id(), 0); - break; - } - - // --- Scope markers: track object lifetimes --- - case mx::ir::OpCode::ENTER_SCOPE: { - if (auto esi = mx::EnterScopeInst::from(inst)) { - auto scope = esi->scope(); - for (auto obj : scope.objects()) { - auto oid = mx::EntityId(obj.id()).Pack(); - auto it = memory_.find(oid); - if (it != memory_.end()) { - it->second.poisoned = false; // Re-entering scope (loop iteration). - } - } - } - break; - } - case mx::ir::OpCode::EXIT_SCOPE: { - if (auto esi = mx::ExitScopeInst::from(inst)) { - auto scope = esi->scope(); - for (auto obj : scope.objects()) { - auto oid = mx::EntityId(obj.id()).Pack(); - auto it = memory_.find(oid); - if (it != memory_.end()) { - it->second.poisoned = true; // Object lifetime ended. - } - } - } - break; - } - - // --- Terminators are handled by the CFG walker, not here --- - case mx::ir::OpCode::COND_BRANCH: - case mx::ir::OpCode::SWITCH: - case mx::ir::OpCode::RET: - case mx::ir::OpCode::UNREACHABLE: - case mx::ir::OpCode::IMPLICIT_UNREACHABLE: - case mx::ir::OpCode::BREAK: - case mx::ir::OpCode::CONTINUE: - case mx::ir::OpCode::GOTO: - case mx::ir::OpCode::IMPLICIT_GOTO: - case mx::ir::OpCode::FALLTHROUGH: - case mx::ir::OpCode::IMPLICIT_FALLTHROUGH: - break; - - case mx::ir::OpCode::UNKNOWN: - LOG(WARNING) << "Encountered UNKNOWN opcode"; - break; - } - - values_[eid] = result; - Trace(inst, result); -} - -void Interpreter::EvalSubExpressions(const mx::IRInstruction &inst) { - for (auto operand : inst.operands()) { - EvalSubExpressions(operand); // depth-first - auto sub_op = operand.opcode(); - if (!mx::ir::IsTerminator(sub_op)) { - Eval(operand); - } - } -} - -// --------------------------------------------------------------------------- -// Main interpreter loop -// --------------------------------------------------------------------------- - -Value Interpreter::Run(const std::vector &args) { - params_ = args; - param_ptrs_.clear(); - values_.clear(); - memory_.clear(); - pointer_shadow_.clear(); - block_map_.clear(); - steps_ = 0; - - // Pre-allocate parameter and return storage. - // PARAM_PTR(n) returns a pointer to the nth parameter's storage. - // RETURN_PTR returns a pointer to the return value storage. - return_ptr_ = Value::Undef(); - { - uint32_t param_idx = 0; - for (auto obj : func_.objects()) { - auto k = obj.kind(); - if (k == mx::ir::ObjectKind::PARAMETER || - k == mx::ir::ObjectKind::PARAMETER_VALUE) { - auto eid = mx::EntityId(obj.id()).Pack(); - AllocateObject(obj); - Pointer ptr{eid, 0}; - if (param_idx < args.size()) { - uint32_t sz = obj.size_bytes(); - if (sz == 0) sz = 8; - MemWriteValue(ptr, args[param_idx], sz); - } - param_ptrs_.push_back(Value::Ptr(eid, 0)); - ++param_idx; - } - } - } - - // Allocate return storage (callee no longer has RETURN_SLOT object). - // Use a monotonically increasing ID that won't collide with entity IDs. - if (return_ptr_.kind == Value::UNDEFINED) { - if (auto decl = func_.source_declaration()) { - if (auto fd = mx::FunctionDecl::from(*decl)) { - auto rt = fd->return_type(); - if (auto bits = rt.size_in_bits()) { - uint32_t sz = static_cast((*bits + 7) / 8); - if (sz > 0) { - uint64_t ret_eid = next_interp_object_id_++; - auto &mem = memory_[ret_eid]; - mem.bytes.resize(sz, 0); - mem.allocated = true; - return_ptr_ = Value::Ptr(ret_eid, 0); - } - } - } - } - } - - // Build block map for CFG navigation. - for (auto block : func_.blocks()) { - block_map_[mx::EntityId(block.id()).Pack()] = block; - } - // Also add the entry block (which might be FRAME, not in RPO). - { - auto entry = func_.entry_block(); - block_map_[mx::EntityId(entry.id()).Pack()] = entry; - } - - // Start at the entry block (FRAME). - mx::IRBlock current = func_.entry_block(); - Value return_value = Value::Undef(); - - while (true) { - if (steps_ >= FLAGS_max_steps) { - LOG(ERROR) << "Interpreter exceeded max steps (" << FLAGS_max_steps << ")"; - break; - } - - - - // Clear cached values at block boundaries for fresh LOAD evaluation. - values_.clear(); - - if (trace_) { - std::cerr << "Block " << mx::ir::EnumeratorName(current.kind()) - << " (" << mx::EntityId(current.id()).Pack() << ")\n"; - } - - - // Evaluate all instructions in the block. all_instructions() yields - // only root instructions; sub-expressions are lazy-evaluated via GetValue. - for (auto inst : current.all_instructions()) { - ++steps_; - auto op = inst.opcode(); - - // Non-terminator: evaluate and store result. - if (!mx::ir::IsTerminator(op)) { - Eval(inst); - continue; - } - - // --- Terminator handling --- - - if (op == mx::ir::OpCode::RET) { - auto ri = mx::RetInst::from(inst); - if (ri) { - if (auto rv = ri->return_value()) { - return_value = GetValue(*rv); - } - } - // Fallback: read from RETURN_PTR storage if direct operand is undef. - if (return_value.kind == Value::UNDEFINED && - return_ptr_.kind == Value::POINTER) { - // Determine return size from the RETURN_SLOT object. - auto it = memory_.find(return_ptr_.ptr.object_id); - size_t ret_sz = (it != memory_.end()) ? it->second.bytes.size() : 8; - return_value = MemReadValue(return_ptr_.ptr, ret_sz, false); - } - goto done; - } - - if (op == mx::ir::OpCode::UNREACHABLE || - op == mx::ir::OpCode::IMPLICIT_UNREACHABLE) { - LOG(ERROR) << "Reached UNREACHABLE instruction"; - goto done; - } - - if (op == mx::ir::OpCode::COND_BRANCH) { - auto cb = mx::CondBranchInst::from(inst); - if (cb) { - Value cond = GetValue(cb->condition()); - auto target = cond.is_truthy() ? cb->true_block() : cb->false_block(); - current = target; - goto next_block; - } - break; - } - - if (op == mx::ir::OpCode::SWITCH) { - auto sw = mx::SwitchInst::from(inst); - if (sw) { - Value sel = GetValue(sw->selector()); - int64_t sel_val = sel.as_int(); - bool found = false; - mx::IRBlock default_block{}; - for (auto sc : sw->cases()) { - if (sc.is_default()) { - default_block = sc.target_block(); - continue; - } - if (sel_val >= sc.low() && sel_val <= sc.high()) { - current = sc.target_block(); - found = true; - break; - } - } - if (!found) { - if (default_block.id().Pack()) { - current = default_block; - } else { - LOG(ERROR) << "Switch: no matching case and no default"; - goto done; - } - } - goto next_block; - } - break; - } - - // All other terminators (GOTO, IMPLICIT_GOTO, BREAK, CONTINUE, - // FALLTHROUGH, IMPLICIT_FALLTHROUGH) are unconditional branches. - { - auto br = mx::BranchInst::from(inst); - if (br) { - current = br->target_block(); - goto next_block; - } - } - break; - } - - // If we fell through without a terminator (shouldn't happen with - // well-formed IR), break. - LOG(ERROR) << "Block ended without terminator"; - break; - next_block: - continue; - } - - done: - std::cout << "Interpreter finished after " << steps_ << " steps.\n"; - switch (return_value.kind) { - case Value::INTEGER: - std::cout << "Return value: " << return_value.ival << "\n"; - break; - case Value::FLOATING: - std::cout << "Return value: " << return_value.fval << "\n"; - break; - case Value::POINTER: - std::cout << "Return value: ptr(" << return_value.ptr.object_id - << "+" << return_value.ptr.offset << ")\n"; - break; - default: - std::cout << "Return value: void/undef\n"; - break; - } +#include +#include +#include +#include +#include +#include - return return_value; -} +#include "Index.h" +#include +#include +#include +#include +#include +#include -} // namespace +DEFINE_uint64(entity_id, mx::kInvalidEntityId, "ID of the entity to interpret"); +DEFINE_string(entity_name, "", "Name of the function to interpret"); +DEFINE_bool(trace, false, "Print each instruction as it executes"); +DEFINE_uint64(max_steps, 100000, "Maximum instruction steps before aborting"); int main(int argc, char *argv[]) { std::stringstream ss; @@ -3529,12 +42,9 @@ int main(int argc, char *argv[]) { std::optional ir_func; if (FLAGS_entity_id != mx::kInvalidEntityId) { - auto vid = mx::EntityId(FLAGS_entity_id).Unpack(); - if (auto *fid = std::get_if(&vid)) { - // Direct entity ID lookup would go here. - LOG(ERROR) << "Direct IRFunction ID lookup not yet supported. Use --entity_name."; - return 1; - } + LOG(ERROR) << "Direct IRFunction ID lookup not yet supported. " + << "Use --entity_name."; + return 1; } if (!FLAGS_entity_name.empty()) { @@ -3551,7 +61,8 @@ int main(int argc, char *argv[]) { } if (!ir_func) { - LOG(ERROR) << "Could not find IR for function '" << FLAGS_entity_name << "'"; + LOG(ERROR) << "Could not find IR for function '" << FLAGS_entity_name + << "'"; return 1; } @@ -3570,19 +81,123 @@ int main(int argc, char *argv[]) { std::cout << num_blocks << " blocks, " << num_insts << " instructions, " << num_objs << " objects\n"; - // Run with zero-initialized arguments. - // A real testing harness would supply concrete values. + // Build zero-initialized arguments. + using namespace mx::ir::interpret; std::vector args; if (auto fd = mx::FunctionDecl::from(*decl)) { for (auto p : fd->parameters()) { (void)p; - args.push_back(Value::Int(0)); + args.push_back(MakeInt(0)); + } + } + std::cout << "Running with " << args.size() + << " zero-initialized arguments...\n\n"; + + // Create policy objects with function resolvers for interprocedural calls. + ConcreteValueFactory factory; + ConcreteMemory memory; + + // Resolve entity ID → IRFunction. Handles both FunctionDecl entity IDs + // (direct calls) and DeclRefExpr entity IDs (indirect calls via FUNC_PTR). + FunctionResolver func_resolver = + [&index](mx::RawEntityId eid) -> std::optional { + auto entity = index.entity(mx::EntityId(eid)); + + if (auto *decl = std::get_if(&entity)) { + if (auto fd = mx::FunctionDecl::from(*decl)) { + return mx::IRFunction::from(*fd); + } + return std::nullopt; + } + + if (auto *stmt = std::get_if(&entity)) { + if (auto dre = mx::DeclRefExpr::from(*stmt)) { + if (auto fd = mx::FunctionDecl::from(dre->declaration())) { + return mx::IRFunction::from(*fd); + } + } + return std::nullopt; + } + + return std::nullopt; + }; + + // Resolve global variable entity ID → GlobalInfo (size, initializer). + GlobalResolver global_resolver = + [&index](mx::RawEntityId eid) -> std::optional { + auto entity = index.entity(mx::EntityId(eid)); + + // Get the VarDecl. + std::optional vd; + if (auto *decl = std::get_if(&entity)) { + vd = mx::VarDecl::from(*decl); + } else if (auto *stmt = std::get_if(&entity)) { + if (auto dre = mx::DeclRefExpr::from(*stmt)) { + vd = mx::VarDecl::from(dre->declaration()); + } + } + if (!vd) return std::nullopt; + + GlobalInfo info; + info.canonical_eid = vd->id().Pack(); + auto ty = vd->type(); + if (auto bits = ty.size_in_bits()) { + info.size = static_cast((*bits + 7) / 8); + } + if (auto al = ty.alignment()) { + info.align = static_cast(*al / 8); + } + if (info.align == 0) info.align = 8; + + // Direct lookup: VarDecl → GLOBAL_INITIALIZER IRFunction. + info.initializer = mx::IRFunction::from(*vd); + return info; + }; + + ConcreteDriver driver(std::move(func_resolver), std::move(global_resolver)); + + // Run the interpreter. + InterpreterState state; + InitState(state, memory, *ir_func, args); + + StepResult last_result{StepStatus::ERROR}; + while (true) { + if (state.steps >= FLAGS_max_steps) { + std::cerr << "Step budget exhausted after " << state.steps + << " steps.\n"; + break; + } + last_result = Step(state, memory, factory, driver); + if (last_result.status == StepStatus::COMPLETED) break; + if (last_result.status == StepStatus::ERROR) { + std::cerr << "Interpreter error after " << state.steps + << " steps.\n"; + break; + } + if (last_result.status == StepStatus::SUSPENDED) { + std::cerr << "Interpreter suspended (unexpected in concrete mode).\n"; + break; } } - std::cout << "Running with " << args.size() << " zero-initialized arguments...\n\n"; - Interpreter interp(*ir_func, FLAGS_trace); - interp.Run(args); + std::cout << "Interpreter finished after " << state.steps + << " steps.\n"; + + // Print the return value. + const auto &ret = last_result.return_value; + if (auto *s = std::get_if(&ret)) { + if (s->width == 4) { + std::cout << "Return value: " << s->as_f32() << " (float)\n"; + } else { + std::cout << "Return value: " << s->as_i64() << "\n"; + } + } else if (auto *p = AsPointer(ret)) { + std::cout << "Return value: ptr(" << ConcreteAddress(*p) << ")\n"; + } else if (IsNull(ret)) { + std::cout << "Return value: null\n"; + } else { + std::cout << "Return value: void/undef\n"; + } return 0; } diff --git a/bindings/Python/Generated/IR/BlockKind.cpp b/bindings/Python/Generated/IR/BlockKind.cpp index f6a610856..1f6937fd7 100644 --- a/bindings/Python/Generated/IR/BlockKind.cpp +++ b/bindings/Python/Generated/IR/BlockKind.cpp @@ -94,7 +94,9 @@ bool PythonBinding::load(BorrowedPyObject *module) noexcept { for (T val : EnumerationRange()) { auto ival = PyLong_FromUnsignedLongLong(static_cast(val)); if (ival) { - auto iname = PyUnicode_FromString(EnumeratorName(val)); + auto name = EnumeratorName(val); + if (!name) continue; // Skip gap values. + auto iname = PyUnicode_FromString(name); if (!PyObject_SetItem(ns_dict, iname, ival)) { continue; } diff --git a/bindings/Python/Generated/IR/ObjectKind.cpp b/bindings/Python/Generated/IR/ObjectKind.cpp index cfea8d074..c98289d6c 100644 --- a/bindings/Python/Generated/IR/ObjectKind.cpp +++ b/bindings/Python/Generated/IR/ObjectKind.cpp @@ -94,7 +94,9 @@ bool PythonBinding::load(BorrowedPyObject *module) noexcept { for (T val : EnumerationRange()) { auto ival = PyLong_FromUnsignedLongLong(static_cast(val)); if (ival) { - auto iname = PyUnicode_FromString(EnumeratorName(val)); + auto name = EnumeratorName(val); + if (!name) continue; // Skip gap values. + auto iname = PyUnicode_FromString(name); if (!PyObject_SetItem(ns_dict, iname, ival)) { continue; } diff --git a/bindings/Python/Generated/IR/OpCode.cpp b/bindings/Python/Generated/IR/OpCode.cpp index 1a55b3d91..e1e726970 100644 --- a/bindings/Python/Generated/IR/OpCode.cpp +++ b/bindings/Python/Generated/IR/OpCode.cpp @@ -94,7 +94,9 @@ bool PythonBinding::load(BorrowedPyObject *module) noexcept { for (T val : EnumerationRange()) { auto ival = PyLong_FromUnsignedLongLong(static_cast(val)); if (ival) { - auto iname = PyUnicode_FromString(EnumeratorName(val)); + auto name = EnumeratorName(val); + if (!name) continue; // Skip gap values. + auto iname = PyUnicode_FromString(name); if (!PyObject_SetItem(ns_dict, iname, ival)) { continue; } diff --git a/docs/interpreter-loop.md b/docs/interpreter-loop.md new file mode 100644 index 000000000..f90014ede --- /dev/null +++ b/docs/interpreter-loop.md @@ -0,0 +1,347 @@ +# Interpreter Loop: From Unit Tests to Real Programs + +## Mission + +Make `mx-interpret-ir` execute a real, indexed C program starting from `main()`, +producing the same observable output (exit code, stdout, file output) as the +natively compiled binary. This is differential testing: interpretation must be +indistinguishable from native execution for the chosen program. + +## Current State + +- **31/34 unit tests pass** (up from 25/34 baseline). + +- **Newly passing**: test_function_calls (all 5 subtests including variadics), + test_globals, test_struct_assign, test_string_literals, test_array_decay, + test_byvalue. + +- **34/34 tests pass** (with re-indexed DB). + +## Active Restructure (in progress) + +Architecture is being converted from class-based Interpreter to a state +transition function. Partially complete — headers are written, codegen +has frame layout computation, but Interpreter.cpp needs full rewrite. + +### What's done: +- `Interpreter.h`: new API — `InterpreterState` struct + free `InitState()`/`Step()` functions +- `Suspension.h`: suspension types renamed to describe what's needed (`NeedCallResolution`, `NeedGlobalResolution`, `NeedBranchDecision`, `NeedConcretePointer`) +- `Driver.h`: simplified to single `Resolve(Suspension) → Resolution` method +- `ConcreteDriver.h/.cpp`: updated to new suspension/resolution types +- `Memory.h`: added `WritePointer()`/`ReadPointer()` for provenance tracking +- `IR.capnp`: added `frameOffset` to Object, `frameSizeBytes`/`hasDynamicAllocas` to Function +- `IRGen.h/.cpp`: `ObjectIR` has `frame_offset`, `FunctionIR` has `frame_size_bytes`/`has_dynamic_allocas`, `ComputeFrameLayout()` assigns aligned offsets +- `SerializeIR.cpp`: serializes the new fields +- `Object.h/.cpp`, `Function.h/.cpp`: read-side API for new fields +- `Enums.cpp`: `EnumeratorName()` returns `nullptr` for gap values (fixes Python binding crash) +- Python bindings: skip null enum names + +### What's NOT done: +- `Interpreter.cpp`: still has old class-based code, won't compile against new headers +- `InterpretIR.cpp` (CLI): needs updating for new Step API +- pointer_shadow removal from MemWriteValue/MemReadValue +- CALL handling: needs to push/pop frames instead of recursive child interpreters + - CallFrame needs `resume_after_inst` (RawEntityId, kInvalidEntityId = no pending call) + - After callee RETs, store return value in values[resume_after_inst], clear resume_after_inst + - Actually: callee writes to return_ptr in memory, caller reads from return slot — no special plumbing +- Memory: frame allocation (`AllocateFrame`/`FreeFrame`) instead of per-object Allocate + - ALLOCA resolves to `frame_base + obj.frame_offset()` — no runtime allocation + +### Key design decisions: +- Step is a free function, not a method +- State owns call stack + globals map (memory is a separate parameter) +- CALL pushes a callee frame, RET pops it — no recursive child interpreters +- `resume_after_inst` in CallFrame tracks where to continue after a callee returns +- Instructions are failure-atomic: suspend on unresolved operands, don't silently skip +- pointer_shadow is removed — Memory stores real bytes, WritePointer/ReadPointer for provenance +- Stack frames are single allocations with pre-computed offsets, not per-object mallocs + +### Codegen changes also in this branch: +- Duff's device: collect_cases recurses into case sub-statements +- Duff's device: emit_case_bodies handles DoStmt with loop back-edge +- Implicit default for switches without explicit default +- Guard against inner switch case leakage in collect_cases +- emit_case_bodies handles nested SwitchStmt as sub-statement +- CompoundStmts processed after terminators (for goto-into-scope) +- ENTER_SCOPE skipped when block is already terminated +- CONSUME_VA_PARAM wrapped in LOAD by codegen +- va_copy implemented +- New test cases for goto-after-return patterns + +- **2 remaining failures before restructure** (both now pass with re-indexed DB): + - `test_variadics` (returns 5): wider variadic types (double, struct) not yet + handled by CONSUME_VA_PARAM. + - `test_evil_goto` (returns 1): codegen bug — `collect_cases` and + `emit_case_bodies` in IRGen.cpp don't recurse deeply enough into Duff's + device `DoStmt` to find nested case statements. `collect_cases` fix is in + place; `emit_case_bodies` needs the same fix. Also added implicit default + for switches without explicit `default`. Needs debug build to trace AST. + +- **Architecture**: policy-based (ValueFactory, Memory, Driver). Interpreter + delegates all computation. Cross-cutting concerns via composition/proxying. + +- **Key files**: + - `include/multiplier/IR/Interpret/*.h` (Value, Memory, ValueFactory, Driver, + Suspension, Interpreter, Concrete{ValueFactory,Memory,Driver}) + - `lib/IR/Interpret/*.cpp` (implementations) + - `bin/InterpretIR/InterpretIR.cpp` (CLI driver, ~140 lines) + - `tests/InterpretIR/` (test suite, `run_tests.sh`) + +## Candidate Program Selection + +Pick a C program that: + +1. **Is self-contained**: single file or a few files, no autoconf/cmake, no + complex build system. Can be compiled with a single `clang` invocation. +2. **Avoids opaque libc types**: no `FILE*`, no `DIR*`, no `jmp_buf`. These + require modeling internal libc layout which is not portable. +3. **Has verifiable output**: produces deterministic output that can be compared + byte-for-byte (hash output, compression round-trip, encoded data). +4. **Exercises real patterns**: function calls, globals, structs, arrays, loops, + pointers, string operations. Not just toy arithmetic. +5. **Is small enough to debug**: under 2000 lines. You need to be able to trace + through the IR when things go wrong. + +**Good candidates** (in order of preference): + +- **A SHA-256 implementation** (~300 lines). Pure computation on byte arrays. + Input is a fixed string, output is a hex digest. No IO beyond + memcpy/memset/strlen. Easy to verify: compare hex output against `shasum`. +- **A CRC32 implementation** (~100 lines). Even simpler. Table-driven, pure + computation. +- **A base64 encoder/decoder** (~200 lines). String in, string out. +- **xxHash** (~400 lines, single-file). Very portable, no deps. + +**Avoid**: anything with `printf` (format string interpretation is a rabbit +hole), `fopen`/`fread` (opaque `FILE` type), `malloc` (heap management can be +stubbed but adds complexity early), signal handlers, threads. + +**Indexing budget**: if indexing a candidate takes more than ~2 hours of +wall-clock debugging, abandon it and try the next one. The indexer may have +issues with certain code patterns; don't fight the tool. + +## Phase 1: Fix Interprocedural Calls + +**Goal**: `test_function_calls` passes (direct calls, recursion, function +pointers, higher-order functions). + +The current CALL handler at `lib/IR/Interpret/Interpreter.cpp:1264` creates a +recursive `Interpreter` instance for each call. This works for intra-fragment +calls but likely fails for cross-fragment function resolution. + +**Diagnosis steps**: +1. Run `test_function_calls` with a debug build under lldb. Break at the CALL + handler. Check: does `IRFunction::from(*target)` return a valid function? +2. If not: the issue is that `target` is an `IRInstruction` referencing a + declaration, and `IRFunction::from()` needs to look up the function's IR + across fragments. The `Index` is needed but the interpreter doesn't have it. +3. The interpreter needs access to the index (or a function resolver) to look up + callees by declaration. + +**Design consideration**: the interpreter currently takes `(ValueFactory, Memory, +Driver)`. It doesn't know about the index. Function resolution is a Driver +concern. Extend `CallSuspension` or `Driver::ResolveCall` to return the callee's +`IRFunction` when the action is `INLINE`. Or: give the interpreter an +`std::function(const FunctionDecl &)>` resolver at +construction time. Think about which preserves IoC better. + +**Do NOT**: hardcode index access into the interpreter. The interpreter should +not know about `mx::Index`. The resolver is injected. + +## Phase 2: Fix Global Initialization + +**Goal**: `test_globals` passes. + +Globals in the IR use `GLOBAL_PTR` instructions that reference global variable +declarations. The interpreter needs to: + +1. Allocate memory for each global on first access (lazy init). +2. Run the global's initializer (if it has one) to populate the memory. +3. Return a pointer to the allocated memory. + +**Design**: global state lives in `Memory`, not per-`CallFrame`. The interpreter +(or a helper) maintains a `map` mapping global entity IDs +to their allocated addresses. On first `GLOBAL_PTR` encounter: +- Look up the global's `VarDecl` from the IR instruction's source. +- Allocate `VarDecl::size()` bytes in memory. +- If the global has a `GLOBAL_INITIALIZER` function in the IR, run it. +- Otherwise, zero-initialize. + +**Watch out**: circular initialization, globals that reference other globals. +Keep a "currently initializing" set to detect cycles. + +## Phase 3: Fix Remaining Unit Test Failures + +Before moving to a real program, get as many unit tests passing as possible. +Each failure is a signal about a missing interpreter capability. + +- `test_struct_assign`: likely needs proper aggregate copy (memcpy through + struct assignment). +- `test_string_literals`: needs `STRING_PTR` to properly allocate and populate + string literal data. +- `test_byvalue`: pass-by-value struct parameters. +- `test_evil_goto`: non-trivial control flow. + +Fix these in order of difficulty (easiest first). Each fix likely generalizes to +patterns the real program will need. + +## Phase 4: Index and Run a Real Program + +1. Write or obtain the candidate C program. +2. Create `compile_commands.json` for it. +3. Run `mx-index` to create the database. +4. Verify the IR looks reasonable: run `mx-interpret-ir --db --entity_name main` + and see how far it gets before failing. +5. Iterate: each failure reveals a missing feature. Fix it, re-run, repeat. + +**External function handling**: when the interpreter encounters a CALL to a +function with no IR (external/libc function), it needs to handle it. The current +code returns `undef`. Instead: + +- The `Driver::ResolveCall` path should check the function name. +- For known functions (memcpy, memset, strlen, strcmp, etc.), implement them + directly using the interpreter's `Memory` interface. +- Start with the functions the candidate program actually calls. Don't + speculatively implement functions you don't need yet. +- Each external function implementation reads arguments from the call's argument + list (which are pointers into interpreter memory) and operates on interpreter + memory directly. + +**Naming convention for external function handlers**: group by category (memory, +string, math) but keep the dispatch simple. A single `switch` or `map` is fine. Don't over-abstract. + +**If you need syscalls** (open, read, write, exit): these are stretch goals. +Start with function-name-based stubs. If a syscall layer becomes necessary: +- Add a `HYPERCALL` or `EXTERN_CALL` concept to the Driver interface. +- Sub-opcodes for each syscall (OPEN, READ, WRITE, CLOSE, EXIT, etc.). +- Implementations pass through to real host syscalls, translating addresses + between interpreter memory and host memory. +- Be mindful: interpreter pointers are not host pointers. Any data passed to a + real syscall must be copied from interpreter memory to a host buffer first, + and results copied back. + +## Self-Review Protocol + +After every significant change (more than ~50 lines), pause and ask: + +### 1. Am I maintaining inversion of control? + +- Does the interpreter know about concrete types it shouldn't? +- Am I adding `#include ` to `Interpreter.cpp`? That's a + red flag. The interpreter delegates to policies; it doesn't reach into the + index directly. +- Could a different Driver implementation (symbolic, taint-tracking) still work + with this interpreter? If my change assumes concrete execution, it's in the + wrong place. + +### 2. Is there a common theme? + +- Am I adding three similar switch cases? Extract the pattern. +- Am I handling "external memcpy" differently from "external memset"? They're + both "external function with known semantics operating on interpreter memory." + The dispatch mechanism should be uniform. +- Look for the abstraction that's trying to emerge. Don't force it, but don't + ignore it either. + +### 3. Am I repeating myself? + +- Copy-pasted code is a bug waiting to happen. If two code paths do similar + things with minor variations, factor out the common part. +- But: three similar lines is better than a premature abstraction. Only + generalize when the pattern is clear and stable. + +### 4. Am I adding complexity that doesn't pay for itself? + +- Every abstraction has a cost (indirection, cognitive load, maintenance). +- A 10-line inline handler for `strlen` is better than a 50-line generic + "string function framework" that only handles `strlen`. +- Ask: "if I delete this abstraction and inline the code, is it clearer?" If + yes, delete the abstraction. + +### 5. Is my code testable? + +- Can I write a unit test (a `.c` file + expected return value) for the feature + I just added? +- If the feature is "globals work," write `test_globals.c` (already exists) and + verify it passes. +- **Run the full test suite after every change.** Regressions are cheap to fix + when caught immediately, expensive when caught later. + +## Task Tracking + +Use a simple checklist. After each session, update it: + +``` +## Checklist +- [x] test_function_calls passes (direct, recursive, indirect, higher-order, variadic) +- [x] test_globals passes (simple, array, struct, static local, file-scope static) +- [x] test_struct_assign passes +- [x] test_string_literals passes (was already passing after CALL arg fix) +- [x] test_byvalue passes (unified return: scalar ≤8B read from slot, aggregate >8B via pointer) +- [ ] test_evil_goto passes (returns 1 — duffs_copy; complex structured goto/switch patterns) +- [ ] Candidate program selected and indexed +- [ ] Candidate program's main() starts executing (reaches first call) +- [ ] All internal function calls in candidate work +- [ ] External function stubs implemented for candidate's needs +- [ ] Candidate produces correct output (differential test passes) +- [x] No unit test regressions (30/34 tests pass, up from 25/34 baseline) +``` + +## How to Self-Prompt + +At the start of each work session: + +1. **Read this document** to re-orient. +2. **Run the test suite**: `cd tests/InterpretIR && bash run_tests.sh mx-index.db ~/Build/multiplier/Release/multiplier/bin/mx-interpret-ir` +3. **Check the checklist**: what's the next unchecked item? +4. **Pick the highest-value, lowest-risk task**. Prefer tasks that unblock + multiple downstream items (e.g., fixing calls unblocks everything that calls + functions). +5. **After making changes**: rebuild, run tests, update checklist, self-review. + +At the end of each work session: + +1. **Run the full test suite** and record results. +2. **Update the checklist** above. +3. **Note any architectural decisions** made and why, so future sessions don't + re-derive them. +4. **If stuck**: describe the problem clearly, what you've tried, and what you + think the root cause is. This is your context for the next session. + +## Build Commands + +```bash +# Build (Release) +cd ~/Build/multiplier/Release/multiplier && ninja -j$(sysctl -n hw.ncpu) mx-interpret-ir + +# Run single test +~/Build/multiplier/Release/multiplier/bin/mx-interpret-ir \ + --db tests/InterpretIR/mx-index.db --entity_name test_function_calls + +# Run full suite +cd tests/InterpretIR && bash run_tests.sh mx-index.db \ + ~/Build/multiplier/Release/multiplier/bin/mx-interpret-ir + +# Index a new program +~/Build/multiplier/Release/multiplier/bin/mx-index \ + --db --workspace \ + --target compile_commands.json --fork_mode +``` + +## Architectural Invariants (do not violate) + +1. **Interpreter never includes ``**. Function and global + resolution is delegated to policies/injected resolvers. +2. **No `LOG()` or `glog` in `lib/`**. Use `assert()` for invariant violations. + Return error values for expected failures. +3. **`MX_EXPORT` on all public classes** in `include/multiplier/IR/Interpret/`. +4. **Float32 values use `MakeFloat32`, float64 use `MakeFloat`**. Never mix. + Constants, loads, arithmetic results must all use the width-correct constructor. +5. **No hardcoded paths or usernames** in committed code. +6. **Policy interfaces are synchronous**. If you need async, build it inside a + proxy. The interpreter never manages event loops or work lists. +7. **Values are opaque to the interpreter**. The interpreter passes values + around but only inspects them through factory/memory calls. Direct access to + `ScalarValue::bits` in `Interpreter.cpp` is a smell (except in + `MemWriteValue`/`MemReadValue` which are the serialization boundary). diff --git a/include/multiplier/IR/Function.h b/include/multiplier/IR/Function.h index 370da53cf..d6550233e 100644 --- a/include/multiplier/IR/Function.h +++ b/include/multiplier/IR/Function.h @@ -16,6 +16,7 @@ namespace mx { class Decl; class Stmt; +class VarDecl; class IRBlock; class IRInstruction; class IRObject; @@ -58,9 +59,16 @@ class MX_EXPORT IRFunction { // Root of the structure tree (FUNCTION_SCOPE). std::optional body_scope(void) const; + // Stack frame layout. + uint32_t frame_size_bytes(void) const; // Total fixed frame size. + bool has_dynamic_allocas(void) const; // True if frame can grow at runtime. + // Find the IR for a FunctionDecl (follows redeclarations). static std::optional from(const FunctionDecl &decl); + // Find the GLOBAL_INITIALIZER IR for a VarDecl. + static std::optional from(const VarDecl &decl); + // Find the containing IR function for any Decl, Stmt, Block, or Instruction. static std::optional containing(const Decl &decl); static std::optional containing(const Stmt &stmt); diff --git a/include/multiplier/IR/Interpret/ConcreteDriver.h b/include/multiplier/IR/Interpret/ConcreteDriver.h new file mode 100644 index 000000000..f9b483228 --- /dev/null +++ b/include/multiplier/IR/Interpret/ConcreteDriver.h @@ -0,0 +1,37 @@ +// Copyright (c) 2024-present, Trail of Bits, Inc. +// +// This source code is licensed in accordance with the terms specified in +// the LICENSE file found in the root directory of this source tree. + +#pragma once + +#include "../../Compiler.h" +#include "Driver.h" +#include + +namespace mx::ir::interpret { + +// Function resolver: given an entity ID (FunctionDecl or DeclRefExpr), +// return its IRFunction. Used for both direct and indirect calls. +using FunctionResolver = + std::function(RawEntityId)>; + +// Global resolver: given a global variable's entity ID, return its info. +using GlobalResolver = + std::function(RawEntityId)>; + +// Concrete driver: resolves all suspensions with simple default policies. +class MX_EXPORT ConcreteDriver final : public Driver { + public: + explicit ConcreteDriver(FunctionResolver func_resolver = {}, + GlobalResolver global_resolver = {}); + ~ConcreteDriver(void) override = default; + + Resolution Resolve(const Suspension &s) override; + + private: + FunctionResolver func_resolver_; + GlobalResolver global_resolver_; +}; + +} // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Interpret/ConcreteMemory.h b/include/multiplier/IR/Interpret/ConcreteMemory.h new file mode 100644 index 000000000..3b17d289f --- /dev/null +++ b/include/multiplier/IR/Interpret/ConcreteMemory.h @@ -0,0 +1,82 @@ +// Copyright (c) 2024-present, Trail of Bits, Inc. +// +// This source code is licensed in accordance with the terms specified in +// the LICENSE file found in the root directory of this source tree. + +#pragma once + +#include "../../Compiler.h" +#include "Memory.h" +#include "Value.h" +#include +#include + +namespace mx::ir::interpret { + +// Concrete memory implementation: bump-allocator with flat byte backing store. +// Supports pointer shadow tracking (which memory locations hold pointer values). +class MX_EXPORT ConcreteMemory final : public Memory { + public: + explicit ConcreteMemory(uint8_t address_width = 8, + uint64_t base_address = 0x10000); + + uint64_t Allocate(uint64_t size_bytes, uint64_t align_bytes) override; + void Free(uint64_t address) override; + bool Read(uint64_t address, void *dest, uint32_t size) override; + bool Write(uint64_t address, const void *src, uint32_t size) override; + bool Memset(uint64_t address, uint8_t value, uint32_t size) override; + bool Memcpy(uint64_t dest_address, uint64_t src_address, + uint32_t size) override; + + bool WritePointer(uint64_t address, uint64_t pointer_value) override; + bool ReadPointer(uint64_t address, uint64_t &pointer_value) override; + + void Poison(uint64_t address) override; + void Unpoison(uint64_t address) override; + bool IsPoisoned(uint64_t address) const override; + + std::unique_ptr Fork(void) const override; + + // Pointer shadow: tracks which memory locations hold pointer values. + // The interpreter calls these when storing/loading pointer-typed values. + void WritePointerShadow(uint64_t address, const Pointer &ptr); + void ClearPointerShadow(uint64_t address); + const Pointer *ReadPointerShadow(uint64_t address) const; + + // Auto-grow: extend a region if a write exceeds its bounds. + // Used for VLA-like objects with unknown compile-time size. + void SetAutoGrow(bool enabled) { auto_grow_ = enabled; } + + private: + struct Region { + uint64_t base{0}; + uint64_t size{0}; + bool freed{false}; + bool poisoned{false}; + }; + + uint8_t address_width_; + uint64_t next_alloc_; + bool auto_grow_{true}; + + // Flat byte backing store: address → bytes. + // Using a map of regions to support sparse address spaces. + std::unordered_map> backing_; + + // Region metadata keyed by base address. + std::unordered_map regions_; + + // Pointer shadow map: address → stored pointer value. + std::unordered_map pointer_shadow_; + + // Find the region containing an address. Returns nullptr if not found. + const Region *FindRegion(uint64_t address) const; + Region *FindRegion(uint64_t address); + + // Ensure backing store exists for a region and return a pointer to + // the byte at the given offset within it. + uint8_t *GetBytes(uint64_t region_base, uint64_t offset); + const uint8_t *GetBytes(uint64_t region_base, uint64_t offset) const; +}; + +} // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Interpret/ConcreteValueFactory.h b/include/multiplier/IR/Interpret/ConcreteValueFactory.h new file mode 100644 index 000000000..06be19048 --- /dev/null +++ b/include/multiplier/IR/Interpret/ConcreteValueFactory.h @@ -0,0 +1,39 @@ +// Copyright (c) 2024-present, Trail of Bits, Inc. +// +// This source code is licensed in accordance with the terms specified in +// the LICENSE file found in the root directory of this source tree. + +#pragma once + +#include "../../Compiler.h" +#include "ValueFactory.h" + +namespace mx::ir::interpret { + +// Concrete implementation of ValueFactory: performs direct computation +// on ScalarValue/Pointer values. No symbolic reasoning. +class MX_EXPORT ConcreteValueFactory final : public ValueFactory { + public: + ~ConcreteValueFactory(void) override = default; + + Value BinaryOp(OpCode op, const Value &lhs, const Value &rhs) override; + Value UnaryOp(OpCode op, const Value &operand) override; + Value Compare(OpCode op, const Value &lhs, const Value &rhs) override; + Value Cast(CastOp op, const Value &operand) override; + Value MakeConst(ConstOp op, int64_t signed_val, + uint64_t unsigned_val) override; + Value MakeNullPtr(void) override; + Value PtrAdd(const Value &base, const Value &index, + int64_t element_size) override; + Value PtrDiff(const Value &lhs, const Value &rhs, + int64_t element_size) override; + std::optional IsTrue(const Value &val) override; + Value Select(const Value &cond, const Value &if_true, + const Value &if_false) override; + Value BitwiseIntrinsic(OpCode width_op, BitwiseOp sub, + const Value &val, const Value &val2) override; + Value FloatIntrinsic(FloatOp sub, + const std::vector &operands) override; +}; + +} // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Interpret/Driver.h b/include/multiplier/IR/Interpret/Driver.h index 9c534d1b6..1c347d16b 100644 --- a/include/multiplier/IR/Interpret/Driver.h +++ b/include/multiplier/IR/Interpret/Driver.h @@ -6,56 +6,21 @@ #pragma once #include "Suspension.h" -#include "Value.h" namespace mx::ir::interpret { // The Driver resolves suspensions from the interpreter. -// Subclass to implement analysis-specific policies. // -// ConcreteDriver: resolves everything concretely, aborts on symbolic. -// TaintDriver: tracks provenance, surfaces decisions to an agent. -// GuidedDriver: target-directed exploration with coverage coordination. +// In the new architecture, the driver CALLS Step() in a loop and handles +// suspensions. This base class provides a convenient Resolve() dispatch +// that maps each suspension type to a resolution. Subclass to implement +// analysis-specific policies. class Driver { public: virtual ~Driver(void) = default; - // Resolve a conditional branch with unknown condition. - virtual BranchResolution ResolveBranch(const BranchSuspension &s) = 0; - - // Resolve a function call. - virtual CallResolution ResolveCall(const CallSuspension &s) = 0; - - // Resolve a load from a symbolic/unknown address. - virtual LoadResolution ResolveLoad(const LoadSuspension &s) = 0; - - // Resolve a store to a symbolic/unknown address. - virtual StoreResolution ResolveStore(const StoreSuspension &s) = 0; - - // Concretize a symbolic value. - virtual ConcretizeResolution ResolveConcretize( - const ConcretizeSuspension &s) = 0; -}; - -// Checker interface: plugged into the interpreter for memory safety checks. -// Multiple checkers can be registered; all fire on each relevant event. -class Checker { - public: - virtual ~Checker(void) = default; - - // Called before every memory access. - virtual void OnMemoryAccess(const Pointer &addr, uint32_t size, - bool is_write) {} - - // Called after pointer arithmetic. - virtual void OnPointerArithmetic(const Pointer &base, int64_t offset_bytes, - const Pointer &result) {} - - // Called on function entry. - virtual void OnCallEntry(const CallSuspension &call) {} - - // Called on function return. - virtual void OnCallReturn(const Value &return_value) {} + // Resolve a suspension. Returns the resolution to resume with. + virtual Resolution Resolve(const Suspension &s) = 0; }; } // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Interpret/Interpreter.h b/include/multiplier/IR/Interpret/Interpreter.h new file mode 100644 index 000000000..44cf82272 --- /dev/null +++ b/include/multiplier/IR/Interpret/Interpreter.h @@ -0,0 +1,110 @@ +// Copyright (c) 2024-present, Trail of Bits, Inc. +// +// This source code is licensed in accordance with the terms specified in +// the LICENSE file found in the root directory of this source tree. + +#pragma once + +#include "../../Compiler.h" +#include "Value.h" +#include "Suspension.h" +#include +#include +#include +#include +#include +#include +#include + +namespace mx { +class IRObject; +class MemoryInst; +} // namespace mx + +namespace mx::ir::interpret { + +class Driver; +class Memory; +class ValueFactory; + +// --------------------------------------------------------------------------- +// StepResult — what Step() returns +// --------------------------------------------------------------------------- + +enum class StepStatus { + CONTINUE, // More instructions to execute. + SUSPENDED, // Needs external resolution before resuming. + COMPLETED, // Top-level function returned normally. + ERROR, // Unrecoverable error (unreachable, etc.). +}; + +struct StepResult { + StepStatus status{StepStatus::ERROR}; + Suspension suspension; // Valid when status == SUSPENDED. + Value return_value; // Valid when status == COMPLETED. +}; + +// --------------------------------------------------------------------------- +// CallFrame — per-function-invocation state +// --------------------------------------------------------------------------- + +struct CallFrame { + IRFunction func; + std::vector params; + std::vector param_ptrs; + Value return_ptr{Undefined{}}; + + // Index into param_ptrs where variadic arguments begin. + uint32_t variadic_start_index{0}; + + // Instruction ID → computed value. + std::unordered_map values; + + // Entity ID → allocated address in Memory. + std::unordered_map entity_to_address; + + // Block ID → IRBlock (for CFG navigation). + std::unordered_map block_map; + + // Current execution position. + IRBlock current_block; + + // When a CALL pushes a callee frame, this records the CALL instruction's + // entity ID so that after the callee returns, we can resume from the + // instruction after the CALL and store the return value. + RawEntityId resume_after_inst{kInvalidEntityId}; +}; + +// --------------------------------------------------------------------------- +// InterpreterState — everything needed to resume execution +// --------------------------------------------------------------------------- + +struct MX_EXPORT InterpreterState { + std::vector call_stack; + + // Global variable addresses: entity ID → allocated address. + std::unordered_map global_addresses; + + // Instruction step counter. + uint64_t steps{0}; + + // Current frame (convenience). + CallFrame &Frame(void) { return call_stack.back(); } + const CallFrame &Frame(void) const { return call_stack.back(); } + bool Empty(void) const { return call_stack.empty(); } +}; + +// --------------------------------------------------------------------------- +// Free functions — the interpreter is a state transition function +// --------------------------------------------------------------------------- + +// Initialize state for executing a function with the given arguments. +MX_EXPORT void InitState(InterpreterState &state, Memory &memory, + const IRFunction &func, const std::vector &args); + +// Execute one block. Advances state in place. +// The caller (driver) owns the loop and handles suspensions. +MX_EXPORT StepResult Step(InterpreterState &state, Memory &memory, + ValueFactory &factory, Driver &driver); + +} // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Interpret/Memory.h b/include/multiplier/IR/Interpret/Memory.h index 22290476e..982aeddc9 100644 --- a/include/multiplier/IR/Interpret/Memory.h +++ b/include/multiplier/IR/Interpret/Memory.h @@ -5,51 +5,75 @@ #pragma once -#include "Value.h" #include -#include +#include namespace mx::ir::interpret { -// Abstract memory model. The interpreter handles endianness mechanics -// and calls Read/Write on this interface. Implementations own the storage. +// Abstract memory model operating on a flat virtual address space. // -// ConcreteMemory: flat byte arrays per object. -// ShadowMemory: tracks initialization, taint, allocation state. -// COW memory: O(1) fork for multi-path exploration. +// Implementations: +// ConcreteMemory: bump allocator with flat byte backing store. +// BoundsCheckingMemory: proxy that tracks regions and detects OOB/UAF. +// COWMemory: proxy with copy-on-write for multi-path exploration. +// DebuggerMemory: pulls pages on demand from a live session. +// SnapshotMemory: mmaps a memory dump. class Memory { public: virtual ~Memory(void) = default; - // Allocate a new memory object. Returns the object ID. - virtual uint32_t Allocate(uint32_t size_bytes, uint32_t align_bytes) = 0; + // Allocate a new region. Returns the base address. + // Returns 0 on failure. + virtual uint64_t Allocate(uint64_t size_bytes, uint64_t align_bytes) = 0; - // Free a previously allocated object. - virtual void Free(uint32_t object_id) = 0; + // Free a previously allocated region by its base address. + virtual void Free(uint64_t address) = 0; - // Read raw bytes from an object at an offset. - // Returns false if the access is out of bounds or the object is freed. - virtual bool Read(uint32_t object_id, int64_t offset, - void *dest, uint32_t size) = 0; + // Read raw bytes from an address. + // Returns false if the access is invalid (unmapped, freed, etc.). + virtual bool Read(uint64_t address, void *dest, uint32_t size) = 0; - // Write raw bytes to an object at an offset. - virtual bool Write(uint32_t object_id, int64_t offset, - const void *src, uint32_t size) = 0; + // Write raw bytes to an address. + // Returns false if the access is invalid. + virtual bool Write(uint64_t address, const void *src, uint32_t size) = 0; + + // Pointer-aware read/write. The concrete implementation stores/reads + // the address as raw bytes (same as Write/Read with sizeof(uint64_t)). + // A provenance-tracking proxy can additionally mark these bytes as + // carrying pointer identity. + virtual bool WritePointer(uint64_t address, uint64_t pointer_value); + virtual bool ReadPointer(uint64_t address, uint64_t &pointer_value); // Bulk operations. - virtual bool Memset(uint32_t object_id, int64_t offset, - uint8_t value, uint32_t size) = 0; - virtual bool Memcpy(uint32_t dest_obj, int64_t dest_offset, - uint32_t src_obj, int64_t src_offset, + virtual bool Memset(uint64_t address, uint8_t value, uint32_t size) = 0; + virtual bool Memcpy(uint64_t dest_address, uint64_t src_address, uint32_t size) = 0; - // Query object metadata. - virtual uint32_t ObjectSize(uint32_t object_id) const = 0; - virtual bool IsAllocated(uint32_t object_id) const = 0; + // Scope-based poisoning: marks a region as temporarily inaccessible + // (e.g., when a local variable goes out of scope). Default no-ops. + virtual void Poison(uint64_t address) { (void) address; } + virtual void Unpoison(uint64_t address) { (void) address; } + virtual bool IsPoisoned(uint64_t address) const { + (void) address; + return false; + } // Fork for multi-path exploration (COW). // Returns a new Memory that shares state until written. virtual std::unique_ptr Fork(void) const = 0; }; +// Default implementations. WritePointer writes raw bytes; ReadPointer returns +// false (no pointer tracking). Override both in implementations that track +// pointer provenance (e.g., ConcreteMemory). +inline bool Memory::WritePointer(uint64_t address, uint64_t pointer_value) { + return Write(address, &pointer_value, sizeof(pointer_value)); +} + +inline bool Memory::ReadPointer(uint64_t address, uint64_t &pointer_value) { + (void) address; + (void) pointer_value; + return false; +} + } // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Interpret/Suspension.h b/include/multiplier/IR/Interpret/Suspension.h index a4824555d..bac557337 100644 --- a/include/multiplier/IR/Interpret/Suspension.h +++ b/include/multiplier/IR/Interpret/Suspension.h @@ -7,96 +7,104 @@ #include "Value.h" #include +#include #include +#include #include #include -namespace mx { -class FunctionDecl; -} - namespace mx::ir::interpret { -// The interpreter yields Suspensions at decision points. -// A Driver returns Resolutions to continue execution. +// --------------------------------------------------------------------------- +// Suspensions — what the interpreter needs before it can continue +// --------------------------------------------------------------------------- -// Branch with unknown (symbolic) condition. -struct BranchSuspension { +// The interpreter can't resolve a branch condition. +// The driver must decide which path(s) to take. +struct NeedBranchDecision { Value condition; IRBlock true_block; IRBlock false_block; }; -struct BranchResolution { - bool take_true{true}; - bool take_false{false}; // true = fork both paths -}; - -// Function call — driver decides: inline, skip, or model. -struct CallSuspension { +// The interpreter encountered a CALL but doesn't have the callee's IR. +// The driver must provide the IRFunction (inline), a modeled return value, +// or indicate the call should be skipped. +struct NeedCallResolution { IRInstruction call_inst; - std::optional target; + RawEntityId target_eid{kInvalidEntityId}; + RawEntityId indirect_target_eid{kInvalidEntityId}; std::vector arguments; bool is_indirect{false}; }; -enum class CallAction { - INLINE, // Step into the callee's IR. - SKIP, // Return a default/symbolic value. - MODEL, // Driver provides the return value. +// The interpreter encountered a GLOBAL_PTR for an unresolved global. +// The driver must provide the global's size, alignment, and optional +// initializer function. +struct NeedGlobalResolution { + RawEntityId entity_id{kInvalidEntityId}; }; -struct CallResolution { - CallAction action{CallAction::SKIP}; - Value return_value; // For SKIP/MODEL. +// A pointer operand needed to be concrete but wasn't. +// The driver must concretize or provide the concrete address. +struct NeedConcretePointer { + IRInstruction inst; + Value symbolic_pointer; }; -// Load from a symbolic or unknown address. -struct LoadSuspension { - Value address; - uint32_t size_bytes{0}; +// --------------------------------------------------------------------------- +// Resolutions — what the driver provides to resume +// --------------------------------------------------------------------------- + +struct BranchDecision { + bool take_true{true}; + bool take_false{false}; // true = fork both paths. }; -struct LoadResolution { - Value value; +enum class CallAction { + INLINE, + SKIP, + MODEL, }; -// Store to a symbolic or unknown address. -struct StoreSuspension { - Value address; - Value value; - uint32_t size_bytes{0}; +struct CallResolution { + CallAction action{CallAction::SKIP}; + Value return_value; + IRFunction callee_ir; // For INLINE. }; -struct StoreResolution { - bool proceed{true}; +// Info about a global variable needed for lazy initialization. +struct GlobalInfo { + RawEntityId canonical_eid{kInvalidEntityId}; + uint32_t size{0}; + uint32_t align{8}; + std::optional initializer; }; -// Need to concretize a symbolic value. -struct ConcretizeSuspension { - Value symbolic_value; +struct GlobalResolution { + GlobalInfo info; }; -struct ConcretizeResolution { - Value concrete_value; +struct ConcretePointerResolution { + uint64_t address{0}; }; -// Union of all suspension types. +// --------------------------------------------------------------------------- +// Variant unions +// --------------------------------------------------------------------------- + using Suspension = std::variant< - BranchSuspension, - CallSuspension, - LoadSuspension, - StoreSuspension, - ConcretizeSuspension + NeedBranchDecision, + NeedCallResolution, + NeedGlobalResolution, + NeedConcretePointer >; -// Union of all resolution types. using Resolution = std::variant< - BranchResolution, + BranchDecision, CallResolution, - LoadResolution, - StoreResolution, - ConcretizeResolution + GlobalResolution, + ConcretePointerResolution >; } // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Interpret/Value.h b/include/multiplier/IR/Interpret/Value.h index 4fa12964a..75c60405b 100644 --- a/include/multiplier/IR/Interpret/Value.h +++ b/include/multiplier/IR/Interpret/Value.h @@ -45,6 +45,10 @@ struct ScalarValue { return v; } + uint64_t as_u64(void) const { + return bits; + } + double as_f64(void) const { double v; std::memcpy(&v, &bits, sizeof(v)); @@ -65,14 +69,114 @@ struct Undefined {}; // Sentinel for a null pointer. struct NullPtr {}; -// A pointer into the interpreter's address space. +// An opaque pointer into the interpreter's virtual address space. +// Address width (4 or 8 bytes) is a session property on Memory, not per-pointer. +// Policies access the concrete address via the free functions below. struct Pointer { - uint32_t object_id{0}; - int64_t offset{0}; + private: + uint64_t address_{0}; + + public: + Pointer(void) = default; + explicit Pointer(uint64_t addr) : address_(addr) {} + + bool operator==(const Pointer &o) const { return address_ == o.address_; } + bool operator!=(const Pointer &o) const { return address_ != o.address_; } + + friend bool IsConcrete(const Pointer &p); + friend uint64_t ConcreteAddress(const Pointer &p); }; +// For the concrete interpreter, all pointers are concrete. +// A future symbolic pointer variant would make this non-trivial. +inline bool IsConcrete(const Pointer &) { return true; } + +// Extract the concrete integral address. Only valid when IsConcrete() is true. +inline uint64_t ConcreteAddress(const Pointer &p) { return p.address_; } + // The value type the interpreter passes around. // Concrete implementation. A symbolic layer would extend/wrap this. using Value = std::variant; +// --------------------------------------------------------------------------- +// Inline helper functions +// --------------------------------------------------------------------------- + +// Extract as signed integer. Returns 0 for non-scalar values. +inline int64_t AsInt(const Value &v) { + if (auto *s = std::get_if(&v)) return s->as_i64(); + return 0; +} + +// Extract as unsigned integer. Returns 0 for non-scalar values. +inline uint64_t AsUint(const Value &v) { + if (auto *s = std::get_if(&v)) return s->as_u64(); + return 0; +} + +// Extract as double. Returns 0.0 for non-scalar values. +inline double AsFloat(const Value &v) { + if (auto *s = std::get_if(&v)) return s->as_f64(); + return 0.0; +} + +// Extract as float. Returns 0.0f for non-scalar values. +inline float AsFloat32(const Value &v) { + if (auto *s = std::get_if(&v)) return s->as_f32(); + return 0.0f; +} + +// Returns pointer if the value holds one, nullptr otherwise. +inline const Pointer *AsPointer(const Value &v) { + return std::get_if(&v); +} + +// Truth test for concrete values. +inline bool IsTruthy(const Value &v) { + if (auto *s = std::get_if(&v)) return s->bits != 0; + if (std::holds_alternative(v)) return true; + if (std::holds_alternative(v)) return false; + return false; // Undefined +} + +// Check if the value is undefined/poison. +inline bool IsUndefined(const Value &v) { + return std::holds_alternative(v); +} + +// Check if the value is a null pointer. +inline bool IsNull(const Value &v) { + return std::holds_alternative(v); +} + +// --- Construction helpers --- + +inline Value MakeInt(int64_t v, uint8_t w = 8) { + return ScalarValue::FromI64(v, w); +} + +inline Value MakeUint(uint64_t v, uint8_t w = 8) { + return ScalarValue::FromU64(v, w); +} + +inline Value MakeFloat(double v) { + return ScalarValue::FromF64(v); +} + +inline Value MakeFloat32(float v) { + return ScalarValue::FromF32(v); +} + +inline Value MakePtr(uint64_t addr) { + return Pointer(addr); +} + +inline Value MakeUndef(void) { + return Undefined{}; +} + +inline Value MakeNull(void) { + return NullPtr{}; +} + } // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Interpret/ValueFactory.h b/include/multiplier/IR/Interpret/ValueFactory.h index 4093cc41a..bc21532cc 100644 --- a/include/multiplier/IR/Interpret/ValueFactory.h +++ b/include/multiplier/IR/Interpret/ValueFactory.h @@ -8,6 +8,7 @@ #include "Value.h" #include #include +#include namespace mx::ir::interpret { @@ -47,6 +48,16 @@ class ValueFactory { // Select (ternary). virtual Value Select(const Value &cond, const Value &if_true, const Value &if_false) = 0; + + // Bitwise intrinsics (BSWAP, POPCOUNT, CLZ, CTZ, FFS, PARITY, ROTL, ROTR). + // The width opcode (BITWISE_8/16/32/64) determines operand width. + virtual Value BitwiseIntrinsic(OpCode width_op, BitwiseOp sub, + const Value &val, + const Value &val2) = 0; + + // Float intrinsics (math functions, special values, classification). + virtual Value FloatIntrinsic(FloatOp sub, + const std::vector &operands) = 0; }; } // namespace mx::ir::interpret diff --git a/include/multiplier/IR/Object.h b/include/multiplier/IR/Object.h index 7d07df17c..63eba26b7 100644 --- a/include/multiplier/IR/Object.h +++ b/include/multiplier/IR/Object.h @@ -38,6 +38,7 @@ class MX_EXPORT IRObject { std::optional type(void) const; uint32_t size_bytes(void) const; uint32_t align_bytes(void) const; + uint32_t frame_offset(void) const; // Offset within the function's stack frame. bool needs_memory(void) const; }; diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 493e3b289..89c3e9f4f 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -146,6 +146,10 @@ add_library("mx-api" OBJECT "IR/Structure.cpp" "IR/StructureKinds.cpp" "IR/Object.cpp" + "IR/Interpret/ConcreteValueFactory.cpp" + "IR/Interpret/ConcreteMemory.cpp" + "IR/Interpret/ConcreteDriver.cpp" + "IR/Interpret/Interpreter.cpp" "InvalidEntityProvider.cpp" "InvalidEntityProvider.h" "Macro.h" diff --git a/lib/IR.capnp b/lib/IR.capnp index b652f3970..d9ed8cbde 100644 --- a/lib/IR.capnp +++ b/lib/IR.capnp @@ -9,6 +9,8 @@ struct Object @0xa7625c6bfddc036b { sizeBytes @2 :UInt32; alignBytes @3 :UInt32; kind @4 :UInt8; + frameOffset @5 :UInt32; # Offset within the function's stack frame. + # Only meaningful for stack-allocated objects. } struct Instruction @0xc6bb311936d9962b { @@ -52,4 +54,6 @@ struct Function @0xe6be31a259218610 { entityOffset @4 :UInt32; kind @5 :UInt8; # FunctionKind enum (0 = NORMAL) bodyScopeId @6 :UInt64; # IRStructureId of FUNCTION_SCOPE (root of structure tree) + frameSizeBytes @7 :UInt32; # Total size of the fixed stack frame (non-dynamic allocas). + hasDynamicAllocas @8 :Bool; # True if the function has VLAs or alloca(). } diff --git a/lib/IR/Enums.cpp b/lib/IR/Enums.cpp index 086b3f96e..763db7168 100644 --- a/lib/IR/Enums.cpp +++ b/lib/IR/Enums.cpp @@ -239,7 +239,7 @@ const char *EnumeratorName(OpCode op) noexcept { case OpCode::RETURN_ADDRESS_32: return "RETURN_ADDRESS_32"; case OpCode::RETURN_ADDRESS_64: return "RETURN_ADDRESS_64"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(ConstOp op) noexcept { @@ -264,7 +264,7 @@ const char *EnumeratorName(ConstOp op) noexcept { case ConstOp::WCHAR32: return "WCHAR32"; case ConstOp::BOOL: return "BOOL"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(AllocaKind op) noexcept { @@ -274,7 +274,7 @@ const char *EnumeratorName(AllocaKind op) noexcept { case AllocaKind::RETURN: return "RETURN"; case AllocaKind::DYNAMIC: return "DYNAMIC"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(CastOp op) noexcept { @@ -338,7 +338,7 @@ const char *EnumeratorName(CastOp op) noexcept { case CastOp::BITCAST: return "BITCAST"; case CastOp::IDENTITY: return "IDENTITY"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(MemOp op) noexcept { @@ -422,7 +422,7 @@ const char *EnumeratorName(MemOp op) noexcept { case MemOp::STORE_F32_BE: return "STORE_F32_BE"; case MemOp::STORE_F64_BE: return "STORE_F64_BE"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(BitwiseOp op) noexcept { @@ -439,7 +439,7 @@ const char *EnumeratorName(BitwiseOp op) noexcept { case BitwiseOp::ROTR: return "ROTR"; // (ABS moved to sized opcode) } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(FloatOp op) noexcept { @@ -527,7 +527,7 @@ const char *EnumeratorName(FloatOp op) noexcept { case FloatOp::SIGNBIT_32: return "SIGNBIT_32"; case FloatOp::SIGNBIT_64: return "SIGNBIT_64"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(FunctionKind kind) noexcept { @@ -536,7 +536,7 @@ const char *EnumeratorName(FunctionKind kind) noexcept { case FunctionKind::GLOBAL_INITIALIZER: return "GLOBAL_INITIALIZER"; case FunctionKind::THREAD_LOCAL_INITIALIZER: return "THREAD_LOCAL_INITIALIZER"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(ObjectKind kind) noexcept { @@ -553,7 +553,7 @@ const char *EnumeratorName(ObjectKind kind) noexcept { case ObjectKind::ALLOCA: return "ALLOCA"; case ObjectKind::HEAP: return "HEAP"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(StructureKind kind) noexcept { @@ -578,7 +578,7 @@ const char *EnumeratorName(StructureKind kind) noexcept { case StructureKind::SWITCH_CASE: return "SWITCH_CASE"; case StructureKind::EXPRESSION_SCOPE: return "EXPRESSION_SCOPE"; } - return "UNKNOWN"; + return nullptr; } const char *EnumeratorName(BlockKind kind) noexcept { @@ -601,7 +601,7 @@ const char *EnumeratorName(BlockKind kind) noexcept { case BlockKind::COMPENSATION: return "COMPENSATION"; case BlockKind::LOOP_PREHEADER: return "LOOP_PREHEADER"; } - return "UNKNOWN"; + return nullptr; } } // namespace mx::ir @@ -616,7 +616,7 @@ const char *EnumeratorName(IREntityKind kind) noexcept { case IREntityKind::IR_OBJECT: return "IR_OBJECT"; case IREntityKind::IR_STRUCTURE: return "IR_STRUCTURE"; } - return "UNKNOWN"; + return nullptr; } } // namespace mx diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 83ed4f7af..17cbb11ad 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "Impl.h" @@ -108,6 +109,16 @@ std::optional IRFunction::body_scope(void) const { return std::nullopt; } +uint32_t IRFunction::frame_size_bytes(void) const { + if (!impl) return 0; + return impl->reader().getFrameSizeBytes(); +} + +bool IRFunction::has_dynamic_allocas(void) const { + if (!impl) return false; + return impl->reader().getHasDynamicAllocas(); +} + std::optional IRFunction::from(const FunctionDecl &decl) { // Try this specific declaration first. auto try_decl = [](const FunctionDecl &d) -> std::optional { @@ -145,6 +156,23 @@ std::optional IRFunction::from(const FunctionDecl &decl) { return std::nullopt; } +std::optional IRFunction::from(const VarDecl &decl) { + auto frag = Fragment::containing(decl); + if (!frag.impl) return std::nullopt; + + auto decl_eid = decl.id().Pack(); + auto ir_funcs = frag.impl->reader.getIrFunctions(); + auto frag_id = frag.impl->fragment_id; + + for (unsigned i = 0; i < ir_funcs.size(); ++i) { + if (ir_funcs[i].getSourceDeclEntityId() == decl_eid) { + return IRFunction(std::make_shared( + frag.impl, i, frag_id)); + } + } + return std::nullopt; +} + std::optional IRFunction::containing(const Decl &decl) { // If this IS a FunctionDecl, use from() directly. if (auto fd = FunctionDecl::from(decl)) { diff --git a/lib/IR/Interpret/ConcreteDriver.cpp b/lib/IR/Interpret/ConcreteDriver.cpp new file mode 100644 index 000000000..408982640 --- /dev/null +++ b/lib/IR/Interpret/ConcreteDriver.cpp @@ -0,0 +1,63 @@ +// Copyright (c) 2024-present, Trail of Bits, Inc. +// +// This source code is licensed in accordance with the terms specified in +// the LICENSE file found in the root directory of this source tree. + +#include + +namespace mx::ir::interpret { + +ConcreteDriver::ConcreteDriver(FunctionResolver func_resolver, + GlobalResolver global_resolver) + : func_resolver_(std::move(func_resolver)), + global_resolver_(std::move(global_resolver)) {} + +Resolution ConcreteDriver::Resolve(const Suspension &s) { + return std::visit([&](const auto &susp) -> Resolution { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + // Concrete: branch conditions should always be concrete. + // Default to true path. + return BranchDecision{.take_true = true, .take_false = false}; + } + + else if constexpr (std::is_same_v) { + if (func_resolver_) { + // Try direct target first, then indirect. + for (auto eid : {susp.target_eid, susp.indirect_target_eid}) { + if (eid != kInvalidEntityId) { + if (auto ir = func_resolver_(eid)) { + return CallResolution{.action = CallAction::INLINE, + .return_value = Undefined{}, + .callee_ir = *std::move(ir)}; + } + } + } + } + return CallResolution{.action = CallAction::SKIP, + .return_value = Undefined{}}; + } + + else if constexpr (std::is_same_v) { + if (global_resolver_) { + if (auto info = global_resolver_(susp.entity_id)) { + return GlobalResolution{.info = *std::move(info)}; + } + } + return GlobalResolution{}; + } + + else if constexpr (std::is_same_v) { + // Concrete mode: this shouldn't happen. Return 0. + return ConcretePointerResolution{.address = 0}; + } + + else { + // Unreachable for well-typed variants. + return ConcretePointerResolution{.address = 0}; + } + }, s); +} + +} // namespace mx::ir::interpret diff --git a/lib/IR/Interpret/ConcreteMemory.cpp b/lib/IR/Interpret/ConcreteMemory.cpp new file mode 100644 index 000000000..2c13bb32c --- /dev/null +++ b/lib/IR/Interpret/ConcreteMemory.cpp @@ -0,0 +1,255 @@ +// Copyright (c) 2024-present, Trail of Bits, Inc. +// +// This source code is licensed in accordance with the terms specified in +// the LICENSE file found in the root directory of this source tree. + +#include + +#include +#include + +namespace mx::ir::interpret { + +static constexpr uint64_t kMask32 = 0xFFFFFFFFull; + +ConcreteMemory::ConcreteMemory(uint8_t address_width, uint64_t base_address) + : address_width_(address_width), + next_alloc_(base_address) { + assert((address_width_ == 4 || address_width_ == 8) && + "address_width must be 4 or 8"); + if (address_width_ == 4) { + next_alloc_ &= kMask32; + } +} + +uint64_t ConcreteMemory::Allocate(uint64_t size_bytes, uint64_t align_bytes) { + if (size_bytes == 0) { + size_bytes = 8; + } + if (align_bytes == 0) { + align_bytes = 8; + } + + // Align next_alloc_ up to align_bytes. + uint64_t mask = align_bytes - 1u; + next_alloc_ = (next_alloc_ + mask) & ~mask; + + uint64_t base = next_alloc_; + next_alloc_ += size_bytes; + + if (address_width_ == 4) { + base &= kMask32; + next_alloc_ &= kMask32; + } + + regions_[base] = Region{base, size_bytes, false, false}; + backing_[base].resize(size_bytes, 0); + + return base; +} + +void ConcreteMemory::Free(uint64_t address) { + auto it = regions_.find(address); + if (it != regions_.end()) { + it->second.freed = true; + } +} + +const ConcreteMemory::Region *ConcreteMemory::FindRegion( + uint64_t address) const { + // Fast path: exact base address match. + auto it = regions_.find(address); + if (it != regions_.end()) { + return &it->second; + } + + // Slow path: scan for containing region. + for (auto &[base, region] : regions_) { + if (address >= region.base && address < region.base + region.size) { + return ®ion; + } + } + return nullptr; +} + +ConcreteMemory::Region *ConcreteMemory::FindRegion(uint64_t address) { + auto it = regions_.find(address); + if (it != regions_.end()) { + return &it->second; + } + + for (auto &[base, region] : regions_) { + if (address >= region.base && address < region.base + region.size) { + return ®ion; + } + } + return nullptr; +} + +uint8_t *ConcreteMemory::GetBytes(uint64_t region_base, uint64_t offset) { + auto it = backing_.find(region_base); + assert(it != backing_.end() && "backing store missing for region"); + assert(offset < it->second.size() && "offset out of bounds"); + return it->second.data() + offset; +} + +const uint8_t *ConcreteMemory::GetBytes(uint64_t region_base, + uint64_t offset) const { + auto it = backing_.find(region_base); + assert(it != backing_.end() && "backing store missing for region"); + assert(offset < it->second.size() && "offset out of bounds"); + return it->second.data() + offset; +} + +bool ConcreteMemory::Read(uint64_t address, void *dest, uint32_t size) { + const Region *region = FindRegion(address); + if (!region || region->freed) { + std::memset(dest, 0, size); + return false; + } + + uint64_t offset = address - region->base; + + // Auto-grow if the read extends past the current backing. + if (offset + size > region->size && auto_grow_) { + uint64_t new_size = offset + size; + auto &r = regions_[region->base]; + r.size = new_size; + backing_[region->base].resize(new_size, 0); + region = &r; + } + + auto bit = backing_.find(region->base); + if (bit == backing_.end() || offset + size > bit->second.size()) { + std::memset(dest, 0, size); + return false; + } + + std::memcpy(dest, bit->second.data() + offset, size); + return true; +} + +bool ConcreteMemory::WritePointer(uint64_t address, uint64_t pointer_value) { + // Write raw bytes first (this clears any stale shadow), then set the + // new shadow entry so ReadPointer can recover the pointer identity. + bool ok = Write(address, &pointer_value, sizeof(pointer_value)); + pointer_shadow_[address] = Pointer(pointer_value); + return ok; +} + +bool ConcreteMemory::ReadPointer(uint64_t address, uint64_t &pointer_value) { + auto it = pointer_shadow_.find(address); + if (it != pointer_shadow_.end()) { + pointer_value = ConcreteAddress(it->second); + return true; + } + return false; +} + +bool ConcreteMemory::Write(uint64_t address, const void *src, uint32_t size) { + // Clear pointer shadow: a raw write overwrites any pointer provenance. + pointer_shadow_.erase(address); + + Region *region = FindRegion(address); + if (!region || region->freed) { + return false; + } + + uint64_t offset = address - region->base; + + // Auto-grow if the write extends past the current region. + if (offset + size > region->size && auto_grow_) { + uint64_t new_size = offset + size; + region->size = new_size; + backing_[region->base].resize(new_size, 0); + } + + if (offset + size > region->size) { + return false; + } + + std::memcpy(backing_[region->base].data() + offset, src, size); + return true; +} + +bool ConcreteMemory::Memset(uint64_t address, uint8_t value, uint32_t size) { + Region *region = FindRegion(address); + if (!region || region->freed) { + return false; + } + + uint64_t offset = address - region->base; + + if (offset + size > region->size && auto_grow_) { + uint64_t new_size = offset + size; + region->size = new_size; + backing_[region->base].resize(new_size, 0); + } + + if (offset + size > region->size) { + return false; + } + + std::memset(backing_[region->base].data() + offset, value, size); + return true; +} + +bool ConcreteMemory::Memcpy(uint64_t dest_address, uint64_t src_address, + uint32_t size) { + std::vector temp(size); + if (!Read(src_address, temp.data(), size)) { + return false; + } + return Write(dest_address, temp.data(), size); +} + +void ConcreteMemory::Poison(uint64_t address) { + auto it = regions_.find(address); + if (it != regions_.end()) { + it->second.poisoned = true; + } +} + +void ConcreteMemory::Unpoison(uint64_t address) { + auto it = regions_.find(address); + if (it != regions_.end()) { + it->second.poisoned = false; + } +} + +bool ConcreteMemory::IsPoisoned(uint64_t address) const { + auto it = regions_.find(address); + if (it != regions_.end()) { + return it->second.poisoned; + } + return false; +} + +std::unique_ptr ConcreteMemory::Fork(void) const { + auto copy = std::make_unique(address_width_, 0); + copy->next_alloc_ = next_alloc_; + copy->auto_grow_ = auto_grow_; + copy->backing_ = backing_; + copy->regions_ = regions_; + copy->pointer_shadow_ = pointer_shadow_; + return copy; +} + +void ConcreteMemory::WritePointerShadow(uint64_t address, + const Pointer &ptr) { + pointer_shadow_[address] = ptr; +} + +void ConcreteMemory::ClearPointerShadow(uint64_t address) { + pointer_shadow_.erase(address); +} + +const Pointer *ConcreteMemory::ReadPointerShadow(uint64_t address) const { + auto it = pointer_shadow_.find(address); + if (it != pointer_shadow_.end()) { + return &it->second; + } + return nullptr; +} + +} // namespace mx::ir::interpret diff --git a/lib/IR/Interpret/ConcreteValueFactory.cpp b/lib/IR/Interpret/ConcreteValueFactory.cpp new file mode 100644 index 000000000..c37ad8b12 --- /dev/null +++ b/lib/IR/Interpret/ConcreteValueFactory.cpp @@ -0,0 +1,982 @@ +// Copyright (c) 2024-present, Trail of Bits, Inc. +// +// This source code is licensed in accordance with the terms specified in +// the LICENSE file found in the root directory of this source tree. + +#include + +#include +#include +#include +#include +#include + +namespace mx::ir::interpret { +namespace { + +// --------------------------------------------------------------------------- +// Width-dispatch helpers +// --------------------------------------------------------------------------- + +// Signed integer binary operation dispatched by width tag (offset from base). +// `width_index` is 0=8-bit, 1=16-bit, 2=32-bit, 3=64-bit. +template +Value SignedIntBinOp(int64_t l, int64_t r, unsigned width_index, Op op_fn) { + switch (width_index) { + case 0: return MakeInt(static_cast( + op_fn(static_cast(l), static_cast(r)))); + case 1: return MakeInt(static_cast( + op_fn(static_cast(l), static_cast(r)))); + case 2: return MakeInt(static_cast( + op_fn(static_cast(l), static_cast(r)))); + case 3: return MakeInt(op_fn(l, r)); + default: assert(false); return MakeUndef(); + } +} + +// Unsigned integer binary operation dispatched by width tag. +template +Value UnsignedIntBinOp(uint64_t l, uint64_t r, unsigned width_index, + Op op_fn) { + switch (width_index) { + case 0: return MakeInt(static_cast( + op_fn(static_cast(l), static_cast(r)))); + case 1: return MakeInt(static_cast( + op_fn(static_cast(l), static_cast(r)))); + case 2: return MakeInt(static_cast( + op_fn(static_cast(l), static_cast(r)))); + case 3: return MakeInt(static_cast( + op_fn(static_cast(l), static_cast(r)))); + default: assert(false); return MakeUndef(); + } +} + +// Signed integer unary operation dispatched by width tag. +template +Value SignedIntUnaryOp(int64_t v, unsigned width_index, Op op_fn) { + switch (width_index) { + case 0: return MakeInt(static_cast( + op_fn(static_cast(v)))); + case 1: return MakeInt(static_cast( + op_fn(static_cast(v)))); + case 2: return MakeInt(static_cast( + op_fn(static_cast(v)))); + case 3: return MakeInt(op_fn(v)); + default: assert(false); return MakeUndef(); + } +} + +// Unsigned integer unary operation dispatched by width tag. +template +Value UnsignedIntUnaryOp(uint64_t v, unsigned width_index, Op op_fn) { + switch (width_index) { + case 0: return MakeInt(static_cast( + op_fn(static_cast(v)))); + case 1: return MakeInt(static_cast( + op_fn(static_cast(v)))); + case 2: return MakeInt(static_cast( + op_fn(static_cast(v)))); + case 3: return MakeInt(static_cast(op_fn(v))); + default: assert(false); return MakeUndef(); + } +} + +// Shift-amount mask for a given width index. +static unsigned ShiftMask(unsigned width_index) { + static constexpr unsigned masks[] = {7u, 15u, 31u, 63u}; + assert(width_index < 4); + return masks[width_index]; +} + +} // namespace + +// --------------------------------------------------------------------------- +// BinaryOp +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::BinaryOp(OpCode op, const Value &lhs, + const Value &rhs) { + using enum OpCode; + + // Logical short-circuit style (but both sides already evaluated). + if (op == LOGICAL_AND) return MakeInt(IsTruthy(lhs) && IsTruthy(rhs) ? 1 : 0); + if (op == LOGICAL_OR) return MakeInt(IsTruthy(lhs) || IsTruthy(rhs) ? 1 : 0); + + // Float arithmetic. + auto float64 = [&](auto fn) -> Value { + return MakeFloat(fn(AsFloat(lhs), AsFloat(rhs))); + }; + auto float32 = [&](auto fn) -> Value { + return MakeFloat32(fn(AsFloat32(lhs), AsFloat32(rhs))); + }; + + switch (op) { + case FADD_32: return float32([](float a, float b) { return a + b; }); + case FADD_64: return float64([](double a, double b) { return a + b; }); + case FSUB_32: return float32([](float a, float b) { return a - b; }); + case FSUB_64: return float64([](double a, double b) { return a - b; }); + case FMUL_32: return float32([](float a, float b) { return a * b; }); + case FMUL_64: return float64([](double a, double b) { return a * b; }); + case FDIV_32: return float32([](float a, float b) { return a / b; }); + case FDIV_64: return float64([](double a, double b) { return a / b; }); + case FREM_32: return float32([](float a, float b) { return std::fmod(a, b); }); + case FREM_64: return float64([](double a, double b) { return std::fmod(a, b); }); + default: break; + } + + // Signed integer arithmetic. Compute width index from opcode groups of 4. + auto si = [&](OpCode base) -> unsigned { + return static_cast(op) - static_cast(base); + }; + + int64_t l = AsInt(lhs); + int64_t r = AsInt(rhs); + uint64_t ul = AsUint(lhs); + uint64_t ur = AsUint(rhs); + + // ADD + if (op >= ADD_8 && op <= ADD_64) + return SignedIntBinOp(l, r, si(ADD_8), + [](auto a, auto b) { return static_cast(a + b); }); + if (op >= ATOMIC_ADD_8 && op <= ATOMIC_ADD_64) + return SignedIntBinOp(l, r, + static_cast(op) - static_cast(ATOMIC_ADD_8), + [](auto a, auto b) { return static_cast(a + b); }); + + // SUB + if (op >= SUB_8 && op <= SUB_64) + return SignedIntBinOp(l, r, si(SUB_8), + [](auto a, auto b) { return static_cast(a - b); }); + if (op >= ATOMIC_SUB_8 && op <= ATOMIC_SUB_64) + return SignedIntBinOp(l, r, + static_cast(op) - static_cast(ATOMIC_SUB_8), + [](auto a, auto b) { return static_cast(a - b); }); + + // MUL + if (op >= MUL_8 && op <= MUL_64) + return SignedIntBinOp(l, r, si(MUL_8), + [](auto a, auto b) { return static_cast(a * b); }); + + // DIV (signed, div-by-zero returns 0) + if (op >= DIV_8 && op <= DIV_64) + return SignedIntBinOp(l, r, si(DIV_8), + [](auto a, auto b) -> decltype(a) { return b == 0 ? 0 : a / b; }); + + // REM (signed, div-by-zero returns 0) + if (op >= REM_8 && op <= REM_64) + return SignedIntBinOp(l, r, si(REM_8), + [](auto a, auto b) -> decltype(a) { return b == 0 ? 0 : a % b; }); + + // UDIV (unsigned, div-by-zero returns 0) + if (op >= UDIV_8 && op <= UDIV_64) + return UnsignedIntBinOp(ul, ur, si(UDIV_8), + [](auto a, auto b) -> decltype(a) { return b == 0 ? 0 : a / b; }); + + // UREM (unsigned, div-by-zero returns 0) + if (op >= UREM_8 && op <= UREM_64) + return UnsignedIntBinOp(ul, ur, si(UREM_8), + [](auto a, auto b) -> decltype(a) { return b == 0 ? 0 : a % b; }); + + // BIT_AND + if (op >= BIT_AND_8 && op <= BIT_AND_64) + return SignedIntBinOp(l, r, si(BIT_AND_8), + [](auto a, auto b) { return static_cast(a & b); }); + if (op >= ATOMIC_AND_8 && op <= ATOMIC_AND_64) + return SignedIntBinOp(l, r, + static_cast(op) - static_cast(ATOMIC_AND_8), + [](auto a, auto b) { return static_cast(a & b); }); + + // BIT_OR + if (op >= BIT_OR_8 && op <= BIT_OR_64) + return SignedIntBinOp(l, r, si(BIT_OR_8), + [](auto a, auto b) { return static_cast(a | b); }); + if (op >= ATOMIC_OR_8 && op <= ATOMIC_OR_64) + return SignedIntBinOp(l, r, + static_cast(op) - static_cast(ATOMIC_OR_8), + [](auto a, auto b) { return static_cast(a | b); }); + + // BIT_XOR + if (op >= BIT_XOR_8 && op <= BIT_XOR_64) + return SignedIntBinOp(l, r, si(BIT_XOR_8), + [](auto a, auto b) { return static_cast(a ^ b); }); + if (op >= ATOMIC_XOR_8 && op <= ATOMIC_XOR_64) + return SignedIntBinOp(l, r, + static_cast(op) - static_cast(ATOMIC_XOR_8), + [](auto a, auto b) { return static_cast(a ^ b); }); + + // ATOMIC_NAND + if (op >= ATOMIC_NAND_8 && op <= ATOMIC_NAND_64) + return SignedIntBinOp(l, r, + static_cast(op) - static_cast(ATOMIC_NAND_8), + [](auto a, auto b) { return static_cast(~(a & b)); }); + + // ATOMIC_EXCHANGE (just returns rhs) + if (op >= ATOMIC_EXCHANGE_8 && op <= ATOMIC_EXCHANGE_64) + return rhs; + + // SHL (mask shift amount) + if (op >= SHL_8 && op <= SHL_64) { + unsigned wi = si(SHL_8); + unsigned mask = ShiftMask(wi); + return SignedIntBinOp(l, r & mask, wi, + [](auto a, auto b) { return static_cast(a << b); }); + } + + // SHR (arithmetic shift right, mask amount) + if (op >= SHR_8 && op <= SHR_64) { + unsigned wi = si(SHR_8); + unsigned mask = ShiftMask(wi); + return SignedIntBinOp(l, r & mask, wi, + [](auto a, auto b) { return static_cast(a >> b); }); + } + + // USHR (logical shift right, mask amount) + if (op >= USHR_8 && op <= USHR_64) { + unsigned wi = si(USHR_8); + unsigned mask = ShiftMask(wi); + return UnsignedIntBinOp(ul, ur & mask, wi, + [](auto a, auto b) { return static_cast(a >> b); }); + } + + assert(false && "Unhandled BinaryOp opcode"); + return MakeUndef(); +} + +// --------------------------------------------------------------------------- +// UnaryOp +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::UnaryOp(OpCode op, const Value &operand) { + using enum OpCode; + + if (op == LOGICAL_NOT) return MakeInt(!IsTruthy(operand) ? 1 : 0); + + // Float negate. + if (op == FNEG_32) return MakeFloat32(-AsFloat32(operand)); + if (op == FNEG_64) return MakeFloat(-AsFloat(operand)); + + int64_t v = AsInt(operand); + uint64_t uv = AsUint(operand); + + auto wi = [&](OpCode base) -> unsigned { + return static_cast(op) - static_cast(base); + }; + + // NEG + if (op >= NEG_8 && op <= NEG_64) + return SignedIntUnaryOp(v, wi(NEG_8), + [](auto a) { return static_cast(-a); }); + + // BIT_NOT + if (op >= BIT_NOT_8 && op <= BIT_NOT_64) + return UnsignedIntUnaryOp(uv, wi(BIT_NOT_8), + [](auto a) { return static_cast(~a); }); + + // ABS + if (op >= ABS_8 && op <= ABS_64) + return SignedIntUnaryOp(v, wi(ABS_8), + [](auto a) { return a < 0 ? static_cast(-a) : a; }); + + assert(false && "Unhandled UnaryOp opcode"); + return MakeUndef(); +} + +// --------------------------------------------------------------------------- +// Compare +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::Compare(OpCode op, const Value &lhs, + const Value &rhs) { + using enum OpCode; + + // Pointer comparisons: if either side is a pointer, compare addresses. + auto *lp = AsPointer(lhs); + auto *rp = AsPointer(rhs); + if (lp || rp) { + uint64_t la = lp ? ConcreteAddress(*lp) : AsUint(lhs); + uint64_t ra = rp ? ConcreteAddress(*rp) : AsUint(rhs); + + switch (op) { + case CMP_EQ_8: case CMP_EQ_16: case CMP_EQ_32: case CMP_EQ_64: + return MakeInt(la == ra ? 1 : 0); + case CMP_NE_8: case CMP_NE_16: case CMP_NE_32: case CMP_NE_64: + return MakeInt(la != ra ? 1 : 0); + case CMP_LT_8: case CMP_LT_16: case CMP_LT_32: case CMP_LT_64: + case UCMP_LT_8: case UCMP_LT_16: case UCMP_LT_32: case UCMP_LT_64: + return MakeInt(la < ra ? 1 : 0); + case CMP_LE_8: case CMP_LE_16: case CMP_LE_32: case CMP_LE_64: + case UCMP_LE_8: case UCMP_LE_16: case UCMP_LE_32: case UCMP_LE_64: + return MakeInt(la <= ra ? 1 : 0); + case CMP_GT_8: case CMP_GT_16: case CMP_GT_32: case CMP_GT_64: + case UCMP_GT_8: case UCMP_GT_16: case UCMP_GT_32: case UCMP_GT_64: + return MakeInt(la > ra ? 1 : 0); + case CMP_GE_8: case CMP_GE_16: case CMP_GE_32: case CMP_GE_64: + case UCMP_GE_8: case UCMP_GE_16: case UCMP_GE_32: case UCMP_GE_64: + return MakeInt(la >= ra ? 1 : 0); + default: break; + } + } + + // Float comparisons. + auto fcmp64 = [&](auto fn) -> Value { + return MakeInt(fn(AsFloat(lhs), AsFloat(rhs)) ? 1 : 0); + }; + auto fcmp32 = [&](auto fn) -> Value { + return MakeInt(fn(AsFloat32(lhs), AsFloat32(rhs)) ? 1 : 0); + }; + + switch (op) { + case FCMP_EQ_32: return fcmp32([](float a, float b) { return a == b; }); + case FCMP_EQ_64: return fcmp64([](double a, double b) { return a == b; }); + case FCMP_NE_32: return fcmp32([](float a, float b) { return a != b; }); + case FCMP_NE_64: return fcmp64([](double a, double b) { return a != b; }); + case FCMP_LT_32: return fcmp32([](float a, float b) { return a < b; }); + case FCMP_LT_64: return fcmp64([](double a, double b) { return a < b; }); + case FCMP_LE_32: return fcmp32([](float a, float b) { return a <= b; }); + case FCMP_LE_64: return fcmp64([](double a, double b) { return a <= b; }); + case FCMP_GT_32: return fcmp32([](float a, float b) { return a > b; }); + case FCMP_GT_64: return fcmp64([](double a, double b) { return a > b; }); + case FCMP_GE_32: return fcmp32([](float a, float b) { return a >= b; }); + case FCMP_GE_64: return fcmp64([](double a, double b) { return a >= b; }); + default: break; + } + + // Signed integer comparisons. + int64_t l = AsInt(lhs); + int64_t r = AsInt(rhs); + uint64_t ul = AsUint(lhs); + uint64_t ur = AsUint(rhs); + + auto si = [&](OpCode base) -> unsigned { + return static_cast(op) - static_cast(base); + }; + + // CMP_EQ + if (op >= CMP_EQ_8 && op <= CMP_EQ_64) + return SignedIntBinOp(l, r, si(CMP_EQ_8), + [](auto a, auto b) -> int64_t { return a == b ? 1 : 0; }); + + // CMP_NE + if (op >= CMP_NE_8 && op <= CMP_NE_64) + return SignedIntBinOp(l, r, si(CMP_NE_8), + [](auto a, auto b) -> int64_t { return a != b ? 1 : 0; }); + + // CMP_LT (signed) + if (op >= CMP_LT_8 && op <= CMP_LT_64) + return SignedIntBinOp(l, r, si(CMP_LT_8), + [](auto a, auto b) -> int64_t { return a < b ? 1 : 0; }); + + // CMP_LE (signed) + if (op >= CMP_LE_8 && op <= CMP_LE_64) + return SignedIntBinOp(l, r, si(CMP_LE_8), + [](auto a, auto b) -> int64_t { return a <= b ? 1 : 0; }); + + // CMP_GT (signed) + if (op >= CMP_GT_8 && op <= CMP_GT_64) + return SignedIntBinOp(l, r, si(CMP_GT_8), + [](auto a, auto b) -> int64_t { return a > b ? 1 : 0; }); + + // CMP_GE (signed) + if (op >= CMP_GE_8 && op <= CMP_GE_64) + return SignedIntBinOp(l, r, si(CMP_GE_8), + [](auto a, auto b) -> int64_t { return a >= b ? 1 : 0; }); + + // UCMP_LT (unsigned) + if (op >= UCMP_LT_8 && op <= UCMP_LT_64) + return UnsignedIntBinOp(ul, ur, si(UCMP_LT_8), + [](auto a, auto b) -> int64_t { return a < b ? 1 : 0; }); + + // UCMP_LE (unsigned) + if (op >= UCMP_LE_8 && op <= UCMP_LE_64) + return UnsignedIntBinOp(ul, ur, si(UCMP_LE_8), + [](auto a, auto b) -> int64_t { return a <= b ? 1 : 0; }); + + // UCMP_GT (unsigned) + if (op >= UCMP_GT_8 && op <= UCMP_GT_64) + return UnsignedIntBinOp(ul, ur, si(UCMP_GT_8), + [](auto a, auto b) -> int64_t { return a > b ? 1 : 0; }); + + // UCMP_GE (unsigned) + if (op >= UCMP_GE_8 && op <= UCMP_GE_64) + return UnsignedIntBinOp(ul, ur, si(UCMP_GE_8), + [](auto a, auto b) -> int64_t { return a >= b ? 1 : 0; }); + + assert(false && "Unhandled Compare opcode"); + return MakeInt(0); +} + +// --------------------------------------------------------------------------- +// Cast +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::Cast(CastOp op, const Value &operand) { + using enum CastOp; + + switch (op) { + case IDENTITY: + return operand; + + case BITCAST: + return operand; + + // Pointer conversions. + case PTR_TO_I64: { + auto *p = AsPointer(operand); + return MakeInt(static_cast(p ? ConcreteAddress(*p) + : AsUint(operand))); + } + case PTR_TO_I32: { + auto *p = AsPointer(operand); + uint64_t addr = p ? ConcreteAddress(*p) : AsUint(operand); + return MakeInt(static_cast(static_cast(addr))); + } + case I64_TO_PTR: + return MakePtr(AsUint(operand)); + case I32_TO_PTR: + return MakePtr(static_cast(static_cast(AsUint(operand)))); + + // Sign-extend. + case SEXT_I8_I16: + return MakeInt(static_cast(static_cast( + static_cast(AsInt(operand))))); + case SEXT_I8_I32: + return MakeInt(static_cast(static_cast( + static_cast(AsInt(operand))))); + case SEXT_I8_I64: + return MakeInt(static_cast( + static_cast(AsInt(operand)))); + case SEXT_I16_I32: + return MakeInt(static_cast(static_cast( + static_cast(AsInt(operand))))); + case SEXT_I16_I64: + return MakeInt(static_cast( + static_cast(AsInt(operand)))); + case SEXT_I32_I64: + return MakeInt(static_cast( + static_cast(AsInt(operand)))); + + // Zero-extend. + case ZEXT_I8_I16: + return MakeInt(static_cast( + static_cast(static_cast(AsUint(operand))))); + case ZEXT_I8_I32: + return MakeInt(static_cast( + static_cast(static_cast(AsUint(operand))))); + case ZEXT_I8_I64: + return MakeInt(static_cast( + static_cast(AsUint(operand)))); + case ZEXT_I16_I32: + return MakeInt(static_cast( + static_cast(static_cast(AsUint(operand))))); + case ZEXT_I16_I64: + return MakeInt(static_cast( + static_cast(AsUint(operand)))); + case ZEXT_I32_I64: + return MakeInt(static_cast( + static_cast(AsUint(operand)))); + + // Truncate. + case TRUNC_I16_I8: + return MakeInt(static_cast( + static_cast(static_cast(AsInt(operand))))); + case TRUNC_I32_I8: + return MakeInt(static_cast( + static_cast(static_cast(AsInt(operand))))); + case TRUNC_I64_I8: + return MakeInt(static_cast( + static_cast(AsInt(operand)))); + case TRUNC_I32_I16: + return MakeInt(static_cast( + static_cast(static_cast(AsInt(operand))))); + case TRUNC_I64_I16: + return MakeInt(static_cast( + static_cast(AsInt(operand)))); + case TRUNC_I64_I32: + return MakeInt(static_cast( + static_cast(AsInt(operand)))); + + // Float widening/narrowing. + case F32_TO_F64: + return MakeFloat(static_cast(AsFloat32(operand))); + case F64_TO_F32: + return MakeFloat32(static_cast(AsFloat(operand))); + + // Signed int to float. + case SI8_TO_F32: + return MakeFloat32(static_cast( + static_cast(AsInt(operand)))); + case SI8_TO_F64: + return MakeFloat(static_cast( + static_cast(AsInt(operand)))); + case SI16_TO_F32: + return MakeFloat32(static_cast( + static_cast(AsInt(operand)))); + case SI16_TO_F64: + return MakeFloat(static_cast( + static_cast(AsInt(operand)))); + case SI32_TO_F32: + return MakeFloat32(static_cast( + static_cast(AsInt(operand)))); + case SI32_TO_F64: + return MakeFloat(static_cast( + static_cast(AsInt(operand)))); + case SI64_TO_F32: + return MakeFloat32(static_cast(AsInt(operand))); + case SI64_TO_F64: + return MakeFloat(static_cast(AsInt(operand))); + + // Unsigned int to float. + case UI8_TO_F32: + return MakeFloat32(static_cast( + static_cast(AsUint(operand)))); + case UI8_TO_F64: + return MakeFloat(static_cast( + static_cast(AsUint(operand)))); + case UI16_TO_F32: + return MakeFloat32(static_cast( + static_cast(AsUint(operand)))); + case UI16_TO_F64: + return MakeFloat(static_cast( + static_cast(AsUint(operand)))); + case UI32_TO_F32: + return MakeFloat32(static_cast( + static_cast(AsUint(operand)))); + case UI32_TO_F64: + return MakeFloat(static_cast( + static_cast(AsUint(operand)))); + case UI64_TO_F32: + return MakeFloat32(static_cast(AsUint(operand))); + case UI64_TO_F64: + return MakeFloat(static_cast(AsUint(operand))); + + // Float to signed int. + case F32_TO_SI8: + return MakeInt(static_cast( + static_cast(static_cast(AsFloat32(operand))))); + case F32_TO_SI16: + return MakeInt(static_cast( + static_cast(static_cast(AsFloat32(operand))))); + case F32_TO_SI32: + return MakeInt(static_cast( + static_cast(static_cast(AsFloat32(operand))))); + case F32_TO_SI64: + return MakeInt(static_cast( + static_cast(AsFloat32(operand)))); + case F64_TO_SI8: + return MakeInt(static_cast( + static_cast(AsFloat(operand)))); + case F64_TO_SI16: + return MakeInt(static_cast( + static_cast(AsFloat(operand)))); + case F64_TO_SI32: + return MakeInt(static_cast( + static_cast(AsFloat(operand)))); + case F64_TO_SI64: + return MakeInt(static_cast(AsFloat(operand))); + + // Float to unsigned int. + case F32_TO_UI8: + return MakeInt(static_cast( + static_cast(static_cast(AsFloat32(operand))))); + case F32_TO_UI16: + return MakeInt(static_cast( + static_cast(static_cast(AsFloat32(operand))))); + case F32_TO_UI32: + return MakeInt(static_cast( + static_cast(static_cast(AsFloat32(operand))))); + case F32_TO_UI64: + return MakeInt(static_cast( + static_cast(static_cast(AsFloat32(operand))))); + case F64_TO_UI8: + return MakeInt(static_cast( + static_cast(AsFloat(operand)))); + case F64_TO_UI16: + return MakeInt(static_cast( + static_cast(AsFloat(operand)))); + case F64_TO_UI32: + return MakeInt(static_cast( + static_cast(AsFloat(operand)))); + case F64_TO_UI64: + return MakeInt(static_cast( + static_cast(AsFloat(operand)))); + } + + assert(false && "Unhandled CastOp"); + return MakeUndef(); +} + +// --------------------------------------------------------------------------- +// MakeConst +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::MakeConst(ConstOp op, int64_t signed_val, + uint64_t unsigned_val) { + using enum ConstOp; + + switch (op) { + case NULL_PTR: + return MakeNull(); + + case FLOAT16: { + // float16 is stored as float in the constant pool. + float f; + std::memcpy(&f, &signed_val, sizeof(f)); + return MakeFloat32(f); + } + case FLOAT32: { + float f; + std::memcpy(&f, &signed_val, sizeof(f)); + return MakeFloat32(f); + } + case FLOAT64: { + double d; + std::memcpy(&d, &signed_val, sizeof(d)); + return MakeFloat(d); + } + + case INF32: + return MakeFloat32(std::numeric_limits::infinity()); + case INF64: + return MakeFloat(std::numeric_limits::infinity()); + case NAN32: + return MakeFloat32(std::numeric_limits::quiet_NaN()); + case NAN64: + return MakeFloat(std::numeric_limits::quiet_NaN()); + + case UINT64: + return MakeInt(static_cast(unsigned_val)); + + case UINT32: + return MakeInt(static_cast( + static_cast(static_cast(unsigned_val)))); + case UINT16: + case WCHAR16: + return MakeInt(static_cast( + static_cast(static_cast(unsigned_val)))); + case UINT8: + return MakeInt(static_cast( + static_cast(static_cast(unsigned_val)))); + + case INT8: + case INT16: + case INT32: + case INT64: + case BOOL: + case WCHAR32: + return MakeInt(signed_val); + } + + assert(false && "Unhandled ConstOp"); + return MakeUndef(); +} + +// --------------------------------------------------------------------------- +// MakeNullPtr +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::MakeNullPtr(void) { + return MakeNull(); +} + +// --------------------------------------------------------------------------- +// PtrAdd +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::PtrAdd(const Value &base, const Value &index, + int64_t element_size) { + int64_t offset = AsInt(index) * element_size; + if (auto *p = AsPointer(base)) { + return MakePtr(ConcreteAddress(*p) + static_cast(offset)); + } + return MakeInt(AsInt(base) + offset); +} + +// --------------------------------------------------------------------------- +// PtrDiff +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::PtrDiff(const Value &lhs, const Value &rhs, + int64_t element_size) { + auto *lp = AsPointer(lhs); + auto *rp = AsPointer(rhs); + int64_t la = lp ? static_cast(ConcreteAddress(*lp)) : AsInt(lhs); + int64_t ra = rp ? static_cast(ConcreteAddress(*rp)) : AsInt(rhs); + int64_t divisor = std::max(element_size, static_cast(1)); + return MakeInt((la - ra) / divisor); +} + +// --------------------------------------------------------------------------- +// IsTrue +// --------------------------------------------------------------------------- + +std::optional ConcreteValueFactory::IsTrue(const Value &val) { + return IsTruthy(val); +} + +// --------------------------------------------------------------------------- +// Select +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::Select(const Value &cond, const Value &if_true, + const Value &if_false) { + return IsTruthy(cond) ? if_true : if_false; +} + +// --------------------------------------------------------------------------- +// BitwiseIntrinsic +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::BitwiseIntrinsic(OpCode width_op, BitwiseOp sub, + const Value &val, + const Value &val2) { + using enum BitwiseOp; + using enum OpCode; + + uint64_t v = AsUint(val); + uint64_t v2 = AsUint(val2); + + // Determine the width from the parent opcode. + unsigned width_bits; + switch (width_op) { + case BITWISE_8: width_bits = 8; break; + case BITWISE_16: width_bits = 16; break; + case BITWISE_32: width_bits = 32; break; + case BITWISE_64: width_bits = 64; break; + default: assert(false); return MakeUndef(); + } + + switch (sub) { + case BSWAP_16: + return MakeInt(static_cast(__builtin_bswap16( + static_cast(v)))); + case BSWAP_32: + return MakeInt(static_cast(__builtin_bswap32( + static_cast(v)))); + case BSWAP_64: + return MakeInt(static_cast(__builtin_bswap64(v))); + + case POPCOUNT: + if (width_bits <= 32) + return MakeInt(static_cast( + __builtin_popcount(static_cast( + static_cast(v))))); + return MakeInt(static_cast( + __builtin_popcountll(static_cast(v)))); + + case CLZ: + switch (width_bits) { + case 8: + return MakeInt(static_cast( + __builtin_clz(static_cast( + static_cast(v))) - 24)); + case 16: + return MakeInt(static_cast( + __builtin_clz(static_cast( + static_cast(v))) - 16)); + case 32: + return MakeInt(static_cast( + __builtin_clz(static_cast( + static_cast(v))))); + case 64: + return MakeInt(static_cast( + __builtin_clzll(static_cast(v)))); + default: break; + } + break; + + case CTZ: + if (width_bits <= 32) + return MakeInt(static_cast( + __builtin_ctz(static_cast( + static_cast(v))))); + return MakeInt(static_cast( + __builtin_ctzll(static_cast(v)))); + + case FFS: + if (width_bits <= 32) + return MakeInt(static_cast( + __builtin_ffs(static_cast(static_cast(v))))); + return MakeInt(static_cast( + __builtin_ffsll(static_cast( + static_cast(v))))); + + case PARITY: + if (width_bits <= 32) + return MakeInt(static_cast( + __builtin_parity(static_cast( + static_cast(v))))); + return MakeInt(static_cast( + __builtin_parityll(static_cast(v)))); + + case ROTL: { + unsigned amt = static_cast(v2) % width_bits; + if (amt == 0) return MakeInt(static_cast(v)); + switch (width_bits) { + case 8: { + auto x = static_cast(v); + return MakeInt(static_cast( + static_cast((x << amt) | (x >> (8 - amt))))); + } + case 16: { + auto x = static_cast(v); + return MakeInt(static_cast( + static_cast((x << amt) | (x >> (16 - amt))))); + } + case 32: { + auto x = static_cast(v); + return MakeInt(static_cast((x << amt) | (x >> (32 - amt)))); + } + case 64: + return MakeInt(static_cast( + (v << amt) | (v >> (64 - amt)))); + default: break; + } + break; + } + + case ROTR: { + unsigned amt = static_cast(v2) % width_bits; + if (amt == 0) return MakeInt(static_cast(v)); + switch (width_bits) { + case 8: { + auto x = static_cast(v); + return MakeInt(static_cast( + static_cast((x >> amt) | (x << (8 - amt))))); + } + case 16: { + auto x = static_cast(v); + return MakeInt(static_cast( + static_cast((x >> amt) | (x << (16 - amt))))); + } + case 32: { + auto x = static_cast(v); + return MakeInt(static_cast((x >> amt) | (x << (32 - amt)))); + } + case 64: + return MakeInt(static_cast( + (v >> amt) | (v << (64 - amt)))); + default: break; + } + break; + } + } + + assert(false && "Unhandled BitwiseOp"); + return MakeUndef(); +} + +// --------------------------------------------------------------------------- +// FloatIntrinsic +// --------------------------------------------------------------------------- + +Value ConcreteValueFactory::FloatIntrinsic(FloatOp sub, + const std::vector &operands) { + using enum FloatOp; + + // Helper to get operand as double/float. + auto d = [&](size_t i) -> double { + return i < operands.size() ? AsFloat(operands[i]) : 0.0; + }; + auto f = [&](size_t i) -> float { + return i < operands.size() ? AsFloat32(operands[i]) : 0.0f; + }; + + switch (sub) { + // Classification (return int). + case ISNAN_32: return MakeInt(std::isnan(f(0)) ? 1 : 0); + case ISNAN_64: return MakeInt(std::isnan(d(0)) ? 1 : 0); + case ISINF_32: return MakeInt(std::isinf(f(0)) ? 1 : 0); + case ISINF_64: return MakeInt(std::isinf(d(0)) ? 1 : 0); + case ISFINITE_32: return MakeInt(std::isfinite(f(0)) ? 1 : 0); + case ISFINITE_64: return MakeInt(std::isfinite(d(0)) ? 1 : 0); + case SIGNBIT_32: return MakeInt(std::signbit(f(0)) ? 1 : 0); + case SIGNBIT_64: return MakeInt(std::signbit(d(0)) ? 1 : 0); + + // Constants (no operands). + case INF_32: return MakeFloat32(std::numeric_limits::infinity()); + case INF_64: return MakeFloat(std::numeric_limits::infinity()); + case NAN_32: return MakeFloat32(std::numeric_limits::quiet_NaN()); + case NAN_64: return MakeFloat(std::numeric_limits::quiet_NaN()); + case HUGE_32: return MakeFloat32(HUGE_VALF); + case HUGE_64: return MakeFloat(HUGE_VAL); + + // Unary float ops. + case FABS_32: return MakeFloat32(std::fabs(f(0))); + case FABS_64: return MakeFloat(std::fabs(d(0))); + case CEIL_32: return MakeFloat32(std::ceil(f(0))); + case CEIL_64: return MakeFloat(std::ceil(d(0))); + case FLOOR_32: return MakeFloat32(std::floor(f(0))); + case FLOOR_64: return MakeFloat(std::floor(d(0))); + case ROUND_32: return MakeFloat32(std::round(f(0))); + case ROUND_64: return MakeFloat(std::round(d(0))); + case TRUNC_32: return MakeFloat32(std::trunc(f(0))); + case TRUNC_64: return MakeFloat(std::trunc(d(0))); + case SQRT_32: return MakeFloat32(std::sqrt(f(0))); + case SQRT_64: return MakeFloat(std::sqrt(d(0))); + case SIN_32: return MakeFloat32(std::sin(f(0))); + case SIN_64: return MakeFloat(std::sin(d(0))); + case COS_32: return MakeFloat32(std::cos(f(0))); + case COS_64: return MakeFloat(std::cos(d(0))); + case TAN_32: return MakeFloat32(std::tan(f(0))); + case TAN_64: return MakeFloat(std::tan(d(0))); + case ASIN_32: return MakeFloat32(std::asin(f(0))); + case ASIN_64: return MakeFloat(std::asin(d(0))); + case ACOS_32: return MakeFloat32(std::acos(f(0))); + case ACOS_64: return MakeFloat(std::acos(d(0))); + case ATAN_32: return MakeFloat32(std::atan(f(0))); + case ATAN_64: return MakeFloat(std::atan(d(0))); + case EXP_32: return MakeFloat32(std::exp(f(0))); + case EXP_64: return MakeFloat(std::exp(d(0))); + case EXP2_32: return MakeFloat32(std::exp2(f(0))); + case EXP2_64: return MakeFloat(std::exp2(d(0))); + case LOG_32: return MakeFloat32(std::log(f(0))); + case LOG_64: return MakeFloat(std::log(d(0))); + case LOG2_32: return MakeFloat32(std::log2(f(0))); + case LOG2_64: return MakeFloat(std::log2(d(0))); + case LOG10_32: return MakeFloat32(std::log10(f(0))); + case LOG10_64: return MakeFloat(std::log10(d(0))); + case SINH_32: return MakeFloat32(std::sinh(f(0))); + case SINH_64: return MakeFloat(std::sinh(d(0))); + case COSH_32: return MakeFloat32(std::cosh(f(0))); + case COSH_64: return MakeFloat(std::cosh(d(0))); + case TANH_32: return MakeFloat32(std::tanh(f(0))); + case TANH_64: return MakeFloat(std::tanh(d(0))); + case ERF_32: return MakeFloat32(std::erf(f(0))); + case ERF_64: return MakeFloat(std::erf(d(0))); + case ERFC_32: return MakeFloat32(std::erfc(f(0))); + case ERFC_64: return MakeFloat(std::erfc(d(0))); + case TGAMMA_32: return MakeFloat32(std::tgamma(f(0))); + case TGAMMA_64: return MakeFloat(std::tgamma(d(0))); + case LGAMMA_32: return MakeFloat32(std::lgamma(f(0))); + case LGAMMA_64: return MakeFloat(std::lgamma(d(0))); + + // Binary float ops. + case COPYSIGN_32: return MakeFloat32(std::copysign(f(0), f(1))); + case COPYSIGN_64: return MakeFloat(std::copysign(d(0), d(1))); + case FMIN_32: return MakeFloat32(std::fmin(f(0), f(1))); + case FMIN_64: return MakeFloat(std::fmin(d(0), d(1))); + case FMAX_32: return MakeFloat32(std::fmax(f(0), f(1))); + case FMAX_64: return MakeFloat(std::fmax(d(0), d(1))); + case ATAN2_32: return MakeFloat32(std::atan2(f(0), f(1))); + case ATAN2_64: return MakeFloat(std::atan2(d(0), d(1))); + case POW_32: return MakeFloat32(std::pow(f(0), f(1))); + case POW_64: return MakeFloat(std::pow(d(0), d(1))); + case FMOD_32: return MakeFloat32(std::fmod(f(0), f(1))); + case FMOD_64: return MakeFloat(std::fmod(d(0), d(1))); + case REMAINDER_32: return MakeFloat32(std::remainder(f(0), f(1))); + case REMAINDER_64: return MakeFloat(std::remainder(d(0), d(1))); + case HYPOT_32: return MakeFloat32(std::hypot(f(0), f(1))); + case HYPOT_64: return MakeFloat(std::hypot(d(0), d(1))); + case FDIM_32: return MakeFloat32(std::fdim(f(0), f(1))); + case FDIM_64: return MakeFloat(std::fdim(d(0), d(1))); + + // Ternary float ops. + case FMA_32: return MakeFloat32(std::fma(f(0), f(1), f(2))); + case FMA_64: return MakeFloat(std::fma(d(0), d(1), d(2))); + } + + assert(false && "Unhandled FloatOp"); + return MakeUndef(); +} + +} // namespace mx::ir::interpret diff --git a/lib/IR/Interpret/Interpreter.cpp b/lib/IR/Interpret/Interpreter.cpp new file mode 100644 index 000000000..1ab50257b --- /dev/null +++ b/lib/IR/Interpret/Interpreter.cpp @@ -0,0 +1,1820 @@ +// Copyright (c) 2024-present, Trail of Bits, Inc. +// +// This source code is licensed in accordance with the terms specified in +// the LICENSE file found in the root directory of this source tree. + +#include +#include +#include +#include +#include +#include +#include +// NOTE: Entity.h defines VariantEntity which contains Fragment, Macro, +// Compilation, etc. The `from()` overload resolution needs all variant +// members to be complete types. Frontend.h provides the missing ones. +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mx::ir::interpret { +namespace { + +// --------------------------------------------------------------------------- +// Context struct — groups parameters every helper needs +// --------------------------------------------------------------------------- + +struct Ctx { + InterpreterState &state; + Memory &memory; + ValueFactory &factory; + Driver &driver; + + CallFrame &Frame() { return state.Frame(); } +}; + +// --------------------------------------------------------------------------- +// Forward declarations +// --------------------------------------------------------------------------- + +static size_t UnderlyingOpAccessSize(ir::OpCode op); +static void MemWriteValue(Memory &memory, uint64_t address, + const Value &val, size_t size); +static Value MemReadValue(Memory &memory, uint64_t address, + size_t size, bool is_float); +static uint64_t AllocateObject(CallFrame &frame, Memory &memory, + const IRObject &obj); +static void SetupFrame(CallFrame &frame, Memory &memory, + const IRFunction &func, const std::vector &args); +static Value GetValue(Ctx &ctx, CallFrame &frame, const IRInstruction &inst); +static Value EvalMemorySubOp(Ctx &ctx, CallFrame &frame, const MemoryInst &mi, + MemOp sub, const std::vector &ops); +static std::optional ResolveVAListAddr( + Ctx &ctx, CallFrame &frame, const IRInstruction &operand); +static Value DerefArgPointer(Ctx &ctx, CallFrame &frame, + const IRInstruction &arg); +static bool EvalCall(Ctx &ctx, const IRInstruction &inst); +static bool Eval(Ctx &ctx, const IRInstruction &inst); +static void RunToCompletion(Ctx &ctx, const IRFunction &func, + const std::vector &args); +static Value ReadReturnValue(Memory &memory, const CallFrame &frame, + const Value &ret_from_inst); + +// --------------------------------------------------------------------------- +// UnderlyingOpAccessSize +// --------------------------------------------------------------------------- + +static size_t UnderlyingOpAccessSize(ir::OpCode op) { + using enum ir::OpCode; + if (ir::IsFloatArithmetic(op)) { + unsigned v = static_cast(op); + return (v % 2 == 1) ? 4 : 8; + } + if (ir::IsFloatComparison(op)) { + unsigned v = static_cast(op); + return (v % 2 == 1) ? 4 : 8; + } + if (op >= ADD_8 && op <= SHR_64) { + static constexpr size_t widths[] = {1, 2, 4, 8}; + unsigned base; + if (op >= SHR_8) base = static_cast(SHR_8); + else if (op >= SHL_8) base = static_cast(SHL_8); + else if (op >= BIT_XOR_8) base = static_cast(BIT_XOR_8); + else if (op >= BIT_OR_8) base = static_cast(BIT_OR_8); + else if (op >= BIT_AND_8) base = static_cast(BIT_AND_8); + else if (op >= USHR_8) base = static_cast(USHR_8); + else if (op >= UREM_8) base = static_cast(UREM_8); + else if (op >= UDIV_8) base = static_cast(UDIV_8); + else if (op >= REM_8) base = static_cast(REM_8); + else if (op >= DIV_8) base = static_cast(DIV_8); + else if (op >= MUL_8) base = static_cast(MUL_8); + else if (op >= SUB_8) base = static_cast(SUB_8); + else base = static_cast(ADD_8); + unsigned idx = static_cast(op) - base; + return (idx < 4) ? widths[idx] : 8; + } + if (op >= ATOMIC_ADD_8 && op <= ATOMIC_EXCHANGE_64) { + static constexpr size_t widths[] = {1, 2, 4, 8}; + unsigned v = static_cast(op) - + static_cast(ATOMIC_ADD_8); + return widths[v % 4]; + } + return 8; +} + +// --------------------------------------------------------------------------- +// AllocateObject +// --------------------------------------------------------------------------- + +static uint64_t AllocateObject(CallFrame &frame, Memory &memory, + const IRObject &obj) { + auto eid = EntityId(obj.id()).Pack(); + auto it = frame.entity_to_address.find(eid); + if (it != frame.entity_to_address.end()) { + return it->second; + } + uint32_t size = obj.size_bytes(); + if (size == 0) size = 8; + uint32_t align = obj.align_bytes(); + if (align == 0) align = 8; + auto address = memory.Allocate(size, align); + frame.entity_to_address[eid] = address; + return address; +} + +// --------------------------------------------------------------------------- +// MemWriteValue — serialize a Value into Memory +// --------------------------------------------------------------------------- + +static void MemWriteValue(Memory &memory, uint64_t address, + const Value &val, size_t size) { + if (auto *ptr = AsPointer(val)) { + memory.WritePointer(address, ConcreteAddress(*ptr)); + return; + } + if (IsNull(val)) { + int64_t zero = 0; + memory.Write(address, &zero, + static_cast(std::min(size, sizeof(zero)))); + return; + } + if (auto *s = std::get_if(&val)) { + if (s->width == 4) { + memory.Write(address, &s->bits, + static_cast(std::min(size, size_t{4}))); + } else { + memory.Write(address, &s->bits, + static_cast(std::min(size, sizeof(s->bits)))); + } + } else { + int64_t zero = 0; + memory.Write(address, &zero, + static_cast(std::min(size, sizeof(zero)))); + } +} + +// --------------------------------------------------------------------------- +// MemReadValue — deserialize a Value from Memory +// --------------------------------------------------------------------------- + +static Value MemReadValue(Memory &memory, uint64_t address, + size_t size, bool is_float) { + // Check for pointer provenance first. + uint64_t ptr_val = 0; + if (memory.ReadPointer(address, ptr_val)) { + return MakePtr(ptr_val); + } + + if (is_float) { + if (size == 4) { + float f = 0; + memory.Read(address, &f, 4); + return MakeFloat32(f); + } + double d = 0; + memory.Read(address, &d, 8); + return MakeFloat(d); + } + + int64_t v = 0; + memory.Read(address, &v, + static_cast(std::min(size, sizeof(v)))); + switch (size) { + case 1: v = static_cast(static_cast(v)); break; + case 2: v = static_cast(static_cast(v)); break; + case 4: v = static_cast(static_cast(v)); break; + default: break; + } + return MakeInt(v); +} + +// --------------------------------------------------------------------------- +// ReadReturnValue — extract the callee's return value from its return slot +// --------------------------------------------------------------------------- + +static Value ReadReturnValue(Memory &memory, const CallFrame &frame, + const Value &ret_from_inst) { + auto *rp = AsPointer(frame.return_ptr); + if (!rp || !IsConcrete(*rp)) return ret_from_inst; + + uint32_t sz = 0; + if (auto fd = frame.func.declaration()) { + if (auto bits = fd->return_type().size_in_bits()) { + sz = static_cast((*bits + 7) / 8); + } + } + + if (sz > 0 && sz <= 8) { + return MemReadValue(memory, ConcreteAddress(*rp), sz, false); + } + if (sz > 8) { + return frame.return_ptr; + } + return ret_from_inst; +} + +// --------------------------------------------------------------------------- +// SetupFrame — initialize a CallFrame for a function invocation +// --------------------------------------------------------------------------- + +static void SetupFrame(CallFrame &frame, Memory &memory, + const IRFunction &func, + const std::vector &args) { + frame.func = func; + frame.params = args; + + // Build block map. + for (auto block : func.blocks()) { + frame.block_map[EntityId(block.id()).Pack()] = block; + } + { + auto entry = func.entry_block(); + frame.block_map[EntityId(entry.id()).Pack()] = entry; + } + + // Allocate parameter storage. + uint32_t param_idx = 0; + for (auto obj : func.objects()) { + auto k = obj.kind(); + if (k == ir::ObjectKind::PARAMETER || + k == ir::ObjectKind::PARAMETER_VALUE) { + auto address = AllocateObject(frame, memory, obj); + if (param_idx < args.size()) { + uint32_t sz = obj.size_bytes(); + if (sz == 0) sz = 8; + auto *arg_ptr = AsPointer(args[param_idx]); + if (arg_ptr && IsConcrete(*arg_ptr) && sz > 8) { + memory.Memcpy(address, ConcreteAddress(*arg_ptr), sz); + } else { + MemWriteValue(memory, address, args[param_idx], sz); + } + } + frame.param_ptrs.push_back(MakePtr(address)); + ++param_idx; + } + } + + // GLOBAL_INITIALIZER functions receive the global's address as a direct + // param_ptr, not through PARAMETER objects. + if (func.kind() == ir::FunctionKind::GLOBAL_INITIALIZER) { + for (auto &a : args) { + frame.param_ptrs.push_back(a); + } + param_idx = static_cast(args.size()); + } + + // Variadic args beyond fixed params. + frame.variadic_start_index = param_idx; + for (uint32_t i = param_idx; i < args.size(); ++i) { + uint32_t sz = 8; + auto address = memory.Allocate(sz, 8); + MemWriteValue(memory, address, args[i], sz); + frame.param_ptrs.push_back(MakePtr(address)); + } + + // Allocate return storage if not already set by the caller. + if (IsUndefined(frame.return_ptr)) { + if (auto fd = func.declaration()) { + auto rt = fd->return_type(); + if (auto bits = rt.size_in_bits()) { + uint32_t sz = static_cast((*bits + 7) / 8); + if (sz > 0) { + auto addr = memory.Allocate(sz, 8); + frame.return_ptr = MakePtr(addr); + } + } + } + } + + frame.current_block = func.entry_block(); +} + +// --------------------------------------------------------------------------- +// RunToCompletion — run a function synchronously (for global initializers) +// --------------------------------------------------------------------------- + +static void RunToCompletion(Ctx &ctx, const IRFunction &func, + const std::vector &args) { + auto saved_depth = ctx.state.call_stack.size(); + + // Ensure capacity so the push doesn't reallocate, which would + // invalidate frame references held by callers up the stack. + if (ctx.state.call_stack.size() == ctx.state.call_stack.capacity()) { + ctx.state.call_stack.reserve(ctx.state.call_stack.capacity() * 2); + } + + ctx.state.call_stack.emplace_back(); + SetupFrame(ctx.state.Frame(), ctx.memory, func, args); + + while (ctx.state.call_stack.size() > saved_depth) { + auto result = Step(ctx.state, ctx.memory, ctx.factory, ctx.driver); + if (result.status != StepStatus::CONTINUE) { + while (ctx.state.call_stack.size() > saved_depth) { + ctx.state.call_stack.pop_back(); + } + return; + } + } +} + +// --------------------------------------------------------------------------- +// GetValue — look up or lazily evaluate an instruction's value +// --------------------------------------------------------------------------- + +static Value GetValue(Ctx &ctx, CallFrame &frame, const IRInstruction &inst) { + auto eid = EntityId(inst.id()).Pack(); + auto it = frame.values.find(eid); + if (it != frame.values.end()) { + return it->second; + } + + auto op = inst.opcode(); + if (!ir::IsTerminator(op)) { + Eval(ctx, inst); + it = frame.values.find(eid); + if (it != frame.values.end()) { + return it->second; + } + } + return MakeUndef(); +} + +// --------------------------------------------------------------------------- +// ResolveVAListAddr +// --------------------------------------------------------------------------- + +static std::optional ResolveVAListAddr( + Ctx &ctx, CallFrame &frame, const IRInstruction &operand) { + auto load_mi = MemoryInst::from(operand); + Value addr = load_mi + ? GetValue(ctx, frame, load_mi->address()) + : GetValue(ctx, frame, operand); + auto *ptr = AsPointer(addr); + if (!ptr || !IsConcrete(*ptr)) return std::nullopt; + return ConcreteAddress(*ptr); +} + +// --------------------------------------------------------------------------- +// DerefArgPointer — load the value from an ALLOCA/ARG pointer +// --------------------------------------------------------------------------- + +static Value DerefArgPointer(Ctx &ctx, CallFrame &frame, + const IRInstruction &arg) { + Value v = GetValue(ctx, frame, arg); + auto *ptr = AsPointer(v); + if (ptr && IsConcrete(*ptr)) { + auto ai = AllocaInst::from(arg); + uint32_t sz = ai ? ai->size_bytes() : 8; + if (sz == 0) sz = 8; + if (sz > 8) return v; + return MemReadValue(ctx.memory, ConcreteAddress(*ptr), sz, false); + } + return v; +} + +// --------------------------------------------------------------------------- +// EvalMemorySubOp — handle bulk memory/string sub-operations +// --------------------------------------------------------------------------- + +static Value EvalMemorySubOp(Ctx &ctx, CallFrame &frame, + const MemoryInst &mi, MemOp sub, + const std::vector &ops) { + Value result = MakeUndef(); + using MO = ir::MemOp; + auto &memory = ctx.memory; + + switch (sub) { + case MO::MEMSET: { + if (ops.size() >= 3) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p) && AsInt(ops[2]) > 0) { + memory.Memset(ConcreteAddress(*p), + static_cast(AsInt(ops[1])), + static_cast(AsInt(ops[2]))); + } + } + result = ops.empty() ? MakeUndef() : ops[0]; + break; + } + case MO::MEMCPY: + case MO::MEMMOVE: { + if (ops.size() >= 3) { + auto *dp = AsPointer(ops[0]); + int64_t len = AsInt(ops[2]); + if (dp && IsConcrete(*dp) && len > 0) { + auto *sp = AsPointer(ops[1]); + if (sp && IsConcrete(*sp)) { + memory.Memcpy(ConcreteAddress(*dp), ConcreteAddress(*sp), + static_cast(len)); + } else if (auto *sv = std::get_if(&ops[1])) { + memory.Write(ConcreteAddress(*dp), &sv->bits, + static_cast( + std::min(static_cast(len), + sizeof(sv->bits)))); + } + } + } + result = ops.empty() ? MakeUndef() : ops[0]; + break; + } + case MO::BZERO: { + if (ops.size() >= 2) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p) && AsInt(ops[1]) > 0) { + memory.Memset(ConcreteAddress(*p), 0, + static_cast(AsInt(ops[1]))); + } + } + result = ops.empty() ? MakeUndef() : ops[0]; + break; + } + case MO::STRLEN: { + if (ops.size() >= 1) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p)) { + uint64_t addr = ConcreteAddress(*p); + size_t len = 0; + uint8_t byte = 0; + while (true) { + memory.Read(addr + len, &byte, 1); + if (byte == 0) break; + ++len; + } + result = MakeInt(static_cast(len)); + } + } + break; + } + case MO::STRNLEN: { + if (ops.size() >= 2) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p)) { + uint64_t addr = ConcreteAddress(*p); + size_t maxlen = static_cast(AsInt(ops[1])); + size_t len = 0; + uint8_t byte = 0; + while (len < maxlen) { + memory.Read(addr + len, &byte, 1); + if (byte == 0) break; + ++len; + } + result = MakeInt(static_cast(len)); + } + } + break; + } + case MO::STRCMP: { + if (ops.size() >= 2) { + auto *p0 = AsPointer(ops[0]); + auto *p1 = AsPointer(ops[1]); + if (p0 && p1 && IsConcrete(*p0) && IsConcrete(*p1)) { + uint64_t a0 = ConcreteAddress(*p0); + uint64_t a1 = ConcreteAddress(*p1); + int cmp = 0; + for (size_t i = 0; ; ++i) { + uint8_t c0 = 0, c1 = 0; + memory.Read(a0 + i, &c0, 1); + memory.Read(a1 + i, &c1, 1); + if (c0 != c1) { cmp = (c0 < c1) ? -1 : 1; break; } + if (c0 == 0) break; + } + result = MakeInt(cmp); + } + } + break; + } + case MO::STRNCMP: { + if (ops.size() >= 3) { + auto *p0 = AsPointer(ops[0]); + auto *p1 = AsPointer(ops[1]); + if (p0 && p1 && IsConcrete(*p0) && IsConcrete(*p1)) { + uint64_t a0 = ConcreteAddress(*p0); + uint64_t a1 = ConcreteAddress(*p1); + size_t n = static_cast(AsInt(ops[2])); + int cmp = 0; + for (size_t i = 0; i < n; ++i) { + uint8_t c0 = 0, c1 = 0; + memory.Read(a0 + i, &c0, 1); + memory.Read(a1 + i, &c1, 1); + if (c0 != c1) { cmp = (c0 < c1) ? -1 : 1; break; } + if (c0 == 0) break; + } + result = MakeInt(cmp); + } + } + break; + } + case MO::MEMCMP: { + if (ops.size() >= 3) { + auto *p0 = AsPointer(ops[0]); + auto *p1 = AsPointer(ops[1]); + if (p0 && p1 && IsConcrete(*p0) && IsConcrete(*p1)) { + size_t len = static_cast(AsInt(ops[2])); + std::vector buf0(len, 0), buf1(len, 0); + memory.Read(ConcreteAddress(*p0), buf0.data(), + static_cast(len)); + memory.Read(ConcreteAddress(*p1), buf1.data(), + static_cast(len)); + result = MakeInt(std::memcmp(buf0.data(), buf1.data(), len)); + } + } + break; + } + case MO::MEMCHR: { + if (ops.size() >= 3) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p)) { + uint64_t addr = ConcreteAddress(*p); + size_t len = static_cast(AsInt(ops[2])); + uint8_t needle = static_cast(AsInt(ops[1])); + for (size_t i = 0; i < len; ++i) { + uint8_t byte = 0; + memory.Read(addr + i, &byte, 1); + if (byte == needle) { + result = MakePtr(addr + i); + break; + } + } + } + } + break; + } + case MO::STRCHR: { + if (ops.size() >= 2) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p)) { + uint64_t addr = ConcreteAddress(*p); + uint8_t needle = static_cast(AsInt(ops[1])); + bool found = false; + for (size_t i = 0; ; ++i) { + uint8_t byte = 0; + memory.Read(addr + i, &byte, 1); + if (byte == needle) { + result = MakePtr(addr + i); + found = true; + break; + } + if (byte == 0) break; + } + if (!found) { + if (needle == 0) { + for (size_t i = 0; ; ++i) { + uint8_t byte = 0; + memory.Read(addr + i, &byte, 1); + if (byte == 0) { + result = MakePtr(addr + i); + found = true; + break; + } + } + } + if (!found) { + result = MakeNull(); + } + } + } + } + break; + } + case MO::STRRCHR: { + if (ops.size() >= 2) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p)) { + uint64_t addr = ConcreteAddress(*p); + uint8_t needle = static_cast(AsInt(ops[1])); + int64_t last_pos = -1; + for (size_t i = 0; ; ++i) { + uint8_t byte = 0; + memory.Read(addr + i, &byte, 1); + if (byte == needle) { + last_pos = static_cast(i); + } + if (byte == 0) break; + } + if (last_pos >= 0) { + result = MakePtr(addr + static_cast(last_pos)); + } else { + result = MakeNull(); + } + } + } + break; + } + case MO::STRSTR: { + if (ops.size() >= 2) { + auto *p0 = AsPointer(ops[0]); + auto *p1 = AsPointer(ops[1]); + if (p0 && p1 && IsConcrete(*p0) && IsConcrete(*p1)) { + uint64_t ha = ConcreteAddress(*p0); + uint64_t na = ConcreteAddress(*p1); + std::string haystack, needle_str; + for (size_t i = 0; ; ++i) { + uint8_t b = 0; + memory.Read(ha + i, &b, 1); + if (b == 0) break; + haystack.push_back(static_cast(b)); + } + for (size_t i = 0; ; ++i) { + uint8_t b = 0; + memory.Read(na + i, &b, 1); + if (b == 0) break; + needle_str.push_back(static_cast(b)); + } + if (needle_str.empty()) { + result = ops[0]; + } else { + auto pos = haystack.find(needle_str); + if (pos != std::string::npos) { + result = MakePtr(ha + pos); + } else { + result = MakeNull(); + } + } + } + } + break; + } + case MO::STRCPY: { + if (ops.size() >= 2) { + auto *dp = AsPointer(ops[0]); + auto *sp = AsPointer(ops[1]); + if (dp && sp && IsConcrete(*dp) && IsConcrete(*sp)) { + uint64_t da = ConcreteAddress(*dp); + uint64_t sa = ConcreteAddress(*sp); + for (size_t i = 0; ; ++i) { + uint8_t c = 0; + memory.Read(sa + i, &c, 1); + memory.Write(da + i, &c, 1); + if (c == 0) break; + } + } + } + result = ops.empty() ? MakeUndef() : ops[0]; + break; + } + case MO::STRNCPY: { + if (ops.size() >= 3) { + auto *dp = AsPointer(ops[0]); + auto *sp = AsPointer(ops[1]); + if (dp && sp && IsConcrete(*dp) && IsConcrete(*sp)) { + uint64_t da = ConcreteAddress(*dp); + uint64_t sa = ConcreteAddress(*sp); + size_t n = static_cast(AsInt(ops[2])); + bool hit_null = false; + for (size_t i = 0; i < n; ++i) { + uint8_t c = 0; + if (!hit_null) { + memory.Read(sa + i, &c, 1); + if (c == 0) hit_null = true; + } + memory.Write(da + i, &c, 1); + } + } + } + result = ops.empty() ? MakeUndef() : ops[0]; + break; + } + case MO::STRCAT: { + if (ops.size() >= 2) { + auto *dp = AsPointer(ops[0]); + auto *sp = AsPointer(ops[1]); + if (dp && sp && IsConcrete(*dp) && IsConcrete(*sp)) { + uint64_t da = ConcreteAddress(*dp); + uint64_t sa = ConcreteAddress(*sp); + size_t dlen = 0; + uint8_t byte = 0; + while (true) { + memory.Read(da + dlen, &byte, 1); + if (byte == 0) break; + ++dlen; + } + for (size_t i = 0; ; ++i) { + uint8_t c = 0; + memory.Read(sa + i, &c, 1); + memory.Write(da + dlen + i, &c, 1); + if (c == 0) break; + } + } + } + result = ops.empty() ? MakeUndef() : ops[0]; + break; + } + case MO::STRNCAT: { + if (ops.size() >= 3) { + auto *dp = AsPointer(ops[0]); + auto *sp = AsPointer(ops[1]); + if (dp && sp && IsConcrete(*dp) && IsConcrete(*sp)) { + uint64_t da = ConcreteAddress(*dp); + uint64_t sa = ConcreteAddress(*sp); + size_t n = static_cast(AsInt(ops[2])); + size_t dlen = 0; + uint8_t byte = 0; + while (true) { + memory.Read(da + dlen, &byte, 1); + if (byte == 0) break; + ++dlen; + } + size_t i = 0; + for (; i < n; ++i) { + uint8_t c = 0; + memory.Read(sa + i, &c, 1); + if (c == 0) break; + memory.Write(da + dlen + i, &c, 1); + } + uint8_t nul = 0; + memory.Write(da + dlen + i, &nul, 1); + } + } + result = ops.empty() ? MakeUndef() : ops[0]; + break; + } + case MO::STPCPY: { + if (ops.size() >= 2) { + auto *dp = AsPointer(ops[0]); + auto *sp = AsPointer(ops[1]); + if (dp && sp && IsConcrete(*dp) && IsConcrete(*sp)) { + uint64_t da = ConcreteAddress(*dp); + uint64_t sa = ConcreteAddress(*sp); + size_t i = 0; + for (; ; ++i) { + uint8_t c = 0; + memory.Read(sa + i, &c, 1); + memory.Write(da + i, &c, 1); + if (c == 0) break; + } + result = MakePtr(da + i); + } + } + break; + } + case MO::STPNCPY: { + if (ops.size() >= 3) { + auto *dp = AsPointer(ops[0]); + auto *sp = AsPointer(ops[1]); + if (dp && sp && IsConcrete(*dp) && IsConcrete(*sp)) { + uint64_t da = ConcreteAddress(*dp); + uint64_t sa = ConcreteAddress(*sp); + size_t n = static_cast(AsInt(ops[2])); + bool hit_null = false; + size_t null_pos = n; + for (size_t i = 0; i < n; ++i) { + uint8_t c = 0; + if (!hit_null) { + memory.Read(sa + i, &c, 1); + if (c == 0) { hit_null = true; null_pos = i; } + } + memory.Write(da + i, &c, 1); + } + result = MakePtr(da + null_pos); + } + } + break; + } + case MO::STRTOI32: case MO::STRTOI64: + case MO::STRTOU32: case MO::STRTOU64: + case MO::STRTOF32: case MO::STRTOF64: { + if (ops.size() >= 1) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p)) { + uint64_t addr = ConcreteAddress(*p); + std::string str; + for (size_t i = 0; ; ++i) { + uint8_t b = 0; + memory.Read(addr + i, &b, 1); + if (b == 0) break; + str.push_back(static_cast(b)); + } + switch (sub) { + case MO::STRTOI32: + result = MakeInt(static_cast( + std::strtol(str.c_str(), nullptr, 10))); + break; + case MO::STRTOI64: + result = MakeInt(static_cast( + std::strtoll(str.c_str(), nullptr, 10))); + break; + case MO::STRTOU32: + result = MakeInt(static_cast( + std::strtoul(str.c_str(), nullptr, 10))); + break; + case MO::STRTOU64: + result = MakeInt(static_cast( + std::strtoull(str.c_str(), nullptr, 10))); + break; + case MO::STRTOF32: + result = MakeFloat(static_cast( + std::strtof(str.c_str(), nullptr))); + break; + case MO::STRTOF64: + result = MakeFloat( + std::strtod(str.c_str(), nullptr)); + break; + default: + break; + } + } + } + break; + } + case MO::BIT_READ_LE: case MO::BIT_READ_BE: { + if (ops.size() >= 1) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p)) { + uint64_t addr = ConcreteAddress(*p); + uint32_t bo = mi.bit_offset(); + uint32_t bw = mi.bit_width(); + uint32_t first_byte = bo / 8; + uint32_t last_byte = (bo + bw - 1) / 8; + uint32_t num_bytes = last_byte - first_byte + 1; + std::vector buf(num_bytes, 0); + memory.Read(addr + first_byte, buf.data(), num_bytes); + uint64_t raw = 0; + if (sub == MO::BIT_READ_LE) { + for (uint32_t i = 0; i < num_bytes; ++i) { + raw |= static_cast(buf[i]) << (i * 8); + } + raw >>= (bo % 8); + } else { + for (uint32_t i = 0; i < num_bytes; ++i) { + raw = (raw << 8) | buf[i]; + } + uint32_t top_bits = num_bytes * 8; + uint32_t shift = top_bits - (bo % 8) - bw; + raw >>= shift; + } + uint64_t mask = (bw >= 64) ? ~uint64_t{0} + : ((uint64_t{1} << bw) - 1); + raw &= mask; + result = MakeInt(static_cast(raw)); + } + } + break; + } + case MO::BIT_WRITE_LE: case MO::BIT_WRITE_BE: { + if (ops.size() >= 2) { + auto *p = AsPointer(ops[0]); + if (p && IsConcrete(*p)) { + uint64_t addr = ConcreteAddress(*p); + uint32_t bo = mi.bit_offset(); + uint32_t bw = mi.bit_width(); + uint64_t val = static_cast(AsInt(ops[1])); + uint64_t mask = (bw >= 64) ? ~uint64_t{0} + : ((uint64_t{1} << bw) - 1); + val &= mask; + uint32_t first_byte = bo / 8; + uint32_t last_byte = (bo + bw - 1) / 8; + uint32_t num_bytes = last_byte - first_byte + 1; + std::vector buf(num_bytes, 0); + memory.Read(addr + first_byte, buf.data(), num_bytes); + if (sub == MO::BIT_WRITE_LE) { + uint64_t raw = 0; + for (uint32_t i = 0; i < num_bytes; ++i) { + raw |= static_cast(buf[i]) << (i * 8); + } + uint32_t shift = bo % 8; + raw &= ~(mask << shift); + raw |= (val << shift); + for (uint32_t i = 0; i < num_bytes; ++i) { + buf[i] = static_cast(raw >> (i * 8)); + } + } else { + uint64_t raw = 0; + for (uint32_t i = 0; i < num_bytes; ++i) { + raw = (raw << 8) | buf[i]; + } + uint32_t top_bits = num_bytes * 8; + uint32_t shift = top_bits - (bo % 8) - bw; + raw &= ~(mask << shift); + raw |= (val << shift); + for (uint32_t i = 0; i < num_bytes; ++i) { + buf[num_bytes - 1 - i] = static_cast(raw >> (i * 8)); + } + } + memory.Write(addr + first_byte, buf.data(), num_bytes); + } + } + break; + } + case MO::CONSUME_VA_PARAM: + break; + default: + if (ir::IsCmpxchg(sub)) { + result = MakeUndef(); + } + break; + } + return result; +} + +// --------------------------------------------------------------------------- +// EvalCall — resolve and execute a function call +// Returns true if a callee frame was pushed (caller must return CONTINUE). +// Returns false if the call was resolved inline (SKIP/MODEL). +// --------------------------------------------------------------------------- + +static bool EvalCall(Ctx &ctx, const IRInstruction &inst) { + auto ci = CallInst::from(inst); + if (!ci) return false; + + auto &frame = ctx.Frame(); + auto eid = EntityId(inst.id()).Pack(); + + // Collect arguments: dereference ALLOCA/ARG pointers. + std::vector call_args; + for (auto arg : ci->arguments()) { + call_args.push_back(DerefArgPointer(ctx, frame, arg)); + } + + // Resolve the callee. + std::optional callee_ir; + auto target_decl = ci->target(); + RawEntityId indirect_eid = kInvalidEntityId; + + if (target_decl) { + callee_ir = IRFunction::from(*target_decl); + } + + // Indirect call: read entity ID from the function pointer. + if (!callee_ir && ci->is_indirect()) { + Value callee_val = GetValue(ctx, frame, inst.nth_operand(0)); + if (auto *ptr = AsPointer(callee_val)) { + if (IsConcrete(*ptr)) { + ctx.memory.Read(ConcreteAddress(*ptr), &indirect_eid, 8); + } + } + } + + // If inline resolution failed, consult the Driver. + if (!callee_ir) { + RawEntityId target_eid = target_decl + ? target_decl->id().Pack() : kInvalidEntityId; + + Suspension s = NeedCallResolution{ + inst, target_eid, indirect_eid, call_args, ci->is_indirect()}; + auto resolution = ctx.driver.Resolve(s); + auto *cr = std::get_if(&resolution); + if (!cr) { + frame.values[eid] = MakeUndef(); + return false; + } + switch (cr->action) { + case CallAction::INLINE: + callee_ir = cr->callee_ir; + break; + case CallAction::MODEL: + case CallAction::SKIP: + frame.values[eid] = cr->return_value; + return false; + } + } + + if (!callee_ir) { + frame.values[eid] = MakeUndef(); + return false; + } + + // Get return ptr from caller BEFORE pushing (frame ref invalidated by push). + Value return_ptr{Undefined{}}; + auto ret_alloca = ci->return_alloca(); + if (ret_alloca) { + return_ptr = GetValue(ctx, frame, *ret_alloca); + } + + // Set resume point on the caller's frame. + frame.resume_after_inst = eid; + + // Push callee frame. NOTE: `frame` reference is invalidated after this. + if (ctx.state.call_stack.size() == ctx.state.call_stack.capacity()) { + ctx.state.call_stack.reserve(ctx.state.call_stack.capacity() * 2); + } + ctx.state.call_stack.emplace_back(); + auto &callee_frame = ctx.state.Frame(); + if (!IsUndefined(return_ptr)) { + callee_frame.return_ptr = return_ptr; + } + SetupFrame(callee_frame, ctx.memory, *callee_ir, call_args); + + return true; +} + +// --------------------------------------------------------------------------- +// Eval — evaluate a single non-terminator instruction +// Returns true if a callee frame was pushed (CALL). +// --------------------------------------------------------------------------- + +static bool Eval(Ctx &ctx, const IRInstruction &inst) { + auto &frame = ctx.Frame(); + auto op = inst.opcode(); + auto eid = EntityId(inst.id()).Pack(); + Value result = MakeUndef(); + + switch (op) { + + // --- Constants --- + case OpCode::CONST: { + auto ci = ConstInst::from(inst); + if (!ci) break; + auto sub = ci->sub_opcode(); + if (sub == ir::ConstOp::NULL_PTR) { + result = ctx.factory.MakeNullPtr(); + } else if (sub == ir::ConstOp::FLOAT32 || + sub == ir::ConstOp::FLOAT16) { + result = MakeFloat32(static_cast(ci->float_value())); + } else if (sub == ir::ConstOp::FLOAT64) { + result = MakeFloat(ci->float_value()); + } else if (sub == ir::ConstOp::INF32) { + result = MakeFloat32(std::numeric_limits::infinity()); + } else if (sub == ir::ConstOp::INF64) { + result = MakeFloat(std::numeric_limits::infinity()); + } else if (sub == ir::ConstOp::NAN32) { + result = MakeFloat32(std::numeric_limits::quiet_NaN()); + } else if (sub == ir::ConstOp::NAN64) { + result = MakeFloat(std::numeric_limits::quiet_NaN()); + } else { + result = ctx.factory.MakeConst(sub, ci->signed_value(), + ci->unsigned_value()); + } + break; + } + + // --- Memory: ALLOCA --- + case OpCode::ALLOCA: { + auto ai = AllocaInst::from(inst); + if (!ai) break; + auto obj = ai->object(); + auto obj_eid = EntityId(obj.id()).Pack(); + if (frame.entity_to_address.find(obj_eid) == + frame.entity_to_address.end()) { + if (auto da = DynamicAllocaInst::from(inst)) { + Value sz_val = GetValue(ctx, frame, da->size()); + uint32_t runtime_sz = static_cast(AsInt(sz_val)); + if (runtime_sz > 0) { + auto addr = ctx.memory.Allocate(runtime_sz, ai->align_bytes()); + frame.entity_to_address[obj_eid] = addr; + } else { + AllocateObject(frame, ctx.memory, obj); + } + } else { + AllocateObject(frame, ctx.memory, obj); + } + } + result = MakePtr(frame.entity_to_address[obj_eid]); + break; + } + + // --- String pointer --- + case OpCode::STRING_PTR_32: + case OpCode::STRING_PTR_64: { + auto inst_eid = EntityId(inst.id()).Pack(); + if (frame.entity_to_address.find(inst_eid) == + frame.entity_to_address.end()) { + if (auto src = inst.source_statement()) { + if (auto sl = StringLiteral::from(*src)) { + auto bytes = sl->bytes(); + uint32_t char_width = sl->character_byte_width(); + uint32_t total = sl->byte_length() + char_width; + auto addr = ctx.memory.Allocate(total, 1); + frame.entity_to_address[inst_eid] = addr; + ctx.memory.Write(addr, bytes.data(), + std::min( + static_cast(bytes.size()), total)); + } + } + } + auto it = frame.entity_to_address.find(inst_eid); + result = (it != frame.entity_to_address.end()) + ? MakePtr(it->second) : MakeUndef(); + break; + } + + // --- Memory: loads, stores, bulk ops --- + case OpCode::MEMORY: { + auto mi = MemoryInst::from(inst); + if (!mi) break; + auto sub = mi->sub_opcode(); + + // CONSUME_VA_PARAM: handled here because we need both the va_list + // address and the frame's param_ptrs. + if (sub == ir::MemOp::CONSUME_VA_PARAM) { + auto cvp = ConsumeVAParamInst::from(inst); + if (cvp) { + auto va_addr = ResolveVAListAddr(ctx, frame, + cvp->va_list_operand()); + if (va_addr) { + uint32_t idx = 0; + ctx.memory.Read(*va_addr, &idx, 4); + if (idx < frame.param_ptrs.size()) { + result = frame.param_ptrs[idx]; + ++idx; + ctx.memory.Write(*va_addr, &idx, 4); + } + } + } + break; + } + + if (ir::IsDirectLoadStore(sub)) { + unsigned sz = ir::AccessSize(sub); + bool is_float = ir::IsFloatLoad(sub); + if (ir::IsAnyLoad(sub)) { + Value addr = GetValue(ctx, frame, mi->address()); + auto *ptr = AsPointer(addr); + if (ptr && IsConcrete(*ptr)) { + result = MemReadValue(ctx.memory, ConcreteAddress(*ptr), + sz, is_float); + } + } else { + Value addr = GetValue(ctx, frame, mi->address()); + Value val = GetValue(ctx, frame, mi->stored_value()); + auto *ptr = AsPointer(addr); + if (ptr && IsConcrete(*ptr)) { + MemWriteValue(ctx.memory, ConcreteAddress(*ptr), val, sz); + } + } + } else { + std::vector ops; + for (auto op_inst : inst.operands()) { + ops.push_back(GetValue(ctx, frame, op_inst)); + } + result = EvalMemorySubOp(ctx, frame, *mi, sub, ops); + } + break; + } + + // --- GEP field --- + case OpCode::GEP_FIELD_32: + case OpCode::GEP_FIELD_64: { + auto gep = GEPFieldInst::from(inst); + if (!gep) break; + Value base = GetValue(ctx, frame, gep->base()); + int64_t off = gep->byte_offset(); + auto *ptr = AsPointer(base); + if (ptr && IsConcrete(*ptr)) { + result = MakePtr(ConcreteAddress(*ptr) + off); + } + break; + } + + // --- Pointer arithmetic --- + case OpCode::PTR_ADD_32: + case OpCode::PTR_ADD_64: { + auto pa = PtrAddInst::from(inst); + if (!pa) break; + Value base = GetValue(ctx, frame, pa->base()); + Value idx = GetValue(ctx, frame, pa->index()); + int64_t elem_size = pa->element_size(); + result = ctx.factory.PtrAdd(base, idx, elem_size); + break; + } + + case OpCode::PTR_DIFF_32: + case OpCode::PTR_DIFF_64: { + auto pd = PtrDiffInst::from(inst); + if (!pd) break; + Value lhs = GetValue(ctx, frame, pd->lhs()); + Value rhs = GetValue(ctx, frame, pd->rhs()); + result = ctx.factory.PtrDiff(lhs, rhs, pd->element_size()); + break; + } + + // --- Binary arithmetic --- + case OpCode::ADD_8: case OpCode::ADD_16: + case OpCode::ADD_32: case OpCode::ADD_64: + case OpCode::SUB_8: case OpCode::SUB_16: + case OpCode::SUB_32: case OpCode::SUB_64: + case OpCode::MUL_8: case OpCode::MUL_16: + case OpCode::MUL_32: case OpCode::MUL_64: + case OpCode::DIV_8: case OpCode::DIV_16: + case OpCode::DIV_32: case OpCode::DIV_64: + case OpCode::REM_8: case OpCode::REM_16: + case OpCode::REM_32: case OpCode::REM_64: + case OpCode::UDIV_8: case OpCode::UDIV_16: + case OpCode::UDIV_32: case OpCode::UDIV_64: + case OpCode::UREM_8: case OpCode::UREM_16: + case OpCode::UREM_32: case OpCode::UREM_64: + case OpCode::USHR_8: case OpCode::USHR_16: + case OpCode::USHR_32: case OpCode::USHR_64: + case OpCode::BIT_AND_8: case OpCode::BIT_AND_16: + case OpCode::BIT_AND_32: case OpCode::BIT_AND_64: + case OpCode::BIT_OR_8: case OpCode::BIT_OR_16: + case OpCode::BIT_OR_32: case OpCode::BIT_OR_64: + case OpCode::BIT_XOR_8: case OpCode::BIT_XOR_16: + case OpCode::BIT_XOR_32: case OpCode::BIT_XOR_64: + case OpCode::SHL_8: case OpCode::SHL_16: + case OpCode::SHL_32: case OpCode::SHL_64: + case OpCode::SHR_8: case OpCode::SHR_16: + case OpCode::SHR_32: case OpCode::SHR_64: + case OpCode::FADD_32: case OpCode::FADD_64: + case OpCode::FSUB_32: case OpCode::FSUB_64: + case OpCode::FMUL_32: case OpCode::FMUL_64: + case OpCode::FDIV_32: case OpCode::FDIV_64: + case OpCode::FREM_32: case OpCode::FREM_64: { + auto bin = BinaryInst::from(inst); + if (bin) { + Value lhs_val = GetValue(ctx, frame, bin->lhs()); + Value rhs_val = GetValue(ctx, frame, bin->rhs()); + result = ctx.factory.BinaryOp(op, lhs_val, rhs_val); + } + break; + } + + // --- Logical --- + case OpCode::LOGICAL_AND: case OpCode::LOGICAL_OR: { + auto bin = BinaryInst::from(inst); + if (bin) { + Value lhs_val = GetValue(ctx, frame, bin->lhs()); + Value rhs_val = GetValue(ctx, frame, bin->rhs()); + result = ctx.factory.BinaryOp(op, lhs_val, rhs_val); + } + break; + } + + // --- Comparisons --- + case OpCode::CMP_EQ_8: case OpCode::CMP_EQ_16: + case OpCode::CMP_EQ_32: case OpCode::CMP_EQ_64: + case OpCode::CMP_NE_8: case OpCode::CMP_NE_16: + case OpCode::CMP_NE_32: case OpCode::CMP_NE_64: + case OpCode::CMP_LT_8: case OpCode::CMP_LT_16: + case OpCode::CMP_LT_32: case OpCode::CMP_LT_64: + case OpCode::CMP_LE_8: case OpCode::CMP_LE_16: + case OpCode::CMP_LE_32: case OpCode::CMP_LE_64: + case OpCode::CMP_GT_8: case OpCode::CMP_GT_16: + case OpCode::CMP_GT_32: case OpCode::CMP_GT_64: + case OpCode::CMP_GE_8: case OpCode::CMP_GE_16: + case OpCode::CMP_GE_32: case OpCode::CMP_GE_64: + case OpCode::UCMP_LT_8: case OpCode::UCMP_LT_16: + case OpCode::UCMP_LT_32: case OpCode::UCMP_LT_64: + case OpCode::UCMP_LE_8: case OpCode::UCMP_LE_16: + case OpCode::UCMP_LE_32: case OpCode::UCMP_LE_64: + case OpCode::UCMP_GT_8: case OpCode::UCMP_GT_16: + case OpCode::UCMP_GT_32: case OpCode::UCMP_GT_64: + case OpCode::UCMP_GE_8: case OpCode::UCMP_GE_16: + case OpCode::UCMP_GE_32: case OpCode::UCMP_GE_64: + case OpCode::FCMP_EQ_32: case OpCode::FCMP_EQ_64: + case OpCode::FCMP_NE_32: case OpCode::FCMP_NE_64: + case OpCode::FCMP_LT_32: case OpCode::FCMP_LT_64: + case OpCode::FCMP_LE_32: case OpCode::FCMP_LE_64: + case OpCode::FCMP_GT_32: case OpCode::FCMP_GT_64: + case OpCode::FCMP_GE_32: case OpCode::FCMP_GE_64: { + auto cmp = ComparisonInst::from(inst); + if (cmp) { + Value lhs_val = GetValue(ctx, frame, cmp->lhs()); + Value rhs_val = GetValue(ctx, frame, cmp->rhs()); + result = ctx.factory.Compare(op, lhs_val, rhs_val); + } + break; + } + + // --- Unary --- + case OpCode::NEG_8: case OpCode::NEG_16: + case OpCode::NEG_32: case OpCode::NEG_64: + case OpCode::FNEG_32: case OpCode::FNEG_64: + case OpCode::BIT_NOT_8: case OpCode::BIT_NOT_16: + case OpCode::BIT_NOT_32: case OpCode::BIT_NOT_64: + case OpCode::LOGICAL_NOT: + case OpCode::ABS_8: case OpCode::ABS_16: + case OpCode::ABS_32: case OpCode::ABS_64: { + auto u = UnaryInst::from(inst); + if (u) { + Value operand_val = GetValue(ctx, frame, u->operand()); + result = ctx.factory.UnaryOp(op, operand_val); + } + break; + } + + // --- Cast --- + case OpCode::CAST: { + auto c = CastInst::from(inst); + if (c) { + Value operand_val = GetValue(ctx, frame, c->operand()); + result = ctx.factory.Cast(c->sub_opcode(), operand_val); + } + break; + } + + // --- Read-modify-write --- + case OpCode::READ_MODIFY_WRITE: { + auto rmw = ReadModifyWriteInst::from(inst); + if (!rmw) break; + Value addr = GetValue(ctx, frame, rmw->address()); + auto *ptr = AsPointer(addr); + if (!ptr || !IsConcrete(*ptr)) break; + uint64_t address = ConcreteAddress(*ptr); + + auto underlying = rmw->underlying_op(); + size_t access_sz = UnderlyingOpAccessSize(underlying); + bool rmw_is_float = ir::IsFloatArithmetic(underlying); + Value old_val = MemReadValue(ctx.memory, address, access_sz, + rmw_is_float); + + Value rhs = MakeInt(0); + for (auto rhs_op : rmw->rhs_operands()) { + rhs = GetValue(ctx, frame, rhs_op); + break; + } + + // Overflow-checked arithmetic. + if (underlying >= OpCode::ADD_OVERFLOW_8 && + underlying <= OpCode::MUL_OVERFLOW_64) { + Value a = MakeInt(0), b = MakeInt(0); + int rhs_i = 0; + for (auto rhs_op : rmw->rhs_operands()) { + if (rhs_i == 0) a = GetValue(ctx, frame, rhs_op); + else if (rhs_i == 1) b = GetValue(ctx, frame, rhs_op); + ++rhs_i; + } + __int128 wide; + if (underlying >= OpCode::ADD_OVERFLOW_8 && + underlying <= OpCode::ADD_OVERFLOW_64) + wide = static_cast<__int128>(AsInt(a)) + + static_cast<__int128>(AsInt(b)); + else if (underlying >= OpCode::SUB_OVERFLOW_8 && + underlying <= OpCode::SUB_OVERFLOW_64) + wide = static_cast<__int128>(AsInt(a)) - + static_cast<__int128>(AsInt(b)); + else + wide = static_cast<__int128>(AsInt(a)) * + static_cast<__int128>(AsInt(b)); + + Value new_val = MakeInt(static_cast(wide)); + bool overflow = + (wide != static_cast<__int128>(static_cast(wide))); + MemWriteValue(ctx.memory, address, new_val, access_sz); + result = MakeInt(overflow ? 1 : 0); + break; + } + + // PTR_ADD in RMW. + if (underlying == OpCode::PTR_ADD_32 || + underlying == OpCode::PTR_ADD_64) { + int64_t elem_sz = rmw->element_size(); + if (elem_sz <= 0) elem_sz = 1; + Value new_val = ctx.factory.PtrAdd(old_val, rhs, elem_sz); + MemWriteValue(ctx.memory, address, new_val, access_sz); + result = rmw->returns_new_value() ? new_val : old_val; + break; + } + + // Atomic exchange. + if (underlying >= OpCode::ATOMIC_EXCHANGE_8 && + underlying <= OpCode::ATOMIC_EXCHANGE_64) { + MemWriteValue(ctx.memory, address, rhs, access_sz); + result = rmw->returns_new_value() ? rhs : old_val; + break; + } + + // General case: delegate to factory. + Value new_val = ctx.factory.BinaryOp(underlying, old_val, rhs); + MemWriteValue(ctx.memory, address, new_val, access_sz); + result = rmw->returns_new_value() ? new_val : old_val; + break; + } + + // --- Call --- + case OpCode::CALL: { + if (EvalCall(ctx, inst)) { + // Callee frame was pushed. EvalCall already set resume_after_inst + // and the call result will be stored by the RET handler. + return true; + } + // SKIP/MODEL: EvalCall already set frame.values[eid]. + return false; + } + + // --- Select --- + case OpCode::SELECT: { + auto sel = SelectInst::from(inst); + if (sel) { + Value cond = GetValue(ctx, frame, sel->condition()); + Value if_true = GetValue(ctx, frame, sel->true_value()); + Value if_false = GetValue(ctx, frame, sel->false_value()); + result = ctx.factory.Select(cond, if_true, if_false); + } + break; + } + + // --- Last value (comma operator) --- + case OpCode::LAST_VALUE: { + auto lv = LastValueInst::from(inst); + if (lv) { + result = GetValue(ctx, frame, lv->last()); + } + break; + } + + // --- Param pointer --- + case OpCode::PARAM_PTR_32: + case OpCode::PARAM_PTR_64: { + auto pr = ParamPtrInst::from(inst); + if (pr) { + uint32_t idx = pr->parameter_index(); + if (idx < frame.param_ptrs.size()) { + result = frame.param_ptrs[idx]; + } + } + break; + } + + // --- Bitwise intrinsics --- + case OpCode::BITWISE_8: case OpCode::BITWISE_16: + case OpCode::BITWISE_32: case OpCode::BITWISE_64: { + auto bw = BitwiseOpInst::from(inst); + if (bw) { + Value val = MakeUndef(); + Value val2 = MakeUndef(); + int count = 0; + for (auto op_inst : inst.operands()) { + if (count == 0) val = GetValue(ctx, frame, op_inst); + else if (count == 1) val2 = GetValue(ctx, frame, op_inst); + ++count; + } + result = ctx.factory.BitwiseIntrinsic(op, bw->sub_opcode(), + val, val2); + } + break; + } + + // --- Float operations --- + case OpCode::FLOAT: { + auto fo = FloatOpInst::from(inst); + if (fo) { + std::vector ops; + for (auto op_inst : inst.operands()) { + ops.push_back(GetValue(ctx, frame, op_inst)); + } + result = ctx.factory.FloatIntrinsic(fo->sub_opcode(), ops); + } + break; + } + + // --- Global/thread-local pointers --- + case OpCode::GLOBAL_PTR_32: case OpCode::GLOBAL_PTR_64: + case OpCode::THREAD_LOCAL_PTR_32: case OpCode::THREAD_LOCAL_PTR_64: { + auto src_eid = inst.source_entity_id(); + + // Fast path: already resolved. + auto git = ctx.state.global_addresses.find(src_eid); + if (git != ctx.state.global_addresses.end()) { + result = MakePtr(git->second); + break; + } + + // Ask the driver for global info. + Suspension s = NeedGlobalResolution{src_eid}; + auto resolution = ctx.driver.Resolve(s); + auto *gr = std::get_if(&resolution); + if (!gr || gr->info.size == 0) break; + + auto &info = gr->info; + auto key = (info.canonical_eid != kInvalidEntityId) + ? info.canonical_eid : src_eid; + + // Check again with canonical key. + git = ctx.state.global_addresses.find(key); + if (git != ctx.state.global_addresses.end()) { + if (key != src_eid) ctx.state.global_addresses[src_eid] = git->second; + result = MakePtr(git->second); + break; + } + + // Allocate and initialize. + uint32_t align = info.align; + if (align == 0) align = 8; + auto addr = ctx.memory.Allocate(info.size, align); + ctx.state.global_addresses[key] = addr; + if (key != src_eid) ctx.state.global_addresses[src_eid] = addr; + + if (info.initializer) { + RunToCompletion(ctx, *info.initializer, {MakePtr(addr)}); + } + + result = MakePtr(addr); + break; + } + + // --- Function pointer --- + case OpCode::FUNC_PTR_32: + case OpCode::FUNC_PTR_64: { + auto src_eid = inst.source_entity_id(); + if (frame.entity_to_address.find(src_eid) == + frame.entity_to_address.end()) { + auto addr = ctx.memory.Allocate(8, 8); + frame.entity_to_address[src_eid] = addr; + ctx.memory.Write(addr, &src_eid, 8); + } + result = MakePtr(frame.entity_to_address[src_eid]); + break; + } + + // --- Return value pointer --- + case OpCode::RETURN_PTR_32: + case OpCode::RETURN_PTR_64: + result = frame.return_ptr; + break; + + // --- Scope markers --- + case OpCode::ENTER_SCOPE: { + auto esi = EnterScopeInst::from(inst); + if (esi) { + auto scope = esi->scope(); + for (auto obj : scope.objects()) { + auto oid = EntityId(obj.id()).Pack(); + auto it = frame.entity_to_address.find(oid); + if (it != frame.entity_to_address.end()) { + ctx.memory.Unpoison(it->second); + } + } + } + break; + } + case OpCode::EXIT_SCOPE: { + auto esi = ExitScopeInst::from(inst); + if (esi) { + auto scope = esi->scope(); + for (auto obj : scope.objects()) { + auto oid = EntityId(obj.id()).Pack(); + auto it = frame.entity_to_address.find(oid); + if (it != frame.entity_to_address.end()) { + ctx.memory.Poison(it->second); + } + } + } + break; + } + + // --- Undefined/poison --- + case OpCode::UNDEFINED: + result = MakeUndef(); + break; + + // --- Frame/return address intrinsics --- + case OpCode::FRAME_PTR_32: case OpCode::FRAME_PTR_64: + case OpCode::RETURN_ADDRESS_32: case OpCode::RETURN_ADDRESS_64: + result = MakeUndef(); + break; + + // --- Variadic --- + case OpCode::VA_START: { + auto vai = VAStartInst::from(inst); + if (vai) { + auto va_addr = ResolveVAListAddr(ctx, frame, + vai->va_list_operand()); + if (va_addr) { + uint32_t idx = frame.variadic_start_index; + ctx.memory.Write(*va_addr, &idx, 4); + } + } + break; + } + case OpCode::VA_END: { + auto vei = VAEndInst::from(inst); + if (vei) { + auto va_addr = ResolveVAListAddr(ctx, frame, + vei->va_list_operand()); + if (va_addr) { + uint32_t sentinel = ~0u; + ctx.memory.Write(*va_addr, &sentinel, 4); + } + } + break; + } + case OpCode::VA_COPY: { + auto vci = VACopyInst::from(inst); + if (vci) { + auto src_addr = ResolveVAListAddr(ctx, frame, vci->src()); + auto dst_addr = ResolveVAListAddr(ctx, frame, vci->dest()); + if (src_addr && dst_addr) { + uint32_t idx = 0; + ctx.memory.Read(*src_addr, &idx, 4); + ctx.memory.Write(*dst_addr, &idx, 4); + } + } + break; + } + + // --- Overflow/atomic opcodes (only valid as RMW underlying ops) --- + case OpCode::ADD_OVERFLOW_8: case OpCode::ADD_OVERFLOW_16: + case OpCode::ADD_OVERFLOW_32: case OpCode::ADD_OVERFLOW_64: + case OpCode::SUB_OVERFLOW_8: case OpCode::SUB_OVERFLOW_16: + case OpCode::SUB_OVERFLOW_32: case OpCode::SUB_OVERFLOW_64: + case OpCode::MUL_OVERFLOW_8: case OpCode::MUL_OVERFLOW_16: + case OpCode::MUL_OVERFLOW_32: case OpCode::MUL_OVERFLOW_64: + case OpCode::ATOMIC_ADD_8: case OpCode::ATOMIC_ADD_16: + case OpCode::ATOMIC_ADD_32: case OpCode::ATOMIC_ADD_64: + case OpCode::ATOMIC_SUB_8: case OpCode::ATOMIC_SUB_16: + case OpCode::ATOMIC_SUB_32: case OpCode::ATOMIC_SUB_64: + case OpCode::ATOMIC_AND_8: case OpCode::ATOMIC_AND_16: + case OpCode::ATOMIC_AND_32: case OpCode::ATOMIC_AND_64: + case OpCode::ATOMIC_OR_8: case OpCode::ATOMIC_OR_16: + case OpCode::ATOMIC_OR_32: case OpCode::ATOMIC_OR_64: + case OpCode::ATOMIC_XOR_8: case OpCode::ATOMIC_XOR_16: + case OpCode::ATOMIC_XOR_32: case OpCode::ATOMIC_XOR_64: + case OpCode::ATOMIC_NAND_8: case OpCode::ATOMIC_NAND_16: + case OpCode::ATOMIC_NAND_32: case OpCode::ATOMIC_NAND_64: + case OpCode::ATOMIC_EXCHANGE_8: case OpCode::ATOMIC_EXCHANGE_16: + case OpCode::ATOMIC_EXCHANGE_32: case OpCode::ATOMIC_EXCHANGE_64: + break; + + // --- Terminators (handled by Step) --- + case OpCode::COND_BRANCH: + case OpCode::SWITCH: + case OpCode::RET: + case OpCode::UNREACHABLE: + case OpCode::IMPLICIT_UNREACHABLE: + case OpCode::BREAK: + case OpCode::CONTINUE: + case OpCode::GOTO: + case OpCode::IMPLICIT_GOTO: + case OpCode::FALLTHROUGH: + case OpCode::IMPLICIT_FALLTHROUGH: + break; + + case OpCode::UNKNOWN: + break; + } + + // Re-fetch frame: RunToCompletion (for global initializers) may have + // reallocated the call stack, invalidating the cached `frame` reference. + ctx.Frame().values[eid] = result; + return false; +} + +} // namespace + +// =========================================================================== +// Public API +// =========================================================================== + +void InitState(InterpreterState &state, Memory &memory, + const IRFunction &func, const std::vector &args) { + state.call_stack.clear(); + state.call_stack.reserve(256); + state.steps = 0; + + state.call_stack.emplace_back(); + SetupFrame(state.Frame(), memory, func, args); +} + +StepResult Step(InterpreterState &state, Memory &memory, + ValueFactory &factory, Driver &driver) { + if (state.Empty()) { + return {StepStatus::ERROR}; + } + + Ctx ctx{state, memory, factory, driver}; + + // If not resuming from a call, clear cached values for fresh evaluation. + bool resuming = (state.Frame().resume_after_inst != kInvalidEntityId); + if (!resuming) { + state.Frame().values.clear(); + } + + // Save and clear the resume point. We'll skip instructions up to this ID. + RawEntityId skip_until = state.Frame().resume_after_inst; + state.Frame().resume_after_inst = kInvalidEntityId; + bool skipping = (skip_until != kInvalidEntityId); + + auto stack_depth = state.call_stack.size(); + + // Save the block before iterating — the generator must outlive any frame + // push that might reallocate the call stack vector. + auto current_block = state.Frame().current_block; + + for (auto inst : current_block.all_instructions()) { + auto inst_eid = EntityId(inst.id()).Pack(); + + // Skip instructions until we pass the one we're resuming from. + if (skipping) { + if (inst_eid == skip_until) { + skipping = false; + } + continue; + } + + ++state.steps; + auto op = inst.opcode(); + + if (!ir::IsTerminator(op)) { + if (Eval(ctx, inst)) { + // A callee frame was pushed (CALL). Return to let the driver + // loop call Step() on the callee. + return {StepStatus::CONTINUE}; + } + continue; + } + + // ----- Terminator handling ----- + + // We need a fresh frame reference since Eval might have mutated state + // (e.g., RunToCompletion for globals). The frame is still valid because + // we only get here if Eval didn't push (returned false). + auto &frame = state.Frame(); + + if (op == OpCode::RET) { + auto ri = RetInst::from(inst); + Value ret_from_inst = MakeUndef(); + if (ri) { + if (auto rv = ri->return_value()) { + ret_from_inst = GetValue(ctx, frame, *rv); + } + } + + if (state.call_stack.size() > 1) { + // Read return value from callee's return slot. + Value callee_result = ReadReturnValue(memory, frame, ret_from_inst); + + state.call_stack.pop_back(); + + // Store the return value for the CALL instruction in the caller. + auto &caller = state.Frame(); + if (caller.resume_after_inst != kInvalidEntityId) { + caller.values[caller.resume_after_inst] = callee_result; + } + + return {StepStatus::CONTINUE}; + } + + // Top-level return. + Value final_result = ReadReturnValue(memory, frame, ret_from_inst); + return {StepStatus::COMPLETED, {}, final_result}; + } + + if (op == OpCode::UNREACHABLE || op == OpCode::IMPLICIT_UNREACHABLE) { + return {StepStatus::ERROR}; + } + + if (op == OpCode::COND_BRANCH) { + auto cb = CondBranchInst::from(inst); + if (cb) { + Value cond = GetValue(ctx, frame, cb->condition()); + auto truth = factory.IsTrue(cond); + if (truth.has_value()) { + frame.current_block = *truth ? cb->true_block() + : cb->false_block(); + return {StepStatus::CONTINUE}; + } + // Symbolic/unknown: ask driver. + Suspension s = NeedBranchDecision{cond, cb->true_block(), + cb->false_block()}; + auto resolution = driver.Resolve(s); + if (auto *bd = std::get_if(&resolution)) { + frame.current_block = bd->take_true ? cb->true_block() + : cb->false_block(); + return {StepStatus::CONTINUE}; + } + return {StepStatus::SUSPENDED, std::move(s)}; + } + return {StepStatus::ERROR}; + } + + if (op == OpCode::SWITCH) { + auto sw = SwitchInst::from(inst); + if (sw) { + Value sel = GetValue(ctx, frame, sw->selector()); + int64_t sel_val = AsInt(sel); + bool found = false; + IRBlock default_block{}; + for (auto sc : sw->cases()) { + if (sc.is_default()) { + default_block = sc.target_block(); + continue; + } + if (sel_val >= sc.low() && sel_val <= sc.high()) { + frame.current_block = sc.target_block(); + found = true; + break; + } + } + if (!found) { + if (EntityId(default_block.id()).Pack()) { + frame.current_block = default_block; + } else { + return {StepStatus::ERROR}; + } + } + return {StepStatus::CONTINUE}; + } + return {StepStatus::ERROR}; + } + + // All other terminators: unconditional branch. + { + auto br = BranchInst::from(inst); + if (br) { + frame.current_block = br->target_block(); + return {StepStatus::CONTINUE}; + } + } + return {StepStatus::ERROR}; + } + + // Block ended without terminator. + return {StepStatus::ERROR}; +} + +} // namespace mx::ir::interpret diff --git a/lib/IR/Object.cpp b/lib/IR/Object.cpp index b43f5e594..19a5e6471 100644 --- a/lib/IR/Object.cpp +++ b/lib/IR/Object.cpp @@ -37,6 +37,11 @@ uint32_t IRObject::align_bytes(void) const { return impl->reader().getAlignBytes(); } +uint32_t IRObject::frame_offset(void) const { + if (!impl) return 0; + return impl->reader().getFrameOffset(); +} + std::optional IRObject::source_declaration(void) const { if (!impl) return std::nullopt; auto eid = impl->reader().getSourceDeclId(); diff --git a/tests/InterpretIR/test_evil_goto.c b/tests/InterpretIR/test_evil_goto.c index cb69fddf6..382da3eff 100644 --- a/tests/InterpretIR/test_evil_goto.c +++ b/tests/InterpretIR/test_evil_goto.c @@ -502,6 +502,54 @@ static int multi_source_goto(int path) { return result; } +// Goto target after a return, inside a nested scope. +static int goto_after_return(void) { + int result = 0; + goto target; + result = 999; // skipped + return result; // skipped + { + int x = 10; // skipped +target: + result = x + 42; // x is uninit, but we only care about reaching here + result = 42; + } + return result; +} + +// Goto into a scope inside an if-else where both branches return. +static int goto_past_if_return(int path) { + int result = 0; + if (path == 1) { + goto after_if; + } + if (path == 2) { + result = 20; + return result; + } + result = 30; + return result; +after_if: + result = 10; + return result; +} + +// Goto target inside deeply nested compound statements after dead code. +static int goto_deep_compound(void) { + int result = 0; + goto deep; + result = 999; // skipped + { + result = 888; // skipped + { + result = 777; // skipped +deep: + result = 42; + } + } + return result; +} + int test_evil_goto(void) { // Duff's device. { @@ -543,5 +591,16 @@ int test_evil_goto(void) { if (multi_source_goto(2) != 20) return 14; if (multi_source_goto(0) != 30) return 15; + // Goto target after return, inside nested scope. + if (goto_after_return() != 42) return 16; + + // Goto past if/return. + if (goto_past_if_return(1) != 10) return 17; + if (goto_past_if_return(2) != 20) return 18; + if (goto_past_if_return(0) != 30) return 19; + + // Goto into deeply nested compound after dead code. + if (goto_deep_compound() != 42) return 20; + return 0; }