diff --git a/bindings/Python/Interpreter.cpp b/bindings/Python/Interpreter.cpp index 74abc12ba..6c5f1b92b 100644 --- a/bindings/Python/Interpreter.cpp +++ b/bindings/Python/Interpreter.cpp @@ -69,16 +69,6 @@ Value python_to_value(PyObject *obj) { } return make_int(v); } - if (PyTuple_Check(obj) && PyTuple_Size(obj) == 2) { - // Legacy ("ptr", addr) tuple — treat as integer. - PyObject *tag = PyTuple_GetItem(obj, 0); - if (tag && PyUnicode_Check(tag)) { - const char *s = PyUnicode_AsUTF8(tag); - if (s && std::strcmp(s, "ptr") == 0) { - return make_uint(PyLong_AsUnsignedLongLong(PyTuple_GetItem(obj, 1))); - } - } - } return make_uint(0); } @@ -548,34 +538,52 @@ static PyObject *py_init_state(PyObject *, PyObject *args) { return nullptr; } -// Concrete-path step result — translates the StepOutcome's terminal -// (completed / errored) or suspension shape into the Python dict the -// existing harness expects. +// Concrete-path step result — wraps the StepOutcome's terminal / +// suspension shape in the typed dataclasses defined in +// `multiplier.symex.events`. Class refs are loaded lazily on first +// call; subsequent calls reuse the cached refs. static PyObject *build_result_dict( const ir::interpret::StepOutcome &outcome) { + static PyObject *cls_Completed = nullptr; + static PyObject *cls_Errored = nullptr; + static PyObject *cls_Suspended = nullptr; + if (!cls_Completed) { + PyObject *mod = PyImport_ImportModule("multiplier.symex.events"); + if (!mod) return nullptr; + cls_Completed = PyObject_GetAttrString(mod, "Completed"); + cls_Errored = PyObject_GetAttrString(mod, "Errored"); + cls_Suspended = PyObject_GetAttrString(mod, "Suspended"); + Py_DECREF(mod); + if (!cls_Completed || !cls_Errored || !cls_Suspended) return nullptr; + } + PyObject *result_dict = PyDict_New(); if (!result_dict) return nullptr; - PyObject *result_tuple; + PyObject *result_obj = nullptr; if (outcome.terminal) { auto &term = *outcome.terminal; if (term.kind == TerminalKind::COMPLETED) { PyObject *py_val = value_to_python(term.return_value); - result_tuple = Py_BuildValue("(sN)", "completed", py_val); + result_obj = PyObject_CallFunction(cls_Completed, "(N)", py_val); } else { - result_tuple = Py_BuildValue("(si)", "error", - static_cast(term.error_kind)); + result_obj = PyObject_CallFunction( + cls_Errored, "(i)", static_cast(term.error_kind)); } } else if (!outcome.continuations.empty()) { auto &cont = *outcome.continuations.front(); - PyObject *desc = PyUnicode_FromString(cont.describe().c_str()); - result_tuple = Py_BuildValue("(sN)", "suspended", desc); + result_obj = PyObject_CallFunction( + cls_Suspended, "(s)", cont.describe().c_str()); } else { - result_tuple = Py_BuildValue("(s)", "suspended"); + result_obj = PyObject_CallFunction(cls_Suspended, "(s)", ""); + } + if (!result_obj) { + Py_DECREF(result_dict); + return nullptr; } - PyDict_SetItemString(result_dict, "result", result_tuple); - Py_XDECREF(result_tuple); + PyDict_SetItemString(result_dict, "result", result_obj); + Py_DECREF(result_obj); PyObject *empty_forks = PyList_New(0); PyDict_SetItemString(result_dict, "forks", empty_forks); @@ -625,12 +633,21 @@ static PyObject *py_step(PyObject *, PyObject *args) { } if (budget_hit) { + static PyObject *cls_Budget = nullptr; + if (!cls_Budget) { + PyObject *mod = PyImport_ImportModule("multiplier.symex.events"); + if (!mod) return nullptr; + cls_Budget = PyObject_GetAttrString(mod, "Budget"); + Py_DECREF(mod); + if (!cls_Budget) return nullptr; + } PyObject *result_dict = PyDict_New(); if (!result_dict) return nullptr; - PyObject *budget_tuple = Py_BuildValue( - "(sK)", "budget", concrete->steps); - PyDict_SetItemString(result_dict, "result", budget_tuple); - Py_XDECREF(budget_tuple); + PyObject *budget_obj = PyObject_CallFunction( + cls_Budget, "(K)", concrete->steps); + if (!budget_obj) { Py_DECREF(result_dict); return nullptr; } + PyDict_SetItemString(result_dict, "result", budget_obj); + Py_DECREF(budget_obj); PyObject *empty_forks = PyList_New(0); PyDict_SetItemString(result_dict, "forks", empty_forks); Py_DECREF(empty_forks); @@ -701,10 +718,10 @@ static PyObject *py_resume_addr(PyObject *, PyObject *args) { "resume_addr: symbolic state has no live call frame"); return nullptr; } - PyObject *ptr_tuple = Py_BuildValue("(sK)", "ptr", chosen_addr); - if (!ptr_tuple) return nullptr; - SharedPyPtr ptr_val(ptr_tuple); - Py_DECREF(ptr_tuple); + PyObject *ptr_obj = PyLong_FromUnsignedLongLong(chosen_addr); + if (!ptr_obj) return nullptr; + SharedPyPtr ptr_val(ptr_obj); + Py_DECREF(ptr_obj); symbolic->call_stack.top().values[address_eid] = ptr_val; Py_RETURN_NONE; } @@ -742,6 +759,39 @@ static PyObject *py_resume_addr_symbolic(PyObject *, PyObject *args) { Py_RETURN_NONE; } +// Resume a state suspended on an unresolved global. The snapshot's +// work_stack still holds the suspended COMPUTE_GLOBAL_PTR item, so +// stepping it after this call will re-enter `compute_global_ptr` and +// hit the `state.global_addresses` cache — that cache is what the +// substrate reads first (InterpreterLoop.h:791-795). +// +// resume_global(state, entity_id, concrete_addr) +static PyObject *py_resume_global(PyObject *, PyObject *args) { + PyObject *state_obj; + uint64_t entity_id; + uint64_t chosen_addr; + if (!PyArg_ParseTuple(args, "OKK", &state_obj, &entity_id, + &chosen_addr)) { + return nullptr; + } + if (Py_TYPE(state_obj) != &InterpreterStateType) { + PyErr_SetString(PyExc_TypeError, "Expected InterpreterState"); + return nullptr; + } + auto *sw = reinterpret_cast(state_obj); + auto *symbolic = sw->symbolic_state + ? reinterpret_cast *>( + sw->symbolic_state)->data + : nullptr; + if (!symbolic) { + PyErr_SetString(PyExc_RuntimeError, + "resume_global: symbolic state not initialized"); + return nullptr; + } + symbolic->global_addresses[entity_id] = chosen_addr; + Py_RETURN_NONE; +} + // Read the cached Python value at a given operand entity-id from the // live call frame. Returns None when the slot has not been populated. // Used by Phase 6 P6.0 to validate `resume_addr_symbolic`'s round-trip @@ -977,6 +1027,11 @@ static PyMethodDef InterpreterMethods[] = { " Resume a state suspended on a symbolic SWITCH selector by " "entering the chosen target IRBlock. Path-condition constraints are " "the driver's responsibility."}, + {"resume_global", py_resume_global, METH_VARARGS, + "resume_global(state, entity_id, concrete_addr)\n" + " Resume a state suspended on an unresolved global by writing the " + "chosen address into the global_addresses cache; on the next step " + "the re-pushed COMPUTE_GLOBAL_PTR item cache-hits and proceeds."}, {"get_value_at", py_get_value_at, METH_VARARGS, "get_value_at(state, eid) -> Python value\n" " Read the cached value at an operand entity-id from the live " diff --git a/bindings/Python/SymbolicInterpreter.cpp b/bindings/Python/SymbolicInterpreter.cpp index 02b3ddeb1..bc8f48af1 100644 --- a/bindings/Python/SymbolicInterpreter.cpp +++ b/bindings/Python/SymbolicInterpreter.cpp @@ -61,15 +61,6 @@ Value python_to_value(PyObject *obj) { } return make_int(v); } - if (PyTuple_Check(obj) && PyTuple_Size(obj) == 2) { - PyObject *tag = PyTuple_GetItem(obj, 0); - if (tag && PyUnicode_Check(tag)) { - const char *s = PyUnicode_AsUTF8(tag); - if (s && std::strcmp(s, "ptr") == 0) { - return make_uint(PyLong_AsUnsignedLongLong(PyTuple_GetItem(obj, 1))); - } - } - } return make_uint(0); } @@ -104,15 +95,6 @@ Value python_to_value_f32_aware(PyObject *obj, bool needs_f32) { } return make_int(v); } - if (PyTuple_Check(obj) && PyTuple_Size(obj) == 2) { - PyObject *tag = PyTuple_GetItem(obj, 0); - if (tag && PyUnicode_Check(tag)) { - const char *s = PyUnicode_AsUTF8(tag); - if (s && std::strcmp(s, "ptr") == 0) { - return make_uint(PyLong_AsUnsignedLongLong(PyTuple_GetItem(obj, 1))); - } - } - } return make_uint(0); } @@ -135,17 +117,6 @@ SharedPyPtr float_value_to_shared(const Value &v, uint32_t size) { return result; } -// Check if a SharedPyPtr holds a ("ptr", addr) tuple. -std::optional extract_ptr_tuple(PyObject *obj) { - if (!obj || !PyTuple_Check(obj) || PyTuple_Size(obj) != 2) return std::nullopt; - PyObject *tag = PyTuple_GetItem(obj, 0); - if (!tag || !PyUnicode_Check(tag)) return std::nullopt; - const char *s = PyUnicode_AsUTF8(tag); - if (!s || std::strcmp(s, "ptr") != 0) return std::nullopt; - PyObject *addr_obj = PyTuple_GetItem(obj, 1); - return PyLong_AsUnsignedLongLong(addr_obj); -} - // Wrapper struct layouts — must match Interpreter.cpp exactly. struct ConcreteMemoryWrapper { PyObject_HEAD @@ -304,24 +275,14 @@ static inline PyObject *or_none(PyObject *p) noexcept { // =========================================================================== std::optional PythonPolicy::extract_address(const SharedPyPtr &val) { - if (auto a = extract_ptr_tuple(val.Get())) return a; - // A bare PyLong is also a concrete address — it's what - // `value_to_python` produces for a pointer loaded out of a slot - // (the C-level `Value` carries no "pointer-ness" tag, so the slot - // load surfaces as an int). Treating only `("ptr", N)` tuples as - // concrete sent every loaded-pointer dereference through the - // symbolic-suspension path, which the default address strategy - // collapsed to zero. PyObject *obj = val.Get(); - if (obj && PyLong_Check(obj) && !PyBool_Check(obj)) { - uint64_t v = PyLong_AsUnsignedLongLong(obj); - if (v == static_cast(-1) && PyErr_Occurred()) { - PyErr_Clear(); - return std::nullopt; - } - return v; + if (!obj || !PyLong_Check(obj) || PyBool_Check(obj)) return std::nullopt; + uint64_t v = PyLong_AsUnsignedLongLong(obj); + if (v == static_cast(-1) && PyErr_Occurred()) { + PyErr_Clear(); + return std::nullopt; } - return std::nullopt; + return v; } int64_t PythonPolicy::extract_int(const SharedPyPtr &val) { @@ -358,7 +319,7 @@ SharedPyPtr PythonPolicy::make_literal_int(int64_t v, uint8_t) { } SharedPyPtr PythonPolicy::make_literal_ptr(uint64_t addr) { - PyObject *obj = Py_BuildValue("(sK)", "ptr", addr); + PyObject *obj = PyLong_FromUnsignedLongLong(addr); SharedPyPtr result(obj); Py_XDECREF(obj); return result; @@ -369,7 +330,14 @@ SharedPyPtr PythonPolicy::make_default() { } bool PythonPolicy::has_address(const SharedPyPtr &val) { - return extract_ptr_tuple(val.Get()).has_value(); + PyObject *obj = val.Get(); + if (!obj || !PyLong_Check(obj) || PyBool_Check(obj)) return false; + uint64_t v = PyLong_AsUnsignedLongLong(obj); + if (v == static_cast(-1) && PyErr_Occurred()) { + PyErr_Clear(); + return false; + } + return v != 0; } // =========================================================================== @@ -512,11 +480,6 @@ SharedPyPtr PythonPolicy::ptr_add(const SharedPyPtr &base, if (!result) { capture_exception(); return make_default(); } Py_DECREF(result); } - // ptr_add yields a pointer; preserve the ("ptr", N) tagging so - // downstream `extract_address` can recover it. value_to_shared loses - // the tag (it always returns a PyLong), which would force callers - // through the symbolic-address suspension path with a default - // strategy that resolves to 0 and corrupts later memory ops. Value v = concrete_ptr_add( python_to_value(base.Get()), python_to_value(index.Get()), element_size); return make_literal_ptr(v.u64); @@ -883,38 +846,38 @@ bool PythonPolicy::resolve_call(PythonScheduler &, return false; } if (result != Py_NotImplemented && result != Py_None) { - // Parse ("skip", return_value) or ("model", return_value). - if (PyTuple_Check(result) && PyTuple_Size(result) == 2) { - PyObject *tag = PyTuple_GetItem(result, 0); - PyObject *val = PyTuple_GetItem(result, 1); - if (tag && PyUnicode_Check(tag)) { - const char *action = PyUnicode_AsUTF8(tag); - if (action && std::strcmp(action, "skip") == 0) { - resolution.action = CallAction::SKIP; - if (val && val != Py_None) { - resolution.return_value = SharedPyPtr(val); - } else { - resolution.return_value = make_default(); - } - Py_DECREF(result); - return true; - } - if (action && std::strcmp(action, "model") == 0) { - resolution.action = CallAction::MODEL; - if (val && val != Py_None) { - resolution.return_value = SharedPyPtr(val); - } else { - resolution.return_value = make_default(); - } - Py_DECREF(result); - return true; - } + // Any non-None / non-NotImplemented return is a skip with that + // value as the call's result. A `Skip(value)` instance is the + // disambiguator for "skip even though the value is None"; we + // unwrap it here. Anything else is taken as the value itself. + static PyObject *cls_Skip = nullptr; + if (!cls_Skip) { + PyObject *mod = PyImport_ImportModule("multiplier.symex.events"); + if (mod) { + cls_Skip = PyObject_GetAttrString(mod, "Skip"); + Py_DECREF(mod); + if (!cls_Skip) PyErr_Clear(); + } else { + PyErr_Clear(); } } + resolution.action = CallAction::SKIP; + if (cls_Skip && PyObject_IsInstance(result, cls_Skip) > 0) { + PyObject *val = PyObject_GetAttrString(result, "value"); + if (!val) { + PyErr_Clear(); + resolution.return_value = make_default(); + } else { + resolution.return_value = SharedPyPtr(val); + Py_DECREF(val); + } + } else { + resolution.return_value = SharedPyPtr(result); + } Py_DECREF(result); - } else { - Py_DECREF(result); + return true; } + Py_DECREF(result); } // Fall through to C++ func_resolver_. @@ -1257,23 +1220,63 @@ PyObject *SymbolicStep(PyObject *state_obj, PyObject *memory_obj, sched.outcome.budget_exhausted = budget_hit; sched.outcome.steps = symbolic->steps; + // Lazily resolve the result/fork dataclass refs from the events + // module on first use. Each holds a strong ref for the lifetime of + // the process — they're class objects, not instances. + static PyObject *cls_Completed = nullptr; + static PyObject *cls_Errored = nullptr; + static PyObject *cls_Budget = nullptr; + static PyObject *cls_Branch = nullptr; + static PyObject *cls_Switch = nullptr; + static PyObject *cls_MemAddr = nullptr; + static PyObject *cls_GlobalSusp = nullptr; + static PyObject *cls_Suspended = nullptr; + static PyObject *cls_BranchFork = nullptr; + static PyObject *cls_SwitchFork = nullptr; + static PyObject *cls_MemAddrFork = nullptr; + static PyObject *cls_GlobalFork = nullptr; + if (!cls_Completed) { + PyObject *mod = PyImport_ImportModule("multiplier.symex.events"); + if (!mod) return nullptr; + auto fetch = [&](const char *name) -> PyObject * { + return PyObject_GetAttrString(mod, name); + }; + cls_Completed = fetch("Completed"); + cls_Errored = fetch("Errored"); + cls_Budget = fetch("Budget"); + cls_Branch = fetch("Branch"); + cls_Switch = fetch("Switch"); + cls_MemAddr = fetch("MemAddrSuspension"); + cls_GlobalSusp = fetch("GlobalSuspension"); + cls_Suspended = fetch("Suspended"); + cls_BranchFork = fetch("BranchFork"); + cls_SwitchFork = fetch("SwitchFork"); + cls_MemAddrFork = fetch("MemAddrFork"); + cls_GlobalFork = fetch("GlobalFork"); + Py_DECREF(mod); + if (!cls_Completed || !cls_Errored || !cls_Budget || !cls_Branch || + !cls_Switch || !cls_MemAddr || !cls_GlobalSusp || !cls_Suspended || + !cls_BranchFork || !cls_SwitchFork || !cls_MemAddrFork || + !cls_GlobalFork) { + return nullptr; + } + } + PyObject *result_dict = PyDict_New(); if (!result_dict) return nullptr; + PyObject *result_obj = nullptr; + // Terminal outcomes take precedence over continuations. if (sched.outcome.terminal) { auto &term = *sched.outcome.terminal; if (term.kind == TerminalKind::COMPLETED) { PyObject *py_val = term.return_value.Get(); if (!py_val) py_val = Py_None; - PyObject *result_tuple = Py_BuildValue("(sO)", "completed", py_val); - PyDict_SetItemString(result_dict, "result", result_tuple); - Py_XDECREF(result_tuple); + result_obj = PyObject_CallFunction(cls_Completed, "(O)", py_val); } else { - PyObject *result_tuple = Py_BuildValue( - "(si)", "error", static_cast(term.error_kind)); - PyDict_SetItemString(result_dict, "result", result_tuple); - Py_XDECREF(result_tuple); + result_obj = PyObject_CallFunction( + cls_Errored, "(i)", static_cast(term.error_kind)); } } else if (!sched.outcome.continuations.empty()) { auto *first = sched.outcome.continuations.front().get(); @@ -1282,172 +1285,152 @@ PyObject *SymbolicStep(PyObject *state_obj, PyObject *memory_obj, if (!cond_obj) cond_obj = Py_None; uint64_t tb_eid = EntityId(bc->true_block().id()).Pack(); uint64_t fb_eid = EntityId(bc->false_block().id()).Pack(); - PyObject *result_tuple = Py_BuildValue( - "(sOKK)", "branch", cond_obj, tb_eid, fb_eid); - PyDict_SetItemString(result_dict, "result", result_tuple); - Py_XDECREF(result_tuple); + result_obj = PyObject_CallFunction( + cls_Branch, "(OKK)", cond_obj, tb_eid, fb_eid); } else if (auto *mc = dynamic_cast *>(first)) { PyObject *addr_obj = mc->symbolic_address().Get(); if (!addr_obj) addr_obj = Py_None; - const char *sub_kind; - if (mc->is_call_target()) { - sub_kind = "call-addr"; // Phase 9: symbolic indirect-call callee - } else if (mc->is_write()) { - sub_kind = "store-addr"; - } else { - sub_kind = "load-addr"; - } - PyObject *result_tuple = Py_BuildValue( - "(ssOKIN)", "suspended", - sub_kind, + result_obj = PyObject_CallFunction( + cls_MemAddr, "(OKIOO)", addr_obj, static_cast(mc->address_eid()), static_cast(mc->size_bytes()), - PyBool_FromLong(mc->is_write() ? 1 : 0)); - PyDict_SetItemString(result_dict, "result", result_tuple); - Py_XDECREF(result_tuple); + mc->is_write() ? Py_True : Py_False, + mc->is_call_target() ? Py_True : Py_False); } else if (auto *sc = dynamic_cast *>(first)) { PyObject *sel_obj = sc->selector().Get(); if (!sel_obj) sel_obj = Py_None; - PyObject *result_tuple = Py_BuildValue( - "(sOK)", "switch", sel_obj, + result_obj = PyObject_CallFunction( + cls_Switch, "(OK)", sel_obj, static_cast(sc->selector_eid())); - PyDict_SetItemString(result_dict, "result", result_tuple); - Py_XDECREF(result_tuple); + } else if (auto *gc = dynamic_cast *>(first)) { + result_obj = PyObject_CallFunction( + cls_GlobalSusp, "(KK)", + static_cast(gc->entity_id()), + static_cast(gc->instruction_id())); } else { - PyObject *desc = PyUnicode_FromString(first->describe().c_str()); - PyObject *result_tuple = Py_BuildValue("(sO)", "suspended", desc); - PyDict_SetItemString(result_dict, "result", result_tuple); - Py_XDECREF(result_tuple); - Py_XDECREF(desc); + result_obj = PyObject_CallFunction( + cls_Suspended, "(s)", first->describe().c_str()); } + } else if (sched.outcome.budget_exhausted) { + result_obj = PyObject_CallFunction( + cls_Budget, "(K)", sched.outcome.steps); } else { - if (sched.outcome.budget_exhausted) { - PyObject *budget_tuple = Py_BuildValue( - "(sK)", "budget", sched.outcome.steps); - PyDict_SetItemString(result_dict, "result", budget_tuple); - Py_XDECREF(budget_tuple); - } else { - PyDict_SetItemString(result_dict, "result", Py_None); - } + Py_INCREF(Py_None); + result_obj = Py_None; + } + + if (!result_obj) { + Py_DECREF(result_dict); + return nullptr; } + PyDict_SetItemString(result_dict, "result", result_obj); + Py_DECREF(result_obj); - // Build forks list by enumerating each continuation. Branches walk - // their `next()` enumeration ({false, true}); each Resumption already - // carries a clone with cond_eid bound and DECIDE_COND_BRANCH re-pushed - // — stepping it dispatches the right edge. Memory-address suspensions - // produce a single fork holding the snapshot — the driver picks - // concrete addresses and calls `resume_addr` on each clone. + // Build forks list as typed *Fork dataclass instances. Each + // continuation contributes one or more entries depending on its + // enumeration shape (branches walk `next()` for {false, true}; the + // others produce one snapshot fork). PyObject *forks_list = PyList_New(0); for (auto &cont : sched.outcome.continuations) { if (auto *bc = dynamic_cast *>(cont.get())) { while (auto resumption = bc->next()) { - // resumption->state is already a fresh PyRef; - // hand it to the wrapper, which steals the strong ref. PyObject *state_obj = MakeSymbolicStateWrapper( std::move(resumption->state)); - PyObject *dict = PyDict_New(); - PyDict_SetItemString(dict, "state", state_obj); - Py_DECREF(state_obj); PyObject *dir_str = PyUnicode_FromString(resumption->label.c_str()); - PyDict_SetItemString(dict, "direction", dir_str); + PyObject *fork = PyObject_CallFunction( + cls_BranchFork, "(OO)", state_obj, dir_str); + Py_DECREF(state_obj); Py_DECREF(dir_str); - PyList_Append(forks_list, dict); - Py_DECREF(dict); + if (!fork) { + Py_DECREF(forks_list); + Py_DECREF(result_dict); + return nullptr; + } + PyList_Append(forks_list, fork); + Py_DECREF(fork); } } else if (auto *sc = dynamic_cast *>(cont.get())) { auto snap = sc->snapshot(); if (!snap) continue; - // One fork entry per switch suspension; the driver walks - // `cases` + `default_block_eid` and clones the snapshot per - // child. The snapshot is stashed in `state` (already a fresh - // wrapper, not cloned — the driver clones via _interp.clone_state). PyObject *state_obj = MakeSymbolicStateWrapper(snap->clone()); - PyObject *dict = PyDict_New(); - PyDict_SetItemString(dict, "state", state_obj); - Py_XDECREF(state_obj); - PyObject *kind_str = PyUnicode_FromString("switch"); - PyDict_SetItemString(dict, "kind", kind_str); - Py_DECREF(kind_str); PyObject *sel_obj = sc->selector().Get(); if (!sel_obj) sel_obj = Py_None; - Py_INCREF(sel_obj); - PyDict_SetItemString(dict, "selector", sel_obj); - Py_DECREF(sel_obj); - PyObject *eid_obj = PyLong_FromUnsignedLongLong(sc->selector_eid()); - PyDict_SetItemString(dict, "selector_eid", eid_obj); - Py_DECREF(eid_obj); PyObject *cases_list = PyList_New(0); for (const auto &c : sc->cases()) { uint64_t target_eid = EntityId(c.target_block.id()).Pack(); PyObject *block_obj = ::mx::to_python(c.target_block); - if (!block_obj) { - Py_INCREF(Py_None); - block_obj = Py_None; - } - // (low, high, target_block_eid, target_block_obj). Driver uses - // the eid for events / path-condition records and the IRBlock - // object for `_interp.resume_switch_case`. + if (!block_obj) { Py_INCREF(Py_None); block_obj = Py_None; } PyObject *case_tuple = Py_BuildValue( "(LLKN)", static_cast(c.low), static_cast(c.high), - target_eid, block_obj); // 'N' steals block_obj + target_eid, block_obj); PyList_Append(cases_list, case_tuple); Py_DECREF(case_tuple); } - PyDict_SetItemString(dict, "cases", cases_list); - Py_DECREF(cases_list); + PyObject *def_block = nullptr; + PyObject *def_eid_obj = nullptr; uint64_t default_eid = EntityId(sc->default_block().id()).Pack(); if (default_eid != 0) { - PyObject *def_eid = PyLong_FromUnsignedLongLong(default_eid); - PyDict_SetItemString(dict, "default_block_eid", def_eid); - Py_DECREF(def_eid); - PyObject *def_block = ::mx::to_python(sc->default_block()); - if (!def_block) { - Py_INCREF(Py_None); - def_block = Py_None; - } - PyDict_SetItemString(dict, "default_block", def_block); - Py_DECREF(def_block); + def_eid_obj = PyLong_FromUnsignedLongLong(default_eid); + def_block = ::mx::to_python(sc->default_block()); + if (!def_block) { Py_INCREF(Py_None); def_block = Py_None; } } else { - Py_INCREF(Py_None); - PyDict_SetItemString(dict, "default_block_eid", Py_None); - Py_DECREF(Py_None); - Py_INCREF(Py_None); - PyDict_SetItemString(dict, "default_block", Py_None); - Py_DECREF(Py_None); + Py_INCREF(Py_None); def_eid_obj = Py_None; + Py_INCREF(Py_None); def_block = Py_None; + } + PyObject *fork = PyObject_CallFunction( + cls_SwitchFork, "(OOKOOO)", + state_obj, sel_obj, + static_cast(sc->selector_eid()), + cases_list, def_block, def_eid_obj); + Py_DECREF(state_obj); + Py_DECREF(cases_list); + Py_DECREF(def_block); + Py_DECREF(def_eid_obj); + if (!fork) { + Py_DECREF(forks_list); + Py_DECREF(result_dict); + return nullptr; } - PyList_Append(forks_list, dict); - Py_DECREF(dict); + PyList_Append(forks_list, fork); + Py_DECREF(fork); + } else if (auto *gc = dynamic_cast *>(cont.get())) { + auto snap = gc->snapshot(); + if (!snap) continue; + PyObject *state_obj = MakeSymbolicStateWrapper(snap->clone()); + PyObject *fork = PyObject_CallFunction( + cls_GlobalFork, "(OKK)", state_obj, + static_cast(gc->entity_id()), + static_cast(gc->instruction_id())); + Py_DECREF(state_obj); + if (!fork) { + Py_DECREF(forks_list); + Py_DECREF(result_dict); + return nullptr; + } + PyList_Append(forks_list, fork); + Py_DECREF(fork); } else if (auto *mc = dynamic_cast *>(cont.get())) { auto snap = mc->snapshot(); if (!snap) continue; - // Clone the snapshot so the driver can mutate (resume_addr) without - // disturbing the original continuation's snapshot reference. PyObject *state_obj = MakeSymbolicStateWrapper(snap->clone()); - PyObject *dict = PyDict_New(); - PyDict_SetItemString(dict, "state", state_obj); - Py_XDECREF(state_obj); - PyObject *kind_str = PyUnicode_FromString( - mc->is_write() ? "store-addr" : "load-addr"); - PyDict_SetItemString(dict, "kind", kind_str); - Py_DECREF(kind_str); PyObject *addr_obj = mc->symbolic_address().Get(); if (!addr_obj) addr_obj = Py_None; - Py_INCREF(addr_obj); - PyDict_SetItemString(dict, "address", addr_obj); - Py_DECREF(addr_obj); - PyObject *eid_obj = PyLong_FromUnsignedLongLong(mc->address_eid()); - PyDict_SetItemString(dict, "address_eid", eid_obj); - Py_DECREF(eid_obj); - PyObject *size_obj = PyLong_FromUnsignedLong(mc->size_bytes()); - PyDict_SetItemString(dict, "size", size_obj); - Py_DECREF(size_obj); - PyObject *iw_obj = PyBool_FromLong(mc->is_write() ? 1 : 0); - PyDict_SetItemString(dict, "is_write", iw_obj); - Py_DECREF(iw_obj); - PyList_Append(forks_list, dict); - Py_DECREF(dict); + PyObject *fork = PyObject_CallFunction( + cls_MemAddrFork, "(OOKIO)", + state_obj, addr_obj, + static_cast(mc->address_eid()), + static_cast(mc->size_bytes()), + mc->is_write() ? Py_True : Py_False); + Py_DECREF(state_obj); + if (!fork) { + Py_DECREF(forks_list); + Py_DECREF(result_dict); + return nullptr; + } + PyList_Append(forks_list, fork); + Py_DECREF(fork); } } PyDict_SetItemString(result_dict, "forks", forks_list); diff --git a/bindings/Python/SymbolicInterpreter.h b/bindings/Python/SymbolicInterpreter.h index a30689824..c90cf15fe 100644 --- a/bindings/Python/SymbolicInterpreter.h +++ b/bindings/Python/SymbolicInterpreter.h @@ -252,6 +252,22 @@ class PythonPolicy bool resolve_global(PythonScheduler &sched, RawEntityId entity_id, GlobalResolution &resolution); + // Symbolic-side suspension for unresolved globals: snapshot, push the + // current work item back so resumption retries the COMPUTE_GLOBAL_PTR, + // emit a GlobalContinuation, and clear the live work_stack. + void on_unresolved_global_impl(PythonScheduler &sched, + SymbolicState &state, + RawEntityId src_eid, + RawEntityId instruction_eid) { + auto snap = make_sharable(state); + snap->work_stack.push_back(state.current_item); + auto cont = std::make_unique< + GlobalContinuation>( + std::move(snap), src_eid, instruction_eid); + sched.outcome.continuations.emplace_back(std::move(cont)); + state.work_stack.clear(); + } + // Reverse-direction resolver: address → entity id. RawEntityId entity_for_address_impl(uint64_t addr) { if (entity_by_addr_resolver_) { diff --git a/bindings/Python/symex/ctx.py b/bindings/Python/symex/ctx.py index cdd7a70d4..ce95056a0 100644 --- a/bindings/Python/symex/ctx.py +++ b/bindings/Python/symex/ctx.py @@ -37,14 +37,16 @@ def __init__(self, *, path, mem, args=None, layout=None, solver=None): self.solver = solver def default(self): - """The substrate's "natural default" for the current event. + """The substrate's "natural default" for the current event, + wrapped in `Skip` so the substrate treats it as an explicit + skip-with-this-value rather than a fall-through. - Phase 2 returns `None` for every event; per-event typed - defaults (e.g., width-correct zero for memory reads) can land - in Phase 3 if a use-case demands it. Use this when you want to - short-circuit a call or write without inlining. + Phase 2 uses `None` as the default value for every event; + per-event typed defaults (e.g., width-correct zero for + memory reads) can land later if a use-case demands it. """ - return None + from .events import Skip + return Skip() def stop_path(self): """Mark the current path as stopped. diff --git a/bindings/Python/symex/dispatch.py b/bindings/Python/symex/dispatch.py index 126e03827..eb6312cf0 100644 --- a/bindings/Python/symex/dispatch.py +++ b/bindings/Python/symex/dispatch.py @@ -41,7 +41,7 @@ BRANCH, LOOP, CONCRETIZE, BLOCK_ENTER, INSTRUCTION, ADDRESS_FOR, ADDRESS_RESOLVED, - EventKind, Phase, CallAction, VALUE_TAG_PTR, + EventKind, Phase, ) from .lens import MemView, ArgsView @@ -61,8 +61,8 @@ def _op_range(lo_name, hi_name): class SymExpr: """Default symbolic-value sentinel produced by InterceptorPolicy. - The class shape (not a 2-tuple) avoids the substrate's - `("ptr", N)` heuristic seeing a symbolic value as a pointer. The + A class (not an int) so the substrate's bare-int address heuristic + cannot mistake a symbolic value for a concrete pointer. The `kind`/`args` fields carry provenance for debugging and for Phase 4 z3 lowering. """ @@ -84,12 +84,9 @@ def __bool__(self): def extract_addr(addr): """Pull a concrete address out of the substrate's value form. - The substrate hands the policy `("ptr", N)` for live pointers and - bare ints occasionally; both normalize to an int. Anything else - (e.g., SymExpr) yields None. + Pointers come through as bare ints; anything else (SymExpr, + z3 expression, None) yields None. """ - if isinstance(addr, tuple) and len(addr) == 2 and addr[0] == VALUE_TAG_PTR: - return int(addr[1]) if isinstance(addr, int) and not isinstance(addr, bool): return int(addr) return None @@ -416,11 +413,11 @@ def default(ctx, addr, size): def _make_default_mem_write(is_float, shadow=None, byte_order="little"): """Return the chain bottom for a memory_write event. - Writes concrete bytes via the lens. Handles ints, ("ptr", N) pointer - tuples, raw bytes, and IEEE floats. A z3 write decomposes ``val`` into - per-byte extracts in the shadow dict; a concrete write clears any - covered shadow slots (concrete memory is the source of truth) and - writes the real bytes to ``ConcreteMemory``. + Writes concrete bytes via the lens. Handles ints, raw bytes, and + IEEE floats. A z3 write decomposes ``val`` into per-byte extracts + in the shadow dict; a concrete write clears any covered shadow + slots (concrete memory is the source of truth) and writes the + real bytes to ``ConcreteMemory``. """ def default(ctx, addr, val, size): if isinstance(val, bool): @@ -431,12 +428,6 @@ def default(ctx, addr, val, size): ctx.mem.write_bytes( addr, val.to_bytes(size, byte_order, signed=(val < 0))) return None - if isinstance(val, tuple) and len(val) == 2 and val[0] == VALUE_TAG_PTR: - if shadow is not None: - _shadow_write(shadow, addr, val, size) - ctx.mem.write_bytes( - addr, int(val[1]).to_bytes(size, byte_order, signed=False)) - return None if isinstance(val, _BYTES_TYPES): if shadow is not None: _shadow_write(shadow, addr, val, size) @@ -1114,12 +1105,12 @@ def _default(c, a, v, sz): def _coerce_store_value(self, val, size, z3): """Lift a substrate-shaped store value to a z3 BitVec of `8*size` - bits. Accepts ints, bools, Python floats, `("ptr", N)` tuples, - and z3 BitVecs. Floats pack via the IEEE byte pattern (size 4 → - f32, size 8 → f64); the resulting BitVec is the bit pattern of - the float, matching how concrete float stores land on the - substrate's byte buffer. Returns None for shapes the overlay - can't represent.""" + bits. Accepts ints, bools, Python floats, and z3 BitVecs. + Floats pack via the IEEE byte pattern (size 4 → f32, size 8 → + f64); the resulting BitVec is the bit pattern of the float, + matching how concrete float stores land on the substrate's + byte buffer. Returns None for shapes the overlay can't + represent.""" bits = 8 * int(size) if isinstance(val, bool): return z3.BitVecVal(int(val), bits) @@ -1135,18 +1126,15 @@ def _coerce_store_value(self, val, size, z3): else: return None return z3.BitVecVal(int.from_bytes(packed, byte_order), bits) - if isinstance(val, tuple) and len(val) == 2 and \ - val[0] == VALUE_TAG_PTR: - return z3.BitVecVal(int(val[1]) & ((1 << bits) - 1), bits) if isinstance(val, z3.BitVecRef): return _z3_resize(val, bits) return None def resolve_call(self, call_inst=None, target_eid=0, indirect_eid=0, target_addr=0, args_list=(), is_indirect=False): - # Args land as a Python list of raw values (ints, ("ptr", N) - # tuples, SymExprs, …). Build an ArgsView over them so hooks - # have a consistent lens API. + # Args land as a Python list of raw values (ints, SymExprs, z3 + # expressions, …). Build an ArgsView over them so hooks have a + # consistent lens API. args = list(args_list) ctx = self._make_ctx(args=args) ctx.inst = call_inst @@ -1202,10 +1190,9 @@ def _match(sel): target_addr=target_addr, is_indirect=is_indirect, return_value=chosen, handled=True) - if isinstance(chosen, tuple) and len(chosen) == 2 and \ - chosen[0] in (CallAction.SKIP, CallAction.MODEL): - return chosen - return (CallAction.SKIP, chosen) + # Handlers return their replacement value directly; the substrate + # treats any non-None return as a skip with that value. + return chosen # ----- pure ops: propagate symbolic values, else fall through ----- @@ -1295,16 +1282,12 @@ def ptr_offset(self, base, byte_offset): def _addr_as_z3(self, value, z3): """Normalize a substrate address-shaped value to a z3 BitVec. - Accepts: a z3 ExprRef (passes through), a `("ptr", N)` tuple - (concrete pointer — wrap as a 64-bit BitVecVal), or a Python int - / bool. Anything else returns None so callers can fall back to a - SymExpr. + Accepts: a z3 ExprRef (passes through) or a Python int / bool + (concrete pointer — wrap as a 64-bit BitVecVal). Anything else + returns None so callers can fall back to a SymExpr. """ if isinstance(value, z3.ExprRef): return value - if isinstance(value, tuple) and len(value) == 2 and \ - value[0] == VALUE_TAG_PTR: - return z3.BitVecVal(int(value[1]), 64) if isinstance(value, bool): return z3.BitVecVal(int(value), 64) if isinstance(value, int): @@ -1527,9 +1510,6 @@ def _mirror_concrete_write_to_overlay(self, addr, val, size, region_name): int_val = int(val) elif isinstance(val, int): int_val = val - elif (isinstance(val, tuple) and len(val) == 2 - and val[0] == VALUE_TAG_PTR): - int_val = int(val[1]) elif isinstance(val, _BYTES_TYPES): i = 0 for b in val: @@ -1590,14 +1570,11 @@ def _lookup_name(self, eid): def _is_concrete(value): """A value is concrete if the substrate can interpret it without - policy help: ints, bools, None, and ("ptr", N) tuples.""" + policy help: ints, bools, None.""" if value is None: return True if isinstance(value, _INT_TYPES): return True - if isinstance(value, tuple) and len(value) == 2 and \ - value[0] == VALUE_TAG_PTR: - return True return False diff --git a/bindings/Python/symex/engine.py b/bindings/Python/symex/engine.py index 07b556b75..61e1c0a78 100644 --- a/bindings/Python/symex/engine.py +++ b/bindings/Python/symex/engine.py @@ -29,9 +29,11 @@ from ._types import Endian from .events import ( EventLog, EventKind, BRANCH, BranchDirection, Terminal, StopNow, - StepResultKind, Strategy, _FilterableList, + Strategy, _FilterableList, ADDRESS_FOR, ADDRESS_RESOLVED, INDIRECT_CALL_RESOLVED, SWITCH_CASE, SWITCH_DEFAULT, + Completed, Errored, Budget, Branch, Switch, + MemAddrSuspension, GlobalSuspension, Suspended, ) from .until import ExploreUntil from .dispatch import ( @@ -1125,43 +1127,47 @@ def _step_one(self, path, memory, policy, slice_steps, concretize): if result is None: return [path] - kind = result[0] - if kind == StepResultKind.COMPLETED: + if isinstance(result, Completed): path.terminal = Terminal.COMPLETED - path.return_value = result[1] + path.return_value = result.return_value return [path] - if kind == StepResultKind.ERROR: + if isinstance(result, Errored): path.terminal = Terminal.ERROR - path.error_kind = result[1] + path.error_kind = result.error_kind return [path] - if kind == StepResultKind.BUDGET: + if isinstance(result, Budget): return [path] - if kind == StepResultKind.BRANCH: + if isinstance(result, Branch): return self._handle_branch_forks(path, result, forks) - if kind == StepResultKind.SWITCH: + if isinstance(result, Switch): return self._handle_switch_forks(path, result, forks) - if kind == StepResultKind.SUSPENDED: - sub_kind = forks[0].get("sub_kind") if forks else None - if sub_kind == "call-addr": + if isinstance(result, GlobalSuspension): + return self._handle_global_suspension(path, result, forks) + if isinstance(result, MemAddrSuspension): + if result.is_call_target: return self._handle_symbolic_indirect_call( path, result, forks, concretize) return self._handle_suspension(path, result, forks, concretize) + if isinstance(result, Suspended): + # Generic continuation we don't handle specifically. + path.terminal = Terminal.STUCK_SUSPENSION + return [path] path.terminal = Terminal.UNKNOWN return [path] def _handle_branch_forks(self, path, result, forks): - _, cond, t_eid, f_eid = result if not forks: path.terminal = Terminal.STUCK_BRANCH return [path] + cond = result.condition cond_z3 = _z3_bool(cond) if _is_z3(cond) else None children = [] for entry in forks: - child_state = entry["state"] - direction = entry.get("direction", BranchDirection.UNKNOWN) + child_state = entry.state + direction = entry.direction or BranchDirection.UNKNOWN child = self._fork_child(path, child_state) if cond_z3 is not None: if direction == BranchDirection.TRUE: @@ -1173,8 +1179,8 @@ def _handle_branch_forks(self, path, result, forks): child.events.append({ "kind": BRANCH, "direction": direction, - "true_block": t_eid, - "false_block": f_eid, + "true_block": result.true_block, + "false_block": result.false_block, "step": path.steps, }) children.append(child) @@ -1190,12 +1196,12 @@ def _handle_switch_forks(self, path, result, forks): return [path] fork = forks[0] - sel = fork.get("selector") - sel_eid = int(fork.get("selector_eid") or 0) - cases = fork.get("cases") or [] - default_block_obj = fork.get("default_block") - default_eid = fork.get("default_block_eid") - snapshot_state = fork["state"] + sel = fork.selector + sel_eid = int(fork.selector_eid or 0) + cases = fork.cases or [] + default_block_obj = fork.default_block + default_eid = fork.default_block_eid + snapshot_state = fork.state sel_is_z3 = _is_z3(sel) z3 = _z3_module() if sel_is_z3 else None @@ -1278,6 +1284,39 @@ def _feasible(child): return [path] return children + def _handle_global_suspension(self, path, result, forks): + """Resolve a GLOBAL_PTR suspension by firing the address_for + chain for the entity. On hit: write to global_addresses cache + and re-step from the snapshot (compute_global_ptr cache-hits). + On miss: terminate the path with UNRESOLVED_GLOBAL — nothing + knows where this global lives, so we can't make progress. + """ + if not forks: + path.suspended = result + path.terminal = Terminal.STUCK_SUSPENSION + return [path] + + fork = forks[0] + entity_id = int(fork.entity_id or 0) + snapshot_state = fork.state + + name = None + if entity_id: + try: + decl = self._index.entity(entity_id) + if decl is not None and getattr(decl, "name", None): + name = str(decl.name) + except Exception: + pass + + addr = self._resolve_address_for(entity_id, name, "global", 0, 8) + if addr is None: + path.terminal = Terminal.UNRESOLVED_GLOBAL + return [path] + + _interp.resume_global(snapshot_state, entity_id, int(addr)) + return [self._fork_child(path, snapshot_state)] + def _handle_suspension(self, path, result, forks, strategy): if not forks: path.suspended = result @@ -1286,10 +1325,10 @@ def _handle_suspension(self, path, result, forks, strategy): fork_entry = forks[0] suspension = Suspension( - address_expr=fork_entry.get("address"), - address_eid=int(fork_entry.get("address_eid") or 0), - size=fork_entry.get("size"), - is_write=fork_entry.get("is_write"), + address_expr=fork_entry.address, + address_eid=int(fork_entry.address_eid or 0), + size=fork_entry.size, + is_write=fork_entry.is_write, path=path, layout=self.layout, solver=path.solver, @@ -1317,8 +1356,8 @@ def _handle_suspension(self, path, result, forks, strategy): def _take_state(): if not first_state_used[0]: first_state_used[0] = True - return fork_entry["state"] - return _interp.clone_state(fork_entry["state"]) + return fork_entry.state + return _interp.clone_state(fork_entry.state) for d in decisions: if isinstance(d, ConcretizeTo): @@ -1356,8 +1395,8 @@ def _handle_symbolic_indirect_call(self, path, result, forks, concretize): return [path] fork_entry = forks[0] - addr_expr = fork_entry.get("address") - address_eid = int(fork_entry.get("address_eid") or 0) + addr_expr = fork_entry.address + address_eid = int(fork_entry.address_eid or 0) from .dispatch import _build_chain, _DEFER from .ctx import Ctx @@ -1443,8 +1482,8 @@ def _default_indirect(ctx_, target_expr_): def take_state(): if not first_used[0]: first_used[0] = True - return fork_entry["state"] - return _interp.clone_state(fork_entry["state"]) + return fork_entry.state + return _interp.clone_state(fork_entry.state) z3 = _z3_module() fork_idx = 0 diff --git a/bindings/Python/symex/events.py b/bindings/Python/symex/events.py index 1fc19aca9..83accc3e6 100644 --- a/bindings/Python/symex/events.py +++ b/bindings/Python/symex/events.py @@ -12,7 +12,9 @@ reference the typed members instead of repeating the literals. """ +from dataclasses import dataclass from enum import StrEnum +from typing import Any, Optional class EventKind(StrEnum): @@ -119,20 +121,119 @@ class Terminal(StrEnum): SINK_HIT = "sink-hit" # Phase 9: intercept.indirect_call returned None, refusing the call. UNRESOLVED_CALL = "unresolved-call" + # GLOBAL_PTR suspended on an unresolved entity and the address_for + # chain produced no answer — we can't make forward progress. + UNRESOLVED_GLOBAL = "unresolved-global" # Cosmetic placeholder used only by `path.summary()` when the path # is still live (`path.terminal is None`) — never actually written # to a Path. LIVE = "live" -class StepResultKind(StrEnum): - """Kinds the C++ substrate returns in the `result` tuple.""" - COMPLETED = "completed" - ERROR = "error" - BUDGET = "budget" - BRANCH = "branch" - SWITCH = "switch" - SUSPENDED = "suspended" +# =========================================================================== +# Step result + fork shapes +# +# The C++ substrate's `step()` call hands the Python driver one of these +# typed result objects (in the dict's "result" slot) plus a list of typed +# fork objects (in "forks"). Drivers dispatch via `isinstance`; the type +# IS the discriminator. No string tags, no positional unpacking. +# =========================================================================== + +@dataclass(frozen=True) +class Completed: + return_value: Any + + +@dataclass(frozen=True) +class Errored: + error_kind: int + + +@dataclass(frozen=True) +class Budget: + steps: int + + +@dataclass(frozen=True) +class Branch: + condition: Any + true_block: int + false_block: int + + +@dataclass(frozen=True) +class Switch: + selector: Any + selector_eid: int + + +@dataclass(frozen=True) +class MemAddrSuspension: + address: Any + address_eid: int + size: int + is_write: bool + is_call_target: bool + + +@dataclass(frozen=True) +class GlobalSuspension: + entity_id: int + instruction_id: int + + +@dataclass(frozen=True) +class Suspended: + """Generic fallback for continuation kinds the driver doesn't + recognize specifically — carries only a description.""" + description: str + + +@dataclass(frozen=True) +class Skip: + """Explicit "skip this call, use `value` as the return slot." + + Most call handlers can return their replacement value directly — + the substrate treats any non-None return as a skip with that + value, since None already means "fall through to inlining." + `Skip(value)` is the disambiguator for the rare case where the + intent is to skip with `None` (e.g. `ctx.default()`). + """ + value: Any = None + + +# ---- Fork entries (one per resumption a continuation produces) ---- + +@dataclass +class BranchFork: + state: Any + direction: "BranchDirection" + + +@dataclass +class SwitchFork: + state: Any + selector: Any + selector_eid: int + cases: list # list of (low, high, target_eid, target_block) + default_block: Any + default_block_eid: Optional[int] + + +@dataclass +class MemAddrFork: + state: Any + address: Any + address_eid: int + size: int + is_write: bool + + +@dataclass +class GlobalFork: + state: Any + entity_id: int + instruction_id: int class Strategy(StrEnum): @@ -146,20 +247,6 @@ class Strategy(StrEnum): from ._types import Endian # noqa: E402,F401 -class CallAction(StrEnum): - """Substrate-facing tag for the second slot of resolve_call's - return tuple: ("skip", value) replaces the call with `value`; - ("model", value) is reserved for future modeled-call shapes.""" - SKIP = "skip" - MODEL = "model" - - -# Wire-protocol tag for "this Python value is a pointer to address N". -# The C++ substrate inspects 2-tuples of shape `(VALUE_TAG_PTR, N)` to -# recognise live pointers; keep this string stable. -VALUE_TAG_PTR = "ptr" - - _MISSING = object() diff --git a/bindings/Python/symex/lens.py b/bindings/Python/symex/lens.py index 70a78438d..e800bae92 100644 --- a/bindings/Python/symex/lens.py +++ b/bindings/Python/symex/lens.py @@ -8,9 +8,9 @@ Phase 1 shipped read/write byte access. Phase 2 adds: - MemView.read_struct / write_struct over a (name, offset, size, signed) layout. - - ArgsView constructed from raw call args (ints, ("ptr", N) tuples, - arbitrary symbolic objects). `args[i]` returns the raw value; - typed accessors do the obvious thing. + - ArgsView constructed from raw call args (ints, arbitrary symbolic + objects). `args[i]` returns the raw value; typed accessors do the + obvious thing. - ArgsView.as_string(i) / as_pointer_to(i, type=...) for pointer arguments. """ @@ -23,9 +23,7 @@ def _coerce_addr(value): - """Pull an integer address out of a raw arg or pointer tuple.""" - if isinstance(value, tuple) and len(value) == 2 and value[0] == "ptr": - return int(value[1]) + """Pull an integer address out of a raw arg.""" if isinstance(value, int) and not isinstance(value, bool): return int(value) return None @@ -146,9 +144,9 @@ class ArgsView: Phase 2 unifies the previous mid-block-entry helper with the call- hook view. Constructed from a list of raw arg values; `args[i]` - returns the raw value (an int, a `("ptr", addr)` tuple, or an - arbitrary symbolic object). Typed accessors operate over either - the literal value or the pointed-to memory. + returns the raw value (an int, or an arbitrary symbolic object). + Typed accessors operate over either the literal value or the + pointed-to memory. """ def __init__(self, mem_view, raw_args, sizes=None): diff --git a/bindings/Python/symex/models/libc.py b/bindings/Python/symex/models/libc.py index 0954f2ef7..475b85bba 100644 --- a/bindings/Python/symex/models/libc.py +++ b/bindings/Python/symex/models/libc.py @@ -38,7 +38,7 @@ def _memcpy(ctx, next_hook): if dst is None or src is None: return next_hook(ctx) ctx.mem.write_bytes(dst, ctx.mem.read_bytes(src, size)) - return ("ptr", dst) + return dst def _memset(ctx, next_hook): @@ -48,7 +48,7 @@ def _memset(ctx, next_hook): byte = ctx.args.read_int(1, size=4) & 0xFF size = ctx.args.read_int(2, size=8) ctx.mem.write_bytes(dst, bytes([byte]) * int(size)) - return ("ptr", dst) + return dst def _read(ctx, next_hook): @@ -65,7 +65,7 @@ def _read(ctx, next_hook): def _malloc(ctx, next_hook): size = ctx.args.read_int(0, size=8) - return ("ptr", ctx.layout.memory.allocate(int(size), 8)) + return ctx.layout.memory.allocate(int(size), 8) def _free(ctx, next_hook): diff --git a/include/multiplier/IR/Interpret/Interpreter.h b/include/multiplier/IR/Interpret/Interpreter.h index ccd8a0dc5..4341e6ea8 100644 --- a/include/multiplier/IR/Interpret/Interpreter.h +++ b/include/multiplier/IR/Interpret/Interpreter.h @@ -33,6 +33,7 @@ enum class ErrorKind : uint8_t { UNREACHABLE, EMPTY_STACK, NO_TERMINATOR, + UNRESOLVED_GLOBAL, }; // --------------------------------------------------------------------------- diff --git a/include/multiplier/IR/Interpret/InterpreterLoop.h b/include/multiplier/IR/Interpret/InterpreterLoop.h index fe145d121..082d0dee4 100644 --- a/include/multiplier/IR/Interpret/InterpreterLoop.h +++ b/include/multiplier/IR/Interpret/InterpreterLoop.h @@ -795,8 +795,16 @@ inline void compute_global_ptr(auto &state, PolicyT &policy, } GlobalResolution gr; - if (!policy.resolve_global(sched, src_eid, gr) || gr.info.size == 0) { - frame.values[id] = policy.make_default(); + // Trust an address_hint even when the IR didn't carry a size: the + // hint already nails down `where`, and `place_at` substitutes 8 for + // size==0 internally. Only bail when we have no address and no way + // to allocate one — and when we bail, hand off to the policy: the + // concrete default errors the path; the symbolic policy snapshots + // and emits a GlobalContinuation so a driver can supply an address + // and re-step this instruction. + if (!policy.resolve_global(sched, src_eid, gr) || + (gr.info.size == 0 && !gr.info.address_hint)) { + policy.on_unresolved_global(sched, state, src_eid, id); return; } @@ -839,33 +847,36 @@ inline void compute_global_ptr(auto &state, PolicyT &policy, if (!placed_via_hint) { addr = policy.mem_allocate(sched, info.size, align); } - if (auto a = policy.extract_address(addr)) { - state.global_addresses[key] = *a; - if (key != src_eid) state.global_addresses[src_eid] = *a; - - // Store the result BEFORE pushing the initializer frame, because - // the push may reallocate the segment's vector and invalidate `frame`. - frame.values[id] = addr; - - if (info.initializer) { - CallFrame init_frame; - init_frame.func = *info.initializer; - init_frame.params = {addr}; - init_frame.call_site = kInvalidEntityId; - for (auto obj : info.initializer->objects()) { - auto k = obj.kind(); - if (k == ir::ObjectKind::PARAMETER || - k == ir::ObjectKind::PARAMETER_VALUE) { - init_frame.param_ptrs.push_back(addr); - } - } - if (info.initializer->kind() == ir::FunctionKind::GLOBAL_INITIALIZER) { + auto a = policy.extract_address(addr); + if (!a) { + policy.on_unresolved_global(sched, state, src_eid, id); + return; + } + state.global_addresses[key] = *a; + if (key != src_eid) state.global_addresses[src_eid] = *a; + + // Store the result BEFORE pushing the initializer frame, because + // the push may reallocate the segment's vector and invalidate `frame`. + frame.values[id] = addr; + + if (info.initializer) { + CallFrame init_frame; + init_frame.func = *info.initializer; + init_frame.params = {addr}; + init_frame.call_site = kInvalidEntityId; + for (auto obj : info.initializer->objects()) { + auto k = obj.kind(); + if (k == ir::ObjectKind::PARAMETER || + k == ir::ObjectKind::PARAMETER_VALUE) { init_frame.param_ptrs.push_back(addr); } - state.call_stack.push(std::move(init_frame)); - state.work_stack.push_back({WorkKind::ENTER_BLOCK, - {}, info.initializer->entry_block()}); } + if (info.initializer->kind() == ir::FunctionKind::GLOBAL_INITIALIZER) { + init_frame.param_ptrs.push_back(addr); + } + state.call_stack.push(std::move(init_frame)); + state.work_stack.push_back({WorkKind::ENTER_BLOCK, + {}, info.initializer->entry_block()}); } } @@ -901,7 +912,8 @@ inline void compute_func_ptr(auto &state, PolicyT &policy, ValueT addr = policy.mem_allocate(sched, 8, 8); auto a = policy.extract_address(addr); if (!a) { - frame.values[id] = ValueTraits::default_value(); + sched.on_errored(ErrorKind::UNRESOLVED_GLOBAL, state.clone()); + state.work_stack.clear(); return; } slot_addr = *a; @@ -1178,7 +1190,6 @@ inline void exec_call(auto &state, PolicyT &policy, case CallAction::INLINE: callee_ir = resolution.callee_ir; break; - case CallAction::MODEL: case CallAction::SKIP: frame.values[id] = resolution.return_value; return; diff --git a/include/multiplier/IR/Interpret/Policy.h b/include/multiplier/IR/Interpret/Policy.h index b78e3803a..61ceda5b4 100644 --- a/include/multiplier/IR/Interpret/Policy.h +++ b/include/multiplier/IR/Interpret/Policy.h @@ -470,6 +470,24 @@ struct Policy { void on_global_initialized_impl(SchedT &, const IRFunction &, const ValueT &) {} + // Failure-atomic handler for a GLOBAL_PTR whose backing entity could + // not be resolved (no resolver, no address_hint, no allocatable size). + // Default: terminate the path with ErrorKind::UNRESOLVED_GLOBAL. + // Symbolic policies override `_impl` to snapshot + emit a + // GlobalContinuation so a driver can supply an address and re-step. + template + void on_unresolved_global(SchedT &sched, StateT &state, + RawEntityId src_eid, + RawEntityId instruction_eid) { + self().on_unresolved_global_impl(sched, state, src_eid, instruction_eid); + } + template + void on_unresolved_global_impl(SchedT &sched, StateT &state, + RawEntityId, RawEntityId) { + sched.on_errored(ErrorKind::UNRESOLVED_GLOBAL, state.clone()); + state.work_stack.clear(); + } + // Abort-request gate. PythonPolicy sets this when a Python hook raises // an exception so the loop can exit cleanly after the current item. bool abort_requested() const { diff --git a/include/multiplier/IR/Interpret/Suspension.h b/include/multiplier/IR/Interpret/Suspension.h index 1fb62de39..6405070db 100644 --- a/include/multiplier/IR/Interpret/Suspension.h +++ b/include/multiplier/IR/Interpret/Suspension.h @@ -22,7 +22,6 @@ namespace mx::ir::interpret { enum class CallAction : uint8_t { INLINE, SKIP, - MODEL, }; template diff --git a/tests/symex/conftest.py b/tests/symex/conftest.py index 4b5592447..2a14aa764 100644 --- a/tests/symex/conftest.py +++ b/tests/symex/conftest.py @@ -21,9 +21,8 @@ class PassthroughPolicy: class SymExpr: """Opaque symbolic value used by ForkOnSymbolicBranchPolicy. - A class (not a tuple) so the substrate's address-extraction - heuristic — which matches 2-tuples whose first element is "ptr" — - cannot accidentally see a symbolic compare result as a pointer. + A class (not an int) so the substrate's address-extraction + can't mistake a symbolic compare result for a concrete pointer. """ __slots__ = ("kind", "args") @@ -45,12 +44,10 @@ def __bool__(self): def _extract_addr(addr): """Pull a concrete address out of the substrate's value form. - The substrate hands the policy ("ptr", N) tuples for live pointers - and bare ints occasionally; both should normalize to an int. + Pointers come through as bare ints; anything else (None, symbolic + objects) yields None. """ - if isinstance(addr, tuple) and len(addr) == 2 and addr[0] == "ptr": - return int(addr[1]) - if isinstance(addr, int): + if isinstance(addr, int) and not isinstance(addr, bool): return addr return None @@ -241,18 +238,23 @@ def run_until_terminal(state, mem, policy, func_resolver, global_resolver, max_total_steps=100000, slice_steps=10000): """Drive the symbolic step loop until completion / error / suspended. - Returns the final ``result`` tuple from the last step dict. + Returns the final result dataclass from the last step dict. """ + from multiplier.symex.events import ( + Completed, Errored, Budget, Suspended, + MemAddrSuspension, GlobalSuspension, + ) + terminal_types = (Completed, Errored, Suspended, + MemAddrSuspension, GlobalSuspension) while state.steps < max_total_steps: out = interp.step(state, mem, policy, slice_steps, func_resolver, global_resolver) result = out.get("result") if result is None: return None - status = result[0] - if status in ("completed", "error", "suspended"): + if isinstance(result, terminal_types): return result - if status == "budget": + if isinstance(result, Budget): continue return result return None @@ -287,6 +289,10 @@ def run_via_concrete_policy(index, name, args=None): state = interp.InterpreterState() interp.init_state(state, policy, ir, args) + from multiplier.symex.events import ( + Completed, Errored, Budget, Suspended, + MemAddrSuspension, GlobalSuspension, + ) max_total_steps = 100000 while state.steps < max_total_steps: out = interp.step(state, policy, 1000) @@ -295,12 +301,12 @@ def run_via_concrete_policy(index, name, args=None): result = out.get("result") if isinstance(out, dict) else out if result is None: return None - status = result[0] - if status == "completed": - return result[1] - if status in ("error", "suspended"): + if isinstance(result, Completed): + return result.return_value + if isinstance(result, (Errored, Suspended, + MemAddrSuspension, GlobalSuspension)): return None - if status == "budget": + if isinstance(result, Budget): continue return None return None diff --git a/tests/symex/test_phase0.py b/tests/symex/test_phase0.py index 4a2a92c45..db3a042bc 100644 --- a/tests/symex/test_phase0.py +++ b/tests/symex/test_phase0.py @@ -66,9 +66,11 @@ def test_p0_1_init_state_at_entry_block_runs_to_completion( result = _run_via_init_state_at(index, ir, ir.entry_block, func_resolver=func_resolver, global_resolver=global_resolver) + from multiplier.symex.events import Completed assert result is not None, "interpreter returned no result" - assert result[0] == "completed", f"expected completed, got {result}" - assert result[1] == 0, f"test_arithmetic should return 0, got {result[1]}" + assert isinstance(result, Completed), f"expected Completed, got {result!r}" + assert result.return_value == 0, \ + f"test_arithmetic should return 0, got {result.return_value}" def test_p0_2_init_state_at_non_entry_block_starts_there( @@ -97,8 +99,9 @@ def test_p0_2_init_state_at_non_entry_block_starts_there( # Acceptable terminals: completed (likely with a different return # value than the canonical run), or error (e.g., uninitialized # locals) — we just need the loop to have run from `non_entry`. - assert result[0] in ("completed", "error"), \ - f"unexpected terminal status: {result}" + from multiplier.symex.events import Completed, Errored + assert isinstance(result, (Completed, Errored)), \ + f"unexpected terminal status: {result!r}" # --- P0.3: regression gate marker ---------------------------------------- diff --git a/tests/symex/test_phase2.py b/tests/symex/test_phase2.py index 6fce6cb5d..ed78092c3 100644 --- a/tests/symex/test_phase2.py +++ b/tests/symex/test_phase2.py @@ -113,7 +113,7 @@ def hook(ctx, addr, val, size, next_hook): policy = InterceptorPolicy(engine, _StubPath(), layout=layout, memory=layout.memory) - rv = policy.mem_write(("ptr", 0x20000), 99, 4, False) + rv = policy.mem_write(0x20000, 99, 4, False) assert rv is None # treated-as-handled assert layout.memory.read_bytes(0x20000, 4) == \ (42).to_bytes(4, "little") # unchanged @@ -245,7 +245,7 @@ def hook_b(ctx, addr, size, next_hook): policy = InterceptorPolicy(engine, _StubPath(), layout=layout, memory=layout.memory) - rv = policy.mem_read(("ptr", 0x20000), 4, False) + rv = policy.mem_read(0x20000, 4, False) assert rv == 0xBEEF assert order == ["a", "b"] @@ -268,7 +268,7 @@ def hook(ctx, addr, size, next_hook): policy = InterceptorPolicy(engine, _StubPath(), layout=layout, memory=layout.memory) - rv = policy.mem_read(("ptr", 0x20000), 4, False) + rv = policy.mem_read(0x20000, 4, False) # Chain bottom reads concrete; the underlying memory now holds # 0xC0FFEE because the handler pre-wrote it. assert rv == 0xC0FFEE diff --git a/tests/symex/test_phase8a.py b/tests/symex/test_phase8a.py index 15c259ac4..6bfd5790a 100644 --- a/tests/symex/test_phase8a.py +++ b/tests/symex/test_phase8a.py @@ -248,7 +248,7 @@ def test_p8a_4_overlay_concrete_then_symbolic_read(index): # Concrete write at offset +5: mirrors into the overlay. OFFSET = 5 BYTE = 0xAB - policy.mem_write(("ptr", BUF + OFFSET), BYTE, 1, False) + policy.mem_write(BUF + OFFSET, BYTE, 1, False) # Second symbolic read: constrain the address to the write offset # and verify the model returns BYTE. diff --git a/tests/symex/test_phase8b.py b/tests/symex/test_phase8b.py index 220e31a06..7440d6ea5 100644 --- a/tests/symex/test_phase8b.py +++ b/tests/symex/test_phase8b.py @@ -171,7 +171,7 @@ def test_p8b_4_aggregate_return_still_reads_slot(index): standalone with concrete `base = 7`, then assert: 1. The path completes. - 2. `path.return_value` is a `("ptr", N)` tuple — proves the + 2. `path.return_value` is an int address — proves the substrate took the `sz > 8` branch and returned the slot pointer rather than the RET's (now-undefined) operand. 3. The 20-byte slot decodes to the five field values @@ -189,10 +189,10 @@ def test_p8b_4_aggregate_return_still_reads_slot(index): f"unexpected terminal {p.terminal!r}" rv = p.return_value - assert isinstance(rv, tuple) and len(rv) == 2 and rv[0] == "ptr", \ - f"expected ('ptr', N) tuple from sz>8 RET path; got {rv!r}" + assert isinstance(rv, int) and not isinstance(rv, bool), \ + f"expected an int address from sz>8 RET path; got {rv!r}" - slot_addr = rv[1] + slot_addr = rv assert slot_addr != 0, \ "return slot pointer is null — substrate didn't allocate" data = p.mem.read_bytes(slot_addr, 20) @@ -229,7 +229,7 @@ def trace(ctx, **payload): seen.append(payload) policy = InterceptorPolicy(engine, path=None, layout=engine.layout) - policy.mem_read(("ptr", BUF), 4, False) + policy.mem_read(BUF, 4, False) assert seen, "observe.global_read did not fire for a global access" hit = seen[0] @@ -259,7 +259,7 @@ def trace(ctx, **payload): # Drive a write through the policy directly (no IR run needed). policy = InterceptorPolicy(engine, path=None, layout=engine.layout) - policy.mem_write(("ptr", LOCK), 0xDEADBEEF, 4, False) + policy.mem_write(LOCK, 0xDEADBEEF, 4, False) assert seen, "observe.global_write did not fire for a global write" hit = seen[0] @@ -353,7 +353,7 @@ def trace(ctx, **payload): # mem_read of a function placement: returns concrete (likely 0 # since no memory backed there); we only care about the fan-out # filter. - policy.mem_read(("ptr", FN_ADDR), 4, False) + policy.mem_read(FN_ADDR, 4, False) assert not seen, \ f"observe.global_read fired for a function placement: {seen!r}" @@ -379,8 +379,8 @@ def trace(ctx, **payload): seen.append(payload) policy = InterceptorPolicy(engine, path=None, layout=engine.layout) - policy.mem_read(("ptr", LOCK), 4, False) - policy.mem_read(("ptr", USERS), 4, False) + policy.mem_read(LOCK, 4, False) + policy.mem_read(USERS, 4, False) assert len(seen) == 1, \ f"selector should match only g_users; got {[p.get('name') for p in seen]}" diff --git a/tests/symex/test_phase8c.py b/tests/symex/test_phase8c.py index 60a72297f..8db101c86 100644 --- a/tests/symex/test_phase8c.py +++ b/tests/symex/test_phase8c.py @@ -63,10 +63,10 @@ def test_p8c_1_shadow_roundtrips_symbolic_write(index): sym = z3.BitVec("sym_ret", SIZE * 8) writer = InterceptorPolicy(engine, path=path, layout=engine.layout) - writer.mem_write(("ptr", SLOT), sym, SIZE, False) + writer.mem_write(SLOT, sym, SIZE, False) reader = InterceptorPolicy(engine, path=path, layout=engine.layout) - got = reader.mem_read(("ptr", SLOT), SIZE, False) + got = reader.mem_read(SLOT, SIZE, False) assert _is_z3(got), \ f"expected z3 expression from shadow read; got {type(got).__name__}" diff --git a/tests/symex/test_phase8e.py b/tests/symex/test_phase8e.py index 4d61811c2..10f6d3fda 100644 --- a/tests/symex/test_phase8e.py +++ b/tests/symex/test_phase8e.py @@ -6,18 +6,16 @@ """Phase 8e — close the LOCAL_VALUE alloca regression that Phase 8d surfaced but couldn't fix in scope. -The bug: `PythonPolicy::ptr_offset` and `ptr_add` fall back to the -shared C++ concrete implementation when the InterceptorPolicy returns -NotImplemented (its concrete-input fast path). The fallback used to -funnel results through `value_to_shared`, which lowers a `Value` to a -plain `PyLong`. The pointer "tag" carried by `("ptr", N)` tuples was -lost — and downstream `extract_address` only recognizes tuples, so -every GEP_FIELD-derived address looked symbolic to the substrate. The -default address strategy concretized those to 0, sending all field -stores to address 0 and leaving every `LOCAL_VALUE` slot zero-filled. - -The fix wraps both fallback paths in `make_literal_ptr` instead of -`value_to_shared`, preserving the pointer tag. +The historical bug: `PythonPolicy::ptr_offset` and `ptr_add` used to +fall back through `value_to_shared`, which produced bare PyLongs; +`extract_address` only recognized `("ptr", N)` tuples then, so every +GEP_FIELD-derived address looked symbolic. The default address +strategy concretized those to 0, zero-filling every LOCAL_VALUE. + +Pointer values now travel as bare ints throughout (the kind tag was +removed), and `extract_address` recognizes them directly. The fix +here was wrapping the fallback paths in `make_literal_ptr` so the +pointer flowed back without being routed through `value_to_shared`. Catalog: @@ -44,9 +42,10 @@ def test_p8e_1_local_value_alloca_resolves(index): - """`make_large(7)` returns `("ptr", N)` and the slot decodes to the - five field values. Pre-fix the slot was zero-filled because the - GEP_FIELD-derived addresses were silently concretized to 0.""" + """`make_large(7)` returns the slot address and the slot decodes + to the five field values. Pre-fix the slot was zero-filled + because the GEP_FIELD-derived addresses were silently + concretized to 0.""" engine = SymExEngine(index) engine.layout = Layout() paths = engine.explore("make_large", args=[7]) @@ -57,9 +56,9 @@ def test_p8e_1_local_value_alloca_resolves(index): f"unexpected terminal {p.terminal!r}" rv = p.return_value - assert isinstance(rv, tuple) and len(rv) == 2 and rv[0] == "ptr", \ - f"expected ('ptr', N) tuple; got {rv!r}" - slot_addr = rv[1] + assert isinstance(rv, int) and not isinstance(rv, bool), \ + f"expected an int address; got {rv!r}" + slot_addr = rv assert slot_addr != 0 data = p.mem.read_bytes(slot_addr, 20) diff --git a/tests/symex/test_phase9.py b/tests/symex/test_phase9.py index 90a16df4c..0f9128ea7 100644 --- a/tests/symex/test_phase9.py +++ b/tests/symex/test_phase9.py @@ -399,17 +399,16 @@ def refuse(ctx, target_expr, next_hook): path._layout = engine.layout import z3 + from multiplier.symex.events import MemAddrFork, MemAddrSuspension target_sym = z3.BitVec("sym_fp2", 64) fake_addr = mem.allocate(8, 8) - forks = [{ - "state": state, - "address": target_sym, - "address_eid": fake_addr, - "sub_kind": "call-addr", - }] + forks = [MemAddrFork(state=state, address=target_sym, + address_eid=fake_addr, size=8, is_write=False)] + result = MemAddrSuspension(address=target_sym, address_eid=fake_addr, + size=8, is_write=False, is_call_target=True) children = engine._handle_symbolic_indirect_call( - path, ("suspended", None), forks, engine.address_strategy) + path, result, forks, engine.address_strategy) assert len(children) == 1 assert children[0].terminal == Terminal.UNRESOLVED_CALL