From 1dda933a218f885085198f7a50110a316962ddd2 Mon Sep 17 00:00:00 2001 From: Adam Doupe Date: Sat, 11 Apr 2026 19:16:16 -0700 Subject: [PATCH 1/5] Add MachO support for MH_KEXT_BUNDLE and MH_BUNDLE file types Also fixes several bugs in the dyld chained fixups walker that were exposed by kext binaries: - Use correct `next` field and stride per pointer format. Arm64e packs `next` as 11 bits at bit 51; Generic64 packs it as 12 bits at bit 52. The old code always used generic64.rebase.next, producing garbage chain walks for ARM64E_KERNEL (stride 4) kexts. - Handle file-offset-to-vaddr shift in segments where vmaddr != fileoff (common in kexts' __DATA_CONST, previously only allowed for __ETC). - Add bounds checks: stop chain walks that exceed page boundaries or produce out-of-range bind ordinals instead of crashing. - Add missing _fields_ to dyld_chained_ptr_arm64e_bind24 struct. - Fix PIC detection to use `filetype in (...)` instead of bitwise AND. Tested against all 907 kext binaries in KDK_26.4.1_25E253.kdk. Co-Authored-By: Claude Opus 4.6 (1M context) --- cle/backends/macho/macho.py | 109 +++++++++++++++++++++++++--------- cle/backends/macho/structs.py | 9 +++ 2 files changed, 91 insertions(+), 27 deletions(-) diff --git a/cle/backends/macho/macho.py b/cle/backends/macho/macho.py index 97be4a1fd..6a268c334 100644 --- a/cle/backends/macho/macho.py +++ b/cle/backends/macho/macho.py @@ -35,6 +35,7 @@ dyld_chained_fixups_header, dyld_chained_starts_in_segment, ) +from .structs import DyldChainedPtrFormats as _DCPF from .symbol import AbstractMachOSymbol, DyldBoundSymbol, SymbolTableSymbol log = logging.getLogger(name=__name__) @@ -42,6 +43,25 @@ __all__ = ("MachO", "MachOSection", "MachOSegment", "SymbolList") +class _ChainStride(typing.NamedTuple): + bytes: int + use_arm64e: bool + + +# Per dyld's fixup-chains.h: each pointer format defines a stride (the byte multiplier for the +# `next` field) and which packed-pointer layout to read (Arm64e vs Generic64). The two layouts put +# `next` at different bit positions, so picking the wrong one yields garbage walks. +_CHAIN_STRIDE: dict[_DCPF, _ChainStride] = { + _DCPF.DYLD_CHAINED_PTR_ARM64E: _ChainStride(bytes=8, use_arm64e=True), + _DCPF.DYLD_CHAINED_PTR_64: _ChainStride(bytes=4, use_arm64e=False), + _DCPF.DYLD_CHAINED_PTR_64_OFFSET: _ChainStride(bytes=4, use_arm64e=False), + _DCPF.DYLD_CHAINED_PTR_ARM64E_KERNEL: _ChainStride(bytes=4, use_arm64e=True), + _DCPF.DYLD_CHAINED_PTR_ARM64E_USERLAND: _ChainStride(bytes=8, use_arm64e=True), + _DCPF.DYLD_CHAINED_PTR_ARM64E_FIRMWARE: _ChainStride(bytes=4, use_arm64e=True), + _DCPF.DYLD_CHAINED_PTR_ARM64E_USERLAND24: _ChainStride(bytes=8, use_arm64e=True), +} + + # pylint: disable=abstract-method class SymbolList(SortedKeyList): """ @@ -157,8 +177,12 @@ def __init__(self, *args, **kwargs): "7I", binary_file, 0, 28 ) - # Libraries are always implicitly PIC - self.pic = bool(self.flags & MH_flags.MH_PIE) or bool(self.filetype & MachoFiletype.MH_DYLIB) + # Libraries, bundles, and kexts are always implicitly PIC + self.pic = bool(self.flags & MH_flags.MH_PIE) or self.filetype in ( + MachoFiletype.MH_DYLIB, + MachoFiletype.MH_BUNDLE, + MachoFiletype.MH_KEXT_BUNDLE, + ) if not bool(self.flags & MH_flags.MH_TWOLEVEL): # ensure MH_TWOLEVEL log.error( @@ -207,6 +231,9 @@ def __init__(self, *args, **kwargs): # A Library is loaded as a dependency, this is fine, the loader will map it to somewhere above the main # binary, so we don't need to do anything pass + elif self.filetype in (MachoFiletype.MH_BUNDLE, MachoFiletype.MH_KEXT_BUNDLE): + if self.is_main_bin: + self._custom_base_addr = 0 else: # This case is not explicitly supported yet. # There are various other MachoFiletypes, which might have different quirks in their loading @@ -1154,60 +1181,88 @@ def _parse_dyld_chained_fixups(self): starts = self._get_struct(dyld_chained_starts_in_segment, starts_addr) seg = self.find_segment_containing(starts.segment_offset) - # There are weird binaries where the offsets inside the file - # and inside the virtual addr space don't match anymore. - # This isn't properly supported yet, and the only known case is the __PII section inside the __ETC segment - # of rare binaries, which isn't that important for most purposes - shift = seg.vaddr - (seg.offset) - if shift != 0: - assert isinstance(seg, MachOSegment) - assert seg.segname == "__ETC", ( - "Only __ETC segments are known to have this shift, please open an" - " issue for this binary so it can be investigated" - ) - log.error("Segment shift detected in, not handling fixups here for now") - continue + # In some binaries (kexts, __ETC segments) the segment's file offset and virtual + # address differ. Chain entries are read at *file* offsets but relocation addresses + # must be virtual (relative to the linked base). Compute the delta once here and + # add it when creating relocations below. + file_to_vaddr_shift = seg.vaddr - seg.offset if seg is not None else 0 page_starts_data = self._read(self._binary_stream, starts_addr + 22, starts.page_count * 2) page_starts = struct.unpack("<" + ("H" * starts.page_count), page_starts_data) pointer_format: DyldChainedPtrFormats = starts.pointer_format log.info("Page has pointer_format: %s", pointer_format) + # Each pointer format has its own (next, stride) layout. Generic64 packs `next` as a + # 12-bit field at bit 52; Arm64e packs it as 11 bits at bit 51. Mixing them up reads + # garbage out of the chain header — see the kext path with DYLD_CHAINED_PTR_ARM64E_KERNEL. + stride = _CHAIN_STRIDE.get(pointer_format) + if stride is None: + raise NotImplementedError(f"Chain stride for pointer format {pointer_format} not known") + is_arm64e = stride.use_arm64e for j, start in enumerate(page_starts): if start == DYLD_CHAINED_PTR_START_NONE: continue - chain_entry_addr = starts.segment_offset + (j * starts.page_size) + start - current_chain_addr = chain_entry_addr + page_base = starts.segment_offset + (j * starts.page_size) + page_end = page_base + starts.page_size + current_chain_addr = page_base + start log.info("Reading chain at %x", current_chain_addr) while True: - chained_rebase_ptr: ChainedFixupPointerOnDisk = self._get_struct( - ChainedFixupPointerOnDisk, current_chain_addr - ) + try: + chained_rebase_ptr: ChainedFixupPointerOnDisk = self._get_struct( + ChainedFixupPointerOnDisk, current_chain_addr + ) + except ValueError: + log.warning("Chain entry at %#x extends past end of file; stopping", current_chain_addr) + break bind = chained_rebase_ptr.isBind(pointer_format) rebase = chained_rebase_ptr.isRebase(pointer_format, self.mapped_base) if bind is not None: libOrdinal, _addend = bind + if libOrdinal >= len(self._dyld_imports): + log.error( + "Chained fixup bind ordinal %d out of range (have %d imports) at %#x; " + "stopping chain walk", + libOrdinal, + len(self._dyld_imports), + current_chain_addr, + ) + break import_symbol = self._dyld_imports[libOrdinal] - reloc = MachOSymbolRelocation(self, import_symbol, current_chain_addr, None) + reloc_addr = current_chain_addr + file_to_vaddr_shift + reloc = MachOSymbolRelocation(self, import_symbol, reloc_addr, None) self.relocs.append(reloc) - # Legacy Code uses bind_xrefs, explicitly add this to make this compatible for now import_symbol.bind_xrefs.append(reloc.dest_addr + self.linked_base) - log.debug("Binding for %s found at %x", import_symbol, current_chain_addr) + log.debug("Binding for %s found at %x", import_symbol, reloc_addr) elif rebase is not None: + reloc_addr = current_chain_addr + file_to_vaddr_shift target = self.linked_base + rebase - location: MemoryPointer = self.linked_base + current_chain_addr - anon_reloc = MachOPointerRelocation(owner=self, relative_addr=current_chain_addr, data=rebase) + location: MemoryPointer = self.linked_base + reloc_addr + anon_reloc = MachOPointerRelocation(owner=self, relative_addr=reloc_addr, data=rebase) self.relocs.append(anon_reloc) log.debug("Rebase to %x found at %x", target, location) else: raise CLEInvalidBinaryError("FixupPointer was neither bind nor rebase, that shouldn't happen") - skip = chained_rebase_ptr.generic64.rebase.next * 4 - current_chain_addr += skip + if is_arm64e: + next_count = chained_rebase_ptr.arm64e.rebase.next + else: + next_count = chained_rebase_ptr.generic64.rebase.next + skip = next_count * stride.bytes if skip == 0: break + current_chain_addr += skip + if current_chain_addr >= page_end: + # Chains are per-page; if a malformed chain would walk into the next page, + # stop rather than reinterpreting unrelated data as fixup entries. + log.warning( + "Chain walked past page end at %#x (page %#x..%#x); stopping", + current_chain_addr, + page_base, + page_end, + ) + break def get_symbol_by_address_fuzzy(self, address): """ diff --git a/cle/backends/macho/structs.py b/cle/backends/macho/structs.py index b636931e8..351f31f48 100644 --- a/cle/backends/macho/structs.py +++ b/cle/backends/macho/structs.py @@ -130,6 +130,15 @@ class dyld_chained_ptr_arm64e_bind24(HelperStruct): https://github.com/apple-opensource/dyld/blob/852.2/include/mach-o/fixup-chains.h#L164-L173 """ + _fields_ = [ + ("ordinal", c_uint64, 24), + ("zero", c_uint64, 8), + ("addend", c_uint64, 19), + ("next", c_uint64, 11), + ("bind", c_uint64, 1), + ("auth", c_uint64, 1), + ] + # noinspection PyPep8Naming class dyld_chained_ptr_arm64e_auth_bind24(HelperStruct): From fdb9d60f66d276c919472f9e5f7f6dd95daf594b Mon Sep 17 00:00:00 2001 From: Adam Doupe Date: Sat, 11 Apr 2026 20:44:41 -0700 Subject: [PATCH 2/5] Universal2: load only the first slice when no arch is specified When a universal binary contains multiple architecture slices and no arch= is passed, loading all slices causes address collisions (e.g. multiple MH_EXECUTE slices all mapping to 0x400000). Pick the first slice and log a warning telling the user how to select a specific one. Co-Authored-By: Claude Opus 4.6 (1M context) --- cle/backends/universal2.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cle/backends/universal2.py b/cle/backends/universal2.py index bf86fc36a..e5c93f31e 100644 --- a/cle/backends/universal2.py +++ b/cle/backends/universal2.py @@ -117,7 +117,7 @@ def __init__(self, *args, arch=None, **kwargs): slices.append((cputype, cpusubtype, offset, size, align)) self._fat_arches = list(slices) - # Filter to requested architecture if specified + # Filter to requested architecture, or pick the first slice if arch is not None: if not isinstance(arch, archinfo.Arch): raise TypeError(f"arch must be an archinfo.Arch instance, got {type(arch).__name__}") @@ -132,6 +132,15 @@ def __init__(self, *args, arch=None, **kwargs): f"Architecture {arch!r} not found in universal binary. Available architectures: {available}" ) slices = filtered + elif len(slices) > 1: + available = [CPU_TYPE_NAMES.get(s[0], f"unknown(0x{s[0]:X})") for s in slices] + log.warning( + "Universal binary contains multiple architectures %s; " + "loading only the first (%s). Pass arch= to select a specific slice.", + available, + available[0], + ) + slices = slices[:1] # Load each slice using _load_object_isolated. # Unlike StaticArchive (where children are .o files), universal binary slices From 197136047943bb9c676f830e1859ee476117227f Mon Sep 17 00:00:00 2001 From: Adam Doupe Date: Sat, 11 Apr 2026 21:23:46 -0700 Subject: [PATCH 3/5] Add kext loading tests and update Universal2 tests Add test_macho_kext.py with 10 tests covering MH_KEXT_BUNDLE loading using the IPwnKit kext binary: filetype detection, PIC, base address, segments, sections, symbols (including IOKit class names), relocations, and code readability. Update test_universal2.py to match the new default behavior of loading only the first architecture slice when no arch= is specified. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_macho_kext.py | 89 ++++++++++++++++++++++++++++++++++++++++ tests/test_universal2.py | 26 +++++------- 2 files changed, 99 insertions(+), 16 deletions(-) create mode 100644 tests/test_macho_kext.py diff --git a/tests/test_macho_kext.py b/tests/test_macho_kext.py new file mode 100644 index 000000000..7278ba35a --- /dev/null +++ b/tests/test_macho_kext.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import os + +import cle +from cle import MachO +from cle.backends.macho.macho_enums import MachoFiletype + +TEST_BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.path.join("..", "..", "binaries")) +KEXT = os.path.join(TEST_BASE, "tests", "aarch64", "IPwnKit.macho.kext") + + +def test_kext_loads(): + ld = cle.Loader(KEXT, auto_load_libs=False) + assert isinstance(ld.main_object, MachO) + assert ld.main_object.filetype == MachoFiletype.MH_KEXT_BUNDLE + + +def test_kext_arch(): + ld = cle.Loader(KEXT, auto_load_libs=False) + assert ld.main_object.arch.name == "AARCH64" + + +def test_kext_pic(): + ld = cle.Loader(KEXT, auto_load_libs=False) + assert ld.main_object.pic is True + + +def test_kext_base_addr(): + ld = cle.Loader(KEXT, auto_load_libs=False) + assert ld.main_object.mapped_base == 0 + + +def test_kext_segments(): + ld = cle.Loader(KEXT, auto_load_libs=False) + mo = ld.main_object + segnames = [s.segname for s in mo.segments] + assert "__TEXT" in segnames + assert "__TEXT_EXEC" in segnames + assert "__DATA" in segnames + assert "__DATA_CONST" in segnames + assert "__LINKEDIT" in segnames + + +def test_kext_sections(): + ld = cle.Loader(KEXT, auto_load_libs=False) + mo = ld.main_object + section_names = set() + for seg in mo.segments: + for sec in seg.sections: + section_names.add((seg.segname, sec.sectname)) + assert ("__TEXT_EXEC", "__text") in section_names + assert ("__TEXT_EXEC", "__auth_stubs") in section_names + assert ("__DATA_CONST", "__auth_got") in section_names + assert ("__DATA_CONST", "__got") in section_names + + +def test_kext_symbols(): + ld = cle.Loader(KEXT, auto_load_libs=False) + mo = ld.main_object + assert len(mo.symbols) > 100 + sym_names = {s.name for s in mo.symbols} + assert "_kmod_info" in sym_names + assert "__realmain" in sym_names + assert "_IPwnKit_start" in sym_names + assert "_IPwnKit_stop" in sym_names + + +def test_kext_iokit_class_symbols(): + ld = cle.Loader(KEXT, auto_load_libs=False) + sym_names = {s.name for s in ld.main_object.symbols} + assert "__ZN21io_oooverflow_IPwnKit5startEP9IOService" in sym_names + assert "__ZN31io_oooverflow_IPwnKitUserClient10gMetaClassE" in sym_names + + +def test_kext_relocations(): + ld = cle.Loader(KEXT, auto_load_libs=False) + assert len(ld.main_object.relocs) > 0 + + +def test_kext_code_readable(): + ld = cle.Loader(KEXT, auto_load_libs=False) + mo = ld.main_object + start_sym = [s for s in mo.symbols if s.name == "_IPwnKit_start" and s.relative_addr != 0] + assert len(start_sym) > 0 + addr = start_sym[0].relative_addr + data = mo.memory.load(addr, 4) + assert len(data) == 4 + assert data != b"\x00\x00\x00\x00" diff --git a/tests/test_universal2.py b/tests/test_universal2.py index 6e3bed03d..33610be99 100644 --- a/tests/test_universal2.py +++ b/tests/test_universal2.py @@ -29,8 +29,8 @@ def test_universal2_autodetect(): assert type(ld.main_object) is Universal2 -def test_universal2_load_all_slices(): - """Test loading all architecture slices from a universal binary.""" +def test_universal2_default_first_slice(): + """Test that loading without arch= picks only the first slice.""" ld = cle.Loader(FATBIN, auto_load_libs=False) main = ld.main_object @@ -38,19 +38,14 @@ def test_universal2_load_all_slices(): assert main.is_outer is True assert main.has_memory is False - # Should have two child objects (x86_64 + aarch64) - assert len(main.child_objects) == 2 - assert len(main.slices) == 2 - - # All children should be MachO objects parented to the Universal2 - for child in main.child_objects: - assert isinstance(child, MachO) - assert child.parent_object is main + # Should load only the first slice when no arch is specified + assert len(main.child_objects) == 1 + assert len(main.slices) == 1 - # Check that both expected architectures are present - arch_names = {child.arch.name for child in main.child_objects} - assert "AMD64" in arch_names - assert "AARCH64" in arch_names + # The child should be a MachO object parented to the Universal2 + child = main.child_objects[0] + assert isinstance(child, MachO) + assert child.parent_object is main def test_universal2_load_single_arch(): @@ -101,9 +96,8 @@ def test_universal2_available_arches(): def test_universal2_child_names(): """Test that child objects have descriptive names including architecture.""" - ld = cle.Loader(FATBIN, auto_load_libs=False) + ld = cle.Loader(FATBIN, auto_load_libs=False, main_opts={"arch": archinfo.ArchAMD64()}) main = ld.main_object names = {child.binary_basename for child in main.child_objects} assert any("[x64]" in n for n in names) - assert any("[aarch64]" in n for n in names) From f49c805ecf34574f39ec3517ebb87b7d3c228282 Mon Sep 17 00:00:00 2001 From: Adam Doupe Date: Sun, 12 Apr 2026 10:00:19 -0700 Subject: [PATCH 4/5] Universal2: pick main bin's arch when loaded as a dependency Per @rhelmot's review feedback: when a fat binary is loaded as a dependency rather than as the main object, select the slice that matches the main binary's arch instead of the first slice. This keeps dependency loading consistent with the main binary's arch. Also extracts the slice filter into a static helper and adds a unit test for it. Co-Authored-By: Claude Opus 4.6 (1M context) --- cle/backends/universal2.py | 50 ++++++++++++++++++++++++-------------- tests/test_universal2.py | 19 +++++++++++++++ 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/cle/backends/universal2.py b/cle/backends/universal2.py index e5c93f31e..8042b8934 100644 --- a/cle/backends/universal2.py +++ b/cle/backends/universal2.py @@ -117,30 +117,32 @@ def __init__(self, *args, arch=None, **kwargs): slices.append((cputype, cpusubtype, offset, size, align)) self._fat_arches = list(slices) - # Filter to requested architecture, or pick the first slice + # Pick which slice(s) to actually load. Loading every slice into memory at once produces + # conflicting placement requirements (multiple MH_EXECUTE slices all want 0x400000) and + # multiple is_main_bin objects, which break downstream consumers. The rules are: + # - If arch= was passed explicitly, honor it. + # - Otherwise, if we are the main binary, pick the first slice and warn. + # - Otherwise (loaded as a dependency), pick the slice matching the main binary's arch. if arch is not None: if not isinstance(arch, archinfo.Arch): raise TypeError(f"arch must be an archinfo.Arch instance, got {type(arch).__name__}") - filtered = [] - for cputype, cpusubtype, offset, size, align in slices: - slice_arch = _cputype_to_arch(cputype) - if slice_arch is not None and isinstance(arch, type(slice_arch)): - filtered.append((cputype, cpusubtype, offset, size, align)) - if not filtered: + slices = self._filter_slices_by_arch(slices, arch) + elif self._is_main_universal: + if len(slices) > 1: available = [CPU_TYPE_NAMES.get(s[0], f"unknown(0x{s[0]:X})") for s in slices] - raise KeyError( - f"Architecture {arch!r} not found in universal binary. Available architectures: {available}" + log.warning( + "Universal binary contains multiple architectures %s; " + "loading only the first (%s). Pass arch= to select a specific slice.", + available, + available[0], ) - slices = filtered - elif len(slices) > 1: - available = [CPU_TYPE_NAMES.get(s[0], f"unknown(0x{s[0]:X})") for s in slices] - log.warning( - "Universal binary contains multiple architectures %s; " - "loading only the first (%s). Pass arch= to select a specific slice.", - available, - available[0], - ) slices = slices[:1] + else: + main_arch = self.loader._main_object.arch if self.loader._main_object is not None else None + if main_arch is None: + slices = slices[:1] + else: + slices = self._filter_slices_by_arch(slices, main_arch) # Load each slice using _load_object_isolated. # Unlike StaticArchive (where children are .o files), universal binary slices @@ -181,6 +183,18 @@ def __init__(self, *args, arch=None, **kwargs): if self.loader._main_object is self: self.loader._main_object = None + @staticmethod + def _filter_slices_by_arch(slices, arch): + filtered = [] + for entry in slices: + slice_arch = _cputype_to_arch(entry[0]) + if slice_arch is not None and isinstance(arch, type(slice_arch)): + filtered.append(entry) + if not filtered: + available = [CPU_TYPE_NAMES.get(s[0], f"unknown(0x{s[0]:X})") for s in slices] + raise KeyError(f"Architecture {arch!r} not found in universal binary. Available architectures: {available}") + return filtered + @property def available_arches(self): """Return the list of architecture names present in the universal binary's fat header.""" diff --git a/tests/test_universal2.py b/tests/test_universal2.py index 33610be99..583a281d9 100644 --- a/tests/test_universal2.py +++ b/tests/test_universal2.py @@ -101,3 +101,22 @@ def test_universal2_child_names(): names = {child.binary_basename for child in main.child_objects} assert any("[x64]" in n for n in names) + + +def test_universal2_filter_slices_by_arch(): + """The slice-filter helper used by the dependency-loading path picks the matching arch.""" + # (cputype, cpusubtype, offset, size, align) tuples — only the cputype field matters here. + slices = [ + (0x1000007, 0, 0, 0, 0), # x86_64 + (0x100000C, 0, 0, 0, 0), # aarch64 + ] + aarch64 = Universal2._filter_slices_by_arch(slices, archinfo.ArchAArch64()) + assert len(aarch64) == 1 + assert aarch64[0][0] == 0x100000C + + amd64 = Universal2._filter_slices_by_arch(slices, archinfo.ArchAMD64()) + assert len(amd64) == 1 + assert amd64[0][0] == 0x1000007 + + with pytest.raises(KeyError, match="not found in universal binary"): + Universal2._filter_slices_by_arch(slices, archinfo.ArchMIPS32()) From 173267629c962b2aeec3ffce49adadc328116f28 Mon Sep 17 00:00:00 2001 From: Adam Doupe Date: Sun, 12 Apr 2026 11:34:13 -0700 Subject: [PATCH 5/5] Fix CI: lint, typecheck, and binaries branch matching - macho.py: drop duplicate DyldChainedPtrFormats import alias and use the full enum name in _CHAIN_STRIDE; add docstring to _ChainStride to fix pylint missing-class-docstring. - test_macho_kext.py: assert isinstance(MachO)/isinstance(MachOSegment) so the type checker can resolve segname/sections attributes. - macos.yml + windows.yml: check out a same-named branch from angr/binaries when one exists, falling back to master. Lets cross-repo PRs picked up on macOS and Windows runners (linux CI already does this via PR-body references). Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/macos.yml | 11 +++++++++++ .github/workflows/windows.yml | 11 +++++++++++ cle/backends/macho/macho.py | 20 +++++++++++--------- tests/test_macho_kext.py | 4 ++++ 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index bb798eb93..1f83d5c42 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -12,10 +12,21 @@ jobs: - uses: actions/checkout@v3 with: path: cle + - name: Resolve binaries branch + id: binaries-ref + shell: bash + run: | + BRANCH="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" + if [ -n "$BRANCH" ] && git ls-remote --exit-code --heads https://github.com/angr/binaries.git "$BRANCH" >/dev/null 2>&1; then + echo "ref=$BRANCH" >> "$GITHUB_OUTPUT" + else + echo "ref=master" >> "$GITHUB_OUTPUT" + fi - uses: actions/checkout@v3 with: repository: angr/binaries path: binaries + ref: ${{ steps.binaries-ref.outputs.ref }} - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index f31803a06..d4448eda6 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -12,10 +12,21 @@ jobs: - uses: actions/checkout@v3 with: path: cle + - name: Resolve binaries branch + id: binaries-ref + shell: bash + run: | + BRANCH="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" + if [ -n "$BRANCH" ] && git ls-remote --exit-code --heads https://github.com/angr/binaries.git "$BRANCH" >/dev/null 2>&1; then + echo "ref=$BRANCH" >> "$GITHUB_OUTPUT" + else + echo "ref=master" >> "$GITHUB_OUTPUT" + fi - uses: actions/checkout@v3 with: repository: angr/binaries path: binaries + ref: ${{ steps.binaries-ref.outputs.ref }} - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/cle/backends/macho/macho.py b/cle/backends/macho/macho.py index 6a268c334..de3920537 100644 --- a/cle/backends/macho/macho.py +++ b/cle/backends/macho/macho.py @@ -35,7 +35,6 @@ dyld_chained_fixups_header, dyld_chained_starts_in_segment, ) -from .structs import DyldChainedPtrFormats as _DCPF from .symbol import AbstractMachOSymbol, DyldBoundSymbol, SymbolTableSymbol log = logging.getLogger(name=__name__) @@ -44,6 +43,9 @@ class _ChainStride(typing.NamedTuple): + """Stride layout for a dyld chained-pointer format: the byte multiplier for the `next` + field and whether to read the packed pointer through the Arm64e or Generic64 view.""" + bytes: int use_arm64e: bool @@ -51,14 +53,14 @@ class _ChainStride(typing.NamedTuple): # Per dyld's fixup-chains.h: each pointer format defines a stride (the byte multiplier for the # `next` field) and which packed-pointer layout to read (Arm64e vs Generic64). The two layouts put # `next` at different bit positions, so picking the wrong one yields garbage walks. -_CHAIN_STRIDE: dict[_DCPF, _ChainStride] = { - _DCPF.DYLD_CHAINED_PTR_ARM64E: _ChainStride(bytes=8, use_arm64e=True), - _DCPF.DYLD_CHAINED_PTR_64: _ChainStride(bytes=4, use_arm64e=False), - _DCPF.DYLD_CHAINED_PTR_64_OFFSET: _ChainStride(bytes=4, use_arm64e=False), - _DCPF.DYLD_CHAINED_PTR_ARM64E_KERNEL: _ChainStride(bytes=4, use_arm64e=True), - _DCPF.DYLD_CHAINED_PTR_ARM64E_USERLAND: _ChainStride(bytes=8, use_arm64e=True), - _DCPF.DYLD_CHAINED_PTR_ARM64E_FIRMWARE: _ChainStride(bytes=4, use_arm64e=True), - _DCPF.DYLD_CHAINED_PTR_ARM64E_USERLAND24: _ChainStride(bytes=8, use_arm64e=True), +_CHAIN_STRIDE: dict[DyldChainedPtrFormats, _ChainStride] = { + DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E: _ChainStride(bytes=8, use_arm64e=True), + DyldChainedPtrFormats.DYLD_CHAINED_PTR_64: _ChainStride(bytes=4, use_arm64e=False), + DyldChainedPtrFormats.DYLD_CHAINED_PTR_64_OFFSET: _ChainStride(bytes=4, use_arm64e=False), + DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E_KERNEL: _ChainStride(bytes=4, use_arm64e=True), + DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E_USERLAND: _ChainStride(bytes=8, use_arm64e=True), + DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E_FIRMWARE: _ChainStride(bytes=4, use_arm64e=True), + DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E_USERLAND24: _ChainStride(bytes=8, use_arm64e=True), } diff --git a/tests/test_macho_kext.py b/tests/test_macho_kext.py index 7278ba35a..d8238679e 100644 --- a/tests/test_macho_kext.py +++ b/tests/test_macho_kext.py @@ -5,6 +5,7 @@ import cle from cle import MachO from cle.backends.macho.macho_enums import MachoFiletype +from cle.backends.macho.segment import MachOSegment TEST_BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.path.join("..", "..", "binaries")) KEXT = os.path.join(TEST_BASE, "tests", "aarch64", "IPwnKit.macho.kext") @@ -34,6 +35,7 @@ def test_kext_base_addr(): def test_kext_segments(): ld = cle.Loader(KEXT, auto_load_libs=False) mo = ld.main_object + assert isinstance(mo, MachO) segnames = [s.segname for s in mo.segments] assert "__TEXT" in segnames assert "__TEXT_EXEC" in segnames @@ -45,8 +47,10 @@ def test_kext_segments(): def test_kext_sections(): ld = cle.Loader(KEXT, auto_load_libs=False) mo = ld.main_object + assert isinstance(mo, MachO) section_names = set() for seg in mo.segments: + assert isinstance(seg, MachOSegment) for sec in seg.sections: section_names.add((seg.segname, sec.sectname)) assert ("__TEXT_EXEC", "__text") in section_names