Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,21 @@ jobs:
- uses: actions/checkout@v3
with:
path: cle
- name: Resolve binaries branch
id: binaries-ref
shell: bash
run: |
BRANCH="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
if [ -n "$BRANCH" ] && git ls-remote --exit-code --heads https://github.com/angr/binaries.git "$BRANCH" >/dev/null 2>&1; then
echo "ref=$BRANCH" >> "$GITHUB_OUTPUT"
else
echo "ref=master" >> "$GITHUB_OUTPUT"
fi
- uses: actions/checkout@v3
with:
repository: angr/binaries
path: binaries
ref: ${{ steps.binaries-ref.outputs.ref }}
- uses: actions/setup-python@v4
with:
python-version: "3.10"
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,21 @@ jobs:
- uses: actions/checkout@v3
with:
path: cle
- name: Resolve binaries branch
id: binaries-ref
shell: bash
run: |
BRANCH="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
if [ -n "$BRANCH" ] && git ls-remote --exit-code --heads https://github.com/angr/binaries.git "$BRANCH" >/dev/null 2>&1; then
echo "ref=$BRANCH" >> "$GITHUB_OUTPUT"
else
echo "ref=master" >> "$GITHUB_OUTPUT"
fi
- uses: actions/checkout@v3
with:
repository: angr/binaries
path: binaries
ref: ${{ steps.binaries-ref.outputs.ref }}
- uses: actions/setup-python@v4
with:
python-version: "3.10"
Expand Down
111 changes: 84 additions & 27 deletions cle/backends/macho/macho.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,28 @@
__all__ = ("MachO", "MachOSection", "MachOSegment", "SymbolList")


class _ChainStride(typing.NamedTuple):
"""Stride layout for a dyld chained-pointer format: the byte multiplier for the `next`
field and whether to read the packed pointer through the Arm64e or Generic64 view."""

bytes: int
use_arm64e: bool


# Per dyld's fixup-chains.h: each pointer format defines a stride (the byte multiplier for the
# `next` field) and which packed-pointer layout to read (Arm64e vs Generic64). The two layouts put
# `next` at different bit positions, so picking the wrong one yields garbage walks.
_CHAIN_STRIDE: dict[DyldChainedPtrFormats, _ChainStride] = {
DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E: _ChainStride(bytes=8, use_arm64e=True),
DyldChainedPtrFormats.DYLD_CHAINED_PTR_64: _ChainStride(bytes=4, use_arm64e=False),
DyldChainedPtrFormats.DYLD_CHAINED_PTR_64_OFFSET: _ChainStride(bytes=4, use_arm64e=False),
DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E_KERNEL: _ChainStride(bytes=4, use_arm64e=True),
DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E_USERLAND: _ChainStride(bytes=8, use_arm64e=True),
DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E_FIRMWARE: _ChainStride(bytes=4, use_arm64e=True),
DyldChainedPtrFormats.DYLD_CHAINED_PTR_ARM64E_USERLAND24: _ChainStride(bytes=8, use_arm64e=True),
}


# pylint: disable=abstract-method
class SymbolList(SortedKeyList):
"""
Expand Down Expand Up @@ -157,8 +179,12 @@ def __init__(self, *args, **kwargs):
"7I", binary_file, 0, 28
)

# Libraries are always implicitly PIC
self.pic = bool(self.flags & MH_flags.MH_PIE) or bool(self.filetype & MachoFiletype.MH_DYLIB)
# Libraries, bundles, and kexts are always implicitly PIC
self.pic = bool(self.flags & MH_flags.MH_PIE) or self.filetype in (
MachoFiletype.MH_DYLIB,
MachoFiletype.MH_BUNDLE,
MachoFiletype.MH_KEXT_BUNDLE,
)

if not bool(self.flags & MH_flags.MH_TWOLEVEL): # ensure MH_TWOLEVEL
log.error(
Expand Down Expand Up @@ -207,6 +233,9 @@ def __init__(self, *args, **kwargs):
# A Library is loaded as a dependency, this is fine, the loader will map it to somewhere above the main
# binary, so we don't need to do anything
pass
elif self.filetype in (MachoFiletype.MH_BUNDLE, MachoFiletype.MH_KEXT_BUNDLE):
if self.is_main_bin:
self._custom_base_addr = 0
else:
# This case is not explicitly supported yet.
# There are various other MachoFiletypes, which might have different quirks in their loading
Expand Down Expand Up @@ -1154,60 +1183,88 @@ def _parse_dyld_chained_fixups(self):
starts = self._get_struct(dyld_chained_starts_in_segment, starts_addr)

seg = self.find_segment_containing(starts.segment_offset)
# There are weird binaries where the offsets inside the file
# and inside the virtual addr space don't match anymore.
# This isn't properly supported yet, and the only known case is the __PII section inside the __ETC segment
# of rare binaries, which isn't that important for most purposes
shift = seg.vaddr - (seg.offset)
if shift != 0:
assert isinstance(seg, MachOSegment)
assert seg.segname == "__ETC", (
"Only __ETC segments are known to have this shift, please open an"
" issue for this binary so it can be investigated"
)
log.error("Segment shift detected in, not handling fixups here for now")
continue
# In some binaries (kexts, __ETC segments) the segment's file offset and virtual
# address differ. Chain entries are read at *file* offsets but relocation addresses
# must be virtual (relative to the linked base). Compute the delta once here and
# add it when creating relocations below.
file_to_vaddr_shift = seg.vaddr - seg.offset if seg is not None else 0

page_starts_data = self._read(self._binary_stream, starts_addr + 22, starts.page_count * 2)
page_starts = struct.unpack("<" + ("H" * starts.page_count), page_starts_data)

pointer_format: DyldChainedPtrFormats = starts.pointer_format
log.info("Page has pointer_format: %s", pointer_format)
# Each pointer format has its own (next, stride) layout. Generic64 packs `next` as a
# 12-bit field at bit 52; Arm64e packs it as 11 bits at bit 51. Mixing them up reads
# garbage out of the chain header — see the kext path with DYLD_CHAINED_PTR_ARM64E_KERNEL.
stride = _CHAIN_STRIDE.get(pointer_format)
if stride is None:
raise NotImplementedError(f"Chain stride for pointer format {pointer_format} not known")
is_arm64e = stride.use_arm64e
for j, start in enumerate(page_starts):
if start == DYLD_CHAINED_PTR_START_NONE:
continue
chain_entry_addr = starts.segment_offset + (j * starts.page_size) + start
current_chain_addr = chain_entry_addr
page_base = starts.segment_offset + (j * starts.page_size)
page_end = page_base + starts.page_size
current_chain_addr = page_base + start
log.info("Reading chain at %x", current_chain_addr)

while True:
chained_rebase_ptr: ChainedFixupPointerOnDisk = self._get_struct(
ChainedFixupPointerOnDisk, current_chain_addr
)
try:
chained_rebase_ptr: ChainedFixupPointerOnDisk = self._get_struct(
ChainedFixupPointerOnDisk, current_chain_addr
)
except ValueError:
log.warning("Chain entry at %#x extends past end of file; stopping", current_chain_addr)
break
bind = chained_rebase_ptr.isBind(pointer_format)
rebase = chained_rebase_ptr.isRebase(pointer_format, self.mapped_base)
if bind is not None:
libOrdinal, _addend = bind
if libOrdinal >= len(self._dyld_imports):
log.error(
"Chained fixup bind ordinal %d out of range (have %d imports) at %#x; "
"stopping chain walk",
libOrdinal,
len(self._dyld_imports),
current_chain_addr,
)
break
import_symbol = self._dyld_imports[libOrdinal]
reloc = MachOSymbolRelocation(self, import_symbol, current_chain_addr, None)
reloc_addr = current_chain_addr + file_to_vaddr_shift
reloc = MachOSymbolRelocation(self, import_symbol, reloc_addr, None)
self.relocs.append(reloc)
# Legacy Code uses bind_xrefs, explicitly add this to make this compatible for now
import_symbol.bind_xrefs.append(reloc.dest_addr + self.linked_base)
log.debug("Binding for %s found at %x", import_symbol, current_chain_addr)
log.debug("Binding for %s found at %x", import_symbol, reloc_addr)
elif rebase is not None:
reloc_addr = current_chain_addr + file_to_vaddr_shift
target = self.linked_base + rebase
location: MemoryPointer = self.linked_base + current_chain_addr
anon_reloc = MachOPointerRelocation(owner=self, relative_addr=current_chain_addr, data=rebase)
location: MemoryPointer = self.linked_base + reloc_addr
anon_reloc = MachOPointerRelocation(owner=self, relative_addr=reloc_addr, data=rebase)
self.relocs.append(anon_reloc)
log.debug("Rebase to %x found at %x", target, location)

else:
raise CLEInvalidBinaryError("FixupPointer was neither bind nor rebase, that shouldn't happen")

skip = chained_rebase_ptr.generic64.rebase.next * 4
current_chain_addr += skip
if is_arm64e:
next_count = chained_rebase_ptr.arm64e.rebase.next
else:
next_count = chained_rebase_ptr.generic64.rebase.next
skip = next_count * stride.bytes
if skip == 0:
break
current_chain_addr += skip
if current_chain_addr >= page_end:
# Chains are per-page; if a malformed chain would walk into the next page,
# stop rather than reinterpreting unrelated data as fixup entries.
log.warning(
"Chain walked past page end at %#x (page %#x..%#x); stopping",
current_chain_addr,
page_base,
page_end,
)
break

def get_symbol_by_address_fuzzy(self, address):
"""
Expand Down
9 changes: 9 additions & 0 deletions cle/backends/macho/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ class dyld_chained_ptr_arm64e_bind24(HelperStruct):
https://github.com/apple-opensource/dyld/blob/852.2/include/mach-o/fixup-chains.h#L164-L173
"""

_fields_ = [
("ordinal", c_uint64, 24),
("zero", c_uint64, 8),
("addend", c_uint64, 19),
("next", c_uint64, 11),
("bind", c_uint64, 1),
("auth", c_uint64, 1),
]


# noinspection PyPep8Naming
class dyld_chained_ptr_arm64e_auth_bind24(HelperStruct):
Expand Down
43 changes: 33 additions & 10 deletions cle/backends/universal2.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,21 +117,32 @@ def __init__(self, *args, arch=None, **kwargs):
slices.append((cputype, cpusubtype, offset, size, align))
self._fat_arches = list(slices)

# Filter to requested architecture if specified
# Pick which slice(s) to actually load. Loading every slice into memory at once produces
# conflicting placement requirements (multiple MH_EXECUTE slices all want 0x400000) and
# multiple is_main_bin objects, which break downstream consumers. The rules are:
# - If arch= was passed explicitly, honor it.
# - Otherwise, if we are the main binary, pick the first slice and warn.
# - Otherwise (loaded as a dependency), pick the slice matching the main binary's arch.
if arch is not None:
if not isinstance(arch, archinfo.Arch):
raise TypeError(f"arch must be an archinfo.Arch instance, got {type(arch).__name__}")
filtered = []
for cputype, cpusubtype, offset, size, align in slices:
slice_arch = _cputype_to_arch(cputype)
if slice_arch is not None and isinstance(arch, type(slice_arch)):
filtered.append((cputype, cpusubtype, offset, size, align))
if not filtered:
slices = self._filter_slices_by_arch(slices, arch)
elif self._is_main_universal:
if len(slices) > 1:
available = [CPU_TYPE_NAMES.get(s[0], f"unknown(0x{s[0]:X})") for s in slices]
raise KeyError(
f"Architecture {arch!r} not found in universal binary. Available architectures: {available}"
log.warning(
"Universal binary contains multiple architectures %s; "
"loading only the first (%s). Pass arch= to select a specific slice.",
available,
available[0],
)
slices = filtered
slices = slices[:1]
else:
main_arch = self.loader._main_object.arch if self.loader._main_object is not None else None
if main_arch is None:
slices = slices[:1]
else:
slices = self._filter_slices_by_arch(slices, main_arch)

# Load each slice using _load_object_isolated.
# Unlike StaticArchive (where children are .o files), universal binary slices
Expand Down Expand Up @@ -172,6 +183,18 @@ def __init__(self, *args, arch=None, **kwargs):
if self.loader._main_object is self:
self.loader._main_object = None

@staticmethod
def _filter_slices_by_arch(slices, arch):
filtered = []
for entry in slices:
slice_arch = _cputype_to_arch(entry[0])
if slice_arch is not None and isinstance(arch, type(slice_arch)):
filtered.append(entry)
if not filtered:
available = [CPU_TYPE_NAMES.get(s[0], f"unknown(0x{s[0]:X})") for s in slices]
raise KeyError(f"Architecture {arch!r} not found in universal binary. Available architectures: {available}")
return filtered

@property
def available_arches(self):
"""Return the list of architecture names present in the universal binary's fat header."""
Expand Down
93 changes: 93 additions & 0 deletions tests/test_macho_kext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

import os

import cle
from cle import MachO
from cle.backends.macho.macho_enums import MachoFiletype
from cle.backends.macho.segment import MachOSegment

TEST_BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.path.join("..", "..", "binaries"))
KEXT = os.path.join(TEST_BASE, "tests", "aarch64", "IPwnKit.macho.kext")


def test_kext_loads():
ld = cle.Loader(KEXT, auto_load_libs=False)
assert isinstance(ld.main_object, MachO)
assert ld.main_object.filetype == MachoFiletype.MH_KEXT_BUNDLE


def test_kext_arch():
ld = cle.Loader(KEXT, auto_load_libs=False)
assert ld.main_object.arch.name == "AARCH64"


def test_kext_pic():
ld = cle.Loader(KEXT, auto_load_libs=False)
assert ld.main_object.pic is True


def test_kext_base_addr():
ld = cle.Loader(KEXT, auto_load_libs=False)
assert ld.main_object.mapped_base == 0


def test_kext_segments():
ld = cle.Loader(KEXT, auto_load_libs=False)
mo = ld.main_object
assert isinstance(mo, MachO)
segnames = [s.segname for s in mo.segments]
assert "__TEXT" in segnames
assert "__TEXT_EXEC" in segnames
assert "__DATA" in segnames
assert "__DATA_CONST" in segnames
assert "__LINKEDIT" in segnames


def test_kext_sections():
ld = cle.Loader(KEXT, auto_load_libs=False)
mo = ld.main_object
assert isinstance(mo, MachO)
section_names = set()
for seg in mo.segments:
assert isinstance(seg, MachOSegment)
for sec in seg.sections:
section_names.add((seg.segname, sec.sectname))
assert ("__TEXT_EXEC", "__text") in section_names
assert ("__TEXT_EXEC", "__auth_stubs") in section_names
assert ("__DATA_CONST", "__auth_got") in section_names
assert ("__DATA_CONST", "__got") in section_names


def test_kext_symbols():
ld = cle.Loader(KEXT, auto_load_libs=False)
mo = ld.main_object
assert len(mo.symbols) > 100
sym_names = {s.name for s in mo.symbols}
assert "_kmod_info" in sym_names
assert "__realmain" in sym_names
assert "_IPwnKit_start" in sym_names
assert "_IPwnKit_stop" in sym_names


def test_kext_iokit_class_symbols():
ld = cle.Loader(KEXT, auto_load_libs=False)
sym_names = {s.name for s in ld.main_object.symbols}
assert "__ZN21io_oooverflow_IPwnKit5startEP9IOService" in sym_names
assert "__ZN31io_oooverflow_IPwnKitUserClient10gMetaClassE" in sym_names


def test_kext_relocations():
ld = cle.Loader(KEXT, auto_load_libs=False)
assert len(ld.main_object.relocs) > 0


def test_kext_code_readable():
ld = cle.Loader(KEXT, auto_load_libs=False)
mo = ld.main_object
start_sym = [s for s in mo.symbols if s.name == "_IPwnKit_start" and s.relative_addr != 0]
assert len(start_sym) > 0
addr = start_sym[0].relative_addr
data = mo.memory.load(addr, 4)
assert len(data) == 4
assert data != b"\x00\x00\x00\x00"
Loading
Loading