Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions pysoot/lifter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class Lifter:
def __init__(
self,
input_file=None,
input_file: str,
input_format="jar",
ir_format="shimple",
additional_jars=None,
Expand Down Expand Up @@ -85,15 +85,10 @@ def _get_ir(self):
for s in settings:
config[s] = str(getattr(self, s, None))

# delayed import
from .soot_manager import SootManager

soot_wrapper = SootManager()
from .soot_manager import run_soot # pylint: disable=import-outside-toplevel

log.info("Running Soot with the following config: " + repr(config))
soot_wrapper.init(**config)
self.classes = soot_wrapper.get_classes()
self._hierarchy = soot_wrapper.compute_hierarchy()
self.classes, self._hierarchy = run_soot(**config)

def getSubclassesOf(self, class_name: str) -> list[str]:
"""Return pre-computed subclasses of the given class name."""
Expand Down
165 changes: 80 additions & 85 deletions pysoot/soot_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pysoot.sootir.soot_class import SootClass


def startJVM():
def _start_jvm():
if jpype.isJVMStarted():
return
jpype.addClassPath(os.path.join(os.path.dirname(__file__), "soot-trunk.jar"))
Expand All @@ -17,87 +17,82 @@ def startJVM():
os.register_at_fork(before=jpype.shutdownJVM)


class SootManager:
def __init__(self):
startJVM()

def init(
self,
input_file,
input_format: str,
android_sdk: str | None,
soot_classpath: str | None,
ir_format: str,
):
Collections = JClass("java.util.Collections")

G = JClass("soot.G")
Hierarchy = JClass("soot.Hierarchy")
Options = JClass("soot.options.Options")
PackManager = JClass("soot.PackManager")
Scene = JClass("soot.Scene")

G.reset()

Options.v().set_process_dir(Collections.singletonList(input_file))

if input_format == "apk":
Options.v().set_android_jars(android_sdk)
Options.v().set_process_multiple_dex(True)
Options.v().set_src_prec(Options.src_prec_apk)
elif input_format == "jar":
Options.v().set_soot_classpath(soot_classpath)
else:
raise Exception("invalid input type")

if ir_format == "jimple":
Options.v().set_output_format(Options.output_format_jimple)
elif ir_format == "shimple":
Options.v().set_output_format(Options.output_format_shimple)
else:
raise Exception("invalid ir format")

Options.v().set_allow_phantom_refs(True)

# this options may or may not work
Options.v().setPhaseOption("cg", "all-reachable:true")
Options.v().setPhaseOption("jb.dae", "enabled:false")
Options.v().setPhaseOption("jb.uce", "enabled:false")
Options.v().setPhaseOption("jj.dae", "enabled:false")
Options.v().setPhaseOption("jj.uce", "enabled:false")

# this avoids an exception in some apks
Options.v().set_wrong_staticness(Options.wrong_staticness_ignore)

Scene.v().loadNecessaryClasses()
PackManager.v().runPacks()

self.scene = Scene.v()
self.raw_classes = self.scene.getClasses()
self.class_name_map = {c.getName(): c for c in self.raw_classes}
self.hierarchy = Hierarchy()

def get_classes(self):
classes = {}
for raw_class in self.raw_classes:
# TODO with this we only get classes for which we have all the code
# soot also has classes with lower "resolving levels", but for those we may
# not have the method list or the code, if we want them we cannot fully
# translate them
if raw_class.isApplicationClass():
soot_class = SootClass.from_ir(raw_class)
classes[soot_class.name] = soot_class
return classes

def compute_hierarchy(self) -> dict[str, list[str]]:
"""Pre-compute subclass relationships for all classes."""
result = {}
for name, raw_class in self.class_name_map.items():
try:
result[name] = [
c.getName() for c in self.hierarchy.getSubclassesOf(raw_class)
]
except Exception:
# Some classes (e.g. interfaces) may not support getSubclassesOf
pass
return result
def run_soot(
input_file: str,
input_format: str,
android_sdk: str | None,
soot_classpath: str | None,
ir_format: str,
) -> tuple[dict[str, SootClass], dict[str, list[str]]]:
"""Run Soot on the given input and return (classes, hierarchy).

classes: dict mapping class name to SootClass (application classes only)
hierarchy: dict mapping class name to list of subclass names
"""
_start_jvm()

Collections = JClass("java.util.Collections")
G = JClass("soot.G")
Hierarchy = JClass("soot.Hierarchy")
Options = JClass("soot.options.Options")
PackManager = JClass("soot.PackManager")
Scene = JClass("soot.Scene")

G.reset()

Options.v().set_process_dir(Collections.singletonList(input_file))

if input_format == "apk":
Options.v().set_android_jars(android_sdk)
Options.v().set_process_multiple_dex(True)
Options.v().set_src_prec(Options.src_prec_apk)
elif input_format == "jar":
Options.v().set_soot_classpath(soot_classpath)
else:
raise Exception("invalid input type")

if ir_format == "jimple":
Options.v().set_output_format(Options.output_format_jimple)
elif ir_format == "shimple":
Options.v().set_output_format(Options.output_format_shimple)
else:
raise Exception("invalid ir format")

Options.v().set_allow_phantom_refs(True)

# this options may or may not work
Options.v().setPhaseOption("cg", "all-reachable:true")
Options.v().setPhaseOption("jb.dae", "enabled:false")
Options.v().setPhaseOption("jb.uce", "enabled:false")
Options.v().setPhaseOption("jj.dae", "enabled:false")
Options.v().setPhaseOption("jj.uce", "enabled:false")

# this avoids an exception in some apks
Options.v().set_wrong_staticness(Options.wrong_staticness_ignore)

Scene.v().loadNecessaryClasses()
PackManager.v().runPacks()

raw_classes = Scene.v().getClasses()
class_name_map = {c.getName(): c for c in raw_classes}

# Convert application classes to Python IR
classes = {}
for raw_class in raw_classes:
if raw_class.isApplicationClass():
soot_class = SootClass.from_ir(raw_class)
classes[soot_class.name] = soot_class

# Pre-compute subclass relationships
hierarchy_obj = Hierarchy()
hierarchy = {}
for name, raw_class in class_name_map.items():
try:
hierarchy[name] = [
c.getName() for c in hierarchy_obj.getSubclassesOf(raw_class)
]
except Exception:
# Some classes (e.g. interfaces) may not support getSubclassesOf
pass

return classes, hierarchy
Loading