diff --git a/pysoot/lifter.py b/pysoot/lifter.py index e9de99a..43a7dc5 100644 --- a/pysoot/lifter.py +++ b/pysoot/lifter.py @@ -13,7 +13,7 @@ class Lifter: def __init__( self, - input_file=None, + input_file: str, input_format="jar", ir_format="shimple", additional_jars=None, @@ -85,15 +85,10 @@ def _get_ir(self): for s in settings: config[s] = str(getattr(self, s, None)) - # delayed import - from .soot_manager import SootManager - - soot_wrapper = SootManager() + from .soot_manager import run_soot # pylint: disable=import-outside-toplevel log.info("Running Soot with the following config: " + repr(config)) - soot_wrapper.init(**config) - self.classes = soot_wrapper.get_classes() - self._hierarchy = soot_wrapper.compute_hierarchy() + self.classes, self._hierarchy = run_soot(**config) def getSubclassesOf(self, class_name: str) -> list[str]: """Return pre-computed subclasses of the given class name.""" diff --git a/pysoot/soot_manager.py b/pysoot/soot_manager.py index 6895502..c18d794 100644 --- a/pysoot/soot_manager.py +++ b/pysoot/soot_manager.py @@ -8,7 +8,7 @@ from pysoot.sootir.soot_class import SootClass -def startJVM(): +def _start_jvm(): if jpype.isJVMStarted(): return jpype.addClassPath(os.path.join(os.path.dirname(__file__), "soot-trunk.jar")) @@ -17,87 +17,82 @@ def startJVM(): os.register_at_fork(before=jpype.shutdownJVM) -class SootManager: - def __init__(self): - startJVM() - - def init( - self, - input_file, - input_format: str, - android_sdk: str | None, - soot_classpath: str | None, - ir_format: str, - ): - Collections = JClass("java.util.Collections") - - G = JClass("soot.G") - Hierarchy = JClass("soot.Hierarchy") - Options = JClass("soot.options.Options") - PackManager = JClass("soot.PackManager") - Scene = JClass("soot.Scene") - - G.reset() - - Options.v().set_process_dir(Collections.singletonList(input_file)) - - if input_format == "apk": - Options.v().set_android_jars(android_sdk) - Options.v().set_process_multiple_dex(True) - Options.v().set_src_prec(Options.src_prec_apk) - elif input_format == "jar": - Options.v().set_soot_classpath(soot_classpath) - else: - raise Exception("invalid input type") - - if ir_format == "jimple": - Options.v().set_output_format(Options.output_format_jimple) - elif ir_format == "shimple": - Options.v().set_output_format(Options.output_format_shimple) - else: - raise Exception("invalid ir format") - - Options.v().set_allow_phantom_refs(True) - - # this options may or may not work - Options.v().setPhaseOption("cg", "all-reachable:true") - Options.v().setPhaseOption("jb.dae", "enabled:false") - Options.v().setPhaseOption("jb.uce", "enabled:false") - Options.v().setPhaseOption("jj.dae", "enabled:false") - Options.v().setPhaseOption("jj.uce", "enabled:false") - - # this avoids an exception in some apks - Options.v().set_wrong_staticness(Options.wrong_staticness_ignore) - - Scene.v().loadNecessaryClasses() - PackManager.v().runPacks() - - self.scene = Scene.v() - self.raw_classes = self.scene.getClasses() - self.class_name_map = {c.getName(): c for c in self.raw_classes} - self.hierarchy = Hierarchy() - - def get_classes(self): - classes = {} - for raw_class in self.raw_classes: - # TODO with this we only get classes for which we have all the code - # soot also has classes with lower "resolving levels", but for those we may - # not have the method list or the code, if we want them we cannot fully - # translate them - if raw_class.isApplicationClass(): - soot_class = SootClass.from_ir(raw_class) - classes[soot_class.name] = soot_class - return classes - - def compute_hierarchy(self) -> dict[str, list[str]]: - """Pre-compute subclass relationships for all classes.""" - result = {} - for name, raw_class in self.class_name_map.items(): - try: - result[name] = [ - c.getName() for c in self.hierarchy.getSubclassesOf(raw_class) - ] - except Exception: - # Some classes (e.g. interfaces) may not support getSubclassesOf - pass - return result +def run_soot( + input_file: str, + input_format: str, + android_sdk: str | None, + soot_classpath: str | None, + ir_format: str, +) -> tuple[dict[str, SootClass], dict[str, list[str]]]: + """Run Soot on the given input and return (classes, hierarchy). + + classes: dict mapping class name to SootClass (application classes only) + hierarchy: dict mapping class name to list of subclass names + """ + _start_jvm() + + Collections = JClass("java.util.Collections") + G = JClass("soot.G") + Hierarchy = JClass("soot.Hierarchy") + Options = JClass("soot.options.Options") + PackManager = JClass("soot.PackManager") + Scene = JClass("soot.Scene") + + G.reset() + + Options.v().set_process_dir(Collections.singletonList(input_file)) + + if input_format == "apk": + Options.v().set_android_jars(android_sdk) + Options.v().set_process_multiple_dex(True) + Options.v().set_src_prec(Options.src_prec_apk) + elif input_format == "jar": + Options.v().set_soot_classpath(soot_classpath) + else: + raise Exception("invalid input type") + + if ir_format == "jimple": + Options.v().set_output_format(Options.output_format_jimple) + elif ir_format == "shimple": + Options.v().set_output_format(Options.output_format_shimple) + else: + raise Exception("invalid ir format") + + Options.v().set_allow_phantom_refs(True) + + # this options may or may not work + Options.v().setPhaseOption("cg", "all-reachable:true") + Options.v().setPhaseOption("jb.dae", "enabled:false") + Options.v().setPhaseOption("jb.uce", "enabled:false") + Options.v().setPhaseOption("jj.dae", "enabled:false") + Options.v().setPhaseOption("jj.uce", "enabled:false") + + # this avoids an exception in some apks + Options.v().set_wrong_staticness(Options.wrong_staticness_ignore) + + Scene.v().loadNecessaryClasses() + PackManager.v().runPacks() + + raw_classes = Scene.v().getClasses() + class_name_map = {c.getName(): c for c in raw_classes} + + # Convert application classes to Python IR + classes = {} + for raw_class in raw_classes: + if raw_class.isApplicationClass(): + soot_class = SootClass.from_ir(raw_class) + classes[soot_class.name] = soot_class + + # Pre-compute subclass relationships + hierarchy_obj = Hierarchy() + hierarchy = {} + for name, raw_class in class_name_map.items(): + try: + hierarchy[name] = [ + c.getName() for c in hierarchy_obj.getSubclassesOf(raw_class) + ] + except Exception: + # Some classes (e.g. interfaces) may not support getSubclassesOf + pass + + return classes, hierarchy