From d831f867cc385c215a297ef90ad017b2d2ff6573 Mon Sep 17 00:00:00 2001 From: Kevin Phoenix Date: Tue, 24 Mar 2026 16:44:44 -0700 Subject: [PATCH 1/5] Refactor SootManager class into a single lift function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SootManager class held no meaningful state between its methods — they were always called sequentially. Replace it with a plain `lift()` function that returns `(classes, hierarchy)` directly. Co-Authored-By: Claude Opus 4.6 (1M context) --- pysoot/lifter.py | 8 +- pysoot/soot_manager.py | 165 ++++++++++++++++++++--------------------- 2 files changed, 82 insertions(+), 91 deletions(-) diff --git a/pysoot/lifter.py b/pysoot/lifter.py index e9de99a..c995474 100644 --- a/pysoot/lifter.py +++ b/pysoot/lifter.py @@ -86,14 +86,10 @@ def _get_ir(self): config[s] = str(getattr(self, s, None)) # delayed import - from .soot_manager import SootManager - - soot_wrapper = SootManager() + from .soot_manager import lift log.info("Running Soot with the following config: " + repr(config)) - soot_wrapper.init(**config) - self.classes = soot_wrapper.get_classes() - self._hierarchy = soot_wrapper.compute_hierarchy() + self.classes, self._hierarchy = lift(**config) def getSubclassesOf(self, class_name: str) -> list[str]: """Return pre-computed subclasses of the given class name.""" diff --git a/pysoot/soot_manager.py b/pysoot/soot_manager.py index 6895502..8b232de 100644 --- a/pysoot/soot_manager.py +++ b/pysoot/soot_manager.py @@ -8,7 +8,7 @@ from pysoot.sootir.soot_class import SootClass -def startJVM(): +def _start_jvm(): if jpype.isJVMStarted(): return jpype.addClassPath(os.path.join(os.path.dirname(__file__), "soot-trunk.jar")) @@ -17,87 +17,82 @@ def startJVM(): os.register_at_fork(before=jpype.shutdownJVM) -class SootManager: - def __init__(self): - startJVM() - - def init( - self, - input_file, - input_format: str, - android_sdk: str | None, - soot_classpath: str | None, - ir_format: str, - ): - Collections = JClass("java.util.Collections") - - G = JClass("soot.G") - Hierarchy = JClass("soot.Hierarchy") - Options = JClass("soot.options.Options") - PackManager = JClass("soot.PackManager") - Scene = JClass("soot.Scene") - - G.reset() - - Options.v().set_process_dir(Collections.singletonList(input_file)) - - if input_format == "apk": - Options.v().set_android_jars(android_sdk) - Options.v().set_process_multiple_dex(True) - Options.v().set_src_prec(Options.src_prec_apk) - elif input_format == "jar": - Options.v().set_soot_classpath(soot_classpath) - else: - raise Exception("invalid input type") - - if ir_format == "jimple": - Options.v().set_output_format(Options.output_format_jimple) - elif ir_format == "shimple": - Options.v().set_output_format(Options.output_format_shimple) - else: - raise Exception("invalid ir format") - - Options.v().set_allow_phantom_refs(True) - - # this options may or may not work - Options.v().setPhaseOption("cg", "all-reachable:true") - Options.v().setPhaseOption("jb.dae", "enabled:false") - Options.v().setPhaseOption("jb.uce", "enabled:false") - Options.v().setPhaseOption("jj.dae", "enabled:false") - Options.v().setPhaseOption("jj.uce", "enabled:false") - - # this avoids an exception in some apks - Options.v().set_wrong_staticness(Options.wrong_staticness_ignore) - - Scene.v().loadNecessaryClasses() - PackManager.v().runPacks() - - self.scene = Scene.v() - self.raw_classes = self.scene.getClasses() - self.class_name_map = {c.getName(): c for c in self.raw_classes} - self.hierarchy = Hierarchy() - - def get_classes(self): - classes = {} - for raw_class in self.raw_classes: - # TODO with this we only get classes for which we have all the code - # soot also has classes with lower "resolving levels", but for those we may - # not have the method list or the code, if we want them we cannot fully - # translate them - if raw_class.isApplicationClass(): - soot_class = SootClass.from_ir(raw_class) - classes[soot_class.name] = soot_class - return classes - - def compute_hierarchy(self) -> dict[str, list[str]]: - """Pre-compute subclass relationships for all classes.""" - result = {} - for name, raw_class in self.class_name_map.items(): - try: - result[name] = [ - c.getName() for c in self.hierarchy.getSubclassesOf(raw_class) - ] - except Exception: - # Some classes (e.g. interfaces) may not support getSubclassesOf - pass - return result +def lift( + input_file: str, + input_format: str, + android_sdk: str | None, + soot_classpath: str | None, + ir_format: str, +) -> tuple[dict[str, SootClass], dict[str, list[str]]]: + """Run Soot on the given input and return (classes, hierarchy). + + classes: dict mapping class name to SootClass (application classes only) + hierarchy: dict mapping class name to list of subclass names + """ + _start_jvm() + + Collections = JClass("java.util.Collections") + G = JClass("soot.G") + Hierarchy = JClass("soot.Hierarchy") + Options = JClass("soot.options.Options") + PackManager = JClass("soot.PackManager") + Scene = JClass("soot.Scene") + + G.reset() + + Options.v().set_process_dir(Collections.singletonList(input_file)) + + if input_format == "apk": + Options.v().set_android_jars(android_sdk) + Options.v().set_process_multiple_dex(True) + Options.v().set_src_prec(Options.src_prec_apk) + elif input_format == "jar": + Options.v().set_soot_classpath(soot_classpath) + else: + raise Exception("invalid input type") + + if ir_format == "jimple": + Options.v().set_output_format(Options.output_format_jimple) + elif ir_format == "shimple": + Options.v().set_output_format(Options.output_format_shimple) + else: + raise Exception("invalid ir format") + + Options.v().set_allow_phantom_refs(True) + + # this options may or may not work + Options.v().setPhaseOption("cg", "all-reachable:true") + Options.v().setPhaseOption("jb.dae", "enabled:false") + Options.v().setPhaseOption("jb.uce", "enabled:false") + Options.v().setPhaseOption("jj.dae", "enabled:false") + Options.v().setPhaseOption("jj.uce", "enabled:false") + + # this avoids an exception in some apks + Options.v().set_wrong_staticness(Options.wrong_staticness_ignore) + + Scene.v().loadNecessaryClasses() + PackManager.v().runPacks() + + raw_classes = Scene.v().getClasses() + class_name_map = {c.getName(): c for c in raw_classes} + + # Convert application classes to Python IR + classes = {} + for raw_class in raw_classes: + if raw_class.isApplicationClass(): + soot_class = SootClass.from_ir(raw_class) + classes[soot_class.name] = soot_class + + # Pre-compute subclass relationships + hierarchy_obj = Hierarchy() + hierarchy = {} + for name, raw_class in class_name_map.items(): + try: + hierarchy[name] = [ + c.getName() for c in hierarchy_obj.getSubclassesOf(raw_class) + ] + except Exception: + # Some classes (e.g. interfaces) may not support getSubclassesOf + pass + + return classes, hierarchy From c816443709ae14fdb3c00067733eb6b2e162060b Mon Sep 17 00:00:00 2001 From: Kevin Phoenix Date: Tue, 24 Mar 2026 17:20:49 -0700 Subject: [PATCH 2/5] Rename lift to run_soot Co-Authored-By: Claude Opus 4.6 (1M context) --- pysoot/lifter.py | 4 ++-- pysoot/soot_manager.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pysoot/lifter.py b/pysoot/lifter.py index c995474..8d91a16 100644 --- a/pysoot/lifter.py +++ b/pysoot/lifter.py @@ -86,10 +86,10 @@ def _get_ir(self): config[s] = str(getattr(self, s, None)) # delayed import - from .soot_manager import lift + from .soot_manager import run_soot log.info("Running Soot with the following config: " + repr(config)) - self.classes, self._hierarchy = lift(**config) + self.classes, self._hierarchy = run_soot(**config) def getSubclassesOf(self, class_name: str) -> list[str]: """Return pre-computed subclasses of the given class name.""" diff --git a/pysoot/soot_manager.py b/pysoot/soot_manager.py index 8b232de..c18d794 100644 --- a/pysoot/soot_manager.py +++ b/pysoot/soot_manager.py @@ -17,7 +17,7 @@ def _start_jvm(): os.register_at_fork(before=jpype.shutdownJVM) -def lift( +def run_soot( input_file: str, input_format: str, android_sdk: str | None, From 8d9af36c1cfbddd48537a607c35116d346e084ee Mon Sep 17 00:00:00 2001 From: Kevin Phoenix Date: Tue, 24 Mar 2026 17:50:16 -0700 Subject: [PATCH 3/5] Make input_file a required str parameter Fixes pyright error: input_file defaulting to None is invalid since it's immediately passed to os.path.realpath which requires a str. Co-Authored-By: Claude Opus 4.6 (1M context) --- pysoot/lifter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pysoot/lifter.py b/pysoot/lifter.py index 8d91a16..61fdd16 100644 --- a/pysoot/lifter.py +++ b/pysoot/lifter.py @@ -13,7 +13,7 @@ class Lifter: def __init__( self, - input_file=None, + input_file: str, input_format="jar", ir_format="shimple", additional_jars=None, From 3f934b753b660395713ebfa3f6ee5626e0f6c442 Mon Sep 17 00:00:00 2001 From: Kevin Phoenix Date: Tue, 24 Mar 2026 17:50:37 -0700 Subject: [PATCH 4/5] Move run_soot import to top level No longer needs to be a delayed import since importing soot_manager no longer triggers JVM startup. Co-Authored-By: Claude Opus 4.6 (1M context) --- pysoot/lifter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pysoot/lifter.py b/pysoot/lifter.py index 61fdd16..e4d3644 100644 --- a/pysoot/lifter.py +++ b/pysoot/lifter.py @@ -5,6 +5,7 @@ import subprocess from .errors import JavaNotFoundError, MissingJavaRuntimeJarsError, ParameterError +from .soot_manager import run_soot log = logging.getLogger("pysoot.lifter") @@ -85,9 +86,6 @@ def _get_ir(self): for s in settings: config[s] = str(getattr(self, s, None)) - # delayed import - from .soot_manager import run_soot - log.info("Running Soot with the following config: " + repr(config)) self.classes, self._hierarchy = run_soot(**config) From 8acb44729f61ddc268812a0c6bffc927836eb302 Mon Sep 17 00:00:00 2001 From: Kevin Phoenix Date: Wed, 25 Mar 2026 06:45:17 -0700 Subject: [PATCH 5/5] Keep soot_manager import delayed to avoid importing jpype at top level Importing soot_manager at the top level would pull in jpype on `import pysoot`, which is undesirable. Move it back to a delayed import with a pylint disable comment. Co-Authored-By: Claude Opus 4.6 (1M context) --- pysoot/lifter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pysoot/lifter.py b/pysoot/lifter.py index e4d3644..43a7dc5 100644 --- a/pysoot/lifter.py +++ b/pysoot/lifter.py @@ -5,7 +5,6 @@ import subprocess from .errors import JavaNotFoundError, MissingJavaRuntimeJarsError, ParameterError -from .soot_manager import run_soot log = logging.getLogger("pysoot.lifter") @@ -86,6 +85,8 @@ def _get_ir(self): for s in settings: config[s] = str(getattr(self, s, None)) + from .soot_manager import run_soot # pylint: disable=import-outside-toplevel + log.info("Running Soot with the following config: " + repr(config)) self.classes, self._hierarchy = run_soot(**config)