super-optimizer: allow dumpting .smt2 files, tracking construction time vs. solver time

damageboy · damageboy · commit c062e91afc25 · 2026-02-07T16:42:44.000+01:00
diff --git a/vxsort/smallsort/codegen/pyproject.toml b/vxsort/smallsort/codegen/pyproject.toml
@@ -10,6 +10,7 @@ dependencies = [
     "pytest-cov>=7.0.0",
     "tabulate>=0.9.0",
     "rich>=14.3.1",
+    "zstandard>=0.25.0",
 ]
 
 [dependency-groups]
diff --git a/vxsort/smallsort/codegen/src/bitonic_compiler.py b/vxsort/smallsort/codegen/src/bitonic_compiler.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 from __future__ import annotations
 import argparse
+import tempfile
 
 # Handle both relative and absolute imports
 try:
@@ -105,6 +106,7 @@ def generate_bitonic_sorter(
     top_k: int | None = None,
     output_format: str = "json",
     gadget_depth: int = 3,
+    smt2_dump_dir: str | None = None,
 ):
     """
     Generate bitonic sorter with super-optimized permutation sequences.
@@ -117,6 +119,7 @@ def generate_bitonic_sorter(
         top_k: Number of best solutions to keep. If None, all solutions are kept.
         output_format: Output format ("json" or "asm")
         gadget_depth: Maximum instruction depth per gadget (1-3, default 3)
+        smt2_dump_dir: Directory to dump SMT2 files if requested
 
     Returns:
         List of SolutionNode trees representing different optimized solutions
@@ -128,7 +131,7 @@ def generate_bitonic_sorter(
     )
 
     # Create super-vectorizer
-    super_opt = BitonicSuperVectorizer(num_vecs, type, vm)
+    super_opt = BitonicSuperVectorizer(num_vecs, type, vm, smt2_dump_dir=smt2_dump_dir)
 
     # Synthesize all stages to build solution tree
     print("Synthesizing permutation gadgets...")
@@ -215,13 +218,23 @@ def generate_bitonic_sorter(
         choices=[1, 2, 3],
         help="Maximum instruction depth per gadget (1-3, default: 3)",
     )
+    parser.add_argument(
+        "--dump-smt2",
+        action="store_true",
+        help="Dump SMT2 files from Z3 into compressed tar files in /tmp",
+    )
 
     args = parser.parse_args()
 
     # Convert string arguments to Enum members
     vm = vector_machine[args.vector_machine]
     dtype = primitive_type[args.datatype]
 
+    smt2_dump_dir = None
+    if args.dump_smt2:
+        smt2_dump_dir = tempfile.mkdtemp(prefix="vxsort_smt2_", dir="/tmp")
+        print(f"SMT2 dump directory: {smt2_dump_dir}")
+
     generate_bitonic_sorter(
         args.num_vecs,
         dtype,
@@ -230,4 +243,5 @@ def generate_bitonic_sorter(
         top_k=args.top_k,
         output_format=args.output_format,
         gadget_depth=args.gadget_depth + 1,  # +1 because range is exclusive
+        smt2_dump_dir=smt2_dump_dir,
     )
diff --git a/vxsort/smallsort/codegen/src/bitonic_super_optimizer.py b/vxsort/smallsort/codegen/src/bitonic_super_optimizer.py
@@ -1,9 +1,23 @@
 from __future__ import annotations
+import time
 import copy
+import os
+import tarfile
+import io
+import zstandard as zstd
 from dataclasses import dataclass
 from tabulate import tabulate
 from multiprocessing import Pool
-from z3 import Solver, Context, main_ctx, Extract, BitVecVal, sat, BitVec, Distinct
+from z3 import (
+    Solver,
+    Context,
+    main_ctx,
+    Extract,
+    BitVecVal,
+    sat,
+    BitVec,
+    Distinct,
+)
 
 try:
     from .success_progress import SuccessProgress
@@ -299,7 +313,8 @@ def synthesize_gadget_with_symbolic(
         input_state: VectorState,
         target_pairs: list[tuple[int, int]],
         max_solutions: int | None = None,
-    ) -> list[tuple[PermutationGadget, VectorState]]:
+        solver_callback: callable | None = None,
+    ) -> tuple[list[tuple[PermutationGadget, VectorState]], float, float]:
         """
         Synthesize gadgets using symbolic immediates in Z3.
 
@@ -309,7 +324,8 @@ def synthesize_gadget_with_symbolic(
         The symbolic values are represented using SymbolicPlaceholder for pickling.
         The pickling is required for multiprocessing.
 
-        Returns list of (gadget, output_state) tuples. The output state is computed
+        Returns (results, construction_time, solver_time) where results is
+        list of (gadget, output_state) tuples. The output state is computed
         directly from the satisfying model, avoiding a redundant Z3 solve. The output
         state is in canonical form: for each pair, the lower element index goes to
         the top vector and the higher index goes to bottom, reflecting the
@@ -318,7 +334,9 @@ def synthesize_gadget_with_symbolic(
         Args:
             max_solutions: Optional cap on the number of solutions returned.
                 When ``None`` (the default) all solutions are enumerated.
+            solver_callback: Optional callback receiving the Solver instance.
         """
+        start_construction = time.perf_counter()
         ctx = main_ctx()
         solver = Solver(ctx=ctx)
 
@@ -400,14 +418,19 @@ def maybe_resolve_symbolic_vars(instructions: list[InstructionSpec]):
         # have the same pair_id, losing elements in the process.
         solver.add(Distinct(*output_lanes))
 
+        if solver_callback:
+            solver_callback(solver)
+
         # Collect symbolic variable terms for enumeration
         terms = list(symbolic_vars.values())
-
+        construction_time = time.perf_counter() - start_construction
+        solver_start = time.perf_counter()
         # If no symbolic variables, single check suffices
         if not terms:
             result = solver.check()
             if result != sat:
-                return []
+                solver_time = time.perf_counter() - solver_start
+                return [], construction_time, solver_time
             model = solver.model()
             gadget, output_state = self._extract_solution_from_model(
                 model,
@@ -418,7 +441,8 @@ def maybe_resolve_symbolic_vars(instructions: list[InstructionSpec]):
                 top_instructions_template,
                 bottom_instructions_template,
             )
-            return [(gadget, output_state)]
+            solver_time = time.perf_counter() - solver_start
+            return [(gadget, output_state)], construction_time, solver_time
 
         # Enumerate all solutions over symbolic variables
         results = []
@@ -433,7 +457,8 @@ def maybe_resolve_symbolic_vars(instructions: list[InstructionSpec]):
                 bottom_instructions_template,
             )
             results.append((gadget, output_state))
-        return results
+        solver_time = time.perf_counter() - solver_start
+        return results, construction_time, solver_time
 
     def _extract_solution_from_model(
         self,
@@ -827,22 +852,54 @@ def _validate_gadgets(
             )
 
         try:
-            with Pool() as pool:
-                # Use imap_unordered for streaming results and progress updates
-                for (
-                    gadget_results,
-                    job_input_state,
-                    job_metadata,
-                ) in pool.imap_unordered(_validate_gadget_worker, jobs):
-                    success_inc = 0
-                    for gadget, output_state in gadget_results:
-                        validated_gadgets.append(
-                            (gadget, job_input_state, output_state, job_metadata)
-                        )
-                        success_inc = 1
-
-                    if progress:
-                        progress.update(task_id, advance=1, success=success_inc)
+            pool = Pool()
+            total_construct_time = 0.0
+            total_solve_time = 0.0
+
+            # Use imap_unordered for streaming results and progress updates
+            for (
+                gadget_results,
+                job_input_state,
+                job_metadata,
+                construct_time,
+                solver_time,
+            ) in pool.imap_unordered(_validate_gadget_worker, jobs):
+                total_construct_time += construct_time
+                total_solve_time += solver_time
+
+                success_inc = 0
+                for gadget, output_state in gadget_results:
+                    validated_gadgets.append(
+                        (gadget, job_input_state, output_state, job_metadata)
+                    )
+                    success_inc = 1
+
+                if progress:
+                    progress.update(task_id, advance=1, success=success_inc)
+
+            pool.close()
+            pool.join()
+
+            if jobs:
+                print(
+                    f"TOTAL construction time: {total_construct_time:.2f}s, TOTAL solver time: {total_solve_time:.2f}s"
+                )
+
+            # Phase 2.5: Compress tar files if smt2_dump_dir is set
+            if jobs and "smt2_dump_dir" in jobs[0][6]:
+                smt2_dump_dir = jobs[0][6]["smt2_dump_dir"]
+                stage_idx = jobs[0][6]["stage_idx"]
+                for filename in os.listdir(smt2_dump_dir):
+                    if filename.startswith(f"stage{stage_idx}_") and filename.endswith(
+                        ".tar"
+                    ):
+                        tar_path = os.path.join(smt2_dump_dir, filename)
+                        zst_path = tar_path + ".zst"
+                        cctx = zstd.ZstdCompressor()
+                        with open(tar_path, "rb") as f_in:
+                            with open(zst_path, "wb") as f_out:
+                                cctx.copy_stream(f_in, f_out)
+                        os.remove(tar_path)
         finally:
             if progress:
                 progress.stop()
@@ -979,10 +1036,17 @@ def _enumerate_dual_input_instructions(
 class BitonicSuperVectorizer:
     """Super-optimizer for bitonic sorting networks using Z3-based gadget synthesis."""
 
-    def __init__(self, num_vecs: int, prim_type: primitive_type, vm: vector_machine):
+    def __init__(
+        self,
+        num_vecs: int,
+        prim_type: primitive_type,
+        vm: vector_machine,
+        smt2_dump_dir: str | None = None,
+    ):
         self.num_vecs = num_vecs
         self.prim_type = prim_type
         self.vm = vm
+        self.smt2_dump_dir = smt2_dump_dir
 
         # Calculate total elements and elements per vector
         self.elements_per_vector = width_dict[vm] // int(prim_type.value[0])
@@ -1104,7 +1168,11 @@ def _build_tree_recursive(
             metadata = {
                 "input_state": input_state,
                 "parent_path": parent_path,
+                "stage_idx": stage_idx,
             }
+            if self.smt2_dump_dir:
+                metadata["smt2_dump_dir"] = self.smt2_dump_dir
+
             if max_solutions_per_gadget is not None:
                 metadata["max_solutions"] = max_solutions_per_gadget
 
@@ -1296,13 +1364,17 @@ def node_to_dict(node: SolutionNode) -> dict:
         print(f"Exported {len(roots)} solution trees to {output_path}")
 
 
+_worker_tar = None
+_worker_job_count = 0
+
+
 def _validate_gadget_worker(job):
     """Worker function for parallel gadget validation.
 
-    Returns (gadget_results, input_state, metadata) where gadget_results is
-    a list of (gadget, output_state) tuples. The output_state is computed
-    directly during validation.
+    Returns (gadget_results, input_state, metadata, construction_time, solver_time)
+    where gadget_results is a list of (gadget, output_state) tuples.
     """
+    global _worker_tar, _worker_job_count
     top_seq, bottom_seq, input_state, target_pairs, vm, prim_type, metadata = job
 
     # Create clones of sequences to avoid modifying the ones in the main process
@@ -1314,12 +1386,42 @@ def _validate_gadget_worker(job):
     # Default to 1 for backward compatibility; callers opt in to more via
     # build_solution_tree(max_solutions_per_gadget=N).
     max_solutions = metadata.get("max_solutions", 1)
-    gadget_results = synthesizer.synthesize_gadget_with_symbolic(
-        top_seq_clone,
-        bottom_seq_clone,
-        input_state,
-        target_pairs,
-        max_solutions=max_solutions,
+
+    smt2_dump_dir = metadata.get("smt2_dump_dir")
+    stage_idx = metadata.get("stage_idx")
+
+    def dump_smt2_to_tar(solver):
+        global _worker_tar, _worker_job_count
+        if smt2_dump_dir is None:
+            return
+
+        if _worker_tar is None:
+            pid = os.getpid()
+            # Write to an uncompressed tar file first; we'll compress it in the main process
+            tar_filename = f"stage{stage_idx}_pid_{pid}.tar"
+            tar_path = os.path.join(smt2_dump_dir, tar_filename)
+            _worker_tar = tarfile.open(tar_path, mode="a")
+
+        _worker_job_count += 1
+        smt2_text = "(reset)\n" + solver.sexpr() + "\n(check-sat)\n"
+        smt2_bytes = smt2_text.encode("utf-8")
+
+        tar_info = tarfile.TarInfo(name=f"job_{_worker_job_count}.smt2")
+        tar_info.size = len(smt2_bytes)
+        _worker_tar.addfile(tar_info, io.BytesIO(smt2_bytes))
+        # Ensure it's written to disk
+        _worker_tar.fileobj.flush()
+
+    gadget_results, construction_time, solver_time = (
+        synthesizer.synthesize_gadget_with_symbolic(
+            top_seq_clone,
+            bottom_seq_clone,
+            input_state,
+            target_pairs,
+            max_solutions=max_solutions,
+            solver_callback=dump_smt2_to_tar,
+        )
     )
 
-    return gadget_results, input_state, metadata
+    metadata["worker_pid"] = os.getpid()
+    return gadget_results, input_state, metadata, construction_time, solver_time
diff --git a/vxsort/smallsort/codegen/tests/test_symbolic_synthesis.py b/vxsort/smallsort/codegen/tests/test_symbolic_synthesis.py
@@ -36,8 +36,9 @@ def test_symbolic_synthesis():
     print(f"Target pairs: {target_pairs}")
 
     # Try with no instructions (should succeed)
-    # Returns list of (gadget, output_state) tuples
-    results = synthesizer.synthesize_gadget_with_symbolic(
+    # Returns (results, construction_time, solver_time)
+    # where results is list of (gadget, output_state) tuples
+    results, _, _ = synthesizer.synthesize_gadget_with_symbolic(
         [], [], input_state, target_pairs
     )
 
@@ -82,8 +83,9 @@ def test_symbolic_synthesis():
     print(f"Target pairs: {target_pairs2}")
     print(f"Instruction template: {inst_template.intrinsic_name}")
 
-    # Returns list of (gadget, output_state) tuples
-    results2 = synthesizer.synthesize_gadget_with_symbolic(
+    # Returns (results, construction_time, solver_time)
+    # where results is list of (gadget, output_state) tuples
+    results2, _, _ = synthesizer.synthesize_gadget_with_symbolic(
         [inst_template], [], input_state2, target_pairs2
     )
 
@@ -200,7 +202,7 @@ def test_multi_solution_enumeration():
 
     # Top: apply blend(top, bottom, symbolic_imm8)
     # Bottom: identity (no instructions) — stays as bottom
-    results = synthesizer.synthesize_gadget_with_symbolic(
+    results, _, _ = synthesizer.synthesize_gadget_with_symbolic(
         [blend_template], [], input_state, target_pairs
     )
 
diff --git a/vxsort/smallsort/codegen/uv.lock b/vxsort/smallsort/codegen/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,7 @@ dependencies = [`
`10`	`10`	`"pytest-cov>=7.0.0",`
`11`	`11`	`"tabulate>=0.9.0",`
`12`	`12`	`"rich>=14.3.1",`
	`13`	`+ "zstandard>=0.25.0",`
`13`	`14`	`]`
`14`	`15`
`15`	`16`	`[dependency-groups]`