PolicyEngine · MaxGhenis · Apr 19, 2026 · Apr 19, 2026
diff --git a/changelog.d/tighten-test-batches.fixed.md b/changelog.d/tighten-test-batches.fixed.md
@@ -0,0 +1 @@
+Split the Full Suite YAML batch grouping in `tests/test_batched.py` so each heavy folder runs in its own isolated subprocess. Previous grouping (3 folders per contrib batch, paired gov/ folders in the baseline-other job) pushed peak memory to ~8-9 GB per subprocess, which OOMed the 16 GB ubuntu-latest runner once the policyengine-core 3.24+ overhead landed and surfaced as `The runner has received a shutdown signal` mid-batch. Each batch now targets ≤5 GB peak, eliminating the intermittent Full Suite failures without changing test coverage.
diff --git a/policyengine_us/tests/test_batched.py b/policyengine_us/tests/test_batched.py
@@ -38,29 +38,46 @@ def split_into_batches(
     if exclude is None:
         exclude = []
 
-    # Special handling for contrib tests - split into 7 batches by memory usage
-    # Only apply to policy/contrib (structural tests), not baseline/contrib
+    # Special handling for contrib tests - one batch per heavy folder.
+    # Only apply to policy/contrib (structural tests), not baseline/contrib.
+    #
+    # Prior grouping (3 folders per batch) pushed peak memory to ~8-9 GB
+    # per subprocess, which OOMs the 16 GB ubuntu-latest runner once
+    # policyengine-core 3.24+ overhead is added, surfacing as
+    # "The runner has received a shutdown signal" mid-batch. Giving each
+    # heavy folder its own batch keeps peaks under ~5 GB and eliminates
+    # the intermittent failures without reducing total coverage.
     if str(base_path).endswith("policy/contrib"):
-        # Define batches by memory usage (measured empirically)
-        BATCH_1 = ["federal", "harris", "treasury"]  # ~9.0 GB
-        BATCH_2 = ["ctc", "snap_ea", "ubi_center"]  # ~8.6 GB
-        BATCH_3 = ["deductions", "aca", "snap"]  # ~8.1 GB
-        BATCH_4 = [
-            "tax_exempt",
-            "eitc",
+        # Heavy folders each get their own batch (~3-5 GB peak each).
+        HEAVY_BATCHES = [
+            ["federal"],
+            ["harris"],
+            ["treasury"],
+            ["ctc"],
+            ["snap_ea"],
+            ["ubi_center"],
+            ["deductions"],
+            ["aca"],
+            ["snap"],
+            ["tax_exempt"],
+            ["eitc"],
+            ["crfb"],
+            ["congress"],
+        ]
+        # Small folders/root YAML files pair into two catch-all batches
+        # rather than one so new unknown folders have somewhere safe to
+        # land without pushing either group past ~5 GB.
+        LIGHT_A = [
             "state_dependent_exemptions",
             "additional_tax_bracket",
-        ]  # ~8.0 GB
-        # Batch 5 is the catch-all for unknown/new folders (~7.8 GB + headroom)
-        BATCH_5_DEFINED = [
             "local",
             "dc_single_joint_threshold_ratio.yaml",
+        ]
+        LIGHT_B = [
             "reconciliation",
             "dc_kccatc.yaml",
             "reported_state_income_tax.yaml",
         ]
-        BATCH_6 = ["crfb"]  # ~8.9 GB, always alone
-        BATCH_7 = ["congress"]  # ~6.3 GB
 
         # Get all subdirectories (excluding states which is in Heavy job)
         subdirs = sorted(
@@ -74,28 +91,28 @@ def split_into_batches(
         # Get root level YAML files
         root_files = sorted(list(base_path.glob("*.yaml")))
 
-        # Build batches
         def get_batch_paths(batch_names, subdirs, root_files):
             paths = []
             for name in batch_names:
-                # Check if it's a directory
                 for subdir in subdirs:
                     if subdir.name == name:
                         paths.append(str(subdir))
                         break
-                # Check if it's a root file
                 for f in root_files:
                     if f.name == name:
                         paths.append(str(f))
                         break
             return paths
 
         # Collect known folders/files
-        all_known = set(
-            BATCH_1 + BATCH_2 + BATCH_3 + BATCH_4 + BATCH_5_DEFINED + BATCH_6 + BATCH_7
-        )
-
-        # Find unknown folders/files (new additions go to Batch 5)
+        all_known = set()
+        for batch in HEAVY_BATCHES:
+            all_known.update(batch)
+        all_known.update(LIGHT_A)
+        all_known.update(LIGHT_B)
+
+        # Find unknown folders/files (new additions land in LIGHT_B —
+        # keeps LIGHT_A's deterministic grouping stable as the repo grows)
         unknown = []
         for subdir in subdirs:
             if subdir.name not in all_known:
@@ -104,20 +121,20 @@ def get_batch_paths(batch_names, subdirs, root_files):
             if f.name not in all_known:
                 unknown.append(str(f))
 
-        # Build all batches
-        batch1 = get_batch_paths(BATCH_1, subdirs, root_files)
-        batch2 = get_batch_paths(BATCH_2, subdirs, root_files)
-        batch3 = get_batch_paths(BATCH_3, subdirs, root_files)
-        batch4 = get_batch_paths(BATCH_4, subdirs, root_files)
-        batch5 = get_batch_paths(BATCH_5_DEFINED, subdirs, root_files) + unknown
-        batch6 = get_batch_paths(BATCH_6, subdirs, root_files)
-        batch7 = get_batch_paths(BATCH_7, subdirs, root_files)
-
-        # Return non-empty batches in order
+        # Build heavy batches (one per folder)
         batches = []
-        for batch in [batch1, batch2, batch3, batch4, batch5, batch6, batch7]:
-            if batch:
-                batches.append(batch)
+        for batch_names in HEAVY_BATCHES:
+            paths = get_batch_paths(batch_names, subdirs, root_files)
+            if paths:
+                batches.append(paths)
+
+        # Add the two light catch-all batches
+        light_a = get_batch_paths(LIGHT_A, subdirs, root_files)
+        if light_a:
+            batches.append(light_a)
+        light_b = get_batch_paths(LIGHT_B, subdirs, root_files) + unknown
+        if light_b:
+            batches.append(light_b)
 
         return batches
 
@@ -181,13 +198,11 @@ def get_batch_paths(batch_names, subdirs, root_files):
         if "states" in exclude:
             gov_path = base_path / "gov"
 
-            # Heavy folders get their own batch
-            BATCH_1 = ["irs"]  # ~4.4 GB
-            BATCH_2 = ["ssa"]  # ~4.0 GB
-            BATCH_3 = ["simulation"]  # ~4.0 GB
-            BATCH_4 = ["usda", "hhs"]  # ~3-4 GB combined
-            # Everything else groups together (~3-4 GB combined)
-            HEAVY = set(BATCH_1 + BATCH_2 + BATCH_3 + BATCH_4)
+            # One batch per heavy gov/ folder (~3-4 GB peak each) so no
+            # subprocess exceeds ~5 GB. Grouping previously-paired folders
+            # (usda+hhs) pushed peak memory past the 16 GB runner cap once
+            # policyengine-core 3.24+ overhead landed.
+            HEAVY = ["irs", "ssa", "simulation", "usda", "hhs"]
 
             def collect_gov_paths(folder_names):
                 """Collect paths for specific gov/ subfolders."""
@@ -200,9 +215,10 @@ def collect_gov_paths(folder_names):
 
             # Collect remaining gov/ subfolders and files
             remaining = []
+            heavy_set = set(HEAVY)
             for gov_item in gov_path.iterdir():
                 if gov_item.is_dir():
-                    if gov_item.name not in HEAVY and gov_item.name != "states":
+                    if gov_item.name not in heavy_set and gov_item.name != "states":
                         remaining.append(str(gov_item))
                 elif gov_item.suffix == ".yaml":
                     remaining.append(str(gov_item))
@@ -218,8 +234,8 @@ def collect_gov_paths(folder_names):
 
             # Build batches (only include non-empty ones)
             batches = []
-            for folder_names in [BATCH_1, BATCH_2, BATCH_3, BATCH_4]:
-                paths = collect_gov_paths(folder_names)
+            for folder_name in HEAVY:
+                paths = collect_gov_paths([folder_name])
                 if paths:
                     batches.append(paths)
             if remaining:

diff --git a/uv.lock b/uv.lock
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Split the Full Suite YAML batch grouping in `tests/test_batched.py` so each heavy folder runs in its own isolated subprocess. Previous grouping (3 folders per contrib batch, paired gov/ folders in the baseline-other job) pushed peak memory to ~8-9 GB per subprocess, which OOMed the 16 GB ubuntu-latest runner once the policyengine-core 3.24+ overhead landed and surfaced as `The runner has received a shutdown signal` mid-batch. Each batch now targets ≤5 GB peak, eliminating the intermittent Full Suite failures without changing test coverage.