PolicyEngine · juaristi22 · Feb 22, 2026 · Feb 8, 2026 · Feb 21, 2026 · Feb 21, 2026
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,5 @@
+- bump: patch
+  changes:
+    changed:
+    - imputing-from-scf-to-cps.ipynb to test SSI policy reforms with different wealth imputations.
+    - updated paper/ and main.pdf to capture the new results and discussion from the notebook as well as general improvements in content and presentation.
diff --git a/microimpute/models/matching.py b/microimpute/models/matching.py
@@ -511,7 +511,11 @@ def _fit(
                         imputed_variables=imputed_variables,
                         imputed_vars_dummy_info=self.imputed_vars_dummy_info,
                         original_predictors=self.original_predictors,
+                        categorical_targets=categorical_targets,
+                        boolean_targets=boolean_targets,
+                        dummy_processor=getattr(self, "dummy_processor", None),
                         seed=self.seed,
+                        log_level=self.log_level,
                         hyperparameters=best_params,
                     ),
                     best_params,
@@ -596,12 +600,6 @@ def objective(trial: optuna.Trial) -> float:
                         "minimax",
                     ],
                 ),
-                "constrained": trial.suggest_categorical(
-                    "constrained", [False, True]
-                ),
-                "constr_alg": trial.suggest_categorical(
-                    "constr_alg", ["hungarian", "lpSolve"]
-                ),
                 "k": trial.suggest_int("k", 1, 10),
             }
 

diff --git a/microimpute/models/mdn.py b/microimpute/models/mdn.py
@@ -59,6 +59,11 @@
         message=".*training batches.*smaller than the logging interval.*",
         module="pytorch_lightning.loops.fit_loop",
     )
+    warnings.filterwarnings(
+        "ignore",
+        message=".*have no logger configured.*",
+        module="pytorch_lightning.core.module",
+    )
 
     # After import, also update the rank_zero_module logger
     from lightning_fabric.utilities.rank_zero import rank_zero_module
@@ -182,7 +187,7 @@ def __init__(
         n_samples: int = 100,
         learning_rate: float = 1e-3,
         max_epochs: int = 100,
-        early_stopping_patience: int = 10,
+        early_stopping_patience: int = 50,
         batch_size: int = 256,
     ):
         self.seed = seed
@@ -360,7 +365,7 @@ def __init__(
         use_batch_norm: bool = False,
         learning_rate: float = 1e-3,
         max_epochs: int = 100,
-        early_stopping_patience: int = 10,
+        early_stopping_patience: int = 50,
         batch_size: int = 256,
     ):
         self.seed = seed
@@ -777,7 +782,7 @@ def __init__(
         # Training config
         learning_rate: float = 1e-3,
         max_epochs: int = 100,
-        early_stopping_patience: int = 10,
+        early_stopping_patience: int = 100,
         batch_size: int = 256,
         # Caching config
         model_dir: str = "./microimpute_models",
@@ -1191,6 +1196,7 @@ def objective(trial: optuna.Trial) -> float:
                         n_samples=self.n_samples,
                         learning_rate=learning_rate,
                         max_epochs=40,  # Reduced for tuning
+                        early_stopping_patience=(self.early_stopping_patience),
                         batch_size=self.batch_size,
                     )
                     model.fit(X_train_fold[predictors], y_train)
@@ -1327,6 +1333,7 @@ def objective(trial: optuna.Trial) -> float:
                         use_batch_norm=self.use_batch_norm,
                         learning_rate=learning_rate,
                         max_epochs=40,  # Reduced for tuning
+                        early_stopping_patience=(self.early_stopping_patience),
                         batch_size=self.batch_size,
                     )
                     model.fit(

diff --git a/microimpute/utils/statmatch_hotdeck.py b/microimpute/utils/statmatch_hotdeck.py
@@ -36,6 +36,17 @@
 from rpy2.robjects.conversion import localconverter
 from rpy2.robjects.packages import importr
 
+# Cache R package imports so they only happen once, avoiding repeated
+# network calls to download CRAN mirror lists during hyperparameter tuning.
+_statmatch_cache = {}
+
+
+def _get_statmatch():
+    """Return the cached StatMatch R package, importing it only once."""
+    if "StatMatch" not in _statmatch_cache:
+        _statmatch_cache["StatMatch"] = importr("StatMatch")
+    return _statmatch_cache["StatMatch"]
+
 
 @validate_call(config=VALIDATE_CONFIG)
 def nnd_hotdeck_using_rpy2(
@@ -66,9 +77,7 @@ def nnd_hotdeck_using_rpy2(
         RuntimeError: If there is an unexpected error during the statistical matching process.
     """
 
-    utils = importr("utils")
-    utils.chooseCRANmirror(ind=1)
-    StatMatch = importr("StatMatch")
+    StatMatch = _get_statmatch()
 
     try:
         missing_in_receiver = [

diff --git a/paper/bibliography/references.bib b/paper/bibliography/references.bib
@@ -354,7 +354,7 @@ @inproceedings{lun2019multiple
 
 @article{meinshausen2006quantile,
   title     = {Quantile regression forests},
-  author    = {Meinshausen, Nicolai and Ridgeway, Greg},
+  author    = {Meinshausen, Nicolai},
   journal   = {Journal of Machine Learning Research},
   volume    = {7},
   number    = {Jun},
@@ -432,7 +432,6 @@ @misc{policyengine2025microimpute
   title  = {Microimpute documentation},
   author = {{PolicyEngine}},
   year   = {2025},
-  note   = {Retrieved from},
   url    = {https://policyengine.github.io/microimpute/}
 }
 
@@ -558,3 +557,63 @@ @misc{zillow2024quantile
   year   = {2024},
   url    = {https://zillow.github.io/quantile-forest/}
 }
+
+@techreport{ssa2023ssi,
+  title       = {{SSI} Annual Statistical Report, 2023},
+  author      = {{Social Security Administration}},
+  institution = {Social Security Administration, Office of Retirement and Disability Policy},
+  year        = {2023},
+  url         = {https://www.ssa.gov/policy/docs/statcomps/ssi_asr/2023/}
+}
+
+@techreport{ssa2024ssi,
+  title       = {{SSI} Annual Statistical Report, 2024},
+  author      = {{Social Security Administration}},
+  institution = {Social Security Administration, Office of Retirement and Disability Policy},
+  year        = {2024},
+  url         = {https://www.ssa.gov/policy/docs/statcomps/ssi_asr/}
+}
+
+@misc{ssi_spea_2025,
+  title  = {{SSI} Savings Penalty Elimination Act},
+  author = {{119th United States Congress}},
+  year   = {2025},
+  note   = {S. 1234 / H.R. 2540, introduced April 1, 2025},
+  url    = {https://www.congress.gov/bill/119th-congress/senate-bill/1234}
+}
+
+@misc{policyengine_us,
+  title  = {PolicyEngine {US}},
+  author = {{PolicyEngine}},
+  year   = {2024},
+  url    = {https://github.com/PolicyEngine/policyengine-us},
+  note   = {Open-source tax-benefit microsimulation model for the United States}
+}
+
+@article{vanschoren2014openml,
+  title     = {{OpenML}: Networked science in machine learning},
+  author    = {Vanschoren, Joaquin and van Rijn, Jan N. and Bischl, Bernd and Torgo, Lu{\'\i}s},
+  journal   = {ACM SIGKDD Explorations Newsletter},
+  volume    = {15},
+  number    = {2},
+  pages     = {49--60},
+  year      = {2014}
+}
+
+@inproceedings{akiba2019optuna,
+  title     = {Optuna: A Next-generation Hyperparameter Optimization Framework},
+  author    = {Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
+  booktitle = {Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining},
+  pages     = {2623--2631},
+  year      = {2019}
+}
+
+@techreport{fed2023scf,
+  title       = {Changes in {U.S.} Family Finances from 2019 to 2022: Evidence from the {Survey of Consumer Finances}},
+  author      = {{Board of Governors of the Federal Reserve System}},
+  year        = {2023},
+  institution = {Federal Reserve Bulletin},
+  month       = {October},
+  volume      = {109},
+  number      = {4}
+}
diff --git a/paper/figures/models_dist_comparison.png b/paper/figures/models_dist_comparison.png
diff --git a/paper/figures/models_loss_benchmark.png b/paper/figures/models_loss_benchmark.png
diff --git a/paper/figures/models_median_decile_comparison.png b/paper/figures/models_median_decile_comparison.png
diff --git a/paper/figures/models_ssi_comparison.png b/paper/figures/models_ssi_comparison.png
diff --git a/paper/figures/models_ssi_reform_comparison.png b/paper/figures/models_ssi_reform_comparison.png