From e4ebcae4aafd4e7b767c74287745ec26810a102c Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Mon, 4 May 2026 13:19:48 -0700
Subject: [PATCH 01/17] Tight coupling for macrophysics so the acoustic solver
 sees these RHS terms.

---
 Source/DataStructs/ERF_DataStruct.H          |   4 +-
 Source/ERF.H                                 |  32 +++---
 Source/Microphysics/Kessler/ERF_Kessler.H    |   2 +-
 Source/Microphysics/Morrison/ERF_Morrison.H  |   2 +-
 Source/Microphysics/SAM/ERF_SAM.H            |   2 +-
 Source/Microphysics/SatAdj/ERF_SatAdj.H      |   2 +-
 Source/Microphysics/WSM6/ERF_WSM6.H          |   2 +-
 Source/SourceTerms/ERF_MakeSources.cpp       | 111 ++++++++++++++-----
 Source/TimeIntegration/ERF_TI_slow_rhs_pre.H |   2 +-
 Source/Utils/ERF_MacroPhysics.H              |  57 ++++++++++
 Source/Utils/Make.package                    |   1 +
 11 files changed, 165 insertions(+), 52 deletions(-)
 create mode 100644 Source/Utils/ERF_MacroPhysics.H

diff --git a/Source/DataStructs/ERF_DataStruct.H b/Source/DataStructs/ERF_DataStruct.H
index 2c6f817297..593d505544 100644
--- a/Source/DataStructs/ERF_DataStruct.H
+++ b/Source/DataStructs/ERF_DataStruct.H
@@ -222,6 +222,7 @@ struct SolverChoice {
             }
 
             pp.query("moisture_tight_coupling",moisture_tight_coupling);
+            pp.query("macrophysics_tight_coupling",macrophysics_tight_coupling);
         }
 
         // Which expression (1,2/3 or 4) to use for buoyancy
@@ -1266,7 +1267,8 @@ struct SolverChoice {
     // Microphysics params
     MoistureComponentIndices moisture_indices;
 
-    bool moisture_tight_coupling {false};
+    bool moisture_tight_coupling     {false};
+    bool macrophysics_tight_coupling {false};
 
     std::string windfarm_loc_table, windfarm_spec_table, windfarm_spec_table_extra;
     std::string windfarm_blade_table, windfarm_airfoil_tables;
diff --git a/Source/ERF.H b/Source/ERF.H
index 08bc5eb4fe..392428662e 100644
--- a/Source/ERF.H
+++ b/Source/ERF.H
@@ -24,7 +24,7 @@
 #include <AMReX_EBFabFactory.H>
 #include <AMReX_EBMultiFabUtil.H>
 
-#include <ERF_EB.H>
+#include "ERF_EB.H"
 
 #ifdef ERF_USE_FFT
 #include <AMReX_FFT_Poisson.H>
@@ -36,20 +36,20 @@
 
 #include "ERF_ProbCommon.H"
 
-#include <ERF_IndexDefines.H>
-#include <ERF_DataStruct.H>
-#include <ERF_TurbPertStruct.H>
-#include <ERF_InputSoundingData.H>
-#include <ERF_InputSpongeData.H>
-#include <ERF_SurfaceLayer.H>
-#include <ERF_Derive.H>
-#include <ERF_ReadBndryPlanes.H>
-#include <ERF_WriteBndryPlanes.H>
-#include <ERF_MRI.H>
-#include <ERF_PhysBCFunct.H>
-#include <ERF_FillPatcher.H>
-#include <ERF_SampleData.H>
-#include <ERF_ForestDrag.H>
+#include "ERF_IndexDefines.H"
+#include "ERF_DataStruct.H"
+#include "ERF_TurbPertStruct.H"
+#include "ERF_InputSoundingData.H"
+#include "ERF_InputSpongeData.H"
+#include "ERF_SurfaceLayer.H"
+#include "ERF_Derive.H"
+#include "ERF_ReadBndryPlanes.H"
+#include "ERF_WriteBndryPlanes.H"
+#include "ERF_MRI.H"
+#include "ERF_PhysBCFunct.H"
+#include "ERF_FillPatcher.H"
+#include "ERF_SampleData.H"
+#include "ERF_ForestDrag.H"
 
 #ifdef ERF_USE_PARTICLES
 #include "ERF_ParticleData.H"
@@ -63,7 +63,7 @@
 #include "ERF_WindFarm.H"
 #endif
 
-#include <ERF_RadiationModels.H>
+#include "ERF_RadiationModels.H"
 
 #ifdef ERF_USE_SHOC
 #include "ERF_ShocInterface.H"
diff --git a/Source/Microphysics/Kessler/ERF_Kessler.H b/Source/Microphysics/Kessler/ERF_Kessler.H
index 86703721f7..e004752d23 100644
--- a/Source/Microphysics/Kessler/ERF_Kessler.H
+++ b/Source/Microphysics/Kessler/ERF_Kessler.H
@@ -55,7 +55,7 @@ public:
     {
         m_fac_cond = lcond / sc.c_p;
         m_axis     = sc.ave_plane;
-        m_do_cond  = (!sc.use_shoc);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
     }
 
     // init
diff --git a/Source/Microphysics/Morrison/ERF_Morrison.H b/Source/Microphysics/Morrison/ERF_Morrison.H
index aacdf6b8a6..de4d8b17a0 100644
--- a/Source/Microphysics/Morrison/ERF_Morrison.H
+++ b/Source/Microphysics/Morrison/ERF_Morrison.H
@@ -72,7 +72,7 @@ public:
     {
         m_axis     = sc.ave_plane;
         m_rdOcp    = sc.rdOcp;
-        m_do_cond  = (!sc.use_shoc);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
     }
 
     // init
diff --git a/Source/Microphysics/SAM/ERF_SAM.H b/Source/Microphysics/SAM/ERF_SAM.H
index a2fdf314fb..02e34687dc 100644
--- a/Source/Microphysics/SAM/ERF_SAM.H
+++ b/Source/Microphysics/SAM/ERF_SAM.H
@@ -82,7 +82,7 @@ public:
         m_fac_sub  = lsub / sc.c_p;
         m_axis     = sc.ave_plane;
         m_rdOcp    = sc.rdOcp;
-        m_do_cond  = (!sc.use_shoc);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
     }
 
     // init
diff --git a/Source/Microphysics/SatAdj/ERF_SatAdj.H b/Source/Microphysics/SatAdj/ERF_SatAdj.H
index 17fb146914..0cc722f04c 100644
--- a/Source/Microphysics/SatAdj/ERF_SatAdj.H
+++ b/Source/Microphysics/SatAdj/ERF_SatAdj.H
@@ -58,7 +58,7 @@ public:
     {
         m_fac_cond = lcond / sc.c_p;
         m_rdOcp    = sc.rdOcp;
-        m_do_cond  = (!sc.use_shoc);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
     }
 
     // init
diff --git a/Source/Microphysics/WSM6/ERF_WSM6.H b/Source/Microphysics/WSM6/ERF_WSM6.H
index 26b0d9ce6d..806cdf1b5f 100644
--- a/Source/Microphysics/WSM6/ERF_WSM6.H
+++ b/Source/Microphysics/WSM6/ERF_WSM6.H
@@ -44,7 +44,7 @@ public:
     void Define(SolverChoice& sc) override
     {
         m_axis = sc.ave_plane;
-        m_do_cond = (!sc.use_shoc);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
     }
 
     void Init(const amrex::MultiFab& cons_in,
diff --git a/Source/SourceTerms/ERF_MakeSources.cpp b/Source/SourceTerms/ERF_MakeSources.cpp
index 3ae049868a..9520a159cf 100644
--- a/Source/SourceTerms/ERF_MakeSources.cpp
+++ b/Source/SourceTerms/ERF_MakeSources.cpp
@@ -4,10 +4,11 @@
 #include <AMReX_TableData.H>
 #include <AMReX_GpuContainers.H>
 
-#include <ERF_NumericalDiffusion.H>
-#include <ERF_SrcHeaders.H>
-#include <ERF_TI_slow_headers.H>
-#include <ERF_MOSTStress.H>
+#include "ERF_NumericalDiffusion.H"
+#include "ERF_SrcHeaders.H"
+#include "ERF_TI_slow_headers.H"
+#include "ERF_MOSTStress.H"
+#include "ERF_MacroPhysics.H"
 
 using namespace amrex;
 
@@ -67,7 +68,11 @@ void make_sources (int level,
         source.setVal(0.0,Rho_comp,2);
     }
 
-    const bool l_use_ndiff      = solverChoice.use_num_diff;
+    Real idt        = amrex::Real(1.0) / dt;
+    Real d_rdOcp    = R_d / Cp_d;
+    Real d_fac_cond = L_v / Cp_d;
+
+    const bool l_use_ndiff = solverChoice.use_num_diff;
 
     TurbChoice tc = solverChoice.turbChoice[level];
     const bool l_use_KE  = tc.use_tke;
@@ -92,6 +97,9 @@ void make_sources (int level,
     // flag for a moisture model
     bool has_moisture = (solverChoice.moisture_type != MoistureType::None);
 
+    // flag for tight macrophysics coupling
+    bool tight_macro = solverChoice.macrophysics_tight_coupling;
+
     // *****************************************************************************
     // Planar averages for subsidence terms
     // *****************************************************************************
@@ -194,18 +202,19 @@ void make_sources (int level,
 
     // *****************************************************************************
     // Define source term for cell-centered conserved variables, from
-    //    one user-defined source terms for (rho theta) and (rho q_t)
-    //    two radiation           for (rho theta)
-    //    three Rayleigh damping    for (rho theta)
-    //    Real(4.) custom forcing      for (rho theta) and (rho Q1)
-    //    Real(5.) custom subsidence   for (rho theta) and (rho Q1)
-    //    Real(6.) numerical diffusion for (rho theta)
-    //    Real(7.) sponging
-    //    Real(8.) turbulent perturbation
-    //    Real(9.) nudging towards input sounding values (only for theta)
+    //    1. user-defined source terms for (rho theta) and (rho q_t)
+    //    2. radiation           for (rho theta)
+    //    3. Rayleigh damping    for (rho theta)
+    //    4. custom forcing      for (rho theta) and (rho Q1)
+    //    5. custom subsidence   for (rho theta) and (rho Q1)
+    //    6. numerical diffusion for (rho theta)
+    //    7. sponging
+    //    8. turbulent perturbation
+    //    9. nudging towards input sounding values (only for theta)
     //   10a. Immersed forcing for terrain
     //   10b. Immersed forcing for buildings
-    //   Real(11.) Four stream radiation source for (rho theta)
+    //   11. Four stream radiation source for (rho theta)
+    //   12. Macrophysics source for (rho theta; rho Q1; rho Q2)
     // *****************************************************************************
 
     // ***********************************************************************************************
@@ -223,18 +232,17 @@ void make_sources (int level,
         const Array4<const Real>& cell_prim  = S_prim.array(mfi);
         const Array4<Real>      & cell_src   = source.array(mfi);
 
-        const Array4<const Real>& r0 = r_hse.const_array(mfi);
+        const Array4<const Real>& r0  = r_hse.const_array(mfi);
         const Array4<const Real>& th0 = th_hse.const_array(mfi);
         const Array4<const Real>& qv0 = qv_hse.const_array(mfi);
 
         const Array4<const Real>& z_cc_arr = z_phys_cc->const_array(mfi);
 
         const Array4<const Real>& t_blank_arr = (terrain_blank) ? terrain_blank->const_array(mfi) :
-                                                               Array4<const Real>{};
-
+                                                                  Array4<const Real>{};
 
         // *************************************************************************************
-        // two Add radiation source terms to (rho theta)
+        // 2. Add radiation source terms to (rho theta)
         // *************************************************************************************
         if (solverChoice.rad_type != RadiationType::None && is_slow_step) {
             auto const& qheating_arr = qheating_rates->const_array(mfi);
@@ -247,7 +255,7 @@ void make_sources (int level,
 
 
         // *************************************************************************************
-        // three Add Rayleigh damping for (rho theta)
+        // 3. Add Rayleigh damping for (rho theta)
         // *************************************************************************************
         Real dampcoef = solverChoice.dampingChoice.rayleigh_dampcoef;
 
@@ -267,7 +275,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // Real(4.) Add custom forcing for (rho theta)
+        // 4. Add custom forcing for (rho theta)
         // *************************************************************************************
         if (solverChoice.custom_rhotheta_forcing && is_slow_step) {
             const int n = RhoTheta_comp;
@@ -303,7 +311,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // Real(4.) Add custom forcing for RhoQ1
+        // 4. Add custom forcing for RhoQ1
         // *************************************************************************************
         if (solverChoice.custom_moisture_forcing && is_slow_step) {
             const int n = RhoQ1_comp;
@@ -339,7 +347,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // Real(5.) Add custom subsidence for (rho theta)
+        // 5. Add custom subsidence for (rho theta)
         // *************************************************************************************
         if (solverChoice.custom_w_subsidence && is_slow_step && solverChoice.do_theta_advection) {
             const int n = RhoTheta_comp;
@@ -366,7 +374,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // Real(5.) Add custom subsidence for RhoQ1 and RhoQ2
+        // 5. Add custom subsidence for RhoQ1 and RhoQ2
         // *************************************************************************************
         if (solverChoice.custom_w_subsidence && (solverChoice.moisture_type != MoistureType::None) && is_slow_step) {
             const int nv = RhoQ1_comp;
@@ -399,7 +407,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // Real(6.) Add numerical diffusion for rho and (rho theta)
+        // 6. Add numerical diffusion for rho and (rho theta)
         // *************************************************************************************
         if (l_use_ndiff && is_slow_step) {
             int sc;
@@ -427,7 +435,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // Real(7.) Add sponging
+        // 7. Add sponging
         // *************************************************************************************
         if(!(solverChoice.spongeChoice.sponge_type == "input_sponge") && is_slow_step){
             const int n_qstate = S_data[IntVars::cons].nComp() - (NDRY + NSCALARS);
@@ -441,7 +449,7 @@ void make_sources (int level,
 
 
         // *************************************************************************************
-        // Real(8.) Add perturbation
+        // 8. Add perturbation
         // *************************************************************************************
         if (solverChoice.use_source_perturbation(level) && is_slow_step) {
             auto m_ixtype = S_data[IntVars::cons].boxArray().ixType(); // Conserved term
@@ -450,7 +458,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // Real(9.) Add nudging towards value specified in input sounding
+        // 9. Add nudging towards value specified in input sounding
         // *************************************************************************************
         if (solverChoice.nudging_from_input_sounding && is_slow_step)
         {
@@ -679,7 +687,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // Real(11.) Add 4 stream radiation src to RhoTheta
+        // 11. Add 4 stream radiation src to RhoTheta
         // *************************************************************************************
         if (solverChoice.four_stream_radiation && has_moisture && is_slow_step)
         {
@@ -736,6 +744,51 @@ void make_sources (int level,
                 }
             });
         }
+
+        // *************************************************************************************
+        // 12. Add macrophysics source for RhoTheta, RhoQ1, and RhoQ2
+        // *************************************************************************************
+        if (is_slow_step && has_moisture && tight_macro) {
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+            {
+                // Conserved CC vars initially
+                Real rho      = cell_data(i,j,k,Rho_comp);
+                Real rt_old   = cell_data(i,j,k,RhoTheta_comp);
+                Real th_old   = rt_old/rho;
+                Real qv_old   = cell_data(i,j,k,RhoQ1_comp   )/rho;
+                Real qc_old   = cell_data(i,j,k,RhoQ2_comp   )/rho;
+                Real qt       = qv_old + qc_old;
+                Real tabs_old = getTgivenRandRTh(rho, rt_old, qv_old);
+
+                // To be updated in place
+                Real qv   = qv_old;
+                Real tabs = tabs_old;
+                Real pres = getPgivenRTh(rt_old, qv_old);
+
+                // Vapor frac at saturation
+                Real qs;
+
+                // Valid Newton iteration
+                erf_qsatw(tabs, pres*Real(0.01), qs);
+                if (qt > qs) {
+                    NewtonIterSat(d_fac_cond, tabs, pres*Real(0.01), qv);
+                }
+                // Put qc into qv and double check Newton iter criteria
+                else {
+                    qv   += qc_old;
+                    tabs -= d_fac_cond * qc_old;
+                    erf_qsatw(tabs, pres*Real(0.01), qs);
+                    if (qv > qs) {
+                        NewtonIterSat(d_fac_cond, tabs, pres*Real(0.01), qv);
+                    }
+                }
+
+                // Populate tendencies
+                cell_src(i,j,k,RhoTheta_comp) += rho * idt * ( getThgivenTandP(tabs, pres, d_rdOcp) - th_old );
+                cell_src(i,j,k,RhoQ1_comp   ) += rho * idt * ( qv - qv_old );
+                cell_src(i,j,k,RhoQ2_comp   ) += -cell_src(i,j,k,RhoQ1_comp);
+            });
+        }
     } // mfi
     } // OMP
 }
diff --git a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
index a90994c38f..dfd025ff8a 100644
--- a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
+++ b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
@@ -107,7 +107,7 @@
                      mapfac[level],
                      rhotheta_src[level].get(), rhoqt_src[level].get(),
                      dptr_wbar_sub, d_rayleigh_ptrs_at_lev, d_sinesq_at_lev,
-                      (solverChoice.hindcast_surface_bcs? &surface_state_interp[level] : nullptr),
+                     (solverChoice.hindcast_surface_bcs? &surface_state_interp[level] : nullptr),
                      input_sounding_data, turbPert, true);
 
         // *****************************************************************************
diff --git a/Source/Utils/ERF_MacroPhysics.H b/Source/Utils/ERF_MacroPhysics.H
new file mode 100644
index 0000000000..a99a763fb4
--- /dev/null
+++ b/Source/Utils/ERF_MacroPhysics.H
@@ -0,0 +1,57 @@
+#ifndef ERF_MACROPHYS_H
+#define ERF_MACROPHYS_H
+
+#include "ERF_MicrophysicsUtils.H"
+
+AMREX_GPU_HOST_DEVICE
+AMREX_FORCE_INLINE
+static void
+NewtonIterSat (const amrex::Real& fac_cond,
+               amrex::Real& tabs,
+               const amrex::Real& pres,
+               amrex::Real& qv)
+{
+    // Solution tolerance
+    amrex::Real tol = amrex::Real(1.0e-8);
+
+    // Saturation moisture fractions
+    amrex::Real qsat, dqsat;
+
+    // Newton iteration vars
+    int  niter;
+    amrex::Real fff, dfff;
+    amrex::Real dtabs;
+
+    // Initial guess for temperature
+    amrex::Real tabs_old = tabs;
+
+    niter = 0;
+    dtabs = 1;
+
+    //==================================================
+    // Newton iteration to qv=qsat (cloud phase only)
+    //==================================================
+    do {
+        // Saturation moisture fractions
+        erf_qsatw(tabs, pres, qsat);
+        erf_dtqsatw(tabs, pres, dqsat);
+
+        // Function for root finding:
+        // 0 = -T_new + T_old + L_v/C_p * (qv - qsat)
+        fff   = -tabs + tabs_old + fac_cond*(qv - qsat);
+
+        // Derivative of function (T_new iterated on)
+        dfff  = -one - fac_cond*dqsat;
+
+        // Update the temperature
+        dtabs = -fff/dfff;
+        tabs += dtabs;
+
+        // Update iteration
+        niter = niter+1;
+    } while(std::abs(dtabs) > tol && niter < 20);
+
+    // Update from last iteration (dq = dq/dt * dt)
+    qv  = qsat + dqsat*dtabs;
+}
+#endif
diff --git a/Source/Utils/Make.package b/Source/Utils/Make.package
index 100bb49e78..c2d0099405 100644
--- a/Source/Utils/Make.package
+++ b/Source/Utils/Make.package
@@ -18,6 +18,7 @@ CEXE_headers += ERF_WaterVaporSaturation.H
 CEXE_headers += ERF_DirectionSelector.H
 CEXE_headers += ERF_StormDiagnostics.H
 CEXE_headers += ERF_HurricaneDiagnostics.H
+CEXE_headers += ERF_MacroPhysics.H
 
 CEXE_sources += ERF_AverageDown.cpp
 CEXE_sources += ERF_ChopGrids.cpp

From 08be1441eeb0294fa06194edde4a34c8f5e45c76 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Mon, 4 May 2026 13:42:56 -0700
Subject: [PATCH 02/17] remove unused.

---
 Source/DataStructs/ERF_DataStruct.H | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Source/DataStructs/ERF_DataStruct.H b/Source/DataStructs/ERF_DataStruct.H
index 593d505544..a12746c672 100644
--- a/Source/DataStructs/ERF_DataStruct.H
+++ b/Source/DataStructs/ERF_DataStruct.H
@@ -679,7 +679,6 @@ struct SolverChoice {
         windfarm_type = WindFarmType::None; // Default
         pp.query_enum_case_insensitive("windfarm_type",windfarm_type);
 
-        static std::string windfarm_loc_type_string = "None";
         windfarm_loc_type = WindFarmLocType::None;
         pp.query_enum_case_insensitive("windfarm_loc_type",windfarm_loc_type);
 

From a67d777106aaa883d85d76fd70a0e8ea63e591bf Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Mon, 4 May 2026 15:08:46 -0700
Subject: [PATCH 03/17] no condensation in relaxation zone.

---
 Source/SourceTerms/ERF_MakeSources.cpp       | 16 ++++++++++++++--
 Source/SourceTerms/ERF_SrcHeaders.H          |  3 ++-
 Source/TimeIntegration/ERF_TI_slow_rhs_pre.H |  2 +-
 Source/TimeIntegration/ERF_TI_substep_fun.H  |  2 +-
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/Source/SourceTerms/ERF_MakeSources.cpp b/Source/SourceTerms/ERF_MakeSources.cpp
index 9520a159cf..51a4ac7a48 100644
--- a/Source/SourceTerms/ERF_MakeSources.cpp
+++ b/Source/SourceTerms/ERF_MakeSources.cpp
@@ -55,7 +55,8 @@ void make_sources (int level,
                    const MultiFab* surface_state_at_lev,
                    InputSoundingData& input_sounding_data,
                    TurbulentPerturbation& turbPert,
-                   bool is_slow_step)
+                   bool is_slow_step,
+                   int real_width)
 {
     BL_PROFILE_REGION("erf_make_sources()");
 
@@ -749,7 +750,18 @@ void make_sources (int level,
         // 12. Add macrophysics source for RhoTheta, RhoQ1, and RhoQ2
         // *************************************************************************************
         if (is_slow_step && has_moisture && tight_macro) {
-            ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+            auto domain = geom.Domain();
+            int i_lo = domain.smallEnd(0);
+            int i_hi = domain.bigEnd(0);
+            int j_lo = domain.smallEnd(1);
+            int j_hi = domain.bigEnd(1);
+
+            auto tbx = bx;
+            if (tbx.smallEnd(0) == i_lo) { tbx.growLo(0,-real_width); }
+            if (tbx.bigEnd(0)   == i_hi) { tbx.growHi(0,-real_width); }
+            if (tbx.smallEnd(1) == j_lo) { tbx.growLo(1,-real_width); }
+            if (tbx.bigEnd(1)   == j_hi) { tbx.growHi(1,-real_width); }
+            ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
             {
                 // Conserved CC vars initially
                 Real rho      = cell_data(i,j,k,Rho_comp);
diff --git a/Source/SourceTerms/ERF_SrcHeaders.H b/Source/SourceTerms/ERF_SrcHeaders.H
index bbe5b29c0c..883110c9cf 100644
--- a/Source/SourceTerms/ERF_SrcHeaders.H
+++ b/Source/SourceTerms/ERF_SrcHeaders.H
@@ -77,7 +77,8 @@ void make_sources (int level, int nrk,
                    const amrex::MultiFab* surface_state_at_lev,
                    InputSoundingData& input_sounding_data,
                    TurbulentPerturbation& turbPert,
-                   bool is_slow_step);
+                   bool is_slow_step,
+                   int real_width);
 
 void make_mom_sources (amrex::Real time,
                        amrex::Real dt,
diff --git a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
index dfd025ff8a..ffb76f89c1 100644
--- a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
+++ b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
@@ -108,7 +108,7 @@
                      rhotheta_src[level].get(), rhoqt_src[level].get(),
                      dptr_wbar_sub, d_rayleigh_ptrs_at_lev, d_sinesq_at_lev,
                      (solverChoice.hindcast_surface_bcs? &surface_state_interp[level] : nullptr),
-                     input_sounding_data, turbPert, true);
+                     input_sounding_data, turbPert, true, real_width);
 
         // *****************************************************************************
         // Define the pressure gradient
diff --git a/Source/TimeIntegration/ERF_TI_substep_fun.H b/Source/TimeIntegration/ERF_TI_substep_fun.H
index 4496dc6785..9e9c7b5b2b 100644
--- a/Source/TimeIntegration/ERF_TI_substep_fun.H
+++ b/Source/TimeIntegration/ERF_TI_substep_fun.H
@@ -68,7 +68,7 @@ auto acoustic_substepping_fun = [&](int fast_step, int n_sub, int nrk,
                      dptr_wbar_sub, d_rayleigh_ptrs_at_lev,
                      d_sinesq_at_lev,
                      (solverChoice.hindcast_surface_bcs? &surface_state_interp[level] : nullptr),
-                     input_sounding_data, turbPert, false);
+                     input_sounding_data, turbPert, false, real_width);
 
         // *************************************************************************
         // Set up flux registers if using two_way coupling

From 8fb87b757246b0acd4e50340f3d300d3384c4bbf Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Tue, 5 May 2026 09:19:35 -0700
Subject: [PATCH 04/17] Fix tendency.

---
 Source/SourceTerms/ERF_MakeSources.cpp       | 50 +++++++++++---------
 Source/SourceTerms/ERF_SrcHeaders.H          |  1 +
 Source/TimeIntegration/ERF_TI_slow_rhs_pre.H |  2 +-
 Source/TimeIntegration/ERF_TI_substep_fun.H  |  2 +-
 4 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/Source/SourceTerms/ERF_MakeSources.cpp b/Source/SourceTerms/ERF_MakeSources.cpp
index 51a4ac7a48..6d844ff78e 100644
--- a/Source/SourceTerms/ERF_MakeSources.cpp
+++ b/Source/SourceTerms/ERF_MakeSources.cpp
@@ -35,6 +35,7 @@ void make_sources (int level,
                    int /*nrk*/,
                    Real dt,
                    Real time,
+                   const Vector<MultiFab>& S_old,
                    const Vector<MultiFab>& S_data,
                    const  MultiFab & S_prim,
                           MultiFab & source,
@@ -229,6 +230,7 @@ void make_sources (int level,
     {
         Box bx  = mfi.tilebox();
 
+        const Array4<const Real>& cell_old   = S_old[IntVars::cons].array(mfi);
         const Array4<const Real>& cell_data  = S_data[IntVars::cons].array(mfi);
         const Array4<const Real>& cell_prim  = S_prim.array(mfi);
         const Array4<Real>      & cell_src   = source.array(mfi);
@@ -750,7 +752,6 @@ void make_sources (int level,
         // 12. Add macrophysics source for RhoTheta, RhoQ1, and RhoQ2
         // *************************************************************************************
         if (is_slow_step && has_moisture && tight_macro) {
-            auto domain = geom.Domain();
             int i_lo = domain.smallEnd(0);
             int i_hi = domain.bigEnd(0);
             int j_lo = domain.smallEnd(1);
@@ -763,41 +764,44 @@ void make_sources (int level,
             if (tbx.bigEnd(1)   == j_hi) { tbx.growHi(1,-real_width); }
             ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
             {
-                // Conserved CC vars initially
-                Real rho      = cell_data(i,j,k,Rho_comp);
-                Real rt_old   = cell_data(i,j,k,RhoTheta_comp);
-                Real th_old   = rt_old/rho;
-                Real qv_old   = cell_data(i,j,k,RhoQ1_comp   )/rho;
-                Real qc_old   = cell_data(i,j,k,RhoQ2_comp   )/rho;
-                Real qt       = qv_old + qc_old;
-                Real tabs_old = getTgivenRandRTh(rho, rt_old, qv_old);
+                // Conserved CC vars at start of step
+                Real rho_old  = cell_old(i,j,k,Rho_comp);
+                Real th_old   = cell_old(i,j,k,RhoTheta_comp) / rho_old;
+                Real qv_old   = cell_old(i,j,k,RhoQ1_comp   ) / rho_old;
 
-                // To be updated in place
-                Real qv   = qv_old;
-                Real tabs = tabs_old;
-                Real pres = getPgivenRTh(rt_old, qv_old);
+                // Conserved CC vars at start of RK stage
+                Real rho_s  = cell_data(i,j,k,Rho_comp);
+                Real rt_s   = cell_data(i,j,k,RhoTheta_comp);
+                Real qv_s   = cell_data(i,j,k,RhoQ1_comp   ) / rho_s;
+                Real qc_s   = cell_data(i,j,k,RhoQ2_comp   ) / rho_s;
 
-                // Vapor frac at saturation
+                // To be updated in place
                 Real qs;
+                Real qv_f   = qv_s;
+                Real tabs_f = getTgivenRandRTh(rho_s, rt_s, qv_s);
+
+                // Invariants
+                Real qt   = qv_s + qc_s;
+                Real pres = getPgivenRTh(rt_s, qv_s);
 
                 // Valid Newton iteration
-                erf_qsatw(tabs, pres*Real(0.01), qs);
+                erf_qsatw(tabs_f, pres*Real(0.01), qs);
                 if (qt > qs) {
-                    NewtonIterSat(d_fac_cond, tabs, pres*Real(0.01), qv);
+                    NewtonIterSat(d_fac_cond, tabs_f, pres*Real(0.01), qv_f);
                 }
                 // Put qc into qv and double check Newton iter criteria
                 else {
-                    qv   += qc_old;
-                    tabs -= d_fac_cond * qc_old;
-                    erf_qsatw(tabs, pres*Real(0.01), qs);
-                    if (qv > qs) {
-                        NewtonIterSat(d_fac_cond, tabs, pres*Real(0.01), qv);
+                    qv_f   += qc_s;
+                    tabs_f -= d_fac_cond * qc_s;
+                    erf_qsatw(tabs_f, pres*Real(0.01), qs);
+                    if (qv_f > qs) {
+                        NewtonIterSat(d_fac_cond, tabs_f, pres*Real(0.01), qv_f);
                     }
                 }
 
                 // Populate tendencies
-                cell_src(i,j,k,RhoTheta_comp) += rho * idt * ( getThgivenTandP(tabs, pres, d_rdOcp) - th_old );
-                cell_src(i,j,k,RhoQ1_comp   ) += rho * idt * ( qv - qv_old );
+                cell_src(i,j,k,RhoTheta_comp) += rho_s * idt * ( getThgivenTandP(tabs_f, pres, d_rdOcp) - th_old );
+                cell_src(i,j,k,RhoQ1_comp   ) += rho_s * idt * ( qv_f - qv_old );
                 cell_src(i,j,k,RhoQ2_comp   ) += -cell_src(i,j,k,RhoQ1_comp);
             });
         }
diff --git a/Source/SourceTerms/ERF_SrcHeaders.H b/Source/SourceTerms/ERF_SrcHeaders.H
index 883110c9cf..221a606825 100644
--- a/Source/SourceTerms/ERF_SrcHeaders.H
+++ b/Source/SourceTerms/ERF_SrcHeaders.H
@@ -57,6 +57,7 @@ void compute_gradp_interpz (const amrex::MultiFab& p,
 
 void make_sources (int level, int nrk,
                    amrex::Real dt, amrex::Real time,
+                   const amrex::Vector<amrex::MultiFab>& S_old,
                    const amrex::Vector<amrex::MultiFab>& S_data,
                    const amrex::MultiFab& S_prim,
                          amrex::MultiFab& cc_source,
diff --git a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
index ffb76f89c1..23de2602a8 100644
--- a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
+++ b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
@@ -100,7 +100,7 @@
         // Construct the source terms for the cell-centered (conserved) variables
         // *****************************************************************************
         make_sources(level, nrk, slow_dt, old_stage_time,
-                     S_data, S_prim, cc_src, base_state[level], zpc_to_use,
+                     S_old, S_data, S_prim, cc_src, base_state[level], zpc_to_use,
                      xvel_new, yvel_new,
                      qheating_rates[level].get(),
                      terrain_blank, fine_geom, solverChoice,
diff --git a/Source/TimeIntegration/ERF_TI_substep_fun.H b/Source/TimeIntegration/ERF_TI_substep_fun.H
index 9e9c7b5b2b..75fc5b4fef 100644
--- a/Source/TimeIntegration/ERF_TI_substep_fun.H
+++ b/Source/TimeIntegration/ERF_TI_substep_fun.H
@@ -59,7 +59,7 @@ auto acoustic_substepping_fun = [&](int fast_step, int n_sub, int nrk,
         }
 
         make_sources(level, nrk, dtau, old_substep_time,
-                     Svec_to_use, S_prim, cc_src, base_state[level], z_phys_cc[level].get(),
+                     S_old, Svec_to_use, S_prim, cc_src, base_state[level], z_phys_cc[level].get(),
                      xvel_new, yvel_new,
                      qheating_rates[level].get(),
                      terrain_blank, fine_geom, solverChoice,

From cc0c0df63dc18bc537631c43c402f1e41c46d114 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Tue, 5 May 2026 14:20:02 -0700
Subject: [PATCH 05/17] macro once per timestep and use operator splitting to
 do theta tend but update moist vars in place; improve qc advection stability.

---
 Source/ERF.H                                 |  1 +
 Source/ERF.cpp                               |  1 +
 Source/ERF_MakeNewArrays.cpp                 | 11 ++-
 Source/SourceTerms/ERF_MakeSources.cpp       | 67 ++--------------
 Source/SourceTerms/ERF_SrcHeaders.H          |  5 +-
 Source/TimeIntegration/ERF_Advance.cpp       | 16 +++-
 Source/TimeIntegration/ERF_TI_slow_rhs_pre.H |  6 +-
 Source/TimeIntegration/ERF_TI_substep_fun.H  |  6 +-
 Source/Utils/ERF_MacroPhysics.H              | 81 +++++++++++++++++++-
 9 files changed, 120 insertions(+), 74 deletions(-)

diff --git a/Source/ERF.H b/Source/ERF.H
index f96b4275c8..3520de9735 100644
--- a/Source/ERF.H
+++ b/Source/ERF.H
@@ -869,6 +869,7 @@ private:
 
     std::unique_ptr<Microphysics> micro;
     amrex::Vector<amrex::Vector<amrex::MultiFab*>> qmoist; // (lev,ncomp) rain_accum, snow_accum, graup_accum
+    amrex::Vector<std::unique_ptr<amrex::MultiFab>> macrophysics_src; // RhoTheta source for macro coupling
 
     // Variables for wind farm parametrization models
 
diff --git a/Source/ERF.cpp b/Source/ERF.cpp
index 6016ad726e..eb1f3d7451 100644
--- a/Source/ERF.cpp
+++ b/Source/ERF.cpp
@@ -2144,6 +2144,7 @@ ERF::initializeMicrophysics (const int& a_nlevsmax /*!< number of AMR levels */)
     }
 
     qmoist.resize(a_nlevsmax);
+    macrophysics_src.resize(a_nlevsmax);
     return;
 }
 
diff --git a/Source/ERF_MakeNewArrays.cpp b/Source/ERF_MakeNewArrays.cpp
index ccb0cae22b..e2e5daedc2 100644
--- a/Source/ERF_MakeNewArrays.cpp
+++ b/Source/ERF_MakeNewArrays.cpp
@@ -43,7 +43,6 @@ ERF::init_stuff (int lev, const BoxArray& ba, const DistributionMapping& dm,
     // ********************************************************************************************
     // Allocate terrain arrays
     // ********************************************************************************************
-
     BoxArray ba_nd(ba);
     ba_nd.surroundingNodes();
 
@@ -465,6 +464,16 @@ ERF::init_stuff (int lev, const BoxArray& ba, const DistributionMapping& dm,
         rad_fluxes[lev]->setVal(0);
     }
 
+    //*********************************************************
+    // Macrophysics heating source terms
+    //*********************************************************
+    if (solverChoice.moisture_type != MoistureType::None &&
+        solverChoice.macrophysics_tight_coupling)
+    {
+        macrophysics_src[lev] = std::make_unique<MultiFab>(ba, dm, 1, 0);
+        macrophysics_src[lev]->setVal(0);
+    }
+
     //*********************************************************
     // Turbulent perturbation region initialization
     //*********************************************************
diff --git a/Source/SourceTerms/ERF_MakeSources.cpp b/Source/SourceTerms/ERF_MakeSources.cpp
index 6d844ff78e..92564edc41 100644
--- a/Source/SourceTerms/ERF_MakeSources.cpp
+++ b/Source/SourceTerms/ERF_MakeSources.cpp
@@ -8,7 +8,6 @@
 #include "ERF_SrcHeaders.H"
 #include "ERF_TI_slow_headers.H"
 #include "ERF_MOSTStress.H"
-#include "ERF_MacroPhysics.H"
 
 using namespace amrex;
 
@@ -35,7 +34,6 @@ void make_sources (int level,
                    int /*nrk*/,
                    Real dt,
                    Real time,
-                   const Vector<MultiFab>& S_old,
                    const Vector<MultiFab>& S_data,
                    const  MultiFab & S_prim,
                           MultiFab & source,
@@ -48,6 +46,7 @@ void make_sources (int level,
                    const Geometry geom,
                    const SolverChoice& solverChoice,
                    Vector<std::unique_ptr<MultiFab>>& mapfac,
+                   const MultiFab* macrophysics_src,
                    const MultiFab* rhotheta_src,
                    const MultiFab* rhoqt_src,
                    const Real* dptr_wbar_sub,
@@ -56,8 +55,7 @@ void make_sources (int level,
                    const MultiFab* surface_state_at_lev,
                    InputSoundingData& input_sounding_data,
                    TurbulentPerturbation& turbPert,
-                   bool is_slow_step,
-                   int real_width)
+                   bool is_slow_step)
 {
     BL_PROFILE_REGION("erf_make_sources()");
 
@@ -70,10 +68,6 @@ void make_sources (int level,
         source.setVal(0.0,Rho_comp,2);
     }
 
-    Real idt        = amrex::Real(1.0) / dt;
-    Real d_rdOcp    = R_d / Cp_d;
-    Real d_fac_cond = L_v / Cp_d;
-
     const bool l_use_ndiff = solverChoice.use_num_diff;
 
     TurbChoice tc = solverChoice.turbChoice[level];
@@ -216,7 +210,7 @@ void make_sources (int level,
     //   10a. Immersed forcing for terrain
     //   10b. Immersed forcing for buildings
     //   11. Four stream radiation source for (rho theta)
-    //   12. Macrophysics source for (rho theta; rho Q1; rho Q2)
+    //   12. Macrophysics source for (rho theta)
     // *****************************************************************************
 
     // ***********************************************************************************************
@@ -230,11 +224,12 @@ void make_sources (int level,
     {
         Box bx  = mfi.tilebox();
 
-        const Array4<const Real>& cell_old   = S_old[IntVars::cons].array(mfi);
         const Array4<const Real>& cell_data  = S_data[IntVars::cons].array(mfi);
         const Array4<const Real>& cell_prim  = S_prim.array(mfi);
         const Array4<Real>      & cell_src   = source.array(mfi);
 
+        const Array4<const Real>& mac_src    = macrophysics_src->const_array(mfi);
+
         const Array4<const Real>& r0  = r_hse.const_array(mfi);
         const Array4<const Real>& th0 = th_hse.const_array(mfi);
         const Array4<const Real>& qv0 = qv_hse.const_array(mfi);
@@ -752,57 +747,9 @@ void make_sources (int level,
         // 12. Add macrophysics source for RhoTheta, RhoQ1, and RhoQ2
         // *************************************************************************************
         if (is_slow_step && has_moisture && tight_macro) {
-            int i_lo = domain.smallEnd(0);
-            int i_hi = domain.bigEnd(0);
-            int j_lo = domain.smallEnd(1);
-            int j_hi = domain.bigEnd(1);
-
-            auto tbx = bx;
-            if (tbx.smallEnd(0) == i_lo) { tbx.growLo(0,-real_width); }
-            if (tbx.bigEnd(0)   == i_hi) { tbx.growHi(0,-real_width); }
-            if (tbx.smallEnd(1) == j_lo) { tbx.growLo(1,-real_width); }
-            if (tbx.bigEnd(1)   == j_hi) { tbx.growHi(1,-real_width); }
-            ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
             {
-                // Conserved CC vars at start of step
-                Real rho_old  = cell_old(i,j,k,Rho_comp);
-                Real th_old   = cell_old(i,j,k,RhoTheta_comp) / rho_old;
-                Real qv_old   = cell_old(i,j,k,RhoQ1_comp   ) / rho_old;
-
-                // Conserved CC vars at start of RK stage
-                Real rho_s  = cell_data(i,j,k,Rho_comp);
-                Real rt_s   = cell_data(i,j,k,RhoTheta_comp);
-                Real qv_s   = cell_data(i,j,k,RhoQ1_comp   ) / rho_s;
-                Real qc_s   = cell_data(i,j,k,RhoQ2_comp   ) / rho_s;
-
-                // To be updated in place
-                Real qs;
-                Real qv_f   = qv_s;
-                Real tabs_f = getTgivenRandRTh(rho_s, rt_s, qv_s);
-
-                // Invariants
-                Real qt   = qv_s + qc_s;
-                Real pres = getPgivenRTh(rt_s, qv_s);
-
-                // Valid Newton iteration
-                erf_qsatw(tabs_f, pres*Real(0.01), qs);
-                if (qt > qs) {
-                    NewtonIterSat(d_fac_cond, tabs_f, pres*Real(0.01), qv_f);
-                }
-                // Put qc into qv and double check Newton iter criteria
-                else {
-                    qv_f   += qc_s;
-                    tabs_f -= d_fac_cond * qc_s;
-                    erf_qsatw(tabs_f, pres*Real(0.01), qs);
-                    if (qv_f > qs) {
-                        NewtonIterSat(d_fac_cond, tabs_f, pres*Real(0.01), qv_f);
-                    }
-                }
-
-                // Populate tendencies
-                cell_src(i,j,k,RhoTheta_comp) += rho_s * idt * ( getThgivenTandP(tabs_f, pres, d_rdOcp) - th_old );
-                cell_src(i,j,k,RhoQ1_comp   ) += rho_s * idt * ( qv_f - qv_old );
-                cell_src(i,j,k,RhoQ2_comp   ) += -cell_src(i,j,k,RhoQ1_comp);
+                cell_src(i,j,k,RhoTheta_comp) += mac_src(i,j,k);
             });
         }
     } // mfi
diff --git a/Source/SourceTerms/ERF_SrcHeaders.H b/Source/SourceTerms/ERF_SrcHeaders.H
index 221a606825..8cd7caa091 100644
--- a/Source/SourceTerms/ERF_SrcHeaders.H
+++ b/Source/SourceTerms/ERF_SrcHeaders.H
@@ -57,7 +57,6 @@ void compute_gradp_interpz (const amrex::MultiFab& p,
 
 void make_sources (int level, int nrk,
                    amrex::Real dt, amrex::Real time,
-                   const amrex::Vector<amrex::MultiFab>& S_old,
                    const amrex::Vector<amrex::MultiFab>& S_data,
                    const amrex::MultiFab& S_prim,
                          amrex::MultiFab& cc_source,
@@ -70,6 +69,7 @@ void make_sources (int level, int nrk,
                    const amrex::Geometry geom,
                    const SolverChoice& solverChoice,
                    amrex::Vector<std::unique_ptr<amrex::MultiFab>>& mapfac,
+                   const amrex::MultiFab* macrophysics_src,
                    const amrex::MultiFab* rhotheta_src,
                    const amrex::MultiFab* rhoqt_src,
                    const amrex::Real* dptr_wbar_sub,
@@ -78,8 +78,7 @@ void make_sources (int level, int nrk,
                    const amrex::MultiFab* surface_state_at_lev,
                    InputSoundingData& input_sounding_data,
                    TurbulentPerturbation& turbPert,
-                   bool is_slow_step,
-                   int real_width);
+                   bool is_slow_step);
 
 void make_mom_sources (amrex::Real time,
                        amrex::Real dt,
diff --git a/Source/TimeIntegration/ERF_Advance.cpp b/Source/TimeIntegration/ERF_Advance.cpp
index 8df8114c38..3f8a5c2da7 100644
--- a/Source/TimeIntegration/ERF_Advance.cpp
+++ b/Source/TimeIntegration/ERF_Advance.cpp
@@ -1,8 +1,9 @@
-#include <ERF.H>
-#include <ERF_Utils.H>
+#include "ERF.H"
+#include "ERF_Utils.H"
+#include "ERF_MacroPhysics.H"
 
 #ifdef ERF_USE_WINDFARM
-#include <ERF_WindFarm.H>
+#include "ERF_WindFarm.H"
 #endif
 
 using namespace amrex;
@@ -204,6 +205,15 @@ ERF::Advance (int lev, Real time, Real dt_lev, int iteration, int /*ncycle*/)
     state_new.push_back(MultiFab(rV_new[lev], amrex::make_alias, 0,     1)); // ymom
     state_new.push_back(MultiFab(rW_new[lev], amrex::make_alias, 0,     1)); // zmom
 
+    // **************************************************************************************
+    // Macrophysics source term computed pre-step
+    // **************************************************************************************
+    if (solverChoice.moisture_type != MoistureType::None &&
+        solverChoice.macrophysics_tight_coupling)
+    {
+        GetMacroPhysicsSrc(real_width, dt_lev, Geom(lev), macrophysics_src[lev].get(), S_old);
+    }
+
     // **************************************************************************************
     // Tests on the reasonableness of the solution before the dycore
     // **************************************************************************************
diff --git a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
index 23de2602a8..5a7588f2f3 100644
--- a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
+++ b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
@@ -100,15 +100,15 @@
         // Construct the source terms for the cell-centered (conserved) variables
         // *****************************************************************************
         make_sources(level, nrk, slow_dt, old_stage_time,
-                     S_old, S_data, S_prim, cc_src, base_state[level], zpc_to_use,
+                     S_data, S_prim, cc_src, base_state[level], zpc_to_use,
                      xvel_new, yvel_new,
                      qheating_rates[level].get(),
                      terrain_blank, fine_geom, solverChoice,
                      mapfac[level],
-                     rhotheta_src[level].get(), rhoqt_src[level].get(),
+                     macrophysics_src[level].get(), rhotheta_src[level].get(), rhoqt_src[level].get(),
                      dptr_wbar_sub, d_rayleigh_ptrs_at_lev, d_sinesq_at_lev,
                      (solverChoice.hindcast_surface_bcs? &surface_state_interp[level] : nullptr),
-                     input_sounding_data, turbPert, true, real_width);
+                     input_sounding_data, turbPert, true);
 
         // *****************************************************************************
         // Define the pressure gradient
diff --git a/Source/TimeIntegration/ERF_TI_substep_fun.H b/Source/TimeIntegration/ERF_TI_substep_fun.H
index 75fc5b4fef..cb449bd150 100644
--- a/Source/TimeIntegration/ERF_TI_substep_fun.H
+++ b/Source/TimeIntegration/ERF_TI_substep_fun.H
@@ -59,16 +59,16 @@ auto acoustic_substepping_fun = [&](int fast_step, int n_sub, int nrk,
         }
 
         make_sources(level, nrk, dtau, old_substep_time,
-                     S_old, Svec_to_use, S_prim, cc_src, base_state[level], z_phys_cc[level].get(),
+                     Svec_to_use, S_prim, cc_src, base_state[level], z_phys_cc[level].get(),
                      xvel_new, yvel_new,
                      qheating_rates[level].get(),
                      terrain_blank, fine_geom, solverChoice,
                      mapfac[level],
-                     rhotheta_src[level].get(), rhoqt_src[level].get(),
+                     macrophysics_src[level].get(), rhotheta_src[level].get(), rhoqt_src[level].get(),
                      dptr_wbar_sub, d_rayleigh_ptrs_at_lev,
                      d_sinesq_at_lev,
                      (solverChoice.hindcast_surface_bcs? &surface_state_interp[level] : nullptr),
-                     input_sounding_data, turbPert, false, real_width);
+                     input_sounding_data, turbPert, false);
 
         // *************************************************************************
         // Set up flux registers if using two_way coupling
diff --git a/Source/Utils/ERF_MacroPhysics.H b/Source/Utils/ERF_MacroPhysics.H
index a99a763fb4..051f6a375c 100644
--- a/Source/Utils/ERF_MacroPhysics.H
+++ b/Source/Utils/ERF_MacroPhysics.H
@@ -1,6 +1,9 @@
 #ifndef ERF_MACROPHYS_H
 #define ERF_MACROPHYS_H
 
+#include "ERF_EOS.H"
+#include "ERF_Constants.H"
+#include "ERF_IndexDefines.H"
 #include "ERF_MicrophysicsUtils.H"
 
 AMREX_GPU_HOST_DEVICE
@@ -12,7 +15,7 @@ NewtonIterSat (const amrex::Real& fac_cond,
                amrex::Real& qv)
 {
     // Solution tolerance
-    amrex::Real tol = amrex::Real(1.0e-8);
+    amrex::Real tol = amrex::Real(1.0e-6);
 
     // Saturation moisture fractions
     amrex::Real qsat, dqsat;
@@ -54,4 +57,80 @@ NewtonIterSat (const amrex::Real& fac_cond,
     // Update from last iteration (dq = dq/dt * dt)
     qv  = qsat + dqsat*dtabs;
 }
+
+void
+GetMacroPhysicsSrc (int& real_width,
+                    amrex::Real& dt,
+                    amrex::Geometry& geom,
+                    amrex::MultiFab* macro_src,
+                    amrex::MultiFab& cons_data)
+{
+    amrex::Real idt        = amrex::Real(1.) / dt;
+    amrex::Real d_rdOcp    = R_d / Cp_d;
+    amrex::Real d_fac_cond = L_v / Cp_d;
+
+#ifdef _OPENMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+    for (amrex::MFIter mfi(cons_data,TileNoZ()); mfi.isValid(); ++mfi)
+    {
+        amrex::Box tbx = mfi.tilebox();
+
+        const amrex::Array4<amrex::Real>& mac_src   = macro_src->array(mfi);
+        const amrex::Array4<amrex::Real>& cell_data = cons_data.array(mfi);
+
+        // No macrophysics in the nudging region
+        const amrex::Box& domain = geom.Domain();
+        int i_lo = domain.smallEnd(0);
+        int i_hi = domain.bigEnd(0);
+        int j_lo = domain.smallEnd(1);
+        int j_hi = domain.bigEnd(1);
+        if (tbx.smallEnd(0) == i_lo) { tbx.growLo(0,-real_width); }
+        if (tbx.bigEnd(0)   == i_hi) { tbx.growHi(0,-real_width); }
+        if (tbx.smallEnd(1) == j_lo) { tbx.growLo(1,-real_width); }
+        if (tbx.bigEnd(1)   == j_hi) { tbx.growHi(1,-real_width); }
+
+        // Theta tendency and update qv/qc in place
+        ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+        {
+            // Conserved CC vars at start of step
+            amrex::Real rho_s = cell_data(i,j,k,Rho_comp);
+            amrex::Real rt_s  = cell_data(i,j,k,RhoTheta_comp);
+            amrex::Real th_s  = cell_data(i,j,k,RhoTheta_comp) / rho_s;
+            amrex::Real qv_s  = cell_data(i,j,k,RhoQ1_comp   ) / rho_s;
+            amrex::Real qc_s  = cell_data(i,j,k,RhoQ2_comp   ) / rho_s;
+
+            // To be updated in place
+            amrex::Real qs;
+            amrex::Real qv_f   = qv_s;
+            amrex::Real tabs_f = getTgivenRandRTh(rho_s, rt_s, qv_s);
+
+            // Invariants
+            amrex::Real qt   = qv_s + qc_s;
+            amrex::Real pres = getPgivenRTh(rt_s, qv_s);
+
+            // Valid Newton iteration
+            erf_qsatw(tabs_f, pres*amrex::Real(0.01), qs);
+            if (qt > qs) {
+                NewtonIterSat(d_fac_cond, tabs_f, pres*amrex::Real(0.01), qv_f);
+            }
+            // Put qc into qv and double check Newton iter criteria
+            else {
+                qv_f   += qc_s;
+                tabs_f -= d_fac_cond * qc_s;
+                erf_qsatw(tabs_f, pres*amrex::Real(0.01), qs);
+                if (qv_f > qs) {
+                    NewtonIterSat(d_fac_cond, tabs_f, pres*amrex::Real(0.01), qv_f);
+                }
+            }
+
+            // Populate tendency and data
+            amrex::Real th_f = getThgivenTandP(tabs_f, pres, d_rdOcp);
+            mac_src(i,j,k) = rho_s * idt * ( th_f - th_s );
+            cell_data(i,j,k,RhoQ1_comp) = rho_s * qv_f;
+            cell_data(i,j,k,RhoQ2_comp) = rho_s * std::max(amrex::Real(0.0),qt - qv_f);
+        });
+    } // mfi
+}
+
 #endif

From c50c2b5145e71d6ef54478fc4e92bf42a19ac2e5 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Tue, 5 May 2026 14:52:10 -0700
Subject: [PATCH 06/17] Fix access of mac src when not allocated.

---
 Source/SourceTerms/ERF_MakeSources.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Source/SourceTerms/ERF_MakeSources.cpp b/Source/SourceTerms/ERF_MakeSources.cpp
index 92564edc41..ba6b316d22 100644
--- a/Source/SourceTerms/ERF_MakeSources.cpp
+++ b/Source/SourceTerms/ERF_MakeSources.cpp
@@ -228,7 +228,8 @@ void make_sources (int level,
         const Array4<const Real>& cell_prim  = S_prim.array(mfi);
         const Array4<Real>      & cell_src   = source.array(mfi);
 
-        const Array4<const Real>& mac_src    = macrophysics_src->const_array(mfi);
+        const Array4<const Real>& mac_src    = (macrophysics_src) ? macrophysics_src->const_array(mfi) :
+                                                                    Array4<const Real>{};
 
         const Array4<const Real>& r0  = r_hse.const_array(mfi);
         const Array4<const Real>& th0 = th_hse.const_array(mfi);
@@ -744,7 +745,7 @@ void make_sources (int level,
         }
 
         // *************************************************************************************
-        // 12. Add macrophysics source for RhoTheta, RhoQ1, and RhoQ2
+        // 12. Add macrophysics source for RhoTheta
         // *************************************************************************************
         if (is_slow_step && has_moisture && tight_macro) {
             ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept

From 7790c41746f933cdea0679f75a4167f50035ed38 Mon Sep 17 00:00:00 2001
From: "Aaron M. Lattanzi" <103702284+AMLattanzi@users.noreply.github.com>
Date: Mon, 11 May 2026 14:35:11 -0700
Subject: [PATCH 07/17] Remora Coupling (#3164)

* remora coupling inputs.

* more descriptive file name.
---
 .Exec_dev/RemoraCoupling/input_sounding      |  3 +
 .Exec_dev/RemoraCoupling/inputs_shear_oneway | 85 ++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 .Exec_dev/RemoraCoupling/input_sounding
 create mode 100644 .Exec_dev/RemoraCoupling/inputs_shear_oneway

diff --git a/.Exec_dev/RemoraCoupling/input_sounding b/.Exec_dev/RemoraCoupling/input_sounding
new file mode 100644
index 0000000000..d2f0614330
--- /dev/null
+++ b/.Exec_dev/RemoraCoupling/input_sounding
@@ -0,0 +1,3 @@
+ 1017.8  289.0  0.0
+    0.0  289.0  0.0  5.0  0.0
+ 5000.0  289.0  0.0  5.0  0.0
diff --git a/.Exec_dev/RemoraCoupling/inputs_shear_oneway b/.Exec_dev/RemoraCoupling/inputs_shear_oneway
new file mode 100644
index 0000000000..1a976fe082
--- /dev/null
+++ b/.Exec_dev/RemoraCoupling/inputs_shear_oneway
@@ -0,0 +1,85 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+erf.prob_name = "Undefined"
+
+stop_time = 3600.0
+max_step  = 1
+
+amrex.fpe_trap_invalid = 1
+
+fabarray.mfiter_tile_size = 1024 1024 1024
+
+# PROBLEM SIZE & GEOMETRY
+geometry.prob_extent = 3200.0  3200.0  1600.0
+amr.n_cell           =   32      32      16
+    
+geometry.is_periodic = 1 1 0
+
+zlo.type = "surface_layer"
+erf.surface_layer.flux_type = "bulk_coeff"
+erf.most.Cd  = 0.0 # ustar
+erf.most.Ch  = 0.0 # theta flux
+erf.most.Cq  = 0.0 # qv    flux
+erf.most.surf_temp  = 289.0
+erf.most.surf_moist = 0.0 
+erf.most.z0     = 0.1
+erf.most.zref   = 50.0
+     
+zhi.type = "SlipWall"
+
+
+# TIME STEP CONTROL
+erf.fixed_dt = 1.0
+erf.fixed_mri_dt_ratio = 6
+
+    
+# DIAGNOSTICS & VERBOSITY
+erf.sum_interval   = 1       # timesteps between computing mass
+erf.v              = 1       # verbosity in ERF.cpp
+amr.v              = 1       # verbosity in Amr.cpp
+
+    
+# REFINEMENT / REGRIDDING
+amr.max_level       = 0       # maximum level number allowed
+
+    
+# CHECKPOINT FILES
+erf.check_file      = chk        # root name of checkpoint file
+erf.check_int       = 100       # number of timesteps between checkpoints
+
+    
+# PLOTFILES
+erf.plot_file_1     = plt        # prefix of plotfile name
+erf.plot_int_1      = 100        # number of timesteps between plotfiles
+erf.plot_vars_1     = density rhotheta rhoQ1 rhoQ2 rhoadv_0 x_velocity y_velocity z_velocity pressure theta scalar temp qv qc
+
+    
+# SOLVER CHOICES
+erf.dycore_horiz_adv_type    = "Upwind_3rd"
+erf.dycore_vert_adv_type     = "Upwind_3rd"
+erf.dryscal_horiz_adv_type   = "Upwind_3rd_SL"
+erf.dryscal_vert_adv_type    = "Upwind_3rd_SL"
+erf.moistscal_horiz_adv_type = "Upwind_3rd_SL"
+erf.moistscal_vert_adv_type  = "Upwind_3rd_SL"
+
+erf.implicit_before_substep     = true
+erf.implicit_thermal_diffusion  = true
+erf.implicit_moisture_diffusion = true
+erf.implicit_momentum_diffusion = true
+erf.vert_implicit_fac           = 1. 1. 0.
+
+erf.use_gravity     = true
+erf.use_coriolis    = false      
+
+erf.moisture_model  = "SatAdj"
+
+erf.les_type          = "None"
+erf.molec_diff_type   = "Constant"
+erf.dynamic_viscosity = 5.0 # [kg/(m-s)]
+erf.alpha_T           = 5.0 # [m^2/s]
+erf.alpha_C           = 5.0
+
+
+# INITIAL CONDITIONS
+erf.init_type = "input_sounding"
+erf.input_sounding_file = "input_sounding"
+erf.sounding_type = Ideal
\ No newline at end of file

From 5b047a6ff856b6eeffe593fc96242fc239d7b0b9 Mon Sep 17 00:00:00 2001
From: "Aaron M. Lattanzi" <103702284+AMLattanzi@users.noreply.github.com>
Date: Mon, 11 May 2026 14:47:05 -0700
Subject: [PATCH 08/17] Storm Tagging (#3163)

* add tagging and inputs file.

* oneway coupling.

* Fix refinement criteria and use surf layer with zero bulk coeff to leverage implicit diff.

* Fix supercell query.
---
 .Exec_dev/ThunderStorm_ML/inputs_storm_amr | 117 +++++++++++++++++++++
 Source/ERF_Tagging.cpp                     |  30 +++++-
 Source/Prob/ERF_InitCustomPert_SuperCell.H |   4 +-
 3 files changed, 148 insertions(+), 3 deletions(-)
 create mode 100644 .Exec_dev/ThunderStorm_ML/inputs_storm_amr

diff --git a/.Exec_dev/ThunderStorm_ML/inputs_storm_amr b/.Exec_dev/ThunderStorm_ML/inputs_storm_amr
new file mode 100644
index 0000000000..df374bbe7a
--- /dev/null
+++ b/.Exec_dev/ThunderStorm_ML/inputs_storm_amr
@@ -0,0 +1,117 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+erf.prob_name = "SuperCell"
+
+erf.init_type = MoistBaseState
+
+max_step = 28800
+stop_time = 14400.0
+
+amrex.fpe_trap_invalid = 1
+
+fabarray.mfiter_tile_size = 2048 1024 2048
+
+    
+# PROBLEM SIZE & GEOMETRY
+geometry.prob_lo     = -75000. -50000.     0.
+geometry.prob_hi     =  75000.  50000. 24000.
+amr.n_cell           =    300    200      48    # dx=dy=dz=500 m
+
+geometry.is_periodic = 1 0 0
+ylo.type = "Open"
+yhi.type = "Open"
+zhi.type = "SlipWall"
+
+zlo.type = "surface_layer"
+erf.surface_layer.flux_type = "bulk_coeff"
+erf.most.Cd  = 0.0 # ustar
+erf.most.Ch  = 0.0 # theta flux
+erf.most.Cq  = 0.0 # qv    flux
+erf.most.surf_temp  = 300.0
+erf.most.surf_moist = 0.0 
+erf.most.z0   = 0.1
+erf.most.zref = 250.0
+
+    
+# TIME STEP CONTROL
+erf.fixed_dt           = 0.5      # fixed time step [s]
+erf.fixed_mri_dt_ratio = 6
+
+    
+# DIAGNOSTICS & VERBOSITY
+erf.sum_interval   = 1       # timesteps between computing mass
+erf.v              = 1       # verbosity in ERF.cpp
+amr.v              = 1       # verbosity in Amr.cpp
+
+    
+# REFINEMENT / REGRIDDING
+amr.max_level      = 1       # maximum level number allowed
+amr.ref_ratio_vect = 5 5 5
+
+erf.regrid_int    = 1
+erf.coupling_type = OneWay
+erf.refinement_indicators         = hi_helicity hi_reflectivity
+erf.hi_helicity.field_name        = helicity
+erf.hi_helicity.value_greater     = 40.0
+erf.hi_reflectivity.field_name    = max_reflectivity
+erf.hi_reflectivity.value_greater = 40.0
+
+    
+# CHECKPOINT FILES
+erf.check_file      = chk        # root name of checkpoint file
+erf.check_int       = 1000       # number of timesteps between checkpoints
+
+    
+# PLOTFILES
+erf.plot_file_1         = plt        # root name of plotfile
+erf.plot_int_1          = 120         # number of timesteps between plotfiles
+erf.plot_vars_1         = density rhotheta rhoQ1 rhoQ2 rhoQ3 x_velocity y_velocity z_velocity pressure theta temp qv qc qrain rain_accum pert_dens helicity max_reflectivity
+
+    
+# SOLVER CHOICE
+erf.dycore_horiz_adv_type    = Upwind_3rd
+erf.dycore_vert_adv_type     = Upwind_3rd
+erf.dryscal_horiz_adv_type   = Upwind_3rd_SL
+erf.dryscal_vert_adv_type    = Upwind_3rd_SL
+erf.moistscal_horiz_adv_type = Upwind_3rd_SL
+erf.moistscal_vert_adv_type  = Upwind_3rd_SL
+
+erf.use_gravity   = true
+erf.use_coriolis  = false
+erf.buoyancy_type = 1
+
+erf.implicit_before_substep     = true
+erf.implicit_thermal_diffusion  = true
+erf.implicit_moisture_diffusion = true
+erf.implicit_momentum_diffusion = true
+erf.vert_implicit_fac           = 1. 1. 0.
+    
+erf.les_type = "None"
+erf.molec_diff_type   = "ConstantAlpha"
+erf.dynamic_viscosity = 33.33 # [kg/(m-s)]
+erf.alpha_T           = 33.33 # [m^2/s]
+erf.alpha_C           = 33.33
+
+erf.moisture_model = "Morrison"
+
+    
+# PROBLEM PARAMETERS (optional)
+prob.z_tr = 12000.0
+prob.height = 1200.0
+prob.theta_0 = 300.0
+prob.theta_tr = 343.0
+prob.T_tr = 213.0
+prob.x_c = 0.0
+prob.y_c = 0.0
+prob.z_c = 2000.0
+prob.x_r = 10000.0
+prob.y_r = 10000.0
+prob.z_r = 2000.0
+prob.theta_c = 3.0
+
+prob.rayleigh_U_0 = 2.0
+prob.rayleigh_V_0 = 1.0
+prob.rayleigh_W_0 = 0.0
+prob.rayleigh_T_0 = 300.0
+
+prob.use_empirical_psat         = true
+prob.T_from_theta_in_moist_init = true
diff --git a/Source/ERF_Tagging.cpp b/Source/ERF_Tagging.cpp
index 187b94bf4a..d589eed842 100644
--- a/Source/ERF_Tagging.cpp
+++ b/Source/ERF_Tagging.cpp
@@ -232,7 +232,35 @@ ERF::ErrorEst (int levc, TagBoxArray& tags, Real time, int /*ngrow*/)
                     derived::erf_dertheta(bx, dfab, 0, 1, sfab, zfab, Geom(levc), time, nullptr, levc);
                 }
             } // mfi
-        // This allows dynamic refinement based on the value of the density
+        // This allows dynamic refinement based on the value of updraft helicity
+        } else if (ref_tags[j].Field() == "helicity")
+        {
+            for (MFIter mfi(*mf, TilingIfNotGPU()); mfi.isValid(); ++mfi)
+            {
+                const Box& bx = mfi.growntilebox();
+                auto& dfab = (*mf)[mfi];
+                auto& sfab = vars_new[levc][Vars::cons][mfi];
+                auto& zfab = (*z_phys_cc[levc])[mfi];
+
+                derived::erf_derhelicity(bx, dfab, 0, 1, sfab, zfab, Geom(levc), time, nullptr, levc);
+            }
+        } else if (ref_tags[j].Field() == "max_reflectivity")
+        {
+            if (solverChoice.moisture_type == MoistureType::Morrison ||
+                solverChoice.moisture_type == MoistureType::SAM) {
+                for (MFIter mfi(*mf, TilingIfNotGPU()); mfi.isValid(); ++mfi)
+                {
+                    const Box& bx = mfi.growntilebox();
+                    auto& dfab = (*mf)[mfi];
+                    auto& sfab = vars_new[levc][Vars::cons][mfi];
+                    auto& zfab = (*z_phys_cc[levc])[mfi];
+
+                    derived::erf_dermaxreflectivity(bx, dfab, 0, 1, sfab, zfab, Geom(levc), time, nullptr, levc);
+                }
+            } else {
+                Abort("Max reflectivity is only available with Morrison and SAM microphysics.");
+            }
+        // This allows dynamic refinement based on the terrain blanking
         } else if ( (SolverChoice::terrain_type == TerrainType::ImmersedForcing) &&
                     (ref_tags[j].Field() == "terrain_blanking") )
         {
diff --git a/Source/Prob/ERF_InitCustomPert_SuperCell.H b/Source/Prob/ERF_InitCustomPert_SuperCell.H
index 0bb83b1902..3072f37387 100644
--- a/Source/Prob/ERF_InitCustomPert_SuperCell.H
+++ b/Source/Prob/ERF_InitCustomPert_SuperCell.H
@@ -37,7 +37,7 @@
     pp_prob.query("z_tr",z_tr);
 
     Real x_c  = zero  ; pp_prob.query("x_c", x_c);
-    Real y_c  = zero  ; pp_prob.query("x_c", x_c);
+    Real y_c  = zero  ; pp_prob.query("y_c", y_c);
     Real z_c  = amrex::Real(1.5e3); pp_prob.query("z_c", z_c);
 
     Real r_c = one;
@@ -73,7 +73,7 @@
         const Real z        = prob_lo[2] + (k + myhalf) * dx[2];
         Real rad, delta_theta, theta_total, temperature, rho, RH, scalar;
 
-        // Introduce the warm bubble. Assume that the bubble is pressure matche with the background
+        // Introduce the warm bubble. Assume that the bubble is pressure matched with the background
 
         rad = std::pow(std::pow((x - x_c)/x_r,2) + std::pow((y - y_c)/y_r,2) + std::pow((z - z_c)/z_r,2), myhalf);
 

From d8498087fd10f0376df97b0c2ed7ade351b7dd69 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Mon, 11 May 2026 17:21:20 -0700
Subject: [PATCH 09/17] Theta_m pert is not right in substep T.

---
 Source/DataStructs/ERF_DataStruct.H           |   4 +-
 Source/ERF_MakeNewArrays.cpp                  |   2 +-
 Source/Microphysics/Kessler/ERF_Kessler.H     |   2 +-
 Source/Microphysics/Morrison/ERF_Morrison.H   |   2 +-
 Source/Microphysics/SAM/ERF_SAM.H             |   2 +-
 Source/Microphysics/SatAdj/ERF_SatAdj.H       |   2 +-
 Source/Microphysics/WSM6/ERF_WSM6.H           |   2 +-
 Source/SourceTerms/ERF_MakeSources.cpp        |   9 +-
 Source/TimeIntegration/ERF_MakeFastCoeffs.cpp |   8 +-
 Source/TimeIntegration/ERF_SlowRhsPre.cpp     |  67 +++++---
 Source/TimeIntegration/ERF_Substep_T.cpp      | 144 ++++++++++--------
 Source/Utils/ERF_MacroPhysics.H               |   4 +-
 12 files changed, 144 insertions(+), 104 deletions(-)

diff --git a/Source/DataStructs/ERF_DataStruct.H b/Source/DataStructs/ERF_DataStruct.H
index a12746c672..19e4e28ae2 100644
--- a/Source/DataStructs/ERF_DataStruct.H
+++ b/Source/DataStructs/ERF_DataStruct.H
@@ -222,7 +222,7 @@ struct SolverChoice {
             }
 
             pp.query("moisture_tight_coupling",moisture_tight_coupling);
-            pp.query("macrophysics_tight_coupling",macrophysics_tight_coupling);
+            pp.query("macrophysics_acoustic_coupling",macrophysics_acoustic_coupling);
         }
 
         // Which expression (1,2/3 or 4) to use for buoyancy
@@ -1267,7 +1267,7 @@ struct SolverChoice {
     MoistureComponentIndices moisture_indices;
 
     bool moisture_tight_coupling     {false};
-    bool macrophysics_tight_coupling {false};
+    bool macrophysics_acoustic_coupling {false};
 
     std::string windfarm_loc_table, windfarm_spec_table, windfarm_spec_table_extra;
     std::string windfarm_blade_table, windfarm_airfoil_tables;
diff --git a/Source/ERF_MakeNewArrays.cpp b/Source/ERF_MakeNewArrays.cpp
index e2e5daedc2..27c8bd9488 100644
--- a/Source/ERF_MakeNewArrays.cpp
+++ b/Source/ERF_MakeNewArrays.cpp
@@ -470,7 +470,7 @@ ERF::init_stuff (int lev, const BoxArray& ba, const DistributionMapping& dm,
     if (solverChoice.moisture_type != MoistureType::None &&
         solverChoice.macrophysics_tight_coupling)
     {
-        macrophysics_src[lev] = std::make_unique<MultiFab>(ba, dm, 1, 0);
+        macrophysics_src[lev] = std::make_unique<MultiFab>(ba, dm, 2, 0);
         macrophysics_src[lev]->setVal(0);
     }
 
diff --git a/Source/Microphysics/Kessler/ERF_Kessler.H b/Source/Microphysics/Kessler/ERF_Kessler.H
index e004752d23..3e28bae5ac 100644
--- a/Source/Microphysics/Kessler/ERF_Kessler.H
+++ b/Source/Microphysics/Kessler/ERF_Kessler.H
@@ -55,7 +55,7 @@ public:
     {
         m_fac_cond = lcond / sc.c_p;
         m_axis     = sc.ave_plane;
-        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_acoustic_coupling);
     }
 
     // init
diff --git a/Source/Microphysics/Morrison/ERF_Morrison.H b/Source/Microphysics/Morrison/ERF_Morrison.H
index de4d8b17a0..7d0ef5058c 100644
--- a/Source/Microphysics/Morrison/ERF_Morrison.H
+++ b/Source/Microphysics/Morrison/ERF_Morrison.H
@@ -72,7 +72,7 @@ public:
     {
         m_axis     = sc.ave_plane;
         m_rdOcp    = sc.rdOcp;
-        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_acoustic_coupling);
     }
 
     // init
diff --git a/Source/Microphysics/SAM/ERF_SAM.H b/Source/Microphysics/SAM/ERF_SAM.H
index 02e34687dc..118727f9a9 100644
--- a/Source/Microphysics/SAM/ERF_SAM.H
+++ b/Source/Microphysics/SAM/ERF_SAM.H
@@ -82,7 +82,7 @@ public:
         m_fac_sub  = lsub / sc.c_p;
         m_axis     = sc.ave_plane;
         m_rdOcp    = sc.rdOcp;
-        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_acoustic_coupling);
     }
 
     // init
diff --git a/Source/Microphysics/SatAdj/ERF_SatAdj.H b/Source/Microphysics/SatAdj/ERF_SatAdj.H
index 0cc722f04c..3ebbad0cff 100644
--- a/Source/Microphysics/SatAdj/ERF_SatAdj.H
+++ b/Source/Microphysics/SatAdj/ERF_SatAdj.H
@@ -58,7 +58,7 @@ public:
     {
         m_fac_cond = lcond / sc.c_p;
         m_rdOcp    = sc.rdOcp;
-        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_acoustic_coupling);
     }
 
     // init
diff --git a/Source/Microphysics/WSM6/ERF_WSM6.H b/Source/Microphysics/WSM6/ERF_WSM6.H
index 806cdf1b5f..0478b6a5b9 100644
--- a/Source/Microphysics/WSM6/ERF_WSM6.H
+++ b/Source/Microphysics/WSM6/ERF_WSM6.H
@@ -44,7 +44,7 @@ public:
     void Define(SolverChoice& sc) override
     {
         m_axis = sc.ave_plane;
-        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_tight_coupling);
+        m_do_cond  = (!sc.use_shoc && !sc.macrophysics_acoustic_coupling);
     }
 
     void Init(const amrex::MultiFab& cons_in,
diff --git a/Source/SourceTerms/ERF_MakeSources.cpp b/Source/SourceTerms/ERF_MakeSources.cpp
index ba6b316d22..d3fd50beb1 100644
--- a/Source/SourceTerms/ERF_MakeSources.cpp
+++ b/Source/SourceTerms/ERF_MakeSources.cpp
@@ -94,7 +94,7 @@ void make_sources (int level,
     bool has_moisture = (solverChoice.moisture_type != MoistureType::None);
 
     // flag for tight macrophysics coupling
-    bool tight_macro = solverChoice.macrophysics_tight_coupling;
+    bool acoustic_macro = solverChoice.macrophysics_acoustic_coupling;
 
     // *****************************************************************************
     // Planar averages for subsidence terms
@@ -210,7 +210,7 @@ void make_sources (int level,
     //   10a. Immersed forcing for terrain
     //   10b. Immersed forcing for buildings
     //   11. Four stream radiation source for (rho theta)
-    //   12. Macrophysics source for (rho theta)
+    //   12. Macrophysics source for (rho theta & rho Q1)
     // *****************************************************************************
 
     // ***********************************************************************************************
@@ -747,10 +747,11 @@ void make_sources (int level,
         // *************************************************************************************
         // 12. Add macrophysics source for RhoTheta
         // *************************************************************************************
-        if (is_slow_step && has_moisture && tight_macro) {
+        if (is_slow_step && has_moisture && acoustic_macro) {
             ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
             {
-                cell_src(i,j,k,RhoTheta_comp) += mac_src(i,j,k);
+                cell_src(i,j,k,RhoTheta_comp) += mac_src(i,j,k,0);
+                cell_src(i,j,k,RhoQ1_comp   ) += mac_src(i,j,k,1);
             });
         }
     } // mfi
diff --git a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
index 8ec418c351..5dac08cfc4 100644
--- a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
+++ b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
@@ -128,11 +128,11 @@ void make_fast_coeffs (int /*level*/,
 
                  Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_p)
                               +  halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) /
-                               (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) );
+                               (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) * (one + RvOverRd*qv_p) );
 
                  Real coeff_Q =  Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_q)
                               + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) /
-                               ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) );
+                               ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) * (one + RvOverRd*qv_q) );
 
                  coeffP_a(i,j,k) = coeff_P;
                  coeffQ_a(i,j,k) = coeff_Q;
@@ -173,11 +173,11 @@ void make_fast_coeffs (int /*level*/,
 
                  Real coeff_P = -Gamma * R_d * dzi * pi_c * (one + RvOverRd*qv_p)
                               +  halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) /
-                              (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) );
+                              (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) * (one + RvOverRd*qv_p) );
 
                  Real coeff_Q = Gamma * R_d * dzi * pi_c * (one + RvOverRd*qv_q)
                               + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) /
-                              ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) );
+                              ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) * (one + RvOverRd*qv_q) );
 
                  coeffP_a(i,j,k) = coeff_P;
                  coeffQ_a(i,j,k) = coeff_Q;
diff --git a/Source/TimeIntegration/ERF_SlowRhsPre.cpp b/Source/TimeIntegration/ERF_SlowRhsPre.cpp
index 94010f17a6..c64c414d70 100644
--- a/Source/TimeIntegration/ERF_SlowRhsPre.cpp
+++ b/Source/TimeIntegration/ERF_SlowRhsPre.cpp
@@ -562,32 +562,53 @@ void erf_slow_rhs_pre (int level, int finest_level,
                                 dxInv, mf_mx, mf_my, mf_uy, mf_vx,
                                 flx_arr, l_fixed_rho);
             AdvectionSrcForScalars(bx, icomp, ncomp,
-                                avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
-                                cell_prim, cell_rhs,
-                                detJ_arr, dxInv, mf_mx, mf_my,
-                                l_horiz_adv_type, l_vert_adv_type,
-                                l_horiz_upw_frac, l_vert_upw_frac,
-                                flx_arr, domain, bc_ptr_h);
+                                   avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
+                                   cell_prim, cell_rhs,
+                                   detJ_arr, dxInv, mf_mx, mf_my,
+                                   l_horiz_adv_type, l_vert_adv_type,
+                                   l_horiz_upw_frac, l_vert_upw_frac,
+                                   flx_arr, domain, bc_ptr_h);
+            if (l_use_moisture) {
+                AdvectionSrcForScalars(bx, RhoQ1_comp, ncomp,
+                                       avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
+                                       cell_prim, cell_rhs,
+                                       detJ_arr, dxInv, mf_mx, mf_my,
+                                       l_horiz_adv_type, l_vert_adv_type,
+                                       l_horiz_upw_frac, l_vert_upw_frac,
+                                       flx_arr, domain, bc_ptr_h);
+            }
         } else {
             EBAdvectionSrcForRho(bx, cell_rhs,
-                                rho_u, rho_v, omega_arr,
-                                avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
-                                mask_arr, cfg_arr,
-                                ax_arr, ay_arr, az_arr,
-                                fcx_arr, fcy_arr, fcz_arr, detJ_arr,
-                                dxInv, mf_mx, mf_my, mf_uy, mf_vx,
-                                flx_arr, l_fixed_rho,
-                                already_on_centroids);
+                                 rho_u, rho_v, omega_arr,
+                                 avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
+                                 mask_arr, cfg_arr,
+                                 ax_arr, ay_arr, az_arr,
+                                 fcx_arr, fcy_arr, fcz_arr, detJ_arr,
+                                 dxInv, mf_mx, mf_my, mf_uy, mf_vx,
+                                 flx_arr, l_fixed_rho,
+                                 already_on_centroids);
             EBAdvectionSrcForScalars(bx, icomp, ncomp,
-                                avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
-                                cell_prim, cell_rhs,
-                                mask_arr, cfg_arr, ax_arr, ay_arr, az_arr,
-                                fcx_arr, fcy_arr, fcz_arr,
-                                detJ_arr, dxInv, mf_mx, mf_my,
-                                l_horiz_adv_type, l_vert_adv_type,
-                                l_horiz_upw_frac, l_vert_upw_frac,
-                                flx_arr, domain, bc_ptr_h,
-                                already_on_centroids);
+                                     avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
+                                     cell_prim, cell_rhs,
+                                     mask_arr, cfg_arr, ax_arr, ay_arr, az_arr,
+                                     fcx_arr, fcy_arr, fcz_arr,
+                                     detJ_arr, dxInv, mf_mx, mf_my,
+                                     l_horiz_adv_type, l_vert_adv_type,
+                                     l_horiz_upw_frac, l_vert_upw_frac,
+                                     flx_arr, domain, bc_ptr_h,
+                                     already_on_centroids);
+            if (l_use_moisture) {
+                EBAdvectionSrcForScalars(bx, RhoQ1_comp, ncomp,
+                                         avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
+                                         cell_prim, cell_rhs,
+                                         mask_arr, cfg_arr, ax_arr, ay_arr, az_arr,
+                                         fcx_arr, fcy_arr, fcz_arr,
+                                         detJ_arr, dxInv, mf_mx, mf_my,
+                                         l_horiz_adv_type, l_vert_adv_type,
+                                         l_horiz_upw_frac, l_vert_upw_frac,
+                                         flx_arr, domain, bc_ptr_h,
+                                         already_on_centroids);
+            }
         }
 
         if (l_use_diff) {
diff --git a/Source/TimeIntegration/ERF_Substep_T.cpp b/Source/TimeIntegration/ERF_Substep_T.cpp
index fd6b596766..c7467ba800 100644
--- a/Source/TimeIntegration/ERF_Substep_T.cpp
+++ b/Source/TimeIntegration/ERF_Substep_T.cpp
@@ -137,7 +137,7 @@ void erf_substep_T (int step, int /*nrk*/,
         const Array4<Real>& old_drho_u     = Delta_rho_u.array(mfi);
         const Array4<Real>& old_drho_v     = Delta_rho_v.array(mfi);
         const Array4<Real>& old_drho_w     = Delta_rho_w.array(mfi);
-        const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
+        const Array4<Real>& old_drho_thm   = Delta_rho_theta.array(mfi);
 
         const Array4<const Real>&  prev_xmom = S_prev[IntVars::xmom].const_array(mfi);
         const Array4<const Real>&  prev_ymom = S_prev[IntVars::ymom].const_array(mfi);
@@ -154,6 +154,7 @@ void erf_substep_T (int step, int /*nrk*/,
             ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
                 cur_cons(i,j,k,Rho_comp)      = prev_cons(i,j,k,Rho_comp);
                 cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
+                cur_cons(i,j,k,RhoQ1_comp)    = prev_cons(i,j,k,RhoQ1_comp);
             });
         } // step = 0
 
@@ -185,22 +186,20 @@ void erf_substep_T (int step, int /*nrk*/,
             old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
         });
 
-        const Array4<Real>& theta_extrap = extrap.array(mfi);
-        const Array4<const Real>& prim   = S_stage_prim.const_array(mfi);
+        const Array4<Real>& thm_extrap = extrap.array(mfi);
+        const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
 
         ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            old_drho(i,j,k)       = cur_cons(i,j,k,Rho_comp)      - stage_cons(i,j,k,Rho_comp);
-            old_drho_theta(i,j,k) = cur_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp);
+            Real qv_cur_fact = (1.0 + RvOverRd *   cur_cons(i,j,k,RhoQ1_comp) /   cur_cons(i,j,k,Rho_comp));
+            Real qv_stg_fact = (1.0 + RvOverRd *       prim(i,j,k,PrimQ1_comp));
+            old_drho(i,j,k)     =   cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
+            old_drho_thm(i,j,k) =   cur_cons(i,j,k,RhoTheta_comp) * qv_cur_fact
+                                - stage_cons(i,j,k,RhoTheta_comp) * qv_stg_fact;
             if (step == 0) {
-                theta_extrap(i,j,k) = old_drho_theta(i,j,k);
+                thm_extrap(i,j,k) = old_drho_thm(i,j,k);
             } else {
-                theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
-                  ( old_drho_theta(i,j,k) - lagged_arr(i,j,k) );
+                thm_extrap(i,j,k) = old_drho_thm(i,j,k) + beta_d * ( old_drho_thm(i,j,k) - lagged_arr(i,j,k) );
             }
-
-            // NOTE: qv is not changing over the fast steps so we use the stage data
-            Real qv = (l_use_moisture) ? prim(i,j,k,PrimQ1_comp) : zero;
-            theta_extrap(i,j,k) *= (one + RvOverRd*qv);
         });
     } // mfi
 
@@ -211,10 +210,10 @@ void erf_substep_T (int step, int /*nrk*/,
     {
         // We define lagged_delta_rt for our next step as the current delta_rt
         Box gbx = mfi.tilebox(); gbx.grow(1);
-        const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
-        const Array4<Real>& lagged_arr     = lagged_delta_rt.array(mfi);
+        const Array4<Real>& old_drho_thm = Delta_rho_theta.array(mfi);
+        const Array4<Real>& lagged_arr   = lagged_delta_rt.array(mfi);
         ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            lagged_arr(i,j,k) = old_drho_theta(i,j,k);
+            lagged_arr(i,j,k) = old_drho_thm(i,j,k);
         });
     } // mfi
 
@@ -258,7 +257,7 @@ void erf_substep_T (int step, int /*nrk*/,
 
         const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
 
-        const Array4<Real>& theta_extrap = extrap.array(mfi);
+        const Array4<Real>& thm_extrap = extrap.array(mfi);
 
         // Map factors
         const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
@@ -285,12 +284,12 @@ void erf_substep_T (int step, int /*nrk*/,
                 // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
                 Real met_h_xi   = Compute_h_xi_AtIface  (i, j, k, dxInv, z_nd);
                 Real met_h_zeta = Compute_h_zeta_AtIface(i, j, k, dxInv, z_nd);
-                Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
+                Real gp_xi = (thm_extrap(i,j,k) - thm_extrap(i-1,j,k)) * dxi;
                 Real gp_zeta_on_iface = (k == 0) ?
-                   myhalf  * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
-                                   - theta_extrap(i-1,j,k  ) - theta_extrap(i,j,k  ) ) :
-                   fourth * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
-                                  - theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
+                   myhalf  * dzi * ( thm_extrap(i-1,j,k+1) + thm_extrap(i,j,k+1)
+                                   - thm_extrap(i-1,j,k  ) - thm_extrap(i,j,k  ) ) :
+                   fourth  * dzi * ( thm_extrap(i-1,j,k+1) + thm_extrap(i,j,k+1)
+                                   - thm_extrap(i-1,j,k-1) - thm_extrap(i,j,k-1) );
                 Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
                 gpx *= mf_ux(i,j,0);
 
@@ -317,12 +316,12 @@ void erf_substep_T (int step, int /*nrk*/,
                 // Add (negative) gradient of (rho theta) multiplied by lagged "pi"
                 Real met_h_eta  = Compute_h_eta_AtJface(i, j, k, dxInv, z_nd);
                 Real met_h_zeta = Compute_h_zeta_AtJface(i, j, k, dxInv, z_nd);
-                Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
+                Real gp_eta = (thm_extrap(i,j,k) - thm_extrap(i,j-1,k)) * dyi;
                 Real gp_zeta_on_jface = (k == 0) ?
-                    myhalf  * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
-                                    - theta_extrap(i,j,k  ) - theta_extrap(i,j-1,k  ) ) :
-                    fourth * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
-                                   - theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
+                    myhalf  * dzi * ( thm_extrap(i,j,k+1) + thm_extrap(i,j-1,k+1)
+                                    - thm_extrap(i,j,k  ) - thm_extrap(i,j-1,k  ) ) :
+                    fourth  * dzi * ( thm_extrap(i,j,k+1) + thm_extrap(i,j-1,k+1)
+                                    - thm_extrap(i,j,k-1) - thm_extrap(i,j-1,k-1) );
                 Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
                 gpy *= mf_vy(i,j,0);
 
@@ -369,11 +368,11 @@ void erf_substep_T (int step, int /*nrk*/,
         const Array4<const Real> & stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi);
         const Array4<const Real> & prim       = S_stage_prim.const_array(mfi);
 
-        const Array4<Real>& old_drho_u     = Delta_rho_u.array(mfi);
-        const Array4<Real>& old_drho_v     = Delta_rho_v.array(mfi);
-        const Array4<Real>& old_drho_w     = Delta_rho_w.array(mfi);
-        const Array4<Real>& old_drho       = Delta_rho.array(mfi);
-        const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
+        const Array4<Real>& old_drho_u   = Delta_rho_u.array(mfi);
+        const Array4<Real>& old_drho_v   = Delta_rho_v.array(mfi);
+        const Array4<Real>& old_drho_w   = Delta_rho_w.array(mfi);
+        const Array4<Real>& old_drho     = Delta_rho.array(mfi);
+        const Array4<Real>& old_drho_thm = Delta_rho_theta.array(mfi);
 
         const Array4<const Real>& slow_rhs_cons  = S_slow_rhs[IntVars::cons].const_array(mfi);
         const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi);
@@ -409,14 +408,17 @@ void erf_substep_T (int step, int /*nrk*/,
         FArrayBox temp_rhs_fab;
         FArrayBox RHS_fab;
         FArrayBox soln_fab;
+        FArrayBox new_qv_fab;
 
         RHS_fab.resize     (tbz,1,The_Async_Arena());
         soln_fab.resize    (tbz,1,The_Async_Arena());
-        temp_rhs_fab.resize(tbz,2,The_Async_Arena());
+        temp_rhs_fab.resize(tbz,3,The_Async_Arena());
+        new_qv_fab.resize(tbz,1,The_Async_Arena());
 
         auto const& RHS_a        =      RHS_fab.array();
         auto const& soln_a       =     soln_fab.array();
         auto const& temp_rhs_arr = temp_rhs_fab.array();
+        auto const& new_qv_arr   = new_qv_fab.array();
 
         auto const&     coeffA_a =     coeff_A_mf.array(mfi);
         auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
@@ -434,6 +436,33 @@ void erf_substep_T (int step, int /*nrk*/,
         const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
             flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
 
+        // *********************************************************************
+        {
+        Box gbxo = mfi.nodaltilebox(2);
+        Box gbxo_mid = gbxo;
+
+        if (gbxo.smallEnd(2) == domlo.z) {
+            Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
+            gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
+            ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
+                omega_arr(i,j,k) = zero;
+            });
+        }
+        if (gbxo.bigEnd(2) == domhi.z+1) {
+            Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
+            gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
+            ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
+                omega_arr(i,j,k) = old_drho_w(i,j,k);
+            });
+        }
+        ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
+            omega_arr(i,j,k) = OmegaFromW(i,j,k,old_drho_w(i,j,k),
+                                          old_drho_u,old_drho_v,
+                                          mf_ux,mf_vy,z_nd,dxInv);
+        });
+        } // end profile
+        // *********************************************************************
+
         // *********************************************************************
         {
         BL_PROFILE("fast_T_making_rho_rhs");
@@ -468,6 +497,19 @@ void erf_substep_T (int step, int /*nrk*/,
                                        xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
                                      ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
                                        yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * myhalf;
+            temp_rhs_arr(i,j,k,2) = (( xflux_hi * (prim(i,j,k,3) + prim(i+1,j,k,3)) -
+                                       xflux_lo * (prim(i,j,k,3) + prim(i-1,j,k,3)) ) * dxi * mfsq+
+                                     ( yflux_hi * (prim(i,j,k,3) + prim(i,j+1,k,3)) -
+                                       yflux_lo * (prim(i,j,k,3) + prim(i,j-1,k,3)) ) * dyi * mfsq) * myhalf;
+
+            // Explicit update for temporary qv
+            Real fast_rhs_rho   = -( temp_rhs_arr(i,j,k,2) +
+                                   ( omega_arr(i,j,k+1) - omega_arr(i,j,k  ) ) * dzi ) / detJ(i,j,k);
+            Real new_rho        = prev_cons(i,j,k,0) + dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
+            Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
+                                   ( omega_arr(i,j,k+1) * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
+                                     omega_arr(i,j,k  ) * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
+            new_qv_arr(i,j,k) = ( prev_cons(i,j,k,4) + dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv) ) / new_rho;
 
             if (l_reflux) {
                 (flx_arr[0])(i,j,k,0) = xflux_lo;
@@ -488,33 +530,6 @@ void erf_substep_T (int step, int /*nrk*/,
         });
         } // end profile
 
-        // *********************************************************************
-        {
-        Box gbxo = mfi.nodaltilebox(2);
-        Box gbxo_mid = gbxo;
-
-        if (gbxo.smallEnd(2) == domlo.z) {
-            Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
-            gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
-            ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-                omega_arr(i,j,k) = zero;
-            });
-        }
-        if (gbxo.bigEnd(2) == domhi.z+1) {
-            Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
-            gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
-            ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-                omega_arr(i,j,k) = old_drho_w(i,j,k);
-            });
-        }
-        ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            omega_arr(i,j,k) = OmegaFromW(i,j,k,old_drho_w(i,j,k),
-                                          old_drho_u,old_drho_v,
-                                          mf_ux,mf_vy,z_nd,dxInv);
-        });
-        } // end profile
-        // *********************************************************************
-
         Box bx_shrunk_in_k = bx;
         int klo = tbz.smallEnd(2);
         int khi = tbz.bigEnd(2);
@@ -531,6 +546,9 @@ void erf_substep_T (int step, int /*nrk*/,
         //Note we don't act on the bottom or top boundaries of the domain
         ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
         {
+            Real new_qv_fact_P = (Real(1.0) + R_v/R_d * new_qv_arr(i,j,k+1));
+            Real new_qv_fact_Q = (Real(1.0) + R_v/R_d * new_qv_arr(i,j,k  ));
+
             Real coeff_P = coeffP_a(i,j,k);
             Real coeff_Q = coeffQ_a(i,j,k);
 
@@ -545,8 +563,8 @@ void erf_substep_T (int step, int /*nrk*/,
             // line 3 residuals (order dtau^2) one <-> beta_2
             Real R1_tmp =  -halfg * (  slow_rhs_cons(i,j,k  ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp) );
 
-                 R1_tmp +=  coeff_P * slow_rhs_cons(i,j,k  ,RhoTheta_comp)
-                          + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp);
+                 R1_tmp +=  coeff_P * slow_rhs_cons(i,j,k  ,RhoTheta_comp) * new_qv_fact_P
+                          + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * new_qv_fact_Q;
 
             Real Omega_kp1 = omega_arr(i,j,k+1);
             Real Omega_k   = omega_arr(i,j,k  );
@@ -559,8 +577,8 @@ void erf_substep_T (int step, int /*nrk*/,
                               + temp_rhs_arr(i,j,k,Rho_comp)/detJ(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ(i,j,k-1) );
 
             // consolidate lines 6&7 (order dtau^2)
-            R1_tmp += -( coeff_P/detJ(i,j,k  ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k  ,RhoTheta_comp) )
-                       + coeff_Q/detJ(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
+            R1_tmp += -( coeff_P/detJ(i,j,k  ) * new_qv_fact_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k  ,RhoTheta_comp) )
+                       + coeff_Q/detJ(i,j,k-1) * new_qv_fact_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
 
             // line 1
             RHS_a(i,j,k) = old_drho_w(i,j,k) + dtau * (slow_rhs_rho_w(i,j,k) + zmom_src_arr(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
diff --git a/Source/Utils/ERF_MacroPhysics.H b/Source/Utils/ERF_MacroPhysics.H
index 051f6a375c..cdec74c9fa 100644
--- a/Source/Utils/ERF_MacroPhysics.H
+++ b/Source/Utils/ERF_MacroPhysics.H
@@ -126,8 +126,8 @@ GetMacroPhysicsSrc (int& real_width,
 
             // Populate tendency and data
             amrex::Real th_f = getThgivenTandP(tabs_f, pres, d_rdOcp);
-            mac_src(i,j,k) = rho_s * idt * ( th_f - th_s );
-            cell_data(i,j,k,RhoQ1_comp) = rho_s * qv_f;
+            mac_src(i,j,k,0) = rho_s * idt * ( th_f - th_s );
+            mac_src(i,j,k,1) = rho_s * idt * ( qv_f - qv_s );
             cell_data(i,j,k,RhoQ2_comp) = rho_s * std::max(amrex::Real(0.0),qt - qv_f);
         });
     } // mfi

From 125fd5ff62e8880e25981da60f8f13a04f3caa66 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Tue, 12 May 2026 12:54:29 -0700
Subject: [PATCH 10/17] compiled in debug. Need to do _NS file.

---
 Source/ERF_MakeNewArrays.cpp                  |   2 +-
 Source/TimeIntegration/ERF_Advance.cpp        |   2 +-
 Source/TimeIntegration/ERF_MakeFastCoeffs.cpp |  26 ++--
 Source/TimeIntegration/ERF_Substep_T.cpp      | 147 +++++++++---------
 4 files changed, 86 insertions(+), 91 deletions(-)

diff --git a/Source/ERF_MakeNewArrays.cpp b/Source/ERF_MakeNewArrays.cpp
index 27c8bd9488..7f7c1d7710 100644
--- a/Source/ERF_MakeNewArrays.cpp
+++ b/Source/ERF_MakeNewArrays.cpp
@@ -468,7 +468,7 @@ ERF::init_stuff (int lev, const BoxArray& ba, const DistributionMapping& dm,
     // Macrophysics heating source terms
     //*********************************************************
     if (solverChoice.moisture_type != MoistureType::None &&
-        solverChoice.macrophysics_tight_coupling)
+        solverChoice.macrophysics_acoustic_coupling)
     {
         macrophysics_src[lev] = std::make_unique<MultiFab>(ba, dm, 2, 0);
         macrophysics_src[lev]->setVal(0);
diff --git a/Source/TimeIntegration/ERF_Advance.cpp b/Source/TimeIntegration/ERF_Advance.cpp
index 3f8a5c2da7..d8abaf1efb 100644
--- a/Source/TimeIntegration/ERF_Advance.cpp
+++ b/Source/TimeIntegration/ERF_Advance.cpp
@@ -209,7 +209,7 @@ ERF::Advance (int lev, Real time, Real dt_lev, int iteration, int /*ncycle*/)
     // Macrophysics source term computed pre-step
     // **************************************************************************************
     if (solverChoice.moisture_type != MoistureType::None &&
-        solverChoice.macrophysics_tight_coupling)
+        solverChoice.macrophysics_acoustic_coupling)
     {
         GetMacroPhysicsSrc(real_width, dt_lev, Geom(lev), macrophysics_src[lev].get(), S_old);
     }
diff --git a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
index 5dac08cfc4..95be46e7a4 100644
--- a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
+++ b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
@@ -126,13 +126,13 @@ void make_fast_coeffs (int /*level*/,
                  Real qv_p = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
                  Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
 
-                 Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_p)
+                 Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c
                               +  halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) /
-                               (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) * (one + RvOverRd*qv_p) );
+                              (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) * (one + RvOverRd*qv_p) );
 
-                 Real coeff_Q =  Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_q)
+                 Real coeff_Q =  Gamma * R_d * dzi * inv_detJ_on_kface * pi_c
                               + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) /
-                               ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) * (one + RvOverRd*qv_q) );
+                              ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) * (one + RvOverRd*qv_q) );
 
                  coeffP_a(i,j,k) = coeff_P;
                  coeffQ_a(i,j,k) = coeff_Q;
@@ -150,10 +150,11 @@ void make_fast_coeffs (int /*level*/,
 
                 // LHS for tri-diagonal system
                 Real D = dtau * dtau * beta_2 * beta_2 * dzi;
-                coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * theta_t_lo );
-                coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * theta_t_hi );
+                coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * (one + RvOverRd*qv_q) * theta_t_lo );
+                coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * (one + RvOverRd*qv_p) * theta_t_hi );
 
-                coeffB_a(i,j,k) = one + D * (coeff_Q/detJ(i,j,k-1) - coeff_P/detJ(i,j,k)) * theta_t_mid;
+                coeffB_a(i,j,k) = one + D * ( coeff_Q * (one + RvOverRd*qv_q) / detJ(i,j,k-1)
+                                            - coeff_P * (one + RvOverRd*qv_p) / detJ(i,j,k  ) ) * theta_t_mid;
             });
 
         } else {
@@ -171,11 +172,11 @@ void make_fast_coeffs (int /*level*/,
                  Real qv_p = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
                  Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
 
-                 Real coeff_P = -Gamma * R_d * dzi * pi_c * (one + RvOverRd*qv_p)
+                 Real coeff_P = -Gamma * R_d * dzi * pi_c
                               +  halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) /
                               (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) * (one + RvOverRd*qv_p) );
 
-                 Real coeff_Q = Gamma * R_d * dzi * pi_c * (one + RvOverRd*qv_q)
+                 Real coeff_Q = Gamma * R_d * dzi * pi_c
                               + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) /
                               ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) * (one + RvOverRd*qv_q) );
 
@@ -195,10 +196,11 @@ void make_fast_coeffs (int /*level*/,
 
                 // LHS for tri-diagonal system
                 Real D = dtau * dtau * beta_2 * beta_2 * dzi;
-                coeffA_a(i,j,k) = D * ( halfg - coeff_Q * theta_t_lo );
-                coeffC_a(i,j,k) = D * (-halfg + coeff_P * theta_t_hi );
+                coeffA_a(i,j,k) = D * ( halfg - coeff_Q * (one + RvOverRd*qv_q) * theta_t_lo );
+                coeffC_a(i,j,k) = D * (-halfg + coeff_P * (one + RvOverRd*qv_p) * theta_t_hi );
 
-                coeffB_a(i,j,k) = one + D * (coeff_Q - coeff_P) * theta_t_mid;
+                coeffB_a(i,j,k) = one + D * ( coeff_Q * (one + RvOverRd*qv_q)
+                                            - coeff_P * (one + RvOverRd*qv_p) ) * theta_t_mid;
             });
         }
 
diff --git a/Source/TimeIntegration/ERF_Substep_T.cpp b/Source/TimeIntegration/ERF_Substep_T.cpp
index c7467ba800..db015d5d98 100644
--- a/Source/TimeIntegration/ERF_Substep_T.cpp
+++ b/Source/TimeIntegration/ERF_Substep_T.cpp
@@ -112,7 +112,6 @@ void erf_substep_T (int step, int /*nrk*/,
     MultiFab     coeff_P_mf(fast_coeffs, make_alias, 3, 1);
     MultiFab     coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
 
-    // *************************************************************************
     // Set gravity as a vector
     const    Array<Real,AMREX_SPACEDIM> grav{zero, zero, -gravity};
     const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
@@ -122,13 +121,12 @@ void erf_substep_T (int step, int /*nrk*/,
     // *************************************************************************
     // First set up some arrays we'll need
     // *************************************************************************
-
 #ifdef _OPENMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
     for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
     {
-        const Array4<Real>       & cur_cons  = S_data[IntVars::cons].array(mfi);
+        const Array4<Real>      &  cur_cons  = S_data[IntVars::cons].array(mfi);
         const Array4<const Real>& prev_cons  = S_prev[IntVars::cons].const_array(mfi);
         const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
         const Array4<Real>& lagged_arr       = lagged_delta_rt.array(mfi);
@@ -150,14 +148,6 @@ void erf_substep_T (int step, int /*nrk*/,
         Box  bx = mfi.validbox();
         Box gbx = mfi.tilebox(); gbx.grow(1);
 
-        if (step == 0) {
-            ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-                cur_cons(i,j,k,Rho_comp)      = prev_cons(i,j,k,Rho_comp);
-                cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
-                cur_cons(i,j,k,RhoQ1_comp)    = prev_cons(i,j,k,RhoQ1_comp);
-            });
-        } // step = 0
-
         Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
         Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
         Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
@@ -165,6 +155,20 @@ void erf_substep_T (int step, int /*nrk*/,
         const auto& bx_lo = lbound(bx);
         const auto& bx_hi = ubound(bx);
 
+        // *************************************************************************
+        // Transfer from S_old to S_data in first step(update in place in S_data)
+        // *************************************************************************
+        if (step == 0) {
+            ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
+                cur_cons(i,j,k,Rho_comp)      = prev_cons(i,j,k,Rho_comp);
+                cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
+                cur_cons(i,j,k,RhoQ1_comp)    = prev_cons(i,j,k,RhoQ1_comp);
+            });
+        }
+
+        // *************************************************************************
+        // Compute perturbational lateral momenta
+        // *************************************************************************
         ParallelFor(gtbx, gtby, gtbz,
         [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
             old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
@@ -186,12 +190,14 @@ void erf_substep_T (int step, int /*nrk*/,
             old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
         });
 
+        // *************************************************************************
+        // Compute pert density, moist potential temperature, and thm extrap
+        // *************************************************************************
         const Array4<Real>& thm_extrap = extrap.array(mfi);
         const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
-
         ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            Real qv_cur_fact = (1.0 + RvOverRd *   cur_cons(i,j,k,RhoQ1_comp) /   cur_cons(i,j,k,Rho_comp));
-            Real qv_stg_fact = (1.0 + RvOverRd *       prim(i,j,k,PrimQ1_comp));
+            Real qv_cur_fact = (Real(1.0) + RvOverRd * cur_cons(i,j,k,RhoQ1_comp) / cur_cons(i,j,k,Rho_comp));
+            Real qv_stg_fact = (Real(1.0) + RvOverRd *     prim(i,j,k,PrimQ1_comp));
             old_drho(i,j,k)     =   cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
             old_drho_thm(i,j,k) =   cur_cons(i,j,k,RhoTheta_comp) * qv_cur_fact
                                 - stage_cons(i,j,k,RhoTheta_comp) * qv_stg_fact;
@@ -200,27 +206,13 @@ void erf_substep_T (int step, int /*nrk*/,
             } else {
                 thm_extrap(i,j,k) = old_drho_thm(i,j,k) + beta_d * ( old_drho_thm(i,j,k) - lagged_arr(i,j,k) );
             }
-        });
-    } // mfi
-
-#ifdef _OPENMP
-#pragma omp parallel if (Gpu::notInLaunchRegion())
-#endif
-    for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
-    {
-        // We define lagged_delta_rt for our next step as the current delta_rt
-        Box gbx = mfi.tilebox(); gbx.grow(1);
-        const Array4<Real>& old_drho_thm = Delta_rho_theta.array(mfi);
-        const Array4<Real>& lagged_arr   = lagged_delta_rt.array(mfi);
-        ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
             lagged_arr(i,j,k) = old_drho_thm(i,j,k);
         });
     } // mfi
 
-    // *************************************************************************
-    // Define updates in the current RK stage
-    // *************************************************************************
-
+    // *********************************************************************
+    // Update lateral momenta (explicit)
+    // *********************************************************************
 #ifdef _OPENMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
@@ -249,7 +241,6 @@ void erf_substep_T (int step, int /*nrk*/,
         const Array4<Real>& cur_xmom = S_data[IntVars::xmom].array(mfi);
         const Array4<Real>& cur_ymom = S_data[IntVars::ymom].array(mfi);
 
-        // These store the advection momenta which we will use to update the slow variables
         const Array4<Real>& avg_xmom_arr = avg_xmom.array(mfi);
         const Array4<Real>& avg_ymom_arr = avg_ymom.array(mfi);
 
@@ -259,19 +250,9 @@ void erf_substep_T (int step, int /*nrk*/,
 
         const Array4<Real>& thm_extrap = extrap.array(mfi);
 
-        // Map factors
         const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
         const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
 
-        // Create old_drho_u/v/w/theta  = U'', V'', W'', Theta'' in the docs
-        // Note that we do the Copy and Subtract including one ghost cell
-        //    so that we don't have to fill ghost cells of the new MultiFabs
-        // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
-        // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
-
-        // *********************************************************************
-        // Define updates in the RHS of {x, y, z}-momentum equations
-        // *********************************************************************
         {
         BL_PROFILE("substep_xymom_T");
 
@@ -362,6 +343,8 @@ void erf_substep_T (int step, int /*nrk*/,
         Box vbx = mfi.validbox();
         const auto& vbx_hi = ubound(vbx);
 
+        const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
+
         const Array4<Real const>& zmom_src_arr   = zmom_src.const_array(mfi);
         const Array4<Real const>& cc_src_arr     = cc_src.const_array(mfi);
 
@@ -383,7 +366,6 @@ void erf_substep_T (int step, int /*nrk*/,
         const Array4<Real>& cur_cons = S_data[IntVars::cons].array(mfi);
         const Array4<Real>& cur_zmom = S_data[IntVars::zmom].array(mfi);
 
-        // These store the advection momenta which we will use to update the slow variables
         const Array4<Real>& avg_zmom_arr = avg_zmom.array(mfi);
 
         const Array4<const Real>& z_nd   = z_phys_nd->const_array(mfi);
@@ -391,7 +373,6 @@ void erf_substep_T (int step, int /*nrk*/,
 
         const Array4<      Real>& omega_arr = Omega.array(mfi);
 
-        // Map factors
         const Array4<const Real>& mf_mx = mapfac[MapFacType::m_x]->const_array(mfi);
         const Array4<const Real>& mf_my = mapfac[MapFacType::m_y]->const_array(mfi);
         const Array4<const Real>& mf_ux = mapfac[MapFacType::u_x]->const_array(mfi);
@@ -399,26 +380,20 @@ void erf_substep_T (int step, int /*nrk*/,
         const Array4<const Real>& mf_vx = mapfac[MapFacType::v_x]->const_array(mfi);
         const Array4<const Real>& mf_vy = mapfac[MapFacType::v_y]->const_array(mfi);
 
-        // Create old_drho_u/v/w/theta  = U'', V'', W'', Theta'' in the docs
-        // Note that we do the Copy and Subtract including one ghost cell
-        //    so that we don't have to fill ghost cells of the new MultiFabs
-        // Initialize New_rho_u/v/w to Delta_rho_u/v/w so that
-        // the ghost cells in New_rho_u/v/w will match old_drho_u/v/w
-
         FArrayBox temp_rhs_fab;
         FArrayBox RHS_fab;
         FArrayBox soln_fab;
-        FArrayBox new_qv_fab;
+        FArrayBox qv_str_fab;
 
         RHS_fab.resize     (tbz,1,The_Async_Arena());
         soln_fab.resize    (tbz,1,The_Async_Arena());
         temp_rhs_fab.resize(tbz,3,The_Async_Arena());
-        new_qv_fab.resize(tbz,1,The_Async_Arena());
+        qv_str_fab.resize(tbz,1,The_Async_Arena());
 
         auto const& RHS_a        =      RHS_fab.array();
         auto const& soln_a       =     soln_fab.array();
         auto const& temp_rhs_arr = temp_rhs_fab.array();
-        auto const& new_qv_arr   = new_qv_fab.array();
+        auto const& qv_str_arr   = qv_str_fab.array();
 
         auto const&     coeffA_a =     coeff_A_mf.array(mfi);
         auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
@@ -436,7 +411,9 @@ void erf_substep_T (int step, int /*nrk*/,
         const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
             flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
 
-        // *********************************************************************
+        // *************************************************************************
+        // Define old contravariant velocity (start of sub-step)
+        // *************************************************************************
         {
         Box gbxo = mfi.nodaltilebox(2);
         Box gbxo_mid = gbxo;
@@ -460,10 +437,11 @@ void erf_substep_T (int step, int /*nrk*/,
                                           old_drho_u,old_drho_v,
                                           mf_ux,mf_vy,z_nd,dxInv);
         });
-        } // end profile
-        // *********************************************************************
+        }
 
-        // *********************************************************************
+        // *************************************************************************
+        // Define lateral update to rho & rho theta & qv^{*}
+        // *************************************************************************
         {
         BL_PROFILE("fast_T_making_rho_rhs");
         ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
@@ -502,14 +480,14 @@ void erf_substep_T (int step, int /*nrk*/,
                                      ( yflux_hi * (prim(i,j,k,3) + prim(i,j+1,k,3)) -
                                        yflux_lo * (prim(i,j,k,3) + prim(i,j-1,k,3)) ) * dyi * mfsq) * myhalf;
 
-            // Explicit update for temporary qv
-            Real fast_rhs_rho   = -( temp_rhs_arr(i,j,k,2) +
+            // Explicit update for star variables
+            Real fast_rhs_rho   = -( temp_rhs_arr(i,j,k,0) +
                                    ( omega_arr(i,j,k+1) - omega_arr(i,j,k  ) ) * dzi ) / detJ(i,j,k);
-            Real new_rho        = prev_cons(i,j,k,0) + dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
+            Real new_rho        = cur_cons(i,j,k,0) + dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
             Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
                                    ( omega_arr(i,j,k+1) * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
                                      omega_arr(i,j,k  ) * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
-            new_qv_arr(i,j,k) = ( prev_cons(i,j,k,4) + dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv) ) / new_rho;
+            qv_str_arr(i,j,k)   = ( cur_cons(i,j,k,4) + dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv) ) / new_rho;
 
             if (l_reflux) {
                 (flx_arr[0])(i,j,k,0) = xflux_lo;
@@ -528,7 +506,7 @@ void erf_substep_T (int step, int /*nrk*/,
                 }
             }
         });
-        } // end profile
+        }
 
         Box bx_shrunk_in_k = bx;
         int klo = tbz.smallEnd(2);
@@ -541,14 +519,14 @@ void erf_substep_T (int step, int /*nrk*/,
         // We define halfg to match the notes (which is why we take the absolute value)
         Real halfg = std::abs(myhalf * grav_gpu[2]);
 
+        // *************************************************************************
+        // RHS for tridiagonal system
+        // *************************************************************************
         {
         BL_PROFILE("fast_loop_on_shrunk_t");
         //Note we don't act on the bottom or top boundaries of the domain
         ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
         {
-            Real new_qv_fact_P = (Real(1.0) + R_v/R_d * new_qv_arr(i,j,k+1));
-            Real new_qv_fact_Q = (Real(1.0) + R_v/R_d * new_qv_arr(i,j,k  ));
-
             Real coeff_P = coeffP_a(i,j,k);
             Real coeff_Q = coeffQ_a(i,j,k);
 
@@ -557,14 +535,22 @@ void erf_substep_T (int step, int /*nrk*/,
             Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
 
             // line 2 last two terms (order dtau)
-            Real R0_tmp  =  -halfg * old_drho(i,j,k  ) + coeff_P * old_drho_theta(i,j,k  )
-                            -halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1);
+            Real qv_str_fact_P  = (Real(1.0) + RvOverRd * qv_str_arr(i,j,k  ));
+            Real qv_str_fact_Q  = (Real(1.0) + RvOverRd * qv_str_arr(i,j,k-1));
+            Real qv_stg_fact_P  = (Real(1.0) + RvOverRd * prim(i,j,k  ,PrimQ1_comp));
+            Real qv_stg_fact_Q  = (Real(1.0) + RvOverRd * prim(i,j,k-1,PrimQ1_comp));
+            Real str_drho_thm_P = cur_cons(i,j,k  ,1) * qv_str_fact_P - stage_cons(i,j,k  ,RhoTheta_comp) * qv_stg_fact_P;
+            Real str_drho_thm_Q = cur_cons(i,j,k-1,1) * qv_str_fact_Q - stage_cons(i,j,k-1,RhoTheta_comp) * qv_stg_fact_Q;
+            Real thm_P  = beta_1 * old_drho_thm(i,j,k  ) + beta_2 * str_drho_thm_P;
+            Real thm_Q  = beta_1 * old_drho_thm(i,j,k-1) + beta_2 * str_drho_thm_Q;
+            Real R0_tmp = -halfg * old_drho(i,j,k  ) + coeff_P * thm_P
+                          -halfg * old_drho(i,j,k-1) + coeff_Q * thm_Q;
 
             // line 3 residuals (order dtau^2) one <-> beta_2
             Real R1_tmp =  -halfg * (  slow_rhs_cons(i,j,k  ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp) );
 
-                 R1_tmp +=  coeff_P * slow_rhs_cons(i,j,k  ,RhoTheta_comp) * new_qv_fact_P
-                          + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * new_qv_fact_Q;
+                 R1_tmp +=  coeff_P * slow_rhs_cons(i,j,k  ,RhoTheta_comp) * qv_str_fact_P
+                          + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * qv_str_fact_Q;
 
             Real Omega_kp1 = omega_arr(i,j,k+1);
             Real Omega_k   = omega_arr(i,j,k  );
@@ -577,8 +563,8 @@ void erf_substep_T (int step, int /*nrk*/,
                               + temp_rhs_arr(i,j,k,Rho_comp)/detJ(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ(i,j,k-1) );
 
             // consolidate lines 6&7 (order dtau^2)
-            R1_tmp += -( coeff_P/detJ(i,j,k  ) * new_qv_fact_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k  ,RhoTheta_comp) )
-                       + coeff_Q/detJ(i,j,k-1) * new_qv_fact_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
+            R1_tmp += -( coeff_P/detJ(i,j,k  ) * qv_str_fact_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k  ,RhoTheta_comp) )
+                       + coeff_Q/detJ(i,j,k-1) * qv_str_fact_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
 
             // line 1
             RHS_a(i,j,k) = old_drho_w(i,j,k) + dtau * (slow_rhs_rho_w(i,j,k) + zmom_src_arr(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
@@ -596,6 +582,9 @@ void erf_substep_T (int step, int /*nrk*/,
         auto const lo = lbound(bx);
         auto const hi = ubound(bx);
 
+        // *************************************************************************
+        // Solve tridiagonal system
+        // *************************************************************************
         {
         BL_PROFILE("substep_b2d_loop_t");
 #ifdef AMREX_USE_GPU
@@ -689,7 +678,7 @@ void erf_substep_T (int step, int /*nrk*/,
         });
 
         // **************************************************************************
-        // Define updates in the RHS of rho and (rho theta)
+        // Define updates in the RHS of rho, rho theta, rho qv
         // **************************************************************************
         {
         BL_PROFILE("fast_rho_final_update");
@@ -713,16 +702,19 @@ void erf_substep_T (int step, int /*nrk*/,
                   }
               }
 
-              Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
-
+              Real fast_rhs_rho  = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
               cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
 
               Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + myhalf *
-                ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
-                  zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
-
+                                        ( zflux_hi * (prim(i,j,k,0) + prim(i,j,k+1,0)) -
+                                          zflux_lo * (prim(i,j,k,0) + prim(i,j,k-1,0)) ) * dzi ) / detJ(i,j,k);
               cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
 
+              Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
+                                     ( zflux_hi * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
+                                       zflux_lo * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
+              cur_cons(i,j,k,4) += dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv);
+
               if (l_reflux) {
                   (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1));
               }
@@ -730,6 +722,7 @@ void erf_substep_T (int step, int /*nrk*/,
               // add in source terms for cell-centered conserved variables
               cur_cons(i,j,k,Rho_comp)      += dtau * cc_src_arr(i,j,k,Rho_comp);
               cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
+              cur_cons(i,j,k,RhoQ1_comp)    += dtau * cc_src_arr(i,j,k,RhoQ1_comp);
         });
         } // end profile
 

From ec11a655380f6951ad095487e7fbfb33f6c72387 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Tue, 12 May 2026 15:46:04 -0700
Subject: [PATCH 11/17] add fix in other PR.

---
 Source/TimeIntegration/ERF_MakeFastCoeffs.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
index 95be46e7a4..8a3b01a8a2 100644
--- a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
+++ b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
@@ -153,8 +153,9 @@ void make_fast_coeffs (int /*level*/,
                 coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * (one + RvOverRd*qv_q) * theta_t_lo );
                 coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * (one + RvOverRd*qv_p) * theta_t_hi );
 
-                coeffB_a(i,j,k) = one + D * ( coeff_Q * (one + RvOverRd*qv_q) / detJ(i,j,k-1)
-                                            - coeff_P * (one + RvOverRd*qv_p) / detJ(i,j,k  ) ) * theta_t_mid;
+                coeffB_a(i,j,k) = one + D * ( ( coeff_Q * (one + RvOverRd*qv_q) / detJ(i,j,k-1)
+                                              - coeff_P * (one + RvOverRd*qv_p) / detJ(i,j,k  ) ) * theta_t_mid
+                                            + halfg * ( Real(1.0)/detJ(i,j,k-1) - Real(1.0)/detJ(i,j,k) ) );
             });
 
         } else {

From 6e7a95ef2b989d00dc698971c47bcd726342756d Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Wed, 13 May 2026 10:24:26 -0700
Subject: [PATCH 12/17] Move Qv to pre and implicit diff and protect no
 moisture in substep T.

---
 Source/Diffusion/ERF_Diffusion.H             | 57 +++++++++---------
 Source/Diffusion/ERF_ImplicitDiff_N.cpp      | 28 ++++-----
 Source/Diffusion/ERF_ImplicitDiff_S.cpp      | 28 ++++-----
 Source/Diffusion/ERF_ImplicitDiff_T.cpp      | 28 ++++-----
 Source/TimeIntegration/ERF_ImplicitPost.H    |  9 +--
 Source/TimeIntegration/ERF_ImplicitPre.H     | 35 +++++++++--
 Source/TimeIntegration/ERF_SlowRhsPost.cpp   | 24 ++++----
 Source/TimeIntegration/ERF_SlowRhsPre.cpp    | 62 ++++++++++++++++++--
 Source/TimeIntegration/ERF_Substep_T.cpp     | 55 +++++++++--------
 Source/TimeIntegration/ERF_TI_slow_rhs_pre.H |  9 ++-
 10 files changed, 213 insertions(+), 122 deletions(-)

diff --git a/Source/Diffusion/ERF_Diffusion.H b/Source/Diffusion/ERF_Diffusion.H
index 631f8e07af..1ae5a3d28d 100644
--- a/Source/Diffusion/ERF_Diffusion.H
+++ b/Source/Diffusion/ERF_Diffusion.H
@@ -456,9 +456,10 @@ void ComputeStrain_T (amrex::Box bxcc, amrex::Box tbxxy,
 
 void ImplicitDiffForStateLU_N (const amrex::Box& bx,
                                const amrex::Box& domain,
-                               const int level,
-                               const int n,
-                               const amrex::Real dt,
+                               const int& level,
+                               const int& n,
+                               const int& qty_index,
+                               const amrex::Real& dt,
                                const amrex::GpuArray<amrex::Real, AMREX_SPACEDIM*2>& bc_neumann_vals,
                                const amrex::Array4<      amrex::Real>& cell_data,
                                const amrex::GpuArray<amrex::Real, AMREX_SPACEDIM>& cellSizeInv,
@@ -466,14 +467,15 @@ void ImplicitDiffForStateLU_N (const amrex::Box& bx,
                                const amrex::Array4<const amrex::Real>& mu_turb,
                                const SolverChoice& solverChoice,
                                const amrex::BCRec* bc_ptr,
-                               const bool use_SurfLayer,
-                               const amrex::Real implicit_fac);
+                               const bool& use_SurfLayer,
+                               const amrex::Real& implicit_fac);
 
 void ImplicitDiffForStateLU_S (const amrex::Box& bx,
                                const amrex::Box& domain,
-                               const int level,
-                               const int n,
-                               const amrex::Real dt,
+                               const int& level,
+                               const int& n,
+                               const int& qty_index,
+                               const amrex::Real& dt,
                                const amrex::GpuArray<amrex::Real, AMREX_SPACEDIM*2>& bc_neumann_vals,
                                const amrex::Array4<      amrex::Real>& cell_data,
                                const amrex::Gpu::DeviceVector<amrex::Real>& stretched_dz_d,
@@ -481,14 +483,15 @@ void ImplicitDiffForStateLU_S (const amrex::Box& bx,
                                const amrex::Array4<const amrex::Real>& mu_turb,
                                const SolverChoice& solverChoice,
                                const amrex::BCRec* bc_ptr,
-                               const bool use_SurfLayer,
-                               const amrex::Real implicit_fac);
+                               const bool& use_SurfLayer,
+                               const amrex::Real& implicit_fac);
 
 void ImplicitDiffForStateLU_T (const amrex::Box& bx,
                                const amrex::Box& domain,
-                               const int level,
-                               const int n,
-                               const amrex::Real dt,
+                               const int& level,
+                               const int& n,
+                               const int& qty_index,
+                               const amrex::Real& dt,
                                const amrex::GpuArray<amrex::Real, AMREX_SPACEDIM*2>& bc_neumann_vals,
                                const amrex::Array4<      amrex::Real>& cell_data,
                                const amrex::Array4<const amrex::Real>& z_nd,
@@ -498,14 +501,14 @@ void ImplicitDiffForStateLU_T (const amrex::Box& bx,
                                const amrex::Array4<const amrex::Real>& mu_turb,
                                const SolverChoice& solverChoice,
                                const amrex::BCRec* bc_ptr,
-                               const bool use_SurfLayer,
-                               const amrex::Real implicit_fac);
+                               const bool& use_SurfLayer,
+                               const amrex::Real& implicit_fac);
 
 template <int stagdir>
 void ImplicitDiffForMomLU_N (const amrex::Box& bx,
                              const amrex::Box& domain,
-                             const int level,
-                             const amrex::Real dt,
+                             const int& level,
+                             const amrex::Real& dt,
                              const amrex::Array4<const amrex::Real>& cell_data,
                              const amrex::Array4<      amrex::Real>& face_data,
                              const amrex::Array4<const amrex::Real>& tau,
@@ -514,14 +517,14 @@ void ImplicitDiffForMomLU_N (const amrex::Box& bx,
                              const amrex::Array4<const amrex::Real>& mu_turb,
                              const SolverChoice& solverChoice,
                              const amrex::BCRec* bc_ptr,
-                             const bool use_SurfLayer,
-                             const amrex::Real implicit_fac);
+                             const bool& use_SurfLayer,
+                             const amrex::Real& implicit_fac);
 
 template <int stagdir>
 void ImplicitDiffForMomLU_S (const amrex::Box& bx,
                              const amrex::Box& domain,
-                             const int level,
-                             const amrex::Real dt,
+                             const int& level,
+                             const amrex::Real& dt,
                              const amrex::Array4<const amrex::Real>& cell_data,
                              const amrex::Array4<      amrex::Real>& face_data,
                              const amrex::Array4<const amrex::Real>& tau,
@@ -530,14 +533,14 @@ void ImplicitDiffForMomLU_S (const amrex::Box& bx,
                              const amrex::Array4<const amrex::Real>& mu_turb,
                              const SolverChoice& solverChoice,
                              const amrex::BCRec* bc_ptr,
-                             const bool use_SurfLayer,
-                             const amrex::Real implicit_fac);
+                             const bool& use_SurfLayer,
+                             const amrex::Real& implicit_fac);
 
 template <int stagdir>
 void ImplicitDiffForMomLU_T (const amrex::Box& bx,
                              const amrex::Box& domain,
-                             const int level,
-                             const amrex::Real dt,
+                             const int& level,
+                             const amrex::Real& dt,
                              const amrex::Array4<const amrex::Real>& cell_data,
                              const amrex::Array4<      amrex::Real>& face_data,
                              const amrex::Array4<const amrex::Real>& tau,
@@ -548,6 +551,6 @@ void ImplicitDiffForMomLU_T (const amrex::Box& bx,
                              const amrex::Array4<const amrex::Real>& mu_turb,
                              const SolverChoice& solverChoice,
                              const amrex::BCRec* bc_ptr,
-                             const bool use_SurfLayer,
-                             const amrex::Real implicit_fac);
+                             const bool& use_SurfLayer,
+                             const amrex::Real& implicit_fac);
 #endif
diff --git a/Source/Diffusion/ERF_ImplicitDiff_N.cpp b/Source/Diffusion/ERF_ImplicitDiff_N.cpp
index 92211b1d98..45f8df1bea 100644
--- a/Source/Diffusion/ERF_ImplicitDiff_N.cpp
+++ b/Source/Diffusion/ERF_ImplicitDiff_N.cpp
@@ -28,9 +28,10 @@ using namespace amrex;
 void
 ImplicitDiffForStateLU_N (const Box& bx,
                           const Box& domain,
-                          const int level,
-                          const int n,
-                          const Real dt,
+                          const int& level,
+                          const int& n,
+                          const int& qty_index,
+                          const Real& dt,
                           const GpuArray<Real, AMREX_SPACEDIM*2>& bc_neumann_vals,
                           const Array4<      Real>& cell_data,
                           const GpuArray<Real, AMREX_SPACEDIM>& cellSizeInv,
@@ -38,14 +39,13 @@ ImplicitDiffForStateLU_N (const Box& bx,
                           const Array4<const Real>& mu_turb,
                           const SolverChoice &solverChoice,
                           const BCRec* bc_ptr,
-                          const bool use_SurfLayer,
-                          const Real implicit_fac)
+                          const bool& use_SurfLayer,
+                          const Real& implicit_fac)
 {
     BL_PROFILE_VAR("ImplicitDiffForState_N()",ImplicitDiffForState_N);
 
     // setup quantities for getRhoAlpha()
 #include "ERF_SetupVertDiff.H"
-    const int qty_index  = n;
     const int prim_index = qty_index - 1;
     const int prim_scal_index = (qty_index >= RhoScalar_comp && qty_index < RhoScalar_comp+NSCALARS) ? PrimScalar_comp : prim_index;
 
@@ -203,8 +203,8 @@ template <int stagdir>
 void
 ImplicitDiffForMomLU_N (const Box& bx,
                         const Box& /*domain*/,
-                        const int level,
-                        const Real dt,
+                        const int& level,
+                        const Real& dt,
                         const Array4<const Real>& cell_data,
                         const Array4<      Real>& face_data,
                         const Array4<const Real>& tau,
@@ -213,8 +213,8 @@ ImplicitDiffForMomLU_N (const Box& bx,
                         const Array4<const Real>& mu_turb,
                         const SolverChoice &solverChoice,
                         const BCRec* bc_ptr,
-                        const bool use_SurfLayer,
-                        const Real implicit_fac)
+                        const bool& use_SurfLayer,
+                        const Real& implicit_fac)
 {
     BL_PROFILE_VAR("ImplicitDiffForMom_N()",ImplicitDiffForMom_N);
 
@@ -430,8 +430,8 @@ ImplicitDiffForMomLU_N (const Box& bx,
     template void ImplicitDiffForMomLU_N<STAGDIR> ( \
         const Box&, \
         const Box&, \
-        const int, \
-        const Real, \
+        const int&, \
+        const Real&, \
         const Array4<const Real>&, \
         const Array4<      Real>&, \
         const Array4<const Real>&, \
@@ -440,8 +440,8 @@ ImplicitDiffForMomLU_N (const Box& bx,
         const Array4<const Real>&, \
         const SolverChoice&, \
         const BCRec*, \
-        const bool, \
-        const Real);
+        const bool&, \
+        const Real&);
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(0)
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(1)
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(2)
diff --git a/Source/Diffusion/ERF_ImplicitDiff_S.cpp b/Source/Diffusion/ERF_ImplicitDiff_S.cpp
index 6fbad2f2cb..02cc35f4e7 100644
--- a/Source/Diffusion/ERF_ImplicitDiff_S.cpp
+++ b/Source/Diffusion/ERF_ImplicitDiff_S.cpp
@@ -26,9 +26,10 @@ using namespace amrex;
 void
 ImplicitDiffForStateLU_S (const Box& bx,
                           const Box& domain,
-                          const int level,
-                          const int n,
-                          const Real dt,
+                          const int& level,
+                          const int& n,
+                          const int& qty_index,
+                          const Real& dt,
                           const GpuArray<Real, AMREX_SPACEDIM*2>& bc_neumann_vals,
                           const Array4<      Real>& cell_data,
                           const Gpu::DeviceVector<Real>& stretched_dz_d,
@@ -36,14 +37,13 @@ ImplicitDiffForStateLU_S (const Box& bx,
                           const Array4<const Real>& mu_turb,
                           const SolverChoice &solverChoice,
                           const BCRec* bc_ptr,
-                          const bool use_SurfLayer,
-                          const Real implicit_fac)
+                          const bool& use_SurfLayer,
+                          const Real& implicit_fac)
 {
     BL_PROFILE_VAR("ImplicitDiffForState_S()",ImplicitDiffForState_S);
 
     // setup quantities for getRhoAlpha()
 #include "ERF_SetupVertDiff.H"
-    const int qty_index  = n;
     const int prim_index = qty_index - 1;
     const int prim_scal_index = (qty_index >= RhoScalar_comp && qty_index < RhoScalar_comp+NSCALARS) ? PrimScalar_comp : prim_index;
 
@@ -213,8 +213,8 @@ template <int stagdir>
 void
 ImplicitDiffForMomLU_S (const Box& bx,
                         const Box& /*domain*/,
-                        const int level,
-                        const Real dt,
+                        const int& level,
+                        const Real& dt,
                         const Array4<const Real>& cell_data,
                         const Array4<      Real>& face_data,
                         const Array4<const Real>& tau,
@@ -223,8 +223,8 @@ ImplicitDiffForMomLU_S (const Box& bx,
                         const Array4<const Real>& mu_turb,
                         const SolverChoice &solverChoice,
                         const BCRec* bc_ptr,
-                        const bool use_SurfLayer,
-                        const Real implicit_fac)
+                        const bool& use_SurfLayer,
+                        const Real& implicit_fac)
 {
     BL_PROFILE_VAR("ImplicitDiffForMom_S()",ImplicitDiffForMom_S);
 
@@ -453,8 +453,8 @@ ImplicitDiffForMomLU_S (const Box& bx,
     template void ImplicitDiffForMomLU_S<STAGDIR> ( \
         const Box&, \
         const Box&, \
-        const int, \
-        const Real, \
+        const int&, \
+        const Real&, \
         const Array4<const Real>&, \
         const Array4<      Real>&, \
         const Array4<const Real>&, \
@@ -463,8 +463,8 @@ ImplicitDiffForMomLU_S (const Box& bx,
         const Array4<const Real>&, \
         const SolverChoice&, \
         const BCRec*, \
-        const bool, \
-        const Real);
+        const bool&, \
+        const Real&);
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(0)
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(1)
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(2)
diff --git a/Source/Diffusion/ERF_ImplicitDiff_T.cpp b/Source/Diffusion/ERF_ImplicitDiff_T.cpp
index 16c937ad34..6c94b326c8 100644
--- a/Source/Diffusion/ERF_ImplicitDiff_T.cpp
+++ b/Source/Diffusion/ERF_ImplicitDiff_T.cpp
@@ -28,9 +28,10 @@ using namespace amrex;
 void
 ImplicitDiffForStateLU_T (const Box& bx,
                           const Box& domain,
-                          const int level,
-                          const int n,
-                          const Real dt,
+                          const int& level,
+                          const int& n,
+                          const int& qty_index,
+                          const Real& dt,
                           const GpuArray<Real, AMREX_SPACEDIM*2>& bc_neumann_vals,
                           const Array4<      Real>& cell_data,
                           const Array4<const Real>& z_nd,
@@ -40,14 +41,13 @@ ImplicitDiffForStateLU_T (const Box& bx,
                           const Array4<const Real>& mu_turb,
                           const SolverChoice &solverChoice,
                           const BCRec* bc_ptr,
-                          const bool use_SurfLayer,
-                          const Real implicit_fac)
+                          const bool& use_SurfLayer,
+                          const Real& implicit_fac)
 {
     BL_PROFILE_VAR("ImplicitDiffForState_T()",ImplicitDiffForState_T);
 
     // setup quantities for getRhoAlpha()
 #include "ERF_SetupVertDiff.H"
-    const int qty_index  = n;
     const int prim_index = qty_index - 1;
     const int prim_scal_index = (qty_index >= RhoScalar_comp && qty_index < RhoScalar_comp+NSCALARS) ? PrimScalar_comp : prim_index;
 
@@ -221,8 +221,8 @@ template <int stagdir>
 void
 ImplicitDiffForMomLU_T (const Box& bx,
                         const Box& /*domain*/,
-                        const int level,
-                        const Real dt,
+                        const int& level,
+                        const Real& dt,
                         const Array4<const Real>& cell_data,
                         const Array4<      Real>& face_data,
                         const Array4<const Real>& tau,
@@ -233,8 +233,8 @@ ImplicitDiffForMomLU_T (const Box& bx,
                         const Array4<const Real>& mu_turb,
                         const SolverChoice &solverChoice,
                         const BCRec* bc_ptr,
-                        const bool use_SurfLayer,
-                        const Real implicit_fac)
+                        const bool& use_SurfLayer,
+                        const Real& implicit_fac)
 {
     BL_PROFILE_VAR("ImplicitDiffForMom_T()",ImplicitDiffForMom_T);
 
@@ -473,8 +473,8 @@ ImplicitDiffForMomLU_T (const Box& bx,
     template void ImplicitDiffForMomLU_T<STAGDIR> ( \
         const Box&, \
         const Box&, \
-        const int, \
-        const Real, \
+        const int&, \
+        const Real&, \
         const Array4<const Real>&, \
         const Array4<      Real>&, \
         const Array4<const Real>&, \
@@ -485,8 +485,8 @@ ImplicitDiffForMomLU_T (const Box& bx,
         const Array4<const Real>&, \
         const SolverChoice&, \
         const BCRec*, \
-        const bool, \
-        const Real);
+        const bool&, \
+        const Real&);
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(0)
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(1)
 INSTANTIATE_IMPLICIT_DIFF_FOR_MOM_LU(2)
diff --git a/Source/TimeIntegration/ERF_ImplicitPost.H b/Source/TimeIntegration/ERF_ImplicitPost.H
index d700060fe4..6ecd6b241d 100644
--- a/Source/TimeIntegration/ERF_ImplicitPost.H
+++ b/Source/TimeIntegration/ERF_ImplicitPost.H
@@ -3,7 +3,8 @@
         // *****************************************************************************
         const bool l_do_implicit_moist = solverChoice.implicit_moisture_diffusion;
         const Real l_vert_implicit_fac = solverChoice.vert_implicit_fac[nrk];
-        if ( (l_do_implicit_moist) &&
+        if ( false &&
+             (l_do_implicit_moist) &&
              (l_vert_implicit_fac > zero) &&
              (solverChoice.moisture_type != MoistureType::None) ) {
 
@@ -34,19 +35,19 @@
                 const Array4<const Real>& q1fx_z = Q1fx3->const_array(mfi);
 
                 if (l_use_stretched_dz) {
-                    ImplicitDiffForStateLU_S(bx, fine_geom.Domain(), level, RhoQ1_comp,
+                    ImplicitDiffForStateLU_S(bx, fine_geom.Domain(), level, RhoQ1_comp, RhoQ1_comp,
                                              slow_dt, l_bc_neumann_vals_d, cell_data,
                                              stretched_dz_d[level], q1fx_z,
                                              mu_turb, solverChoice,
                                              bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
                 } else if (l_use_terrain_fitted_coords) {
-                    ImplicitDiffForStateLU_T(bx, fine_geom.Domain(), level, RhoQ1_comp,
+                    ImplicitDiffForStateLU_T(bx, fine_geom.Domain(), level, RhoQ1_comp, RhoQ1_comp,
                                              slow_dt, l_bc_neumann_vals_d, cell_data,
                                              z_nd_arr, detJ_arr, dxInv, q1fx_z,
                                              mu_turb, solverChoice,
                                              bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
                 } else { // no terrain
-                    ImplicitDiffForStateLU_N(bx, fine_geom.Domain(), level, RhoQ1_comp,
+                    ImplicitDiffForStateLU_N(bx, fine_geom.Domain(), level, RhoQ1_comp, RhoQ1_comp,
                                              slow_dt, l_bc_neumann_vals_d, cell_data,
                                              dxInv, q1fx_z, mu_turb, solverChoice,
                                              bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
diff --git a/Source/TimeIntegration/ERF_ImplicitPre.H b/Source/TimeIntegration/ERF_ImplicitPre.H
index a7a24348f8..7dad86a8ef 100644
--- a/Source/TimeIntegration/ERF_ImplicitPre.H
+++ b/Source/TimeIntegration/ERF_ImplicitPre.H
@@ -6,8 +6,10 @@
 
             if (l_vert_implicit_fac > zero) {
 
-                const bool l_do_implicit_theta = solverChoice.implicit_thermal_diffusion;
-                const bool l_do_implicit_mom = solverChoice.implicit_momentum_diffusion;
+                const bool l_has_moisture         = (solverChoice.moisture_type != MoistureType::None);
+                const bool l_do_implicit_theta    = solverChoice.implicit_thermal_diffusion;
+                const bool l_do_implicit_moisture = solverChoice.implicit_moisture_diffusion;
+                const bool l_do_implicit_mom      = solverChoice.implicit_momentum_diffusion;
 
                 // If we're doing an implicit solve for momenta (u and v only),
                 // then we use the explicit solution at this point to derive
@@ -61,16 +63,24 @@
                     const Array4<const Real> tau13  = Tau[level][TauType::tau13]->array(mfi);
                     const Array4<const Real> tau23  = Tau[level][TauType::tau23]->array(mfi);
                     [[maybe_unused]] const Array4<const Real> tau33  = Tau[level][TauType::tau33]->array(mfi);
-                    const Array4<const Real>& hfx_z = Hfx3->const_array(mfi);
+                    const Array4<const Real>& hfx_z  = Hfx3->const_array(mfi);
+                    const Array4<const Real>& q1fx_z = Q1fx3->const_array(mfi);
 
                     if (l_use_stretched_dz) {
                         if (l_do_implicit_theta) {
-                            ImplicitDiffForStateLU_S(bx, fine_geom.Domain(), level, RhoTheta_comp,
+                            ImplicitDiffForStateLU_S(bx, fine_geom.Domain(), level, 1, RhoTheta_comp,
                                                      slow_dt, l_bc_neumann_vals_d, cell_data,
                                                      stretched_dz_d[level], hfx_z,
                                                      mu_turb, solverChoice,
                                                      bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
                         }
+                        if (l_do_implicit_moisture && l_has_moisture) {
+                          ImplicitDiffForStateLU_S(bx, fine_geom.Domain(), level, 2, RhoQ1_comp,
+                                                     slow_dt, l_bc_neumann_vals_d, cell_data,
+                                                     stretched_dz_d[level], q1fx_z,
+                                                     mu_turb, solverChoice,
+                                                     bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
+                        }
                         if (l_do_implicit_mom) {
                             ImplicitDiffForMomLU_S<0>(tbx, fine_geom.Domain(), level, slow_dt,
                                                       cell_data, rho_u, tau13, tau13_corr,
@@ -93,12 +103,19 @@
                         }
                     } else if (l_use_terrain_fitted_coords) {
                         if (l_do_implicit_theta) {
-                            ImplicitDiffForStateLU_T(bx, fine_geom.Domain(), level, RhoTheta_comp,
+                            ImplicitDiffForStateLU_T(bx, fine_geom.Domain(), level, 1, RhoTheta_comp,
                                                      slow_dt, l_bc_neumann_vals_d, cell_data,
                                                      z_nd_arr, detJ_arr, dxInv, hfx_z,
                                                      mu_turb, solverChoice,
                                                      bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
                         }
+                        if (l_do_implicit_moisture && l_has_moisture) {
+                            ImplicitDiffForStateLU_T(bx, fine_geom.Domain(), level, 2, RhoQ1_comp,
+                                                     slow_dt, l_bc_neumann_vals_d, cell_data,
+                                                     z_nd_arr, detJ_arr, dxInv, q1fx_z,
+                                                     mu_turb, solverChoice,
+                                                     bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
+                        }
                         if (l_do_implicit_mom) {
                             ImplicitDiffForMomLU_T<0>(tbx, fine_geom.Domain(), level, slow_dt,
                                                       cell_data, rho_u, tau13, tau13_corr,
@@ -121,11 +138,17 @@
                         }
                     } else { // no terrain
                         if (l_do_implicit_theta) {
-                            ImplicitDiffForStateLU_N(bx, fine_geom.Domain(), level, RhoTheta_comp,
+                            ImplicitDiffForStateLU_N(bx, fine_geom.Domain(), level, 1, RhoTheta_comp,
                                                      slow_dt, l_bc_neumann_vals_d, cell_data,
                                                      dxInv, hfx_z, mu_turb, solverChoice,
                                                      bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
                         }
+                        if (l_do_implicit_moisture && l_has_moisture) {
+                            ImplicitDiffForStateLU_N(bx, fine_geom.Domain(), level, 2, RhoQ1_comp,
+                                                     slow_dt, l_bc_neumann_vals_d, cell_data,
+                                                     dxInv, q1fx_z, mu_turb, solverChoice,
+                                                     bc_ptr_h, l_use_SurfLayer, l_vert_implicit_fac);
+                        }
                         if (l_do_implicit_mom) {
                             ImplicitDiffForMomLU_N<0>(tbx, fine_geom.Domain(), level, slow_dt,
                                                       cell_data, rho_u, tau13, tau13_corr,
diff --git a/Source/TimeIntegration/ERF_SlowRhsPost.cpp b/Source/TimeIntegration/ERF_SlowRhsPost.cpp
index 4d9b0bc67c..2e97f8700d 100644
--- a/Source/TimeIntegration/ERF_SlowRhsPost.cpp
+++ b/Source/TimeIntegration/ERF_SlowRhsPost.cpp
@@ -175,11 +175,11 @@ void erf_slow_rhs_post (int level, int finest_level,
     }
 
     // Valid vars
-    Vector<int> is_valid_slow_var; is_valid_slow_var.resize(RhoQ1_comp+1,0);
+    Vector<int> is_valid_slow_var; is_valid_slow_var.resize(RhoQ2_comp+1,0);
     if (l_use_KE)    { is_valid_slow_var[    RhoKE_comp] = 1; }
     if (l_do_scalar) { is_valid_slow_var[RhoScalar_comp] = 1; }
     if (solverChoice.moisture_type != MoistureType::None) {
-         is_valid_slow_var[RhoQ1_comp] = 1;
+         is_valid_slow_var[RhoQ2_comp] = 1;
     }
 
     // *************************************************************************
@@ -281,12 +281,12 @@ void erf_slow_rhs_post (int level, int finest_level,
         const GpuArray<int, IntVars::NumTypes> ncomp_slow = {nsv,0,0,0};
 
         // **************************************************************************
-        // Note that here we do copy only the "slow" variables, not (rho) or (rho theta)
+        // Note that here we do copy only the "slow" variables, not (rho, rhoTheta, RhoQ1)
         // **************************************************************************
         ParallelFor(tbx, ncomp_slow[IntVars::cons],
         [=] AMREX_GPU_DEVICE (int i, int j, int k, int nn) {
             const int n = scomp_slow[IntVars::cons] + nn;
-            cur_cons(i,j,k,n) = new_cons(i,j,k,n);
+            if (n != RhoQ1_comp) { cur_cons(i,j,k,n) = new_cons(i,j,k,n); }
         });
 
         // We have projected the velocities stored in S_data but we will use
@@ -372,14 +372,14 @@ void erf_slow_rhs_post (int level, int finest_level,
         //
         // Note that we either advect and diffuse all or none of the moisture variables
         //
-        for (int ivar(RhoKE_comp); ivar<= RhoQ1_comp; ++ivar)
+        for (int ivar(RhoKE_comp); ivar<= RhoQ2_comp; ++ivar)
         {
             if (is_valid_slow_var[ivar])
             {
                 start_comp = ivar;
                 num_comp = 1;
 
-                if (ivar == RhoQ1_comp) {
+                if (ivar == RhoQ2_comp) {
                     horiz_adv_type = ac.moistscal_horiz_adv_type;
                      vert_adv_type = ac.moistscal_vert_adv_type;
                     horiz_upw_frac = ac.moistscal_horiz_upw_frac;
@@ -390,7 +390,7 @@ void erf_slow_rhs_post (int level, int finest_level,
                           vert_adv_type = EfficientAdvType(nrk,ac.moistscal_vert_adv_type);
                     }
 
-                    num_comp = n_qstate;
+                    num_comp = n_qstate - 1;
 
                 } else {
                     horiz_adv_type = ac.dryscal_horiz_adv_type;
@@ -436,7 +436,7 @@ void erf_slow_rhs_post (int level, int finest_level,
                 if (l_use_diff)
                 {
                     // Allow for implicit moisture diffusion
-                    const Real l_vert_implicit_fac = (solverChoice.implicit_moisture_diffusion) ?
+                    const Real l_vert_implicit_fac = (false) ? //(solverChoice.implicit_moisture_diffusion) ?
                                                      solverChoice.vert_implicit_fac[nrk] : zero;
 
                     const Array4<const Real> tm_arr = t_mean_mf ? t_mean_mf->const_array(mfi) : Array4<const Real>{};
@@ -477,7 +477,7 @@ void erf_slow_rhs_post (int level, int finest_level,
         } // loop ivar
 
 #if defined(ERF_USE_NETCDF)
-        if (moist_set_rhs_bool)
+        if (false) //moist_set_rhs_bool)
         {
             Real bdy_factor = solverChoice.bdy_nudge_factor;
             const Array4<const Real> & new_cons_const = S_new[IntVars::cons].const_array(mfi);
@@ -507,14 +507,14 @@ void erf_slow_rhs_post (int level, int finest_level,
 
         auto const& src_arr = source.const_array(mfi);
 
-        for (int ivar(RhoKE_comp); ivar<= RhoQ1_comp; ++ivar)
+        for (int ivar(RhoKE_comp); ivar<= RhoQ2_comp; ++ivar)
         {
             if (is_valid_slow_var[ivar])
             {
                 start_comp = ivar;
                 num_comp = 1;
-                if (ivar == RhoQ1_comp) {
-                    num_comp = nvars - RhoQ1_comp;
+                if (ivar == RhoQ2_comp) {
+                    num_comp = nvars - RhoQ2_comp;
                 } else if (ivar == RhoScalar_comp) {
                     num_comp = NSCALARS;
                 }
diff --git a/Source/TimeIntegration/ERF_SlowRhsPre.cpp b/Source/TimeIntegration/ERF_SlowRhsPre.cpp
index c64c414d70..70667db916 100644
--- a/Source/TimeIntegration/ERF_SlowRhsPre.cpp
+++ b/Source/TimeIntegration/ERF_SlowRhsPre.cpp
@@ -131,6 +131,12 @@ void erf_slow_rhs_pre (int level, int finest_level,
     const AdvType l_vert_adv_type  = solverChoice.advChoice.dycore_vert_adv_type;
     const Real    l_horiz_upw_frac = solverChoice.advChoice.dycore_horiz_upw_frac;
     const Real    l_vert_upw_frac  = solverChoice.advChoice.dycore_vert_upw_frac;
+
+    const AdvType l_moist_horiz_adv_type = solverChoice.advChoice.moistscal_horiz_adv_type;
+    const AdvType l_moist_vert_adv_type  = solverChoice.advChoice.moistscal_vert_adv_type;
+    const Real    l_moist_horiz_upw_frac = solverChoice.advChoice.moistscal_horiz_upw_frac;
+    const Real    l_moist_vert_upw_frac  = solverChoice.advChoice.moistscal_vert_upw_frac;
+
     const bool    l_use_stretched_dz             = (solverChoice.mesh_type == MeshType::StretchedDz);
     const bool    l_use_terrain_fitted_coords    = (solverChoice.mesh_type == MeshType::VariableDz);
     const bool    l_moving_terrain               = (solverChoice.terrain_type == TerrainType::MovingFittedMesh);
@@ -573,8 +579,8 @@ void erf_slow_rhs_pre (int level, int finest_level,
                                        avg_xmom_arr, avg_ymom_arr, avg_zmom_arr,
                                        cell_prim, cell_rhs,
                                        detJ_arr, dxInv, mf_mx, mf_my,
-                                       l_horiz_adv_type, l_vert_adv_type,
-                                       l_horiz_upw_frac, l_vert_upw_frac,
+                                       l_moist_horiz_adv_type, l_moist_vert_adv_type,
+                                       l_moist_horiz_upw_frac, l_moist_vert_upw_frac,
                                        flx_arr, domain, bc_ptr_h);
             }
         } else {
@@ -604,8 +610,8 @@ void erf_slow_rhs_pre (int level, int finest_level,
                                          mask_arr, cfg_arr, ax_arr, ay_arr, az_arr,
                                          fcx_arr, fcy_arr, fcz_arr,
                                          detJ_arr, dxInv, mf_mx, mf_my,
-                                         l_horiz_adv_type, l_vert_adv_type,
-                                         l_horiz_upw_frac, l_vert_upw_frac,
+                                         l_moist_horiz_adv_type, l_moist_vert_adv_type,
+                                         l_moist_horiz_upw_frac, l_moist_vert_upw_frac,
                                          flx_arr, domain, bc_ptr_h,
                                          already_on_centroids);
             }
@@ -650,6 +656,17 @@ void erf_slow_rhs_pre (int level, int finest_level,
                                        hfx_z, q1fx_z, q2fx_z, diss,
                                        mu_turb, solverChoice, level,
                                        tm_arr, grav_gpu, bc_ptr_d, l_use_SurfLayer, l_vert_implicit_fac);
+                if (l_use_moisture) {
+                    DiffusionSrcForState_S(bx, domain, RhoQ1_comp, 1, u, v,
+                                           cell_data, cell_prim, cell_rhs,
+                                           diffflux_x, diffflux_y, diffflux_z,
+                                           stretched_dz_d, dxInv, SmnSmn_a,
+                                           mf_mx, mf_ux, mf_vx,
+                                           mf_my, mf_uy, mf_vy,
+                                           hfx_z, q1fx_z, q2fx_z, diss,
+                                           mu_turb, solverChoice, level,
+                                           tm_arr, grav_gpu, bc_ptr_d, l_use_SurfLayer, l_vert_implicit_fac);
+                }
             } else if (l_use_terrain_fitted_coords) {
                 DiffusionSrcForState_T(bx, domain, n_start, n_comp, l_rotate, u, v,
                                        cell_data, cell_prim, cell_rhs,
@@ -661,6 +678,18 @@ void erf_slow_rhs_pre (int level, int finest_level,
                                        hfx_x, hfx_y, hfx_z, q1fx_x, q1fx_y, q1fx_z, q2fx_z, diss,
                                        mu_turb, solverChoice, level,
                                        tm_arr, grav_gpu, bc_ptr_d, l_use_SurfLayer, l_vert_implicit_fac);
+                if (l_use_moisture) {
+                    DiffusionSrcForState_T(bx, domain, RhoQ1_comp, 1, l_rotate, u, v,
+                                           cell_data, cell_prim, cell_rhs,
+                                           diffflux_x, diffflux_y, diffflux_z,
+                                           z_nd, z_cc, ax_arr, ay_arr, az_arr, detJ_arr,
+                                           dxInv, SmnSmn_a,
+                                           mf_mx, mf_ux, mf_vx,
+                                           mf_my, mf_uy, mf_vy,
+                                           hfx_x, hfx_y, hfx_z, q1fx_x, q1fx_y, q1fx_z, q2fx_z, diss,
+                                           mu_turb, solverChoice, level,
+                                           tm_arr, grav_gpu, bc_ptr_d, l_use_SurfLayer, l_vert_implicit_fac);
+                }
             } else if (l_use_eb) {
                 DiffusionSrcForState_EB(bx, domain, n_start, n_comp, u, v,
                                        cell_data, cell_prim, cell_rhs,
@@ -671,6 +700,17 @@ void erf_slow_rhs_pre (int level, int finest_level,
                                        hfx_z, q1fx_z, q2fx_z, hfx_EB,
                                        mu_turb, solverChoice, level,
                                        bc_ptr_d, l_use_SurfLayer);
+                if (l_use_moisture) {
+                    DiffusionSrcForState_EB(bx, domain, RhoQ1_comp, 1, u, v,
+                                            cell_data, cell_prim, cell_rhs,
+                                            diffflux_x, diffflux_y, diffflux_z,
+                                            cfg_arr, ax_arr, ay_arr, az_arr, detJ_arr,
+                                            barea_arr, bcent_arr,
+                                            dx, dxInv,
+                                            hfx_z, q1fx_z, q2fx_z, hfx_EB,
+                                            mu_turb, solverChoice, level,
+                                            bc_ptr_d, l_use_SurfLayer);
+                }
             } else {
                 DiffusionSrcForState_N(bx, domain, n_start, n_comp, u, v,
                                        cell_data, cell_prim, cell_rhs,
@@ -681,6 +721,17 @@ void erf_slow_rhs_pre (int level, int finest_level,
                                        hfx_z, q1fx_z, q2fx_z, diss,
                                        mu_turb, solverChoice, level,
                                        tm_arr, grav_gpu, bc_ptr_d, l_use_SurfLayer, l_vert_implicit_fac);
+                if (l_use_moisture) {
+                    DiffusionSrcForState_N(bx, domain, RhoQ1_comp, 1, u, v,
+                                       cell_data, cell_prim, cell_rhs,
+                                       diffflux_x, diffflux_y, diffflux_z,
+                                       dxInv, SmnSmn_a,
+                                       mf_mx, mf_ux, mf_vx,
+                                       mf_my, mf_uy, mf_vy,
+                                       hfx_z, q1fx_z, q2fx_z, diss,
+                                       mu_turb, solverChoice, level,
+                                       tm_arr, grav_gpu, bc_ptr_d, l_use_SurfLayer, l_vert_implicit_fac);
+                }
             }
         }
 
@@ -689,6 +740,9 @@ void erf_slow_rhs_pre (int level, int finest_level,
         {
             cell_rhs(i,j,k,Rho_comp)      += source_arr(i,j,k,Rho_comp);
             cell_rhs(i,j,k,RhoTheta_comp) += source_arr(i,j,k,RhoTheta_comp);
+            if (l_use_moisture) {
+               cell_rhs(i,j,k,RhoQ1_comp) += source_arr(i,j,k,RhoQ1_comp);
+            }
         });
 
         // If anelastic and in second RK stage, take average of old-time and new-time source
diff --git a/Source/TimeIntegration/ERF_Substep_T.cpp b/Source/TimeIntegration/ERF_Substep_T.cpp
index db015d5d98..0113598454 100644
--- a/Source/TimeIntegration/ERF_Substep_T.cpp
+++ b/Source/TimeIntegration/ERF_Substep_T.cpp
@@ -196,8 +196,10 @@ void erf_substep_T (int step, int /*nrk*/,
         const Array4<Real>& thm_extrap = extrap.array(mfi);
         const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
         ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            Real qv_cur_fact = (Real(1.0) + RvOverRd * cur_cons(i,j,k,RhoQ1_comp) / cur_cons(i,j,k,Rho_comp));
-            Real qv_stg_fact = (Real(1.0) + RvOverRd *     prim(i,j,k,PrimQ1_comp));
+            Real qv_cur_fact = (!l_use_moisture) ? 0.0 :
+                (Real(1.0) + RvOverRd * cur_cons(i,j,k,RhoQ1_comp) / cur_cons(i,j,k,Rho_comp));
+            Real qv_stg_fact = (!l_use_moisture) ? 0.0 :
+                (Real(1.0) + RvOverRd *     prim(i,j,k,PrimQ1_comp));
             old_drho(i,j,k)     =   cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
             old_drho_thm(i,j,k) =   cur_cons(i,j,k,RhoTheta_comp) * qv_cur_fact
                                 - stage_cons(i,j,k,RhoTheta_comp) * qv_stg_fact;
@@ -475,19 +477,22 @@ void erf_substep_T (int step, int /*nrk*/,
                                        xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
                                      ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
                                        yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * myhalf;
-            temp_rhs_arr(i,j,k,2) = (( xflux_hi * (prim(i,j,k,3) + prim(i+1,j,k,3)) -
-                                       xflux_lo * (prim(i,j,k,3) + prim(i-1,j,k,3)) ) * dxi * mfsq+
-                                     ( yflux_hi * (prim(i,j,k,3) + prim(i,j+1,k,3)) -
-                                       yflux_lo * (prim(i,j,k,3) + prim(i,j-1,k,3)) ) * dyi * mfsq) * myhalf;
-
-            // Explicit update for star variables
-            Real fast_rhs_rho   = -( temp_rhs_arr(i,j,k,0) +
-                                   ( omega_arr(i,j,k+1) - omega_arr(i,j,k  ) ) * dzi ) / detJ(i,j,k);
-            Real new_rho        = cur_cons(i,j,k,0) + dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
-            Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
-                                   ( omega_arr(i,j,k+1) * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
-                                     omega_arr(i,j,k  ) * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
-            qv_str_arr(i,j,k)   = ( cur_cons(i,j,k,4) + dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv) ) / new_rho;
+            if (l_use_moisture) {
+                temp_rhs_arr(i,j,k,2) = (( xflux_hi * (prim(i,j,k,3) + prim(i+1,j,k,3)) -
+                                           xflux_lo * (prim(i,j,k,3) + prim(i-1,j,k,3)) ) * dxi * mfsq+
+                                         ( yflux_hi * (prim(i,j,k,3) + prim(i,j+1,k,3)) -
+                                           yflux_lo * (prim(i,j,k,3) + prim(i,j-1,k,3)) ) * dyi * mfsq) * myhalf;
+
+                // Explicit update for star variables
+                Real fast_rhs_rho   = -( temp_rhs_arr(i,j,k,0) +
+                                       ( omega_arr(i,j,k+1) - omega_arr(i,j,k  ) ) * dzi ) / detJ(i,j,k);
+                Real new_rho        = cur_cons(i,j,k,0) + dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
+                Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
+                                       ( omega_arr(i,j,k+1) * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
+                                         omega_arr(i,j,k  ) * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
+                qv_str_arr(i,j,k)   = ( cur_cons(i,j,k,4) + dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv) ) / new_rho;
+            }
+
 
             if (l_reflux) {
                 (flx_arr[0])(i,j,k,0) = xflux_lo;
@@ -535,10 +540,10 @@ void erf_substep_T (int step, int /*nrk*/,
             Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
 
             // line 2 last two terms (order dtau)
-            Real qv_str_fact_P  = (Real(1.0) + RvOverRd * qv_str_arr(i,j,k  ));
-            Real qv_str_fact_Q  = (Real(1.0) + RvOverRd * qv_str_arr(i,j,k-1));
-            Real qv_stg_fact_P  = (Real(1.0) + RvOverRd * prim(i,j,k  ,PrimQ1_comp));
-            Real qv_stg_fact_Q  = (Real(1.0) + RvOverRd * prim(i,j,k-1,PrimQ1_comp));
+            Real qv_str_fact_P  = (!l_use_moisture) ? Real(1.0) : (Real(1.0) + RvOverRd * qv_str_arr(i,j,k  ));
+            Real qv_str_fact_Q  = (!l_use_moisture) ? Real(1.0) : (Real(1.0) + RvOverRd * qv_str_arr(i,j,k-1));
+            Real qv_stg_fact_P  = (!l_use_moisture) ? Real(1.0) : (Real(1.0) + RvOverRd * prim(i,j,k  ,PrimQ1_comp));
+            Real qv_stg_fact_Q  = (!l_use_moisture) ? Real(1.0) : (Real(1.0) + RvOverRd * prim(i,j,k-1,PrimQ1_comp));
             Real str_drho_thm_P = cur_cons(i,j,k  ,1) * qv_str_fact_P - stage_cons(i,j,k  ,RhoTheta_comp) * qv_stg_fact_P;
             Real str_drho_thm_Q = cur_cons(i,j,k-1,1) * qv_str_fact_Q - stage_cons(i,j,k-1,RhoTheta_comp) * qv_stg_fact_Q;
             Real thm_P  = beta_1 * old_drho_thm(i,j,k  ) + beta_2 * str_drho_thm_P;
@@ -710,10 +715,12 @@ void erf_substep_T (int step, int /*nrk*/,
                                           zflux_lo * (prim(i,j,k,0) + prim(i,j,k-1,0)) ) * dzi ) / detJ(i,j,k);
               cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
 
-              Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
-                                     ( zflux_hi * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
-                                       zflux_lo * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
-              cur_cons(i,j,k,4) += dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv);
+              if (l_use_moisture) {
+                  Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
+                                         ( zflux_hi * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
+                                           zflux_lo * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
+                  cur_cons(i,j,k,4) += dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv);
+              }
 
               if (l_reflux) {
                   (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1));
@@ -722,7 +729,7 @@ void erf_substep_T (int step, int /*nrk*/,
               // add in source terms for cell-centered conserved variables
               cur_cons(i,j,k,Rho_comp)      += dtau * cc_src_arr(i,j,k,Rho_comp);
               cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
-              cur_cons(i,j,k,RhoQ1_comp)    += dtau * cc_src_arr(i,j,k,RhoQ1_comp);
+              if (l_use_moisture) { cur_cons(i,j,k,RhoQ1_comp)    += dtau * cc_src_arr(i,j,k,RhoQ1_comp); }
         });
         } // end profile
 
diff --git a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
index 5a7588f2f3..e98465dca6 100644
--- a/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
+++ b/Source/TimeIntegration/ERF_TI_slow_rhs_pre.H
@@ -169,10 +169,13 @@
                          fr_as_crse, fr_as_fine);
 
         if ((solverChoice.vert_implicit_fac[nrk] > zero) && solverChoice.implicit_before_substep) {
-            MultiFab scratch(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(), 2,
+            MultiFab scratch(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(), 3,
                              S_data[IntVars::cons].nGrowVect());
-            MultiFab::Copy(scratch, S_old[IntVars::cons], 0, 0, 2, S_data[IntVars::cons].nGrowVect()); // scratch := S_old               (for rho, rhotheta)
-            MultiFab::Saxpy(scratch, slow_dt, S_rhs[IntVars::cons], 0, 0, 2, 0);                       // scratch := S_old + slow_dt*Src (for rho, rhotheta)
+            MultiFab::Copy(scratch, S_old[IntVars::cons], 0, 0, 2, S_data[IntVars::cons].nGrowVect());          // scratch := S_old               (for rho, rhotheta)
+            MultiFab::Copy(scratch, S_old[IntVars::cons], RhoQ1_comp, 2, 1, S_data[IntVars::cons].nGrowVect()); // scratch := S_old               (for rhoQ1)
+            MultiFab::Saxpy(scratch, slow_dt, S_rhs[IntVars::cons], 0, 0, 2, 0);                                // scratch := S_old + slow_dt*Src (for rho, rhotheta)
+            MultiFab::Saxpy(scratch, slow_dt, S_rhs[IntVars::cons], RhoQ1_comp, 2, 1, 0);                       // scratch := S_old + slow_dt*Src (for rhoQ1)
+
             scratch.FillBoundary(geom[level].periodicity());
 
             MultiFab scratch_xmom(S_data[IntVars::xmom].boxArray(),

From 1f4e822240662d46dd27b825c21750e9582753a7 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Fri, 15 May 2026 14:36:57 -0700
Subject: [PATCH 13/17] Only copy q1 if we have moisture.

---
 Source/TimeIntegration/ERF_Substep_T.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/TimeIntegration/ERF_Substep_T.cpp b/Source/TimeIntegration/ERF_Substep_T.cpp
index 0113598454..d288fe43ce 100644
--- a/Source/TimeIntegration/ERF_Substep_T.cpp
+++ b/Source/TimeIntegration/ERF_Substep_T.cpp
@@ -162,7 +162,7 @@ void erf_substep_T (int step, int /*nrk*/,
             ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
                 cur_cons(i,j,k,Rho_comp)      = prev_cons(i,j,k,Rho_comp);
                 cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp);
-                cur_cons(i,j,k,RhoQ1_comp)    = prev_cons(i,j,k,RhoQ1_comp);
+                if (l_use_moisture) { cur_cons(i,j,k,RhoQ1_comp) = prev_cons(i,j,k,RhoQ1_comp); }
             });
         }
 

From a5ea9032ec39552a1616334d9ba2a4bbf0323790 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Mon, 18 May 2026 13:24:06 -0700
Subject: [PATCH 14/17] Make qc have tendency as well.

---
 Source/ERF_MakeNewArrays.cpp           | 2 +-
 Source/SourceTerms/ERF_MakeSources.cpp | 1 +
 Source/Utils/ERF_MacroPhysics.H        | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Source/ERF_MakeNewArrays.cpp b/Source/ERF_MakeNewArrays.cpp
index 4a27a67062..6deec653cc 100644
--- a/Source/ERF_MakeNewArrays.cpp
+++ b/Source/ERF_MakeNewArrays.cpp
@@ -470,7 +470,7 @@ ERF::init_stuff (int lev, const BoxArray& ba, const DistributionMapping& dm,
     if (solverChoice.moisture_type != MoistureType::None &&
         solverChoice.macrophysics_acoustic_coupling)
     {
-        macrophysics_src[lev] = std::make_unique<MultiFab>(ba, dm, 2, 0);
+        macrophysics_src[lev] = std::make_unique<MultiFab>(ba, dm, 3, 0);
         macrophysics_src[lev]->setVal(0);
     }
 
diff --git a/Source/SourceTerms/ERF_MakeSources.cpp b/Source/SourceTerms/ERF_MakeSources.cpp
index 59a3c96f2e..baed755915 100644
--- a/Source/SourceTerms/ERF_MakeSources.cpp
+++ b/Source/SourceTerms/ERF_MakeSources.cpp
@@ -752,6 +752,7 @@ void make_sources (int level,
             {
                 cell_src(i,j,k,RhoTheta_comp) += mac_src(i,j,k,0);
                 cell_src(i,j,k,RhoQ1_comp   ) += mac_src(i,j,k,1);
+                cell_src(i,j,k,RhoQ2_comp   ) += mac_src(i,j,k,2);
             });
         }
     } // mfi
diff --git a/Source/Utils/ERF_MacroPhysics.H b/Source/Utils/ERF_MacroPhysics.H
index cdec74c9fa..3b197a97a4 100644
--- a/Source/Utils/ERF_MacroPhysics.H
+++ b/Source/Utils/ERF_MacroPhysics.H
@@ -128,7 +128,7 @@ GetMacroPhysicsSrc (int& real_width,
             amrex::Real th_f = getThgivenTandP(tabs_f, pres, d_rdOcp);
             mac_src(i,j,k,0) = rho_s * idt * ( th_f - th_s );
             mac_src(i,j,k,1) = rho_s * idt * ( qv_f - qv_s );
-            cell_data(i,j,k,RhoQ2_comp) = rho_s * std::max(amrex::Real(0.0),qt - qv_f);
+            mac_src(i,j,k,2) = rho_s * idt * ( amrex::max(amrex::Real(0.0),qt - qv_f) - qc_s );
         });
     } // mfi
 }

From ca9651888254e4356fe0ebd473cf7f0a46cae37e Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Tue, 19 May 2026 16:47:57 -0700
Subject: [PATCH 15/17] first pass.

---
 Source/TimeIntegration/ERF_MakeFastCoeffs.cpp | 29 +++++++-
 Source/TimeIntegration/ERF_Substep_T.cpp      | 74 +++++++++++--------
 2 files changed, 67 insertions(+), 36 deletions(-)

diff --git a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
index 5766268fa4..beab6e1198 100644
--- a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
+++ b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
@@ -148,13 +148,34 @@ void make_fast_coeffs (int /*level*/,
                 Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k  ,PrimTheta_comp) );
                 Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
 
+                Real qv_t_lo  = (l_use_moisture) ? myhalf * ( prim(i,j,k-2,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp) ) : zero;
+                Real qv_t_mid = (l_use_moisture) ? myhalf * ( prim(i,j,k-1,PrimQ1_comp) + prim(i,j,k  ,PrimQ1_comp) ) : zero;
+                Real qv_t_hi  = (l_use_moisture) ? myhalf * ( prim(i,j,k  ,PrimQ1_comp) + prim(i,j,k+1,PrimQ1_comp) ) : zero;
+
+                Real r_t_lo  = myhalf * ( stage_cons(i,j,k-2,Rho_comp) + stage_cons(i,j,k-1,Rho_comp) );
+                Real r_t_mid = myhalf * ( stage_cons(i,j,k-1,Rho_comp) + stage_cons(i,j,k  ,Rho_comp) );
+                Real r_t_hi  = myhalf * ( stage_cons(i,j,k  ,Rho_comp) + stage_cons(i,j,k+1,Rho_comp) );
+
+                Real A_T_hi = (one + RvOverRd*qv_q);
+                Real A_T_lo = (one + RvOverRd*qv_p);
+                Real A_Q_hi = RvOverRd*prim(i,j,k  ,PrimTheta_comp);
+                Real A_Q_lo = RvOverRd*prim(i,j,k-1,PrimTheta_comp);
+                Real A_D_hi = RvOverRd*prim(i,j,k  ,PrimTheta_comp)*qv_p;
+                Real A_D_lo = RvOverRd*prim(i,j,k-1,PrimTheta_comp)*qv_q;
+
+                Real A_sum_hi = A_T_hi * theta_t_hi + A_Q_hi * qv_t_hi + A_D_hi * r_t_hi;
+                Real A_sum_lo = A_T_lo * theta_t_lo + A_Q_lo * qv_t_lo + A_D_lo * r_t_lo;
+
+                Real A_sum_mid_p = A_T_hi * theta_t_mid + A_Q_hi * qv_t_mid + A_D_hi * r_t_mid;
+                Real A_sum_mid_q = A_T_hi * theta_t_mid + A_Q_hi * qv_t_mid + A_D_hi * r_t_mid;
+
                 // LHS for tri-diagonal system
                 Real D = dtau * dtau * beta_2 * beta_2 * dzi;
-                coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * (one + RvOverRd*qv_q) * theta_t_lo );
-                coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * (one + RvOverRd*qv_p) * theta_t_hi );
+                coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * A_sum_lo );
+                coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * A_sum_hi );
 
-                coeffB_a(i,j,k) = one + D * ( ( coeff_Q * (one + RvOverRd*qv_q) / detJ(i,j,k-1)
-                                              - coeff_P * (one + RvOverRd*qv_p) / detJ(i,j,k  ) ) * theta_t_mid
+                coeffB_a(i,j,k) = one + D * ( ( coeff_Q * A_sum_mid_q / detJ(i,j,k-1)
+                                              - coeff_P * A_sum_mid_p / detJ(i,j,k  ) )
                                             + halfg * ( Real(1.0)/detJ(i,j,k) - Real(1.0)/detJ(i,j,k-1) ) );
             });
 
diff --git a/Source/TimeIntegration/ERF_Substep_T.cpp b/Source/TimeIntegration/ERF_Substep_T.cpp
index d288fe43ce..5e1706d408 100644
--- a/Source/TimeIntegration/ERF_Substep_T.cpp
+++ b/Source/TimeIntegration/ERF_Substep_T.cpp
@@ -385,17 +385,14 @@ void erf_substep_T (int step, int /*nrk*/,
         FArrayBox temp_rhs_fab;
         FArrayBox RHS_fab;
         FArrayBox soln_fab;
-        FArrayBox qv_str_fab;
 
         RHS_fab.resize     (tbz,1,The_Async_Arena());
         soln_fab.resize    (tbz,1,The_Async_Arena());
         temp_rhs_fab.resize(tbz,3,The_Async_Arena());
-        qv_str_fab.resize(tbz,1,The_Async_Arena());
 
         auto const& RHS_a        =      RHS_fab.array();
         auto const& soln_a       =     soln_fab.array();
         auto const& temp_rhs_arr = temp_rhs_fab.array();
-        auto const& qv_str_arr   = qv_str_fab.array();
 
         auto const&     coeffA_a =     coeff_A_mf.array(mfi);
         auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
@@ -482,15 +479,8 @@ void erf_substep_T (int step, int /*nrk*/,
                                            xflux_lo * (prim(i,j,k,3) + prim(i-1,j,k,3)) ) * dxi * mfsq+
                                          ( yflux_hi * (prim(i,j,k,3) + prim(i,j+1,k,3)) -
                                            yflux_lo * (prim(i,j,k,3) + prim(i,j-1,k,3)) ) * dyi * mfsq) * myhalf;
-
-                // Explicit update for star variables
-                Real fast_rhs_rho   = -( temp_rhs_arr(i,j,k,0) +
-                                       ( omega_arr(i,j,k+1) - omega_arr(i,j,k  ) ) * dzi ) / detJ(i,j,k);
-                Real new_rho        = cur_cons(i,j,k,0) + dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
-                Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
-                                       ( omega_arr(i,j,k+1) * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
-                                         omega_arr(i,j,k  ) * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
-                qv_str_arr(i,j,k)   = ( cur_cons(i,j,k,4) + dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv) ) / new_rho;
+            } else {
+                temp_rhs_arr(i,j,k,2) = zero;
             }
 
 
@@ -539,23 +529,37 @@ void erf_substep_T (int step, int /*nrk*/,
             Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k  ,PrimTheta_comp) );
             Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
 
+            Real qv_t_lo  = (l_use_moisture) ? myhalf * ( prim(i,j,k-2,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp) ) : zero;
+            Real qv_t_mid = (l_use_moisture) ? myhalf * ( prim(i,j,k-1,PrimQ1_comp) + prim(i,j,k  ,PrimQ1_comp) ) : zero;
+            Real qv_t_hi  = (l_use_moisture) ? myhalf * ( prim(i,j,k  ,PrimQ1_comp) + prim(i,j,k+1,PrimQ1_comp) ) : zero;
+
+            Real qv_p = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
+            Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
+
+            Real A_T_hi = (one + RvOverRd*qv_q);
+            Real A_T_lo = (one + RvOverRd*qv_p);
+            Real A_Q_hi = RvOverRd*prim(i,j,k  ,PrimTheta_comp);
+            Real A_Q_lo = RvOverRd*prim(i,j,k-1,PrimTheta_comp);
+            Real A_D_hi = RvOverRd*prim(i,j,k  ,PrimTheta_comp)*qv_p;
+            Real A_D_lo = RvOverRd*prim(i,j,k-1,PrimTheta_comp)*qv_q;
+
+            Real T_rhs_hi = slow_rhs_cons(i,j,k  ,RhoTheta_comp);
+            Real Q_rhs_hi = (l_use_moisture) ? slow_rhs_cons(i,j,k  ,RhoQ1_comp) : zero;
+            Real D_rhs_hi  = slow_rhs_cons(i,j,k  ,Rho_comp);
+            Real A_sum_rhs_p = A_T_hi * T_rhs_hi + A_Q_hi * Q_rhs_hi + A_D_hi * D_rhs_hi;
+
+            Real T_rhs_lo = slow_rhs_cons(i,j,k-1,RhoTheta_comp);
+            Real Q_rhs_lo = (l_use_moisture) ? slow_rhs_cons(i,j,k-1,RhoQ1_comp) : zero;
+            Real D_rhs_lo = slow_rhs_cons(i,j,k-1,Rho_comp);
+            Real A_sum_rhs_q = A_T_lo * T_rhs_lo + A_Q_lo * Q_rhs_lo + A_D_lo * D_rhs_lo;
+
             // line 2 last two terms (order dtau)
-            Real qv_str_fact_P  = (!l_use_moisture) ? Real(1.0) : (Real(1.0) + RvOverRd * qv_str_arr(i,j,k  ));
-            Real qv_str_fact_Q  = (!l_use_moisture) ? Real(1.0) : (Real(1.0) + RvOverRd * qv_str_arr(i,j,k-1));
-            Real qv_stg_fact_P  = (!l_use_moisture) ? Real(1.0) : (Real(1.0) + RvOverRd * prim(i,j,k  ,PrimQ1_comp));
-            Real qv_stg_fact_Q  = (!l_use_moisture) ? Real(1.0) : (Real(1.0) + RvOverRd * prim(i,j,k-1,PrimQ1_comp));
-            Real str_drho_thm_P = cur_cons(i,j,k  ,1) * qv_str_fact_P - stage_cons(i,j,k  ,RhoTheta_comp) * qv_stg_fact_P;
-            Real str_drho_thm_Q = cur_cons(i,j,k-1,1) * qv_str_fact_Q - stage_cons(i,j,k-1,RhoTheta_comp) * qv_stg_fact_Q;
-            Real thm_P  = beta_1 * old_drho_thm(i,j,k  ) + beta_2 * str_drho_thm_P;
-            Real thm_Q  = beta_1 * old_drho_thm(i,j,k-1) + beta_2 * str_drho_thm_Q;
-            Real R0_tmp = -halfg * old_drho(i,j,k  ) + coeff_P * thm_P
-                          -halfg * old_drho(i,j,k-1) + coeff_Q * thm_Q;
+            Real R0_tmp = -halfg * old_drho(i,j,k  ) + coeff_P * old_drho_thm(i,j,k  )
+                          -halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_thm(i,j,k-1);
 
             // line 3 residuals (order dtau^2) one <-> beta_2
-            Real R1_tmp =  -halfg * (  slow_rhs_cons(i,j,k  ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp) );
-
-                 R1_tmp +=  coeff_P * slow_rhs_cons(i,j,k  ,RhoTheta_comp) * qv_str_fact_P
-                          + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * qv_str_fact_Q;
+            Real R1_tmp  =  -halfg * (  slow_rhs_cons(i,j,k  ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp) );
+                 R1_tmp +=  coeff_P * A_sum_rhs_p + coeff_Q * A_sum_rhs_q;
 
             Real Omega_kp1 = omega_arr(i,j,k+1);
             Real Omega_k   = omega_arr(i,j,k  );
@@ -568,8 +572,14 @@ void erf_substep_T (int step, int /*nrk*/,
                               + temp_rhs_arr(i,j,k,Rho_comp)/detJ(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ(i,j,k-1) );
 
             // consolidate lines 6&7 (order dtau^2)
-            R1_tmp += -( coeff_P/detJ(i,j,k  ) * qv_str_fact_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k  ,RhoTheta_comp) )
-                       + coeff_Q/detJ(i,j,k-1) * qv_str_fact_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
+            R1_tmp += -( coeff_P/detJ(i,j,k  ) * A_T_hi * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid ) + temp_rhs_arr(i,j,k  ,RhoTheta_comp) )
+                       + coeff_Q/detJ(i,j,k-1) * A_T_lo * ( beta_1 * dzi * (Omega_k*theta_t_mid  - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
+
+            R1_tmp += -( coeff_P/detJ(i,j,k  ) * A_Q_hi * ( beta_1 * dzi * (Omega_kp1*qv_t_hi - Omega_k*qv_t_mid ) + temp_rhs_arr(i,j,k  ,RhoTheta_comp+1) )
+                       + coeff_Q/detJ(i,j,k-1) * A_Q_lo * ( beta_1 * dzi * (Omega_k*qv_t_mid  - Omega_km1*qv_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp+1) ) );
+
+            R1_tmp += -( coeff_P/detJ(i,j,k  ) * A_D_hi * ( beta_1 * dzi * (Omega_kp1 - Omega_k  ) + temp_rhs_arr(i,j,k  ,Rho_comp) )
+                       + coeff_Q/detJ(i,j,k-1) * A_D_lo * ( beta_1 * dzi * (Omega_k   - Omega_km1) + temp_rhs_arr(i,j,k-1,Rho_comp) ) );
 
             // line 1
             RHS_a(i,j,k) = old_drho_w(i,j,k) + dtau * (slow_rhs_rho_w(i,j,k) + zmom_src_arr(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
@@ -581,7 +591,7 @@ void erf_substep_T (int step, int /*nrk*/,
         });
         } // end profile
 
-        Box b2d = tbz; // Copy constructor
+        Box b2d = tbz;
         b2d.setRange(2,0);
 
         auto const lo = lbound(bx);
@@ -596,7 +606,7 @@ void erf_substep_T (int step, int /*nrk*/,
         ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
         {
             // w_klo, w_khi given by specified Dirichlet values
-            RHS_a(i,j,lo.z  ) = dtau * (slow_rhs_rho_w(i,j,lo.z) + zmom_src_arr(i,j,lo.z));
+            RHS_a(i,j,lo.z  ) = dtau * (slow_rhs_rho_w(i,j,lo.z  ) + zmom_src_arr(i,j,lo.z  ));
             RHS_a(i,j,hi.z+1) = dtau * (slow_rhs_rho_w(i,j,hi.z+1) + zmom_src_arr(i,j,hi.z+1));
 
             // w = specified Dirichlet value at k = lo.z
@@ -689,7 +699,7 @@ void erf_substep_T (int step, int /*nrk*/,
         BL_PROFILE("fast_rho_final_update");
         ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
         {
-              Real zflux_lo = beta_2 * soln_a(i,j,k  ) + beta_1 * omega_arr(i,j,k);
+              Real zflux_lo = beta_2 * soln_a(i,j,k  ) + beta_1 * omega_arr(i,j,k  );
               Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
 
               // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
@@ -729,7 +739,7 @@ void erf_substep_T (int step, int /*nrk*/,
               // add in source terms for cell-centered conserved variables
               cur_cons(i,j,k,Rho_comp)      += dtau * cc_src_arr(i,j,k,Rho_comp);
               cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
-              if (l_use_moisture) { cur_cons(i,j,k,RhoQ1_comp)    += dtau * cc_src_arr(i,j,k,RhoQ1_comp); }
+              if (l_use_moisture) { cur_cons(i,j,k,RhoQ1_comp) += dtau * cc_src_arr(i,j,k,RhoQ1_comp); }
         });
         } // end profile
 

From 041edc0361bb225f1ff72b43ef3063dd66c6f894 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Wed, 20 May 2026 10:42:46 -0700
Subject: [PATCH 16/17] Unify theory and code.

---
 Source/TimeIntegration/ERF_MakeFastCoeffs.cpp |  57 +++-----
 Source/TimeIntegration/ERF_Substep_T.cpp      | 137 ++++++++++--------
 2 files changed, 97 insertions(+), 97 deletions(-)

diff --git a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
index beab6e1198..531df69e4f 100644
--- a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
+++ b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
@@ -123,16 +123,32 @@ void make_fast_coeffs (int /*level*/,
                  Real     detJ_on_kface = myhalf * (detJ(i,j,k) + detJ(i,j,k-1));
                  Real inv_detJ_on_kface = one / detJ_on_kface;
 
-                 Real qv_p = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
-                 Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
+                 Real thd_km2 = prim(i,j,k-2,PrimTheta_comp);
+                 Real thd_km1 = prim(i,j,k-1,PrimTheta_comp);
+                 Real thd_k   = prim(i,j,k  ,PrimTheta_comp);
+                 Real thd_kp1 = prim(i,j,k+1,PrimTheta_comp);
+
+                 Real qv_km2 = (l_use_moisture) ? prim(i,j,k-2,PrimQ1_comp) : zero;
+                 Real qv_km1 = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
+                 Real qv_k   = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
+                 Real qv_kp1 = (l_use_moisture) ? prim(i,j,k+1,PrimQ1_comp) : zero;
+
+                 Real thm_km2 = thd_km2 * (Real(1.0) + RvOverRd*qv_km2);
+                 Real thm_km1 = thd_km1 * (Real(1.0) + RvOverRd*qv_km1);
+                 Real thm_k   = thd_k   * (Real(1.0) + RvOverRd*qv_k  );
+                 Real thm_kp1 = thd_kp1 * (Real(1.0) + RvOverRd*qv_kp1);
+
+                 Real thm_t_lo  = myhalf * (thm_km2 + thm_km1);
+                 Real thm_t_mid = myhalf * (thm_km1 + thm_k  );
+                 Real thm_t_hi  = myhalf * (thm_k   + thm_kp1);
 
                  Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c
                               +  halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) /
-                              (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) * (one + RvOverRd*qv_p) );
+                              (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) * (one + RvOverRd*qv_k) );
 
                  Real coeff_Q =  Gamma * R_d * dzi * inv_detJ_on_kface * pi_c
                               + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) /
-                              ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) * (one + RvOverRd*qv_q) );
+                              ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) * (one + RvOverRd*qv_km1) );
 
                  coeffP_a(i,j,k) = coeff_P;
                  coeffQ_a(i,j,k) = coeff_Q;
@@ -144,38 +160,13 @@ void make_fast_coeffs (int /*level*/,
                     coeff_Q /= (one + q);
                 }
 
-                Real theta_t_lo  = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
-                Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k  ,PrimTheta_comp) );
-                Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
-
-                Real qv_t_lo  = (l_use_moisture) ? myhalf * ( prim(i,j,k-2,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp) ) : zero;
-                Real qv_t_mid = (l_use_moisture) ? myhalf * ( prim(i,j,k-1,PrimQ1_comp) + prim(i,j,k  ,PrimQ1_comp) ) : zero;
-                Real qv_t_hi  = (l_use_moisture) ? myhalf * ( prim(i,j,k  ,PrimQ1_comp) + prim(i,j,k+1,PrimQ1_comp) ) : zero;
-
-                Real r_t_lo  = myhalf * ( stage_cons(i,j,k-2,Rho_comp) + stage_cons(i,j,k-1,Rho_comp) );
-                Real r_t_mid = myhalf * ( stage_cons(i,j,k-1,Rho_comp) + stage_cons(i,j,k  ,Rho_comp) );
-                Real r_t_hi  = myhalf * ( stage_cons(i,j,k  ,Rho_comp) + stage_cons(i,j,k+1,Rho_comp) );
-
-                Real A_T_hi = (one + RvOverRd*qv_q);
-                Real A_T_lo = (one + RvOverRd*qv_p);
-                Real A_Q_hi = RvOverRd*prim(i,j,k  ,PrimTheta_comp);
-                Real A_Q_lo = RvOverRd*prim(i,j,k-1,PrimTheta_comp);
-                Real A_D_hi = RvOverRd*prim(i,j,k  ,PrimTheta_comp)*qv_p;
-                Real A_D_lo = RvOverRd*prim(i,j,k-1,PrimTheta_comp)*qv_q;
-
-                Real A_sum_hi = A_T_hi * theta_t_hi + A_Q_hi * qv_t_hi + A_D_hi * r_t_hi;
-                Real A_sum_lo = A_T_lo * theta_t_lo + A_Q_lo * qv_t_lo + A_D_lo * r_t_lo;
-
-                Real A_sum_mid_p = A_T_hi * theta_t_mid + A_Q_hi * qv_t_mid + A_D_hi * r_t_mid;
-                Real A_sum_mid_q = A_T_hi * theta_t_mid + A_Q_hi * qv_t_mid + A_D_hi * r_t_mid;
-
                 // LHS for tri-diagonal system
                 Real D = dtau * dtau * beta_2 * beta_2 * dzi;
-                coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * A_sum_lo );
-                coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * A_sum_hi );
+                coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * thm_t_lo );
+                coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * thm_t_hi );
 
-                coeffB_a(i,j,k) = one + D * ( ( coeff_Q * A_sum_mid_q / detJ(i,j,k-1)
-                                              - coeff_P * A_sum_mid_p / detJ(i,j,k  ) )
+                coeffB_a(i,j,k) = one + D * ( ( coeff_Q / detJ(i,j,k-1)
+                                              - coeff_P / detJ(i,j,k  ) ) * thm_t_mid
                                             + halfg * ( Real(1.0)/detJ(i,j,k) - Real(1.0)/detJ(i,j,k-1) ) );
             });
 
diff --git a/Source/TimeIntegration/ERF_Substep_T.cpp b/Source/TimeIntegration/ERF_Substep_T.cpp
index 5e1706d408..cc4b43feb9 100644
--- a/Source/TimeIntegration/ERF_Substep_T.cpp
+++ b/Source/TimeIntegration/ERF_Substep_T.cpp
@@ -196,13 +196,11 @@ void erf_substep_T (int step, int /*nrk*/,
         const Array4<Real>& thm_extrap = extrap.array(mfi);
         const Array4<const Real>& prim = S_stage_prim.const_array(mfi);
         ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            Real qv_cur_fact = (!l_use_moisture) ? 0.0 :
-                (Real(1.0) + RvOverRd * cur_cons(i,j,k,RhoQ1_comp) / cur_cons(i,j,k,Rho_comp));
-            Real qv_stg_fact = (!l_use_moisture) ? 0.0 :
-                (Real(1.0) + RvOverRd *     prim(i,j,k,PrimQ1_comp));
+            Real qv_stg = (l_use_moisture) ?     prim(i,j,k,PrimQ1_comp) : zero;
+            Real qv_cur = (l_use_moisture) ? cur_cons(i,j,k,RhoQ1_comp ) / cur_cons(i,j,k,Rho_comp) : zero;
             old_drho(i,j,k)     =   cur_cons(i,j,k,Rho_comp) - stage_cons(i,j,k,Rho_comp);
-            old_drho_thm(i,j,k) =   cur_cons(i,j,k,RhoTheta_comp) * qv_cur_fact
-                                - stage_cons(i,j,k,RhoTheta_comp) * qv_stg_fact;
+            old_drho_thm(i,j,k) =   cur_cons(i,j,k,RhoTheta_comp) * (Real(1.0) + RvOverRd*qv_cur)
+                                - stage_cons(i,j,k,RhoTheta_comp) * (Real(1.0) + RvOverRd*qv_stg);
             if (step == 0) {
                 thm_extrap(i,j,k) = old_drho_thm(i,j,k);
             } else {
@@ -345,8 +343,6 @@ void erf_substep_T (int step, int /*nrk*/,
         Box vbx = mfi.validbox();
         const auto& vbx_hi = ubound(vbx);
 
-        const Array4<const Real>& stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
-
         const Array4<Real const>& zmom_src_arr   = zmom_src.const_array(mfi);
         const Array4<Real const>& cc_src_arr     = cc_src.const_array(mfi);
 
@@ -468,19 +464,19 @@ void erf_substep_T (int step, int /*nrk*/,
             Real mfsq = mf_mx(i,j,0) * mf_my(i,j,0);
 
             // NOTE: we are saving the (1/J) weighting for later when we add this to rho and theta
-            temp_rhs_arr(i,j,k,0) =  ( xflux_hi - xflux_lo ) * dxi * mfsq +
-                                     ( yflux_hi - yflux_lo ) * dyi * mfsq;
-            temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
-                                       xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
-                                     ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
-                                       yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * myhalf;
+            temp_rhs_arr(i,j,k,Rho_comp)     =  ( xflux_hi - xflux_lo ) * dxi * mfsq +
+                                                ( yflux_hi - yflux_lo ) * dyi * mfsq;
+            temp_rhs_arr(i,j,k,RhoTheta_comp) = (( xflux_hi * (prim(i,j,k) + prim(i+1,j,k)) -
+                                                   xflux_lo * (prim(i,j,k) + prim(i-1,j,k)) ) * dxi * mfsq+
+                                                 ( yflux_hi * (prim(i,j,k) + prim(i,j+1,k)) -
+                                                   yflux_lo * (prim(i,j,k) + prim(i,j-1,k)) ) * dyi * mfsq) * myhalf;
             if (l_use_moisture) {
-                temp_rhs_arr(i,j,k,2) = (( xflux_hi * (prim(i,j,k,3) + prim(i+1,j,k,3)) -
-                                           xflux_lo * (prim(i,j,k,3) + prim(i-1,j,k,3)) ) * dxi * mfsq+
-                                         ( yflux_hi * (prim(i,j,k,3) + prim(i,j+1,k,3)) -
-                                           yflux_lo * (prim(i,j,k,3) + prim(i,j-1,k,3)) ) * dyi * mfsq) * myhalf;
+                temp_rhs_arr(i,j,k,RhoTheta_comp+1) = (( xflux_hi * (prim(i,j,k,PrimQ1_comp) + prim(i+1,j,k,PrimQ1_comp)) -
+                                                         xflux_lo * (prim(i,j,k,PrimQ1_comp) + prim(i-1,j,k,PrimQ1_comp)) ) * dxi * mfsq+
+                                                       ( yflux_hi * (prim(i,j,k,PrimQ1_comp) + prim(i,j+1,k,PrimQ1_comp)) -
+                                                         yflux_lo * (prim(i,j,k,PrimQ1_comp) + prim(i,j-1,k,PrimQ1_comp)) ) * dyi * mfsq) * myhalf;
             } else {
-                temp_rhs_arr(i,j,k,2) = zero;
+                temp_rhs_arr(i,j,k,RhoTheta_comp+1) = zero;
             }
 
 
@@ -525,33 +521,52 @@ void erf_substep_T (int step, int /*nrk*/,
             Real coeff_P = coeffP_a(i,j,k);
             Real coeff_Q = coeffQ_a(i,j,k);
 
-            Real theta_t_lo  = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
-            Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k  ,PrimTheta_comp) );
-            Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
-
-            Real qv_t_lo  = (l_use_moisture) ? myhalf * ( prim(i,j,k-2,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp) ) : zero;
-            Real qv_t_mid = (l_use_moisture) ? myhalf * ( prim(i,j,k-1,PrimQ1_comp) + prim(i,j,k  ,PrimQ1_comp) ) : zero;
-            Real qv_t_hi  = (l_use_moisture) ? myhalf * ( prim(i,j,k  ,PrimQ1_comp) + prim(i,j,k+1,PrimQ1_comp) ) : zero;
-
-            Real qv_p = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
-            Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
-
-            Real A_T_hi = (one + RvOverRd*qv_q);
-            Real A_T_lo = (one + RvOverRd*qv_p);
-            Real A_Q_hi = RvOverRd*prim(i,j,k  ,PrimTheta_comp);
-            Real A_Q_lo = RvOverRd*prim(i,j,k-1,PrimTheta_comp);
-            Real A_D_hi = RvOverRd*prim(i,j,k  ,PrimTheta_comp)*qv_p;
-            Real A_D_lo = RvOverRd*prim(i,j,k-1,PrimTheta_comp)*qv_q;
-
-            Real T_rhs_hi = slow_rhs_cons(i,j,k  ,RhoTheta_comp);
-            Real Q_rhs_hi = (l_use_moisture) ? slow_rhs_cons(i,j,k  ,RhoQ1_comp) : zero;
-            Real D_rhs_hi  = slow_rhs_cons(i,j,k  ,Rho_comp);
-            Real A_sum_rhs_p = A_T_hi * T_rhs_hi + A_Q_hi * Q_rhs_hi + A_D_hi * D_rhs_hi;
-
-            Real T_rhs_lo = slow_rhs_cons(i,j,k-1,RhoTheta_comp);
-            Real Q_rhs_lo = (l_use_moisture) ? slow_rhs_cons(i,j,k-1,RhoQ1_comp) : zero;
-            Real D_rhs_lo = slow_rhs_cons(i,j,k-1,Rho_comp);
-            Real A_sum_rhs_q = A_T_lo * T_rhs_lo + A_Q_lo * Q_rhs_lo + A_D_lo * D_rhs_lo;
+            // Cell center vars for constructing theta_m
+            Real thd_km2 = prim(i,j,k-2,PrimTheta_comp);
+            Real thd_km1 = prim(i,j,k-1,PrimTheta_comp);
+            Real thd_k   = prim(i,j,k  ,PrimTheta_comp);
+            Real thd_kp1 = prim(i,j,k+1,PrimTheta_comp);
+
+            Real qv_km2 = (l_use_moisture) ? prim(i,j,k-2,PrimQ1_comp) : zero;
+            Real qv_km1 = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
+            Real qv_k   = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
+            Real qv_kp1 = (l_use_moisture) ? prim(i,j,k+1,PrimQ1_comp) : zero;
+
+            Real thm_km2 = thd_km2 * (Real(1.0) + RvOverRd*qv_km2);
+            Real thm_km1 = thd_km1 * (Real(1.0) + RvOverRd*qv_km1);
+            Real thm_k   = thd_k   * (Real(1.0) + RvOverRd*qv_k  );
+            Real thm_kp1 = thd_kp1 * (Real(1.0) + RvOverRd*qv_kp1);
+
+            // Stage theta_m at w-faces
+            Real thm_t_lo  = myhalf * (thm_km2 + thm_km1);
+            Real thm_t_mid = myhalf * (thm_km1 + thm_k  );
+            Real thm_t_hi  = myhalf * (thm_k   + thm_kp1);
+
+            // Stage coefficients for linearization
+            Real A_T_k   =  (one + RvOverRd*qv_k  );
+            Real A_T_km1 =  (one + RvOverRd*qv_km1);
+            Real A_Q_k   =  RvOverRd*prim(i,j,k  ,PrimTheta_comp);
+            Real A_Q_km1 =  RvOverRd*prim(i,j,k-1,PrimTheta_comp);
+            Real A_D_k   = -RvOverRd*prim(i,j,k  ,PrimTheta_comp)*qv_k;
+            Real A_D_km1 = -RvOverRd*prim(i,j,k-1,PrimTheta_comp)*qv_km1;
+
+            // Sums of slow RHS
+            Real Q_srhs_k     = (l_use_moisture) ? slow_rhs_cons(i,j,k  ,RhoQ1_comp) : zero;
+            Real Q_srhs_km1   = (l_use_moisture) ? slow_rhs_cons(i,j,k-1,RhoQ1_comp) : zero;
+            Real A_sum_srhs_k   = A_D_k   * slow_rhs_cons(i,j,k  ,Rho_comp)
+                                + A_T_k   * slow_rhs_cons(i,j,k  ,RhoTheta_comp)
+                                + A_Q_k   * Q_srhs_k;
+            Real A_sum_srhs_km1 = A_D_km1 * slow_rhs_cons(i,j,k-1,Rho_comp)
+                                + A_T_km1 * slow_rhs_cons(i,j,k-1,RhoTheta_comp)
+                                + A_Q_km1 * Q_srhs_km1;
+
+            // Sums of the temp RHS
+            Real A_sum_trhs_k   = A_D_k   * temp_rhs_arr(i,j,k  ,Rho_comp)
+                                + A_T_k   * temp_rhs_arr(i,j,k  ,RhoTheta_comp)
+                                + A_Q_k   * temp_rhs_arr(i,j,k  ,RhoTheta_comp+1);
+            Real A_sum_trhs_km1 = A_D_km1 * temp_rhs_arr(i,j,k-1,Rho_comp)
+                                + A_T_km1 * temp_rhs_arr(i,j,k-1,RhoTheta_comp)
+                                + A_Q_km1 * temp_rhs_arr(i,j,k-1,RhoTheta_comp+1);
 
             // line 2 last two terms (order dtau)
             Real R0_tmp = -halfg * old_drho(i,j,k  ) + coeff_P * old_drho_thm(i,j,k  )
@@ -559,7 +574,7 @@ void erf_substep_T (int step, int /*nrk*/,
 
             // line 3 residuals (order dtau^2) one <-> beta_2
             Real R1_tmp  =  -halfg * (  slow_rhs_cons(i,j,k  ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp) );
-                 R1_tmp +=  coeff_P * A_sum_rhs_p + coeff_Q * A_sum_rhs_q;
+                 R1_tmp +=  coeff_P * A_sum_srhs_k + coeff_Q * A_sum_srhs_km1;
 
             Real Omega_kp1 = omega_arr(i,j,k+1);
             Real Omega_k   = omega_arr(i,j,k  );
@@ -572,14 +587,8 @@ void erf_substep_T (int step, int /*nrk*/,
                               + temp_rhs_arr(i,j,k,Rho_comp)/detJ(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ(i,j,k-1) );
 
             // consolidate lines 6&7 (order dtau^2)
-            R1_tmp += -( coeff_P/detJ(i,j,k  ) * A_T_hi * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid ) + temp_rhs_arr(i,j,k  ,RhoTheta_comp) )
-                       + coeff_Q/detJ(i,j,k-1) * A_T_lo * ( beta_1 * dzi * (Omega_k*theta_t_mid  - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
-
-            R1_tmp += -( coeff_P/detJ(i,j,k  ) * A_Q_hi * ( beta_1 * dzi * (Omega_kp1*qv_t_hi - Omega_k*qv_t_mid ) + temp_rhs_arr(i,j,k  ,RhoTheta_comp+1) )
-                       + coeff_Q/detJ(i,j,k-1) * A_Q_lo * ( beta_1 * dzi * (Omega_k*qv_t_mid  - Omega_km1*qv_t_lo) + temp_rhs_arr(i,j,k-1,RhoTheta_comp+1) ) );
-
-            R1_tmp += -( coeff_P/detJ(i,j,k  ) * A_D_hi * ( beta_1 * dzi * (Omega_kp1 - Omega_k  ) + temp_rhs_arr(i,j,k  ,Rho_comp) )
-                       + coeff_Q/detJ(i,j,k-1) * A_D_lo * ( beta_1 * dzi * (Omega_k   - Omega_km1) + temp_rhs_arr(i,j,k-1,Rho_comp) ) );
+            R1_tmp += -( coeff_P/detJ(i,j,k  ) * ( beta_1 * dzi * (Omega_kp1*thm_t_hi - Omega_k*thm_t_mid ) + A_sum_trhs_k  )
+                       + coeff_Q/detJ(i,j,k-1) * ( beta_1 * dzi * (Omega_k*thm_t_mid  - Omega_km1*thm_t_lo) + A_sum_trhs_km1) );
 
             // line 1
             RHS_a(i,j,k) = old_drho_w(i,j,k) + dtau * (slow_rhs_rho_w(i,j,k) + zmom_src_arr(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
@@ -620,7 +629,7 @@ void erf_substep_T (int step, int /*nrk*/,
             cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
 
             for (int k = hi.z; k >= lo.z; k--) {
-                soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
+                soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
             }
         });
 #else
@@ -717,19 +726,19 @@ void erf_substep_T (int step, int /*nrk*/,
                   }
               }
 
-              Real fast_rhs_rho  = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
+              Real fast_rhs_rho  = -(temp_rhs_arr(i,j,k,Rho_comp) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
               cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
 
-              Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + myhalf *
-                                        ( zflux_hi * (prim(i,j,k,0) + prim(i,j,k+1,0)) -
-                                          zflux_lo * (prim(i,j,k,0) + prim(i,j,k-1,0)) ) * dzi ) / detJ(i,j,k);
+              Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,RhoTheta_comp) + myhalf *
+                                        ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
+                                          zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
               cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
 
               if (l_use_moisture) {
-                  Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,2) + myhalf *
-                                         ( zflux_hi * (prim(i,j,k,3) + prim(i,j,k+1,3)) -
-                                           zflux_lo * (prim(i,j,k,3) + prim(i,j,k-1,3)) ) * dzi ) / detJ(i,j,k);
-                  cur_cons(i,j,k,4) += dtau * (slow_rhs_cons(i,j,k,4) + fast_rhs_rhoqv);
+                  Real fast_rhs_rhoqv = -( temp_rhs_arr(i,j,k,RhoTheta_comp+1) + myhalf *
+                                         ( zflux_hi * (prim(i,j,k,PrimQ1_comp) + prim(i,j,k+1,PrimQ1_comp)) -
+                                           zflux_lo * (prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp)) ) * dzi ) / detJ(i,j,k);
+                  cur_cons(i,j,k,RhoQ1_comp) += dtau * (slow_rhs_cons(i,j,k,RhoQ1_comp) + fast_rhs_rhoqv);
               }
 
               if (l_reflux) {

From 78dee9818cf0a700b5d71329d46c40381122e615 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Wed, 20 May 2026 10:56:00 -0700
Subject: [PATCH 17/17] limit qv to zero for safety.

---
 Source/TimeIntegration/ERF_Substep_T.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Source/TimeIntegration/ERF_Substep_T.cpp b/Source/TimeIntegration/ERF_Substep_T.cpp
index cc4b43feb9..f1e338056f 100644
--- a/Source/TimeIntegration/ERF_Substep_T.cpp
+++ b/Source/TimeIntegration/ERF_Substep_T.cpp
@@ -748,7 +748,9 @@ void erf_substep_T (int step, int /*nrk*/,
               // add in source terms for cell-centered conserved variables
               cur_cons(i,j,k,Rho_comp)      += dtau * cc_src_arr(i,j,k,Rho_comp);
               cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
-              if (l_use_moisture) { cur_cons(i,j,k,RhoQ1_comp) += dtau * cc_src_arr(i,j,k,RhoQ1_comp); }
+              if (l_use_moisture) {
+                  cur_cons(i,j,k,RhoQ1_comp) = amrex::max(zero, cur_cons(i,j,k,RhoQ1_comp) + dtau * cc_src_arr(i,j,k,RhoQ1_comp));
+              }
         });
         } // end profile