From 72e5710d6e020a893493b13a71da67c907310ea6 Mon Sep 17 00:00:00 2001
From: Qiming Sun <qiming.sun@bytedance.com>
Date: Tue, 10 Mar 2026 15:35:46 -0700
Subject: [PATCH 1/5] Update dependency to pyscf 2.12

---
 .github/workflows/unittest-dev.yml         |  6 ++--
 gpu4pyscf/pbc/df/tests/test_pbc_aft.py     |  2 +-
 gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py | 14 ++++----
 gpu4pyscf/scf/smearing.py                  | 37 +++++++++++++++++++---
 requirements.txt                           |  4 +--
 5 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/unittest-dev.yml b/.github/workflows/unittest-dev.yml
index e2a966b63..8c1b675e3 100644
--- a/.github/workflows/unittest-dev.yml
+++ b/.github/workflows/unittest-dev.yml
@@ -5,16 +5,16 @@ on:
     inputs:
       pyscf_version:
         required: false
-        default: "2.8.0"
+        default: "2.12.1"
       cupy_versoin:
         required: false
-        default: "13.4.1"
+        default: "13.6.0"
       cutensor_version:
         required: false
         default: "2.2.0"
       libxc_version:
         required: false
-        default: "0.5.0"
+        default: "0.8.1"
       dispersion_version:
         required: false
         default: "1.5.0"
diff --git a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py
index 6f7abd357..02c1b4d3e 100644
--- a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py
+++ b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py
@@ -383,7 +383,7 @@ def test_ek_ip1_kpts(self):
 
         cell.precision = 1e-10
         cell.build(0, 0)
-        myfft = fft_cpu.FFTDF(cell)
+        myfft = fft_cpu.FFTDF(cell, kpts=kpts)
         vk = myfft.get_k_e1(dm, kpts=kpts)
         aoslices = cell.aoslice_by_atom()
         ref = np.empty((cell.natm, 3))
diff --git a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py
index 9aee55e21..c78d86332 100644
--- a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py
+++ b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py
@@ -134,7 +134,7 @@ def test_sr_vk_hermi1_kpts_vs_fft():
     cell.precision = 1e-10
     cell.build(0, 0)
     cell.omega = -rsjk.OMEGA
-    ref = fft.FFTDF(cell).get_jk(dm, with_j=False, kpts=kpts)[1].get()
+    ref = fft.FFTDF(cell, kpts=kpts).get_jk(dm, with_j=False, kpts=kpts)[1].get()
     assert abs(vk - ref).max() < 1e-8
 
 def test_sr_vk_hermi0_gamma_point_vs_fft():
@@ -185,7 +185,7 @@ def test_sr_vk_hermi0_kpts_vs_fft():
     cell.precision = 1e-10
     cell.build(0, 0)
     cell.omega = -rsjk.OMEGA
-    ref = fft.FFTDF(cell).get_jk(dm, hermi=0, kpts=kpts, with_j=False)[1].get()
+    ref = fft.FFTDF(cell, kpts=kpts).get_jk(dm, hermi=0, kpts=kpts, with_j=False)[1].get()
     assert abs(vk - ref).max() < 1e-8
 
 def test_vk_kpts_band_vs_fft():
@@ -234,7 +234,8 @@ def test_vk_hermi1_kpts_vs_fft():
 
     cell.precision = 1e-10
     cell.build(0, 0)
-    ref = fft.FFTDF(cell).get_jk(dm, hermi=1, with_j=False, kpts=kpts, exxdiv='ewald')[1].get()
+    ref = fft.FFTDF(cell, kpts=kpts).get_jk(
+        dm, hermi=1, with_j=False, kpts=kpts, exxdiv='ewald')[1].get()
     assert abs(vk - ref).max() < 1e-8
 
 def test_vk_hermi0_gamma_point_vs_fft():
@@ -283,7 +284,8 @@ def test_vk_hermi0_kpts_vs_fft():
 
     cell.precision = 1e-10
     cell.build(0, 0)
-    ref = fft.FFTDF(cell).get_jk(dm, hermi=0, kpts=kpts, with_j=False)[1].get()
+    ref = fft.FFTDF(cell, kpts=kpts).get_jk(
+        dm, hermi=0, kpts=kpts, with_j=False)[1].get()
     assert abs(vk - ref).max() < 1e-8
 
 def test_ejk_sr_ip1_per_atom_gamma_point():
@@ -336,7 +338,7 @@ def test_ejk_sr_ip1_per_atom_kpts():
     assert abs(ejk.sum(axis=0)).max() < 1e-8
 
     cell.omega = -rsjk.OMEGA
-    vj, vk = fft_cpu.FFTDF(cell).get_jk_e1(dm, kpts=kpts, exxdiv=exxdiv)
+    vj, vk = fft_cpu.FFTDF(cell, kpts=kpts).get_jk_e1(dm, kpts=kpts, exxdiv=exxdiv)
     vhf = vj - vk * .5
     aoslices = cell.aoslice_by_atom()
     ref = np.empty((cell.natm, 3))
@@ -427,7 +429,7 @@ def test_ejk_ip1_per_atom_kpts():
     ejk += with_rsjk._get_ejk_lr_ip1(dm, kpts=kpts, exxdiv=None)
     assert abs(ejk.sum(axis=0)).max() < 1e-8
 
-    vj, vk = fft_cpu.FFTDF(cell).get_jk_e1(dm, kpts=kpts, exxdiv=None)
+    vj, vk = fft_cpu.FFTDF(cell, kpts=kpts).get_jk_e1(dm, kpts=kpts, exxdiv=None)
     vhf = vj - vk * .5
     aoslices = cell.aoslice_by_atom()
     ref = np.empty((cell.natm, 3))
diff --git a/gpu4pyscf/scf/smearing.py b/gpu4pyscf/scf/smearing.py
index 197afad18..df8b3c3fb 100644
--- a/gpu4pyscf/scf/smearing.py
+++ b/gpu4pyscf/scf/smearing.py
@@ -18,7 +18,7 @@
 from pyscf import __config__, lib
 from pyscf.scf import addons as cpu_addons
 from pyscf.scf.addons import (_fermi_smearing_occ, _gaussian_smearing_occ,
-                              _get_fermi, _smearing_optimize)
+                              _get_fermi)
 from pyscf.pbc.tools import print_mo_energy_occ
 from gpu4pyscf.lib import logger
 
@@ -55,6 +55,38 @@ def smearing_(mf, *args, **kwargs):
     return mf
 
 
+def _smearing_optimize(f_occ, mo_es, nocc, sigma):
+    def rootfn(m):
+        mo_occ = f_occ(m, mo_es, sigma)
+        return mo_occ.sum() - nocc
+
+    # it's okay to set small xtol according to the docs.
+    mu = scipy.optimize.bisect(rootfn, mo_es.min()-10., mo_es.max()+10.,
+                               xtol=1e-16, maxiter=10000)
+
+    cur_err = abs(rootfn(mu))
+
+    # Check if we can further improve mu by moving it up/down
+    # by the minimum machine-representable amount.
+    # In many cases with Fermi-type smearing and sigma~1e-6,
+    # the minimum possible error is still >1e-11 because the
+    # smearing function is just so sharp. Because xtol is set to 1e-16 above,
+    # this should not take too many iterations.
+
+    iters, maxiter = 0, 1000
+
+    while abs(rootfn(numpy.nextafter(mu, numpy.inf))) < cur_err and iters < maxiter:
+        mu = numpy.nextafter(mu, numpy.inf)
+        cur_err = abs(rootfn(mu))
+        iters += 1
+
+    while abs(rootfn(numpy.nextafter(mu, -numpy.inf))) < cur_err and iters < maxiter:
+        mu = numpy.nextafter(mu, -numpy.inf)
+        cur_err = abs(rootfn(mu))
+        iters += 1
+
+    return mu, f_occ(mu, mo_es, sigma)
+
 class _SmearingSCF:
 
     __name_mixin__ = "Smearing"
@@ -112,8 +144,6 @@ def get_occ(self, mo_energy=None, mo_coeff=None):
             if self.mu0 is None:
                 mu_a, occa = _smearing_optimize(f_occ, mo_es[0], nocc[0], sigma)
                 mu_b, occb = _smearing_optimize(f_occ, mo_es[1], nocc[1], sigma)
-                mu_a = mu_a[0]
-                mu_b = mu_b[0]
             else:
                 if np.isscalar(self.mu0):
                     mu_a = mu_b = self.mu0
@@ -167,7 +197,6 @@ def get_occ(self, mo_energy=None, mo_coeff=None):
 
             if self.mu0 is None:
                 mu, mo_occs = _smearing_optimize(f_occ, mo_es, nelectron, sigma)
-                mu = mu[0]
             else:
                 # If mu0 is given, fix mu instead of electron number. XXX -Chong Sun
                 mu = self.mu0
diff --git a/requirements.txt b/requirements.txt
index 3543d57b6..d8b5a2024 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 cutensor-cu12==2.2.0
 gpu4pyscf-libxc-cuda12x==0.8.1
 cupy-cuda12x==13.4.1
-pyscf==2.8.0
+pyscf==2.12.1
 basis-set-exchange==0.11
 pyscf-dispersion==1.5.0
-geometric==1.1.0
\ No newline at end of file
+geometric==1.1.0

From dd86c90c546c9a75ce46f3d26c2e0484708af034 Mon Sep 17 00:00:00 2001
From: Qiming Sun <qiming.sun@bytedance.com>
Date: Wed, 18 Mar 2026 00:07:08 -0700
Subject: [PATCH 2/5] Missing import module

---
 gpu4pyscf/scf/smearing.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gpu4pyscf/scf/smearing.py b/gpu4pyscf/scf/smearing.py
index df8b3c3fb..19f29805e 100644
--- a/gpu4pyscf/scf/smearing.py
+++ b/gpu4pyscf/scf/smearing.py
@@ -75,13 +75,13 @@ def rootfn(m):
 
     iters, maxiter = 0, 1000
 
-    while abs(rootfn(numpy.nextafter(mu, numpy.inf))) < cur_err and iters < maxiter:
-        mu = numpy.nextafter(mu, numpy.inf)
+    while abs(rootfn(np.nextafter(mu, np.inf))) < cur_err and iters < maxiter:
+        mu = np.nextafter(mu, np.inf)
         cur_err = abs(rootfn(mu))
         iters += 1
 
-    while abs(rootfn(numpy.nextafter(mu, -numpy.inf))) < cur_err and iters < maxiter:
-        mu = numpy.nextafter(mu, -numpy.inf)
+    while abs(rootfn(np.nextafter(mu, -np.inf))) < cur_err and iters < maxiter:
+        mu = np.nextafter(mu, -np.inf)
         cur_err = abs(rootfn(mu))
         iters += 1
 

From b3804ec8dd6cae83d9d5748c73c9a9efa37558ce Mon Sep 17 00:00:00 2001
From: Qiming Sun <qiming.sun@bytedance.com>
Date: Wed, 18 Mar 2026 08:24:12 -0700
Subject: [PATCH 3/5] Adjust to_gpu

---
 gpu4pyscf/_patch_pyscf.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/gpu4pyscf/_patch_pyscf.py b/gpu4pyscf/_patch_pyscf.py
index e4a985674..c0f5985a9 100644
--- a/gpu4pyscf/_patch_pyscf.py
+++ b/gpu4pyscf/_patch_pyscf.py
@@ -252,22 +252,18 @@ def to_gpu(method, out=None):
 
             from importlib import import_module
             mod = import_module(method.__module__.replace('pyscf', 'gpu4pyscf'))
-            try:
-                cls = getattr(mod, method.__class__.__name__)
-            except AttributeError:
-                if hasattr(cls, 'from_cpu'):
-                    # the customized to_gpu function can be accessed at module
-                    # levelin gpu4pyscf.
-                    return cls.from_cpu(method)
-                raise
-
-            # Allow gpu4pyscf to customize the to_gpu method for PySCF classes.
             if hasattr(mod, 'from_cpu'):
+                # the customized to_gpu function can be accessed at module
+                # levelin gpu4pyscf.
                 return mod.from_cpu(method)
 
             # A temporary GPU instance. This ensures to initialize private
             # attributes that are only available for GPU code.
             cls = getattr(mod, method.__class__.__name__)
+            # Allow gpu4pyscf to customize the to_gpu method for PySCF classes.
+            if hasattr(cls, 'from_cpu'):
+                return cls.from_cpu(method)
+
             out = method.view(cls)
 
         elif hasattr(out, 'from_cpu'):

From ec45cd1d51d7adda3bf54445bd10e4d591332db1 Mon Sep 17 00:00:00 2001
From: Qiming Sun <qiming.sun@bytedance.com>
Date: Wed, 18 Mar 2026 08:55:12 -0700
Subject: [PATCH 4/5] Fix get_coulG and madelung interfaces

---
 gpu4pyscf/pbc/df/aft.py                       |  6 +-
 gpu4pyscf/pbc/df/aft_jk.py                    | 17 +++--
 gpu4pyscf/pbc/df/fft_jk.py                    | 22 ++++--
 gpu4pyscf/pbc/df/grad/krhf.py                 |  4 +-
 gpu4pyscf/pbc/df/grad/kuhf.py                 |  4 +-
 gpu4pyscf/pbc/df/grad/rhf.py                  |  4 +-
 gpu4pyscf/pbc/df/grad/uhf.py                  |  4 +-
 gpu4pyscf/pbc/df/tests/test_pbc_aft.py        |  2 +-
 gpu4pyscf/pbc/scf/hf.py                       |  2 +-
 gpu4pyscf/pbc/scf/j_engine.py                 |  2 +-
 gpu4pyscf/pbc/scf/rsjk.py                     | 31 +++++----
 gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py    |  2 +-
 .../pbc/scf/tests/test_pbc_scf_smearing.py    | 10 +--
 gpu4pyscf/pbc/tools/pbc.py                    | 68 +++++++++++++++----
 gpu4pyscf/scf/tests/test_fermi_smearing.py    |  9 +--
 15 files changed, 122 insertions(+), 65 deletions(-)

diff --git a/gpu4pyscf/pbc/df/aft.py b/gpu4pyscf/pbc/df/aft.py
index 7d25cc6aa..a05a7d0a4 100644
--- a/gpu4pyscf/pbc/df/aft.py
+++ b/gpu4pyscf/pbc/df/aft.py
@@ -118,13 +118,15 @@ class AFTDFMixin:
 
     pw_loop = NotImplemented
 
-    def weighted_coulG(mydf, kpt=None, exx=None, mesh=None, omega=None, kpts=None):
+    def weighted_coulG(mydf, kpt=None, exx=None, mesh=None, omega=None, kmesh=None):
         '''Weighted regular Coulomb kernel'''
         cell = mydf.cell
         if mesh is None:
             mesh = mydf.mesh
         Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
-        coulG = get_coulG(cell, kpt, exx, mesh=mesh, Gv=Gv, omega=omega, kpts=kpts)
+        if kmesh is not None:
+            mydf = None
+        coulG = get_coulG(cell, kpt, exx, mesh=mesh, Gv=Gv, omega=omega, kmesh=kmesh)
         coulG *= kws
         return coulG
 
diff --git a/gpu4pyscf/pbc/df/aft_jk.py b/gpu4pyscf/pbc/df/aft_jk.py
index c1f6703b1..241abf6a5 100644
--- a/gpu4pyscf/pbc/df/aft_jk.py
+++ b/gpu4pyscf/pbc/df/aft_jk.py
@@ -31,6 +31,7 @@
 from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh
 from gpu4pyscf.pbc.df.ft_ao import FTOpt, libpbc
 from gpu4pyscf.pbc.df.fft_jk import _format_dms, _format_jks, _ewald_exxdiv_for_G0
+from gpu4pyscf.pbc.tools import madelung
 from gpu4pyscf.lib.cupy_helper import contract, get_avail_mem, asarray
 from gpu4pyscf.lib import logger
 from gpu4pyscf.scf.jk import SHM_SIZE
@@ -216,7 +217,7 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=None, kpts_band=None,
     log.debug1('Gblksize = %d', Gblksize)
 
     for group_id, (kpt, ki_idx, kj_idx, self_conj) in enumerate(kpt_iters):
-        vkcoulG = mydf.weighted_coulG(kpt, exxdiv, mesh, kpts=kpts) * weight
+        vkcoulG = mydf.weighted_coulG(kpt, exxdiv, mesh, kmesh=bvk_kmesh) * weight
         for p0, p1 in lib.prange(0, ngrids, Gblksize):
             log.debug3('update_vk [%s:%s]', p0, p1)
             Gpq = ft_kern(Gv[p0:p1], kpt, kpts, kj_idx)
@@ -461,7 +462,7 @@ def get_ek_ip1(mydf, dm, kpts=None, exxdiv=None):
     ek = cp.zeros((cell.natm, 3))
     for group_id, (kp, kp_conj, ki_idx, kj_idx) in enumerate(bvk_kk_adapted_iter(kmesh)):
         kpt = kpts[kp]
-        wcoulG = mydf.weighted_coulG(kpt, exxdiv, mydf.mesh, kpts=kpts)
+        wcoulG = mydf.weighted_coulG(kpt, exxdiv, mydf.mesh, kmesh=kmesh)
         swap_2e = kp != kp_conj
         for p0, p1 in lib.prange(0, ngrids, blksize):
             nGv = p1 - p0
@@ -829,7 +830,6 @@ def get_ek_strain_deriv(mydf, dm, kpts=None, exxdiv=None, omega=None):
     if (exxdiv == 'ewald' and
         (cell.dimension == 3 or
          (cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum'))):
-        from pyscf.pbc.tools.pbc import madelung
         from gpu4pyscf.pbc.gto import int1e
         cell = mydf.cell
         s0 = int1e.int1e_ovlp(cell, kpts, kmesh)
@@ -843,13 +843,12 @@ def get_ek_strain_deriv(mydf, dm, kpts=None, exxdiv=None, omega=None):
         for i in range(3):
             for j in range(i+1):
                 cell1, cell2 = rks_stress._finite_diff_cells(cell, i, j, disp)
-                kpts1 = scaled_kpts.dot(cell1.reciprocal_vectors(norm_to=1))
-                kpts2 = scaled_kpts.dot(cell2.reciprocal_vectors(norm_to=1))
-                e1 = nkpts * madelung(cell1, kpts1, omega=omega)
-                e2 = nkpts * madelung(cell2, kpts2, omega=omega)
+                e1 = nkpts * madelung(cell1, omega=omega, kmesh=kmesh)
+                e2 = nkpts * madelung(cell2, omega=omega, kmesh=kmesh)
                 ewald_G0[j,i] = ewald_G0[i,j] = (e1-e2)/(2*disp)
         ewald_G0 *= ek_G0
-        ewald_G0 += int1e.ovlp_strain_deriv(cell, k_dm, kpts) * madelung(cell, kpts, omega=omega)
+        ovlp_strain = int1e.ovlp_strain_deriv(cell, k_dm, kpts)
+        ewald_G0 += ovlp_strain * madelung(cell, omega=omega, kmesh=kmesh)
         sigma += ewald_G0
 
     log.timer_debug1('get_ek_ip1', *cpu0)
@@ -929,7 +928,7 @@ def get_jk(mydf, dm, hermi=1, kpt=np.zeros(3),
         if (exxdiv == 'ewald' and
             (cell.dimension < 2 or  # 0D and 1D are computed with inf_vacuum
              (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))):
-            _ewald_exxdiv_for_G0(cell, kpt, dms, vk)
+            _ewald_exxdiv_for_G0(cell, None, dms, vk)
         if k_real:
             vk = vk.real
         vk = vk.reshape(dm.shape)
diff --git a/gpu4pyscf/pbc/df/fft_jk.py b/gpu4pyscf/pbc/df/fft_jk.py
index e2a428bca..4ea309460 100644
--- a/gpu4pyscf/pbc/df/fft_jk.py
+++ b/gpu4pyscf/pbc/df/fft_jk.py
@@ -29,6 +29,7 @@
 from gpu4pyscf.lib import logger
 from gpu4pyscf.lib.cupy_helper import contract
 from gpu4pyscf.pbc import tools
+from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh
 
 def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None):
     '''Get the Coulomb (J) AO matrix at sampled k-points.
@@ -92,7 +93,7 @@ def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None):
 
     return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
 
-def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None,
+def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=None, kpts_band=None,
                exxdiv=None):
     '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points.
 
@@ -130,7 +131,13 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None,
         mo_coeff = None
 
     ni = mydf._numint
-    kpts = np.asarray(kpts)
+    if kpts is None:
+        kpts = np.zeros((1, 3))
+        kmesh = [1] * 3
+    else:
+        kpts = np.asarray(kpts).reshape(-1, 3)
+        kmesh = kpts_to_kmesh(cell, kpts)
+
     dm_kpts = cp.asarray(dm_kpts, order='C')
     dms = _format_dms(dm_kpts, kpts)
     nset, nkpts, nao = dms.shape[:3]
@@ -174,7 +181,7 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None,
         for k1, ao1 in enumerate(ao1_kpts):
             ao1T = ao1.T
             kpt1 = kpts_band[k1]
-            coulG = tools.get_coulG(cell, kpt2-kpt1, exxdiv, mesh, kpts=kpts)
+            coulG = tools.get_coulG(cell, kpt2-kpt1, exxdiv, mesh, kmesh=kmesh)
             if is_zero(kpt1-kpt2):
                 expmikr = cp.array(1.)
             else:
@@ -357,14 +364,17 @@ def get_j_e1_kpts(mydf, dm_kpts, kpts=None):
         ej = ej[0]
     return ej
 
-def get_k_e1_kpts(mydf, dm_kpts, kpts=np.zeros((1,3)), exxdiv=None):
+def get_k_e1_kpts(mydf, dm_kpts, kpts=None, exxdiv=None):
     raise NotImplementedError
 
 def _ewald_exxdiv_for_G0(cell, kpts, dms, vk, kpts_band=None):
-    from pyscf.pbc.tools.pbc import madelung
     from gpu4pyscf.pbc.gto.int1e import int1e_ovlp
     s = int1e_ovlp(cell, kpts=kpts)
-    m = madelung(cell, kpts)
+    if kpts is None:
+        kmesh = [1] * 3
+    else:
+        kmesh = kpts_to_kmesh(cell, kpts)
+    m = tools.madelung(cell, kmesh=kmesh)
     if kpts is None:
         for i,dm in enumerate(dms):
             vk[i] += m * s.dot(dm).dot(s)
diff --git a/gpu4pyscf/pbc/df/grad/krhf.py b/gpu4pyscf/pbc/df/grad/krhf.py
index 9a34a845e..c315461a6 100644
--- a/gpu4pyscf/pbc/df/grad/krhf.py
+++ b/gpu4pyscf/pbc/df/grad/krhf.py
@@ -565,9 +565,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, kpts=None, hermi=0, j_factor=1., k_fact
         # cell.omega is not 0, which can lead to incorrect correction for
         # full-range ewald probe charge correction.
         if with_long_range:
-            weighted_coulG_at_G0 = madelung(cell, kpts, omega=0)
+            weighted_coulG_at_G0 = madelung(cell, omega=0, kmesh=bvk_kmesh)
         else:
-            weighted_coulG_at_G0 = madelung(cell, kpts, omega=-omega)
+            weighted_coulG_at_G0 = madelung(cell, omega=-omega, kmesh=bvk_kmesh)
         # The k_factor was previously scaled by 1/nkpts^2. The ewald term
         # requires a factor of 1/nkpts. Rescale k_factor by nkpts
         k_factor *= nkpts
diff --git a/gpu4pyscf/pbc/df/grad/kuhf.py b/gpu4pyscf/pbc/df/grad/kuhf.py
index 1a559b265..af4e93edc 100644
--- a/gpu4pyscf/pbc/df/grad/kuhf.py
+++ b/gpu4pyscf/pbc/df/grad/kuhf.py
@@ -495,9 +495,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, kpts=None, hermi=0, j_factor=1., k_fact
         # cell.omega is not 0, which can lead to incorrect correction for
         # full-range ewald probe charge correction.
         if with_long_range:
-            weighted_coulG_at_G0 = madelung(cell, kpts, omega=0)
+            weighted_coulG_at_G0 = madelung(cell, omega=0, kmesh=bvk_kmesh)
         else:
-            weighted_coulG_at_G0 = madelung(cell, kpts, omega=-omega)
+            weighted_coulG_at_G0 = madelung(cell, omega=-omega, kmesh=bvk_kmesh)
         # Note the additional minus sign for nabla_A ovlp = -nabla ovlp
         ejk_ewald *= k_factor*nkpts * weighted_coulG_at_G0
         ejk += ejk_ewald
diff --git a/gpu4pyscf/pbc/df/grad/rhf.py b/gpu4pyscf/pbc/df/grad/rhf.py
index 454a30ea6..362769168 100644
--- a/gpu4pyscf/pbc/df/grad/rhf.py
+++ b/gpu4pyscf/pbc/df/grad/rhf.py
@@ -408,9 +408,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, hermi=0, j_factor=1., k_factor=1.,
         # cell.omega is not 0, which can lead to incorrect correction for
         # full-range ewald probe charge correction.
         if with_long_range:
-            weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=0)
+            weighted_coulG_at_G0 = madelung(cell, omega=0)
         else:
-            weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=-omega)
+            weighted_coulG_at_G0 = madelung(cell, omega=-omega)
         # Note the additional minus sign for nabla_A ovlp = -nabla ovlp
         ejk_ewald *= .5 * k_factor * weighted_coulG_at_G0
         ejk += ejk_ewald
diff --git a/gpu4pyscf/pbc/df/grad/uhf.py b/gpu4pyscf/pbc/df/grad/uhf.py
index 281dfcd40..bd6b1f61b 100644
--- a/gpu4pyscf/pbc/df/grad/uhf.py
+++ b/gpu4pyscf/pbc/df/grad/uhf.py
@@ -398,9 +398,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, hermi=0, j_factor=1., k_factor=1.,
         # cell.omega is not 0, which can lead to incorrect correction for
         # full-range ewald probe charge correction.
         if with_long_range:
-            weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=0)
+            weighted_coulG_at_G0 = madelung(cell, omega=0)
         else:
-            weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=-omega)
+            weighted_coulG_at_G0 = madelung(cell, omega=-omega)
         # Note the additional minus sign for nabla_A ovlp = -nabla ovlp
         ejk_ewald *= k_factor * weighted_coulG_at_G0
         ejk += ejk_ewald
diff --git a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py
index 02c1b4d3e..e3b466752 100644
--- a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py
+++ b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py
@@ -355,7 +355,7 @@ def test_ek_ip1_gamma_point(self):
             for i in range(cell.natm):
                 p0, p1 = aoslices[i, 2:]
                 ref[i] = np.einsum('xnpq,nqp->x', vk[:,:,p0:p1], dm[:,:,p0:p1])
-            assert abs(ek_ewald - ref).max() < 1e-8
+            assert abs(ek_ewald - ref).max() < 3e-8
 
     @unittest.skipIf(num_devices > 1, '')
     def test_ek_ip1_kpts(self):
diff --git a/gpu4pyscf/pbc/scf/hf.py b/gpu4pyscf/pbc/scf/hf.py
index f10767224..5cfed5a6c 100644
--- a/gpu4pyscf/pbc/scf/hf.py
+++ b/gpu4pyscf/pbc/scf/hf.py
@@ -193,7 +193,7 @@ def dump_flags(self, verbose=None):
         cell = self.cell
         if ((cell.dimension >= 2 and cell.low_dim_ft_type != 'inf_vacuum') and
             isinstance(self.exxdiv, str) and self.exxdiv.lower() == 'ewald'):
-            madelung = tools.pbc.madelung(cell, self.kpt[None])
+            madelung = tools.pbc.madelung(cell)
             log.info('    madelung (= occupied orbital energy shift) = %s', madelung)
             log.info('    Total energy shift due to Ewald probe charge'
                      ' = -1/2 * Nelec*madelung = %.12g',
diff --git a/gpu4pyscf/pbc/scf/j_engine.py b/gpu4pyscf/pbc/scf/j_engine.py
index 26b85cc54..0b027867d 100644
--- a/gpu4pyscf/pbc/scf/j_engine.py
+++ b/gpu4pyscf/pbc/scf/j_engine.py
@@ -410,7 +410,7 @@ def get_j(self, dm, hermi=0, kpts=None, kpts_band=None, verbose=None):
         vj += self._get_j_lr(dm, hermi, kpts, kpts_band, verbose=verbose)
         return vj
 
-    def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, kpts=None):
+    def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, **kwargs):
         '''weighted LR Coulomb kernel. Mimic AFTDF.weighted_coulG'''
         if mesh is None:
             mesh = self.mesh
diff --git a/gpu4pyscf/pbc/scf/rsjk.py b/gpu4pyscf/pbc/scf/rsjk.py
index cea4110a0..8775eecd4 100644
--- a/gpu4pyscf/pbc/scf/rsjk.py
+++ b/gpu4pyscf/pbc/scf/rsjk.py
@@ -41,7 +41,8 @@
 from gpu4pyscf.pbc.df.fft import _check_kpts
 from gpu4pyscf.pbc.df.fft_jk import _format_dms
 from gpu4pyscf.pbc.dft.multigrid_v2 import _unique_image_pair
-from gpu4pyscf.pbc.tools.pbc import get_coulG, probe_charge_sr_coulomb
+from gpu4pyscf.pbc.tools.pbc import get_coulG, probe_charge_sr_coulomb, madelung
+from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh
 from gpu4pyscf.grad.rhf import _ejk_quartets_scheme
 from gpu4pyscf.pbc.gto import int1e
 
@@ -227,8 +228,10 @@ def _get_k_sr(self, dm, hermi, kpts=None, kpts_band=None, exxdiv=None, verbose=N
 
         if kpts is None:
             kpts = np.zeros((1, 3))
+            kmesh = [1] * 3
         else:
             kpts = kpts.reshape(-1, 3)
+            kmesh = kpts_to_kmesh(cell, kpts, bound_by_supmol=True)
         is_gamma_point = is_zero(kpts)
         if is_gamma_point:
             assert dms.dtype == np.float64
@@ -406,7 +409,7 @@ def proc(dms, dm_cond):
                 # This term rapidly decays to 0 for large k-mesh. In the
                 # FFTDF.get_jk based implementation, this contribution is
                 # included in the short-range part.
-                wcoulG_SR_at_G0 = probe_charge_sr_coulomb(cell, omega, kpts)
+                wcoulG_SR_at_G0 = probe_charge_sr_coulomb(cell, omega, kmesh)
             else:
                 # Remove the G=0 contribution to match the output of FFTDF.get_jk().
                 wcoulG_SR_at_G0 = np.pi / omega**2 / cell.vol
@@ -435,7 +438,8 @@ def _get_k_lr(self, dm, hermi, kpts=None, kpts_band=None, exxdiv=None,
             kpts = kpts[0]
         return get_k_kpts(self, dm, hermi, kpts, kpts_band, exxdiv=exxdiv)
 
-    def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, kpts=None):
+    def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None,
+                       kmesh=None):
         '''weighted LR Coulomb kernel. Mimic AFTDF.weighted_coulG'''
         if mesh is None:
             mesh = self.mesh
@@ -443,25 +447,25 @@ def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, kpts=None):
         omega = self.omega
         Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
         coulG = get_coulG(cell, kpt, exx=None, mesh=mesh, Gv=Gv,
-                          wrap_around=True, omega=omega, kpts=kpts)
+                          wrap_around=True, omega=omega, kmesh=kmesh)
         coulG *= kws
         if kpt is None or not is_zero(kpt):
             return coulG
 
         if exx == 'ewald':
-            Nk = len(kpts)
+            Nk = np.prod(kmesh)
             # In the full-range Coulomb, the ewald correction for get_k_lr is
-            #     +Nk*pbctools.madelung(cell, kpts) - np.pi / omega**2 * kws - probe_charge_sr_coulomb
+            #     +Nk*madelung(cell, kmesh) - np.pi / omega**2 * kws - probe_charge_sr_coulomb
             # The last two terms are included in the get_k_sr. The second term
             # (np.pi/omega**2) removes the contribution of the SR integrals at G=0.
             #
-            # pbctools.madelung(cell, kpts) includes three terms: -2*ewovrl, -2*ewself and -2*ewg.
+            # madelung(cell, kmesh) includes three terms: -2*ewovrl, -2*ewself and -2*ewg.
             # ewself is the sum of ewself_lr_point_charge and ewself_sr_at_G0.
-            # This correction is identical to madelung(cell, kpts, omega=omega),
+            # This correction is identical to madelung(cell, omega, kmesh),
             # which gives -2*(ewself_lr_point_charges + ewg) .
             # ewself_sr_at_G0 in ewovrl cancels out the second term (np.pi/omega**2);
             # -2*ewovrl cancels out the last term (probe_charge_sr_coulomb).
-            coulG[0] += Nk*pbctools.madelung(cell, kpts, omega=omega)
+            coulG[0] += Nk*madelung(cell, omega=omega, kmesh=kmesh)
         return coulG
 
     def _get_ejk_sr_ip1(self, dm, kpts=None, exxdiv=None,
@@ -716,8 +720,10 @@ def _get_ejk_sr_strain_deriv(self, dm, kpts=None, exxdiv=None,
 
         if kpts is None:
             kpts = np.zeros((1, 3))
+            kmesh = [1] * 3
         else:
             kpts = kpts.reshape(-1, 3)
+            kmesh = kpts_to_kmesh(cell, kpts, bound_by_supmol=True)
         is_gamma_point = is_zero(kpts)
         if is_gamma_point:
             assert dms.dtype == np.float64
@@ -906,7 +912,6 @@ def proc(dms, dm_cond):
             sigma += ej_G0 * np.eye(3)
             sigma -= wcoulG_SR_at_G0 * ek_G0 * np.eye(3)
             if exxdiv == 'ewald':
-                from pyscf.pbc.tools.pbc import madelung
                 from gpu4pyscf.pbc.grad.rks_stress import _finite_diff_cells
                 scaled_kpts = kpts.dot(cell.lattice_vectors().T)
                 ewald_G0_response = np.empty((3,3))
@@ -914,10 +919,8 @@ def proc(dms, dm_cond):
                 for i in range(3):
                     for j in range(i+1):
                         cell1, cell2 = _finite_diff_cells(cell, i, j, disp)
-                        kpts1 = scaled_kpts.dot(cell1.reciprocal_vectors(norm_to=1))
-                        kpts2 = scaled_kpts.dot(cell2.reciprocal_vectors(norm_to=1))
-                        e1 = nkpts * madelung(cell1, kpts1, omega=-omega)
-                        e2 = nkpts * madelung(cell2, kpts2, omega=-omega)
+                        e1 = nkpts * madelung(cell1, omega=-omega, kmesh=kmesh)
+                        e2 = nkpts * madelung(cell2, omega=-omega, kmesh=kmesh)
                         ewald_G0_response[j,i] = ewald_G0_response[i,j] = (e1-e2)/(2*disp)
                 ewald_G0_response *= ek_G0
                 sigma -= ewald_G0_response
diff --git a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py
index c78d86332..4ff16256f 100644
--- a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py
+++ b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py
@@ -396,7 +396,7 @@ def test_ejk_ip1_per_atom_gamma_point():
         for i in range(cell.natm):
             p0, p1 = aoslices[i, 2:]
             ref[i] = np.einsum('xnpq,nqp->x', vhf[:,:,p0:p1], dm[:,:,p0:p1])
-        assert abs(ejk - ref).max() < 1e-8
+        assert abs(ejk - ref).max() < 3e-8
     else:
         ejk = with_rsjk._get_ejk_sr_ip1(dm, kpts=kpt, exxdiv=None)
         ejk += with_rsjk._get_ejk_lr_ip1(dm, kpts=kpt, exxdiv=None)
diff --git a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_smearing.py b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_smearing.py
index e0f8580a0..deeaab2b3 100644
--- a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_smearing.py
+++ b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_smearing.py
@@ -17,7 +17,6 @@
 import cupy as cp
 from packaging.version import Version
 import pyscf
-from pyscf.pbc.scf import addons as cpu_addons
 from gpu4pyscf.pbc.scf import smearing
 
 
@@ -99,16 +98,17 @@ def test_uhf_smearing(self):
         mf.get_occ(mo_energy)
         self.assertAlmostEqual(mf.entropy, 0.42189309944541731, 9)
 
-    @unittest.skipIf(Version(pyscf.__version__) < Version('2.12'),
-                     'Require new interface developed in pyscf-2.12')
+    @unittest.skipIf(Version(pyscf.__version__) < Version('2.13'),
+                     'Require new interface developed in pyscf-2.13')
     def test_to_gpu(self):
-        mf = cpu_addons.smearing(cell.RHF(), sigma=0.1)
+        from pyscf.pbc.scf import smearing as cpu_smearing
+        mf = cell.RHF().smearing(sigma=0.1)
         gpu_mf = mf.to_gpu()
         assert isinstance(gpu_mf, smearing._SmearingKSCF)
         assert gpu_mf.sigma == 0.1
 
         mf = gpu_mf.to_cpu()
-        assert isinstance(mf, cpu_addons._SmearingKSCF)
+        assert isinstance(mf, cpu_smearing._SmearingKSCF)
         assert mf.sigma == 0.1
 
 if __name__ == "__main__":
diff --git a/gpu4pyscf/pbc/tools/pbc.py b/gpu4pyscf/pbc/tools/pbc.py
index 1be9da0a1..e117c7026 100644
--- a/gpu4pyscf/pbc/tools/pbc.py
+++ b/gpu4pyscf/pbc/tools/pbc.py
@@ -17,7 +17,7 @@
 from scipy.special import erfc
 from pyscf import lib
 from pyscf.pbc.gto.cell import Cell
-from pyscf.pbc.tools.pbc import madelung, get_monkhorst_pack_size
+from pyscf.pbc.tools.pbc import get_monkhorst_pack_size
 from gpu4pyscf.lib.cupy_helper import asarray
 
 def fft(f, mesh):
@@ -147,7 +147,7 @@ def _Gv_wrap_around(cell, Gv, k, mesh):
     return kG
 
 def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None,
-              wrap_around=True, omega=None, kpts=None, **kwargs):
+              wrap_around=True, omega=None, kmesh=None, **kwargs):
     '''Calculate the Coulomb kernel for all G-vectors, handling G=0 and exchange.
 
     Args:
@@ -181,7 +181,8 @@ def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None,
     elif exx and mf is not None:
         exxdiv = mf.exxdiv
     if exxdiv == 'vcut_sph' or exxdiv == 'vcut_ws':
-        return asarray(get_coulG(cell, k, exx, mf, mesh, Gv, wrap_around, omega, **kwargs))
+        return asarray(get_coulG(cell, k, exx, mf, mesh, Gv, wrap_around, omega,
+                                 kmesh=kmesh, **kwargs))
 
     if mesh is None:
         mesh = cell.mesh
@@ -286,29 +287,70 @@ def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None,
     # cancelled out by Coulomb integrals. Its leading term is calculated
     # using Ewald probe charge (the function madelung below)
     if cell.dimension > 0 and exxdiv == 'ewald' and G0_idx is not None:
-        if kpts is None:
-            kpts = np.zeros((1, 3))
+        if kmesh is None:
+            Nk = 1
             if mf is not None:
                 raise DeprecationWarning(
                     'Accessing kpts via mf.kpts is deprecated. '
                     'kpts should be passed to get_coulG explicitly.')
         else:
-            assert kpts.ndim == 2
-        Nk = len(kpts)
+            Nk = np.prod(kmesh)
         if omega is None or omega == 0:
-            coulG[G0_idx] += Nk*cell.vol*madelung(cell, kpts)
+            coulG[G0_idx] += Nk*cell.vol*madelung(cell, kmesh=kmesh)
         else: # G=0 term should be handled separately in RSGDF and RSJK
             raise NotImplementedError(f'exx=ewald for omega={omega}')
     return coulG
 
-def probe_charge_sr_coulomb(cell, omega, kpts=None):
-    if kpts is None:
-        kmesh = np.array([1, 1, 1])
-    else:
-        kmesh = get_monkhorst_pack_size(cell, kpts)
+def probe_charge_sr_coulomb(cell, omega, kmesh=None):
+    if kmesh is None:
+        kmesh = np.ones(3, dtype=int)
     rcut = (-np.log(cell.precision*1e-3)/omega**2)**.5
     Ls = cell.get_lattice_Ls(rcut=rcut) * kmesh
     r = np.linalg.norm(Ls, axis=1)
     r = r[(r > 1e-10) & (omega * r < 7)]
     ewovrl = .5 * (erfc(omega * r) / r).sum()
     return 2 * ewovrl * np.prod(kmesh)
+
+def madelung(cell, kpts=None, omega=None, kmesh=None):
+    if kmesh is None:
+        if kpts is None:
+            Nk = np.ones(3)
+        else:
+            Nk = get_monkhorst_pack_size(cell, kpts)
+    else:
+        Nk = kmesh
+    ecell = cell.copy(deep=False)
+    ecell._atm = np.array([[1, cell._env.size, 0, 0, 0, 0]])
+    ecell._env = np.append(cell._env, [0., 0., 0.])
+    ecell.unit = 'B'
+    #ecell.verbose = 0
+    ecell.a = a = np.einsum('xi,x->xi', cell.lattice_vectors(), Nk)
+
+    if omega is None:
+        omega = cell.omega
+
+    if omega == 0:
+        return -2*ecell.ewald()
+
+    else:
+        # cell.ewald function does not use the Coulomb kernel function
+        # get_coulG. When computing the nuclear interactions with attenuated
+        # Coulomb operator, the Ewald summation technique is not needed
+        # because the Coulomb kernel 4pi/G^2*exp(-G^2/4/omega**2) decays
+        # quickly.
+        precision = cell.precision
+        Ecut = 10.
+        Ecut = np.log(16*np.pi**2/(2*omega**2*(2*Ecut)**.5) / precision + 1.) * 2*omega**2
+        Ecut = np.log(16*np.pi**2/(2*omega**2*(2*Ecut)**.5) / precision + 1.) * 2*omega**2
+        mesh = cutoff_to_mesh(a, Ecut)
+        Gv, Gvbase, weights = ecell.get_Gv_weights(mesh)
+        wcoulG = get_coulG(ecell, Gv=Gv, omega=abs(omega), exxdiv=None) * weights
+        SI = ecell.get_SI(mesh=mesh)
+        ZSI = SI[0]
+        e_lr = (2*abs(omega)/np.pi**0.5 -
+                np.einsum('i,i,i->', ZSI.conj(), ZSI, wcoulG).real)
+        if omega > 0:
+            return e_lr
+        else:
+            e_fr = -2*ecell.ewald() # The full-range Coulomb
+            return e_fr - e_lr
diff --git a/gpu4pyscf/scf/tests/test_fermi_smearing.py b/gpu4pyscf/scf/tests/test_fermi_smearing.py
index 8ee81c5f2..521f4002f 100644
--- a/gpu4pyscf/scf/tests/test_fermi_smearing.py
+++ b/gpu4pyscf/scf/tests/test_fermi_smearing.py
@@ -61,16 +61,17 @@ def test_df_uhf_gradient(self):
         assert np.allclose(gpu_mf.e_tot, cpu_mf.e_tot, atol=1e-9)
         assert np.allclose(gpu_gradient, cpu_gradient, atol=1e-7)
 
-    @unittest.skipIf(Version(pyscf.__version__) < Version('2.12'),
-                     'Require new interface developed in pyscf-2.12')
+    @unittest.skipIf(Version(pyscf.__version__) < Version('2.13'),
+                     'Require new interface developed in pyscf-2.13')
     def test_to_gpu(self):
-        mf = cpu_addons.smearing(mol.RHF(), sigma=0.1)
+        from pyscf.scf import smearing as cpu_smearing
+        mf = mol.RHF().smearing(sigma=0.1)
         gpu_mf = mf.to_gpu()
         assert isinstance(gpu_mf, smearing._SmearingSCF)
         assert gpu_mf.sigma == 0.1
 
         mf = gpu_mf.to_cpu()
-        assert isinstance(mf, cpu_addons._SmearingSCF)
+        assert isinstance(mf, cpu_smearing._SmearingSCF)
         assert mf.sigma == 0.1
 
 if __name__ == "__main__":

From 4302b746f5a9cca7bf3ce61489ae9d60a7533ca9 Mon Sep 17 00:00:00 2001
From: Qiming Sun <qiming.sun@bytedance.com>
Date: Thu, 26 Mar 2026 14:58:08 -0700
Subject: [PATCH 5/5] Fix error caused by missing function import

---
 gpu4pyscf/pbc/df/aft_jk.py | 1 -
 gpu4pyscf/pbc/tools/pbc.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/gpu4pyscf/pbc/df/aft_jk.py b/gpu4pyscf/pbc/df/aft_jk.py
index 241abf6a5..9505a4e3a 100644
--- a/gpu4pyscf/pbc/df/aft_jk.py
+++ b/gpu4pyscf/pbc/df/aft_jk.py
@@ -837,7 +837,6 @@ def get_ek_strain_deriv(mydf, dm, kpts=None, exxdiv=None, omega=None):
         k_dm = contract('nkpr,nkrs->kps', k_dm, dm0)
         ek_G0 = .5 * cp.einsum('kij,kji->', s0, k_dm).real.get() / nkpts**2
 
-        scaled_kpts = kpts.dot(cell.lattice_vectors().T)
         ewald_G0 = np.empty((3,3))
         disp = max(1e-5, (cell.precision*.1)**.5)
         for i in range(3):
diff --git a/gpu4pyscf/pbc/tools/pbc.py b/gpu4pyscf/pbc/tools/pbc.py
index e117c7026..3f10962d6 100644
--- a/gpu4pyscf/pbc/tools/pbc.py
+++ b/gpu4pyscf/pbc/tools/pbc.py
@@ -17,7 +17,7 @@
 from scipy.special import erfc
 from pyscf import lib
 from pyscf.pbc.gto.cell import Cell
-from pyscf.pbc.tools.pbc import get_monkhorst_pack_size
+from pyscf.pbc.tools.pbc import get_monkhorst_pack_size, cutoff_to_mesh
 from gpu4pyscf.lib.cupy_helper import asarray
 
 def fft(f, mesh):