From 72e5710d6e020a893493b13a71da67c907310ea6 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Tue, 10 Mar 2026 15:35:46 -0700 Subject: [PATCH 1/5] Update dependency to pyscf 2.12 --- .github/workflows/unittest-dev.yml | 6 ++-- gpu4pyscf/pbc/df/tests/test_pbc_aft.py | 2 +- gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py | 14 ++++---- gpu4pyscf/scf/smearing.py | 37 +++++++++++++++++++--- requirements.txt | 4 +-- 5 files changed, 47 insertions(+), 16 deletions(-) diff --git a/.github/workflows/unittest-dev.yml b/.github/workflows/unittest-dev.yml index e2a966b63..8c1b675e3 100644 --- a/.github/workflows/unittest-dev.yml +++ b/.github/workflows/unittest-dev.yml @@ -5,16 +5,16 @@ on: inputs: pyscf_version: required: false - default: "2.8.0" + default: "2.12.1" cupy_versoin: required: false - default: "13.4.1" + default: "13.6.0" cutensor_version: required: false default: "2.2.0" libxc_version: required: false - default: "0.5.0" + default: "0.8.1" dispersion_version: required: false default: "1.5.0" diff --git a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py index 6f7abd357..02c1b4d3e 100644 --- a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py +++ b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py @@ -383,7 +383,7 @@ def test_ek_ip1_kpts(self): cell.precision = 1e-10 cell.build(0, 0) - myfft = fft_cpu.FFTDF(cell) + myfft = fft_cpu.FFTDF(cell, kpts=kpts) vk = myfft.get_k_e1(dm, kpts=kpts) aoslices = cell.aoslice_by_atom() ref = np.empty((cell.natm, 3)) diff --git a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py index 9aee55e21..c78d86332 100644 --- a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py +++ b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py @@ -134,7 +134,7 @@ def test_sr_vk_hermi1_kpts_vs_fft(): cell.precision = 1e-10 cell.build(0, 0) cell.omega = -rsjk.OMEGA - ref = fft.FFTDF(cell).get_jk(dm, with_j=False, kpts=kpts)[1].get() + ref = fft.FFTDF(cell, kpts=kpts).get_jk(dm, with_j=False, kpts=kpts)[1].get() assert abs(vk - ref).max() < 1e-8 def test_sr_vk_hermi0_gamma_point_vs_fft(): @@ -185,7 +185,7 @@ def test_sr_vk_hermi0_kpts_vs_fft(): cell.precision = 1e-10 cell.build(0, 0) cell.omega = -rsjk.OMEGA - ref = fft.FFTDF(cell).get_jk(dm, hermi=0, kpts=kpts, with_j=False)[1].get() + ref = fft.FFTDF(cell, kpts=kpts).get_jk(dm, hermi=0, kpts=kpts, with_j=False)[1].get() assert abs(vk - ref).max() < 1e-8 def test_vk_kpts_band_vs_fft(): @@ -234,7 +234,8 @@ def test_vk_hermi1_kpts_vs_fft(): cell.precision = 1e-10 cell.build(0, 0) - ref = fft.FFTDF(cell).get_jk(dm, hermi=1, with_j=False, kpts=kpts, exxdiv='ewald')[1].get() + ref = fft.FFTDF(cell, kpts=kpts).get_jk( + dm, hermi=1, with_j=False, kpts=kpts, exxdiv='ewald')[1].get() assert abs(vk - ref).max() < 1e-8 def test_vk_hermi0_gamma_point_vs_fft(): @@ -283,7 +284,8 @@ def test_vk_hermi0_kpts_vs_fft(): cell.precision = 1e-10 cell.build(0, 0) - ref = fft.FFTDF(cell).get_jk(dm, hermi=0, kpts=kpts, with_j=False)[1].get() + ref = fft.FFTDF(cell, kpts=kpts).get_jk( + dm, hermi=0, kpts=kpts, with_j=False)[1].get() assert abs(vk - ref).max() < 1e-8 def test_ejk_sr_ip1_per_atom_gamma_point(): @@ -336,7 +338,7 @@ def test_ejk_sr_ip1_per_atom_kpts(): assert abs(ejk.sum(axis=0)).max() < 1e-8 cell.omega = -rsjk.OMEGA - vj, vk = fft_cpu.FFTDF(cell).get_jk_e1(dm, kpts=kpts, exxdiv=exxdiv) + vj, vk = fft_cpu.FFTDF(cell, kpts=kpts).get_jk_e1(dm, kpts=kpts, exxdiv=exxdiv) vhf = vj - vk * .5 aoslices = cell.aoslice_by_atom() ref = np.empty((cell.natm, 3)) @@ -427,7 +429,7 @@ def test_ejk_ip1_per_atom_kpts(): ejk += with_rsjk._get_ejk_lr_ip1(dm, kpts=kpts, exxdiv=None) assert abs(ejk.sum(axis=0)).max() < 1e-8 - vj, vk = fft_cpu.FFTDF(cell).get_jk_e1(dm, kpts=kpts, exxdiv=None) + vj, vk = fft_cpu.FFTDF(cell, kpts=kpts).get_jk_e1(dm, kpts=kpts, exxdiv=None) vhf = vj - vk * .5 aoslices = cell.aoslice_by_atom() ref = np.empty((cell.natm, 3)) diff --git a/gpu4pyscf/scf/smearing.py b/gpu4pyscf/scf/smearing.py index 197afad18..df8b3c3fb 100644 --- a/gpu4pyscf/scf/smearing.py +++ b/gpu4pyscf/scf/smearing.py @@ -18,7 +18,7 @@ from pyscf import __config__, lib from pyscf.scf import addons as cpu_addons from pyscf.scf.addons import (_fermi_smearing_occ, _gaussian_smearing_occ, - _get_fermi, _smearing_optimize) + _get_fermi) from pyscf.pbc.tools import print_mo_energy_occ from gpu4pyscf.lib import logger @@ -55,6 +55,38 @@ def smearing_(mf, *args, **kwargs): return mf +def _smearing_optimize(f_occ, mo_es, nocc, sigma): + def rootfn(m): + mo_occ = f_occ(m, mo_es, sigma) + return mo_occ.sum() - nocc + + # it's okay to set small xtol according to the docs. + mu = scipy.optimize.bisect(rootfn, mo_es.min()-10., mo_es.max()+10., + xtol=1e-16, maxiter=10000) + + cur_err = abs(rootfn(mu)) + + # Check if we can further improve mu by moving it up/down + # by the minimum machine-representable amount. + # In many cases with Fermi-type smearing and sigma~1e-6, + # the minimum possible error is still >1e-11 because the + # smearing function is just so sharp. Because xtol is set to 1e-16 above, + # this should not take too many iterations. + + iters, maxiter = 0, 1000 + + while abs(rootfn(numpy.nextafter(mu, numpy.inf))) < cur_err and iters < maxiter: + mu = numpy.nextafter(mu, numpy.inf) + cur_err = abs(rootfn(mu)) + iters += 1 + + while abs(rootfn(numpy.nextafter(mu, -numpy.inf))) < cur_err and iters < maxiter: + mu = numpy.nextafter(mu, -numpy.inf) + cur_err = abs(rootfn(mu)) + iters += 1 + + return mu, f_occ(mu, mo_es, sigma) + class _SmearingSCF: __name_mixin__ = "Smearing" @@ -112,8 +144,6 @@ def get_occ(self, mo_energy=None, mo_coeff=None): if self.mu0 is None: mu_a, occa = _smearing_optimize(f_occ, mo_es[0], nocc[0], sigma) mu_b, occb = _smearing_optimize(f_occ, mo_es[1], nocc[1], sigma) - mu_a = mu_a[0] - mu_b = mu_b[0] else: if np.isscalar(self.mu0): mu_a = mu_b = self.mu0 @@ -167,7 +197,6 @@ def get_occ(self, mo_energy=None, mo_coeff=None): if self.mu0 is None: mu, mo_occs = _smearing_optimize(f_occ, mo_es, nelectron, sigma) - mu = mu[0] else: # If mu0 is given, fix mu instead of electron number. XXX -Chong Sun mu = self.mu0 diff --git a/requirements.txt b/requirements.txt index 3543d57b6..d8b5a2024 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ cutensor-cu12==2.2.0 gpu4pyscf-libxc-cuda12x==0.8.1 cupy-cuda12x==13.4.1 -pyscf==2.8.0 +pyscf==2.12.1 basis-set-exchange==0.11 pyscf-dispersion==1.5.0 -geometric==1.1.0 \ No newline at end of file +geometric==1.1.0 From dd86c90c546c9a75ce46f3d26c2e0484708af034 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Wed, 18 Mar 2026 00:07:08 -0700 Subject: [PATCH 2/5] Missing import module --- gpu4pyscf/scf/smearing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gpu4pyscf/scf/smearing.py b/gpu4pyscf/scf/smearing.py index df8b3c3fb..19f29805e 100644 --- a/gpu4pyscf/scf/smearing.py +++ b/gpu4pyscf/scf/smearing.py @@ -75,13 +75,13 @@ def rootfn(m): iters, maxiter = 0, 1000 - while abs(rootfn(numpy.nextafter(mu, numpy.inf))) < cur_err and iters < maxiter: - mu = numpy.nextafter(mu, numpy.inf) + while abs(rootfn(np.nextafter(mu, np.inf))) < cur_err and iters < maxiter: + mu = np.nextafter(mu, np.inf) cur_err = abs(rootfn(mu)) iters += 1 - while abs(rootfn(numpy.nextafter(mu, -numpy.inf))) < cur_err and iters < maxiter: - mu = numpy.nextafter(mu, -numpy.inf) + while abs(rootfn(np.nextafter(mu, -np.inf))) < cur_err and iters < maxiter: + mu = np.nextafter(mu, -np.inf) cur_err = abs(rootfn(mu)) iters += 1 From b3804ec8dd6cae83d9d5748c73c9a9efa37558ce Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Wed, 18 Mar 2026 08:24:12 -0700 Subject: [PATCH 3/5] Adjust to_gpu --- gpu4pyscf/_patch_pyscf.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/gpu4pyscf/_patch_pyscf.py b/gpu4pyscf/_patch_pyscf.py index e4a985674..c0f5985a9 100644 --- a/gpu4pyscf/_patch_pyscf.py +++ b/gpu4pyscf/_patch_pyscf.py @@ -252,22 +252,18 @@ def to_gpu(method, out=None): from importlib import import_module mod = import_module(method.__module__.replace('pyscf', 'gpu4pyscf')) - try: - cls = getattr(mod, method.__class__.__name__) - except AttributeError: - if hasattr(cls, 'from_cpu'): - # the customized to_gpu function can be accessed at module - # levelin gpu4pyscf. - return cls.from_cpu(method) - raise - - # Allow gpu4pyscf to customize the to_gpu method for PySCF classes. if hasattr(mod, 'from_cpu'): + # the customized to_gpu function can be accessed at module + # levelin gpu4pyscf. return mod.from_cpu(method) # A temporary GPU instance. This ensures to initialize private # attributes that are only available for GPU code. cls = getattr(mod, method.__class__.__name__) + # Allow gpu4pyscf to customize the to_gpu method for PySCF classes. + if hasattr(cls, 'from_cpu'): + return cls.from_cpu(method) + out = method.view(cls) elif hasattr(out, 'from_cpu'): From ec45cd1d51d7adda3bf54445bd10e4d591332db1 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Wed, 18 Mar 2026 08:55:12 -0700 Subject: [PATCH 4/5] Fix get_coulG and madelung interfaces --- gpu4pyscf/pbc/df/aft.py | 6 +- gpu4pyscf/pbc/df/aft_jk.py | 17 +++-- gpu4pyscf/pbc/df/fft_jk.py | 22 ++++-- gpu4pyscf/pbc/df/grad/krhf.py | 4 +- gpu4pyscf/pbc/df/grad/kuhf.py | 4 +- gpu4pyscf/pbc/df/grad/rhf.py | 4 +- gpu4pyscf/pbc/df/grad/uhf.py | 4 +- gpu4pyscf/pbc/df/tests/test_pbc_aft.py | 2 +- gpu4pyscf/pbc/scf/hf.py | 2 +- gpu4pyscf/pbc/scf/j_engine.py | 2 +- gpu4pyscf/pbc/scf/rsjk.py | 31 +++++---- gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py | 2 +- .../pbc/scf/tests/test_pbc_scf_smearing.py | 10 +-- gpu4pyscf/pbc/tools/pbc.py | 68 +++++++++++++++---- gpu4pyscf/scf/tests/test_fermi_smearing.py | 9 +-- 15 files changed, 122 insertions(+), 65 deletions(-) diff --git a/gpu4pyscf/pbc/df/aft.py b/gpu4pyscf/pbc/df/aft.py index 7d25cc6aa..a05a7d0a4 100644 --- a/gpu4pyscf/pbc/df/aft.py +++ b/gpu4pyscf/pbc/df/aft.py @@ -118,13 +118,15 @@ class AFTDFMixin: pw_loop = NotImplemented - def weighted_coulG(mydf, kpt=None, exx=None, mesh=None, omega=None, kpts=None): + def weighted_coulG(mydf, kpt=None, exx=None, mesh=None, omega=None, kmesh=None): '''Weighted regular Coulomb kernel''' cell = mydf.cell if mesh is None: mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) - coulG = get_coulG(cell, kpt, exx, mesh=mesh, Gv=Gv, omega=omega, kpts=kpts) + if kmesh is not None: + mydf = None + coulG = get_coulG(cell, kpt, exx, mesh=mesh, Gv=Gv, omega=omega, kmesh=kmesh) coulG *= kws return coulG diff --git a/gpu4pyscf/pbc/df/aft_jk.py b/gpu4pyscf/pbc/df/aft_jk.py index c1f6703b1..241abf6a5 100644 --- a/gpu4pyscf/pbc/df/aft_jk.py +++ b/gpu4pyscf/pbc/df/aft_jk.py @@ -31,6 +31,7 @@ from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh from gpu4pyscf.pbc.df.ft_ao import FTOpt, libpbc from gpu4pyscf.pbc.df.fft_jk import _format_dms, _format_jks, _ewald_exxdiv_for_G0 +from gpu4pyscf.pbc.tools import madelung from gpu4pyscf.lib.cupy_helper import contract, get_avail_mem, asarray from gpu4pyscf.lib import logger from gpu4pyscf.scf.jk import SHM_SIZE @@ -216,7 +217,7 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=None, kpts_band=None, log.debug1('Gblksize = %d', Gblksize) for group_id, (kpt, ki_idx, kj_idx, self_conj) in enumerate(kpt_iters): - vkcoulG = mydf.weighted_coulG(kpt, exxdiv, mesh, kpts=kpts) * weight + vkcoulG = mydf.weighted_coulG(kpt, exxdiv, mesh, kmesh=bvk_kmesh) * weight for p0, p1 in lib.prange(0, ngrids, Gblksize): log.debug3('update_vk [%s:%s]', p0, p1) Gpq = ft_kern(Gv[p0:p1], kpt, kpts, kj_idx) @@ -461,7 +462,7 @@ def get_ek_ip1(mydf, dm, kpts=None, exxdiv=None): ek = cp.zeros((cell.natm, 3)) for group_id, (kp, kp_conj, ki_idx, kj_idx) in enumerate(bvk_kk_adapted_iter(kmesh)): kpt = kpts[kp] - wcoulG = mydf.weighted_coulG(kpt, exxdiv, mydf.mesh, kpts=kpts) + wcoulG = mydf.weighted_coulG(kpt, exxdiv, mydf.mesh, kmesh=kmesh) swap_2e = kp != kp_conj for p0, p1 in lib.prange(0, ngrids, blksize): nGv = p1 - p0 @@ -829,7 +830,6 @@ def get_ek_strain_deriv(mydf, dm, kpts=None, exxdiv=None, omega=None): if (exxdiv == 'ewald' and (cell.dimension == 3 or (cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum'))): - from pyscf.pbc.tools.pbc import madelung from gpu4pyscf.pbc.gto import int1e cell = mydf.cell s0 = int1e.int1e_ovlp(cell, kpts, kmesh) @@ -843,13 +843,12 @@ def get_ek_strain_deriv(mydf, dm, kpts=None, exxdiv=None, omega=None): for i in range(3): for j in range(i+1): cell1, cell2 = rks_stress._finite_diff_cells(cell, i, j, disp) - kpts1 = scaled_kpts.dot(cell1.reciprocal_vectors(norm_to=1)) - kpts2 = scaled_kpts.dot(cell2.reciprocal_vectors(norm_to=1)) - e1 = nkpts * madelung(cell1, kpts1, omega=omega) - e2 = nkpts * madelung(cell2, kpts2, omega=omega) + e1 = nkpts * madelung(cell1, omega=omega, kmesh=kmesh) + e2 = nkpts * madelung(cell2, omega=omega, kmesh=kmesh) ewald_G0[j,i] = ewald_G0[i,j] = (e1-e2)/(2*disp) ewald_G0 *= ek_G0 - ewald_G0 += int1e.ovlp_strain_deriv(cell, k_dm, kpts) * madelung(cell, kpts, omega=omega) + ovlp_strain = int1e.ovlp_strain_deriv(cell, k_dm, kpts) + ewald_G0 += ovlp_strain * madelung(cell, omega=omega, kmesh=kmesh) sigma += ewald_G0 log.timer_debug1('get_ek_ip1', *cpu0) @@ -929,7 +928,7 @@ def get_jk(mydf, dm, hermi=1, kpt=np.zeros(3), if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): - _ewald_exxdiv_for_G0(cell, kpt, dms, vk) + _ewald_exxdiv_for_G0(cell, None, dms, vk) if k_real: vk = vk.real vk = vk.reshape(dm.shape) diff --git a/gpu4pyscf/pbc/df/fft_jk.py b/gpu4pyscf/pbc/df/fft_jk.py index e2a428bca..4ea309460 100644 --- a/gpu4pyscf/pbc/df/fft_jk.py +++ b/gpu4pyscf/pbc/df/fft_jk.py @@ -29,6 +29,7 @@ from gpu4pyscf.lib import logger from gpu4pyscf.lib.cupy_helper import contract from gpu4pyscf.pbc import tools +from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None): '''Get the Coulomb (J) AO matrix at sampled k-points. @@ -92,7 +93,7 @@ def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None): return _format_jks(vj_kpts, dm_kpts, input_band, kpts) -def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None, +def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=None, kpts_band=None, exxdiv=None): '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points. @@ -130,7 +131,13 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None, mo_coeff = None ni = mydf._numint - kpts = np.asarray(kpts) + if kpts is None: + kpts = np.zeros((1, 3)) + kmesh = [1] * 3 + else: + kpts = np.asarray(kpts).reshape(-1, 3) + kmesh = kpts_to_kmesh(cell, kpts) + dm_kpts = cp.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] @@ -174,7 +181,7 @@ def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None, for k1, ao1 in enumerate(ao1_kpts): ao1T = ao1.T kpt1 = kpts_band[k1] - coulG = tools.get_coulG(cell, kpt2-kpt1, exxdiv, mesh, kpts=kpts) + coulG = tools.get_coulG(cell, kpt2-kpt1, exxdiv, mesh, kmesh=kmesh) if is_zero(kpt1-kpt2): expmikr = cp.array(1.) else: @@ -357,14 +364,17 @@ def get_j_e1_kpts(mydf, dm_kpts, kpts=None): ej = ej[0] return ej -def get_k_e1_kpts(mydf, dm_kpts, kpts=np.zeros((1,3)), exxdiv=None): +def get_k_e1_kpts(mydf, dm_kpts, kpts=None, exxdiv=None): raise NotImplementedError def _ewald_exxdiv_for_G0(cell, kpts, dms, vk, kpts_band=None): - from pyscf.pbc.tools.pbc import madelung from gpu4pyscf.pbc.gto.int1e import int1e_ovlp s = int1e_ovlp(cell, kpts=kpts) - m = madelung(cell, kpts) + if kpts is None: + kmesh = [1] * 3 + else: + kmesh = kpts_to_kmesh(cell, kpts) + m = tools.madelung(cell, kmesh=kmesh) if kpts is None: for i,dm in enumerate(dms): vk[i] += m * s.dot(dm).dot(s) diff --git a/gpu4pyscf/pbc/df/grad/krhf.py b/gpu4pyscf/pbc/df/grad/krhf.py index 9a34a845e..c315461a6 100644 --- a/gpu4pyscf/pbc/df/grad/krhf.py +++ b/gpu4pyscf/pbc/df/grad/krhf.py @@ -565,9 +565,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, kpts=None, hermi=0, j_factor=1., k_fact # cell.omega is not 0, which can lead to incorrect correction for # full-range ewald probe charge correction. if with_long_range: - weighted_coulG_at_G0 = madelung(cell, kpts, omega=0) + weighted_coulG_at_G0 = madelung(cell, omega=0, kmesh=bvk_kmesh) else: - weighted_coulG_at_G0 = madelung(cell, kpts, omega=-omega) + weighted_coulG_at_G0 = madelung(cell, omega=-omega, kmesh=bvk_kmesh) # The k_factor was previously scaled by 1/nkpts^2. The ewald term # requires a factor of 1/nkpts. Rescale k_factor by nkpts k_factor *= nkpts diff --git a/gpu4pyscf/pbc/df/grad/kuhf.py b/gpu4pyscf/pbc/df/grad/kuhf.py index 1a559b265..af4e93edc 100644 --- a/gpu4pyscf/pbc/df/grad/kuhf.py +++ b/gpu4pyscf/pbc/df/grad/kuhf.py @@ -495,9 +495,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, kpts=None, hermi=0, j_factor=1., k_fact # cell.omega is not 0, which can lead to incorrect correction for # full-range ewald probe charge correction. if with_long_range: - weighted_coulG_at_G0 = madelung(cell, kpts, omega=0) + weighted_coulG_at_G0 = madelung(cell, omega=0, kmesh=bvk_kmesh) else: - weighted_coulG_at_G0 = madelung(cell, kpts, omega=-omega) + weighted_coulG_at_G0 = madelung(cell, omega=-omega, kmesh=bvk_kmesh) # Note the additional minus sign for nabla_A ovlp = -nabla ovlp ejk_ewald *= k_factor*nkpts * weighted_coulG_at_G0 ejk += ejk_ewald diff --git a/gpu4pyscf/pbc/df/grad/rhf.py b/gpu4pyscf/pbc/df/grad/rhf.py index 454a30ea6..362769168 100644 --- a/gpu4pyscf/pbc/df/grad/rhf.py +++ b/gpu4pyscf/pbc/df/grad/rhf.py @@ -408,9 +408,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, hermi=0, j_factor=1., k_factor=1., # cell.omega is not 0, which can lead to incorrect correction for # full-range ewald probe charge correction. if with_long_range: - weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=0) + weighted_coulG_at_G0 = madelung(cell, omega=0) else: - weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=-omega) + weighted_coulG_at_G0 = madelung(cell, omega=-omega) # Note the additional minus sign for nabla_A ovlp = -nabla ovlp ejk_ewald *= .5 * k_factor * weighted_coulG_at_G0 ejk += ejk_ewald diff --git a/gpu4pyscf/pbc/df/grad/uhf.py b/gpu4pyscf/pbc/df/grad/uhf.py index 281dfcd40..bd6b1f61b 100644 --- a/gpu4pyscf/pbc/df/grad/uhf.py +++ b/gpu4pyscf/pbc/df/grad/uhf.py @@ -398,9 +398,9 @@ def _jk_energy_per_atom(int3c2e_opt, dm, hermi=0, j_factor=1., k_factor=1., # cell.omega is not 0, which can lead to incorrect correction for # full-range ewald probe charge correction. if with_long_range: - weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=0) + weighted_coulG_at_G0 = madelung(cell, omega=0) else: - weighted_coulG_at_G0 = madelung(cell, np.zeros((1, 3)), omega=-omega) + weighted_coulG_at_G0 = madelung(cell, omega=-omega) # Note the additional minus sign for nabla_A ovlp = -nabla ovlp ejk_ewald *= k_factor * weighted_coulG_at_G0 ejk += ejk_ewald diff --git a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py index 02c1b4d3e..e3b466752 100644 --- a/gpu4pyscf/pbc/df/tests/test_pbc_aft.py +++ b/gpu4pyscf/pbc/df/tests/test_pbc_aft.py @@ -355,7 +355,7 @@ def test_ek_ip1_gamma_point(self): for i in range(cell.natm): p0, p1 = aoslices[i, 2:] ref[i] = np.einsum('xnpq,nqp->x', vk[:,:,p0:p1], dm[:,:,p0:p1]) - assert abs(ek_ewald - ref).max() < 1e-8 + assert abs(ek_ewald - ref).max() < 3e-8 @unittest.skipIf(num_devices > 1, '') def test_ek_ip1_kpts(self): diff --git a/gpu4pyscf/pbc/scf/hf.py b/gpu4pyscf/pbc/scf/hf.py index f10767224..5cfed5a6c 100644 --- a/gpu4pyscf/pbc/scf/hf.py +++ b/gpu4pyscf/pbc/scf/hf.py @@ -193,7 +193,7 @@ def dump_flags(self, verbose=None): cell = self.cell if ((cell.dimension >= 2 and cell.low_dim_ft_type != 'inf_vacuum') and isinstance(self.exxdiv, str) and self.exxdiv.lower() == 'ewald'): - madelung = tools.pbc.madelung(cell, self.kpt[None]) + madelung = tools.pbc.madelung(cell) log.info(' madelung (= occupied orbital energy shift) = %s', madelung) log.info(' Total energy shift due to Ewald probe charge' ' = -1/2 * Nelec*madelung = %.12g', diff --git a/gpu4pyscf/pbc/scf/j_engine.py b/gpu4pyscf/pbc/scf/j_engine.py index 26b85cc54..0b027867d 100644 --- a/gpu4pyscf/pbc/scf/j_engine.py +++ b/gpu4pyscf/pbc/scf/j_engine.py @@ -410,7 +410,7 @@ def get_j(self, dm, hermi=0, kpts=None, kpts_band=None, verbose=None): vj += self._get_j_lr(dm, hermi, kpts, kpts_band, verbose=verbose) return vj - def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, kpts=None): + def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, **kwargs): '''weighted LR Coulomb kernel. Mimic AFTDF.weighted_coulG''' if mesh is None: mesh = self.mesh diff --git a/gpu4pyscf/pbc/scf/rsjk.py b/gpu4pyscf/pbc/scf/rsjk.py index cea4110a0..8775eecd4 100644 --- a/gpu4pyscf/pbc/scf/rsjk.py +++ b/gpu4pyscf/pbc/scf/rsjk.py @@ -41,7 +41,8 @@ from gpu4pyscf.pbc.df.fft import _check_kpts from gpu4pyscf.pbc.df.fft_jk import _format_dms from gpu4pyscf.pbc.dft.multigrid_v2 import _unique_image_pair -from gpu4pyscf.pbc.tools.pbc import get_coulG, probe_charge_sr_coulomb +from gpu4pyscf.pbc.tools.pbc import get_coulG, probe_charge_sr_coulomb, madelung +from gpu4pyscf.pbc.tools.k2gamma import kpts_to_kmesh from gpu4pyscf.grad.rhf import _ejk_quartets_scheme from gpu4pyscf.pbc.gto import int1e @@ -227,8 +228,10 @@ def _get_k_sr(self, dm, hermi, kpts=None, kpts_band=None, exxdiv=None, verbose=N if kpts is None: kpts = np.zeros((1, 3)) + kmesh = [1] * 3 else: kpts = kpts.reshape(-1, 3) + kmesh = kpts_to_kmesh(cell, kpts, bound_by_supmol=True) is_gamma_point = is_zero(kpts) if is_gamma_point: assert dms.dtype == np.float64 @@ -406,7 +409,7 @@ def proc(dms, dm_cond): # This term rapidly decays to 0 for large k-mesh. In the # FFTDF.get_jk based implementation, this contribution is # included in the short-range part. - wcoulG_SR_at_G0 = probe_charge_sr_coulomb(cell, omega, kpts) + wcoulG_SR_at_G0 = probe_charge_sr_coulomb(cell, omega, kmesh) else: # Remove the G=0 contribution to match the output of FFTDF.get_jk(). wcoulG_SR_at_G0 = np.pi / omega**2 / cell.vol @@ -435,7 +438,8 @@ def _get_k_lr(self, dm, hermi, kpts=None, kpts_band=None, exxdiv=None, kpts = kpts[0] return get_k_kpts(self, dm, hermi, kpts, kpts_band, exxdiv=exxdiv) - def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, kpts=None): + def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, + kmesh=None): '''weighted LR Coulomb kernel. Mimic AFTDF.weighted_coulG''' if mesh is None: mesh = self.mesh @@ -443,25 +447,25 @@ def weighted_coulG(self, kpt=None, exx=None, mesh=None, omega=None, kpts=None): omega = self.omega Gv, Gvbase, kws = cell.get_Gv_weights(mesh) coulG = get_coulG(cell, kpt, exx=None, mesh=mesh, Gv=Gv, - wrap_around=True, omega=omega, kpts=kpts) + wrap_around=True, omega=omega, kmesh=kmesh) coulG *= kws if kpt is None or not is_zero(kpt): return coulG if exx == 'ewald': - Nk = len(kpts) + Nk = np.prod(kmesh) # In the full-range Coulomb, the ewald correction for get_k_lr is - # +Nk*pbctools.madelung(cell, kpts) - np.pi / omega**2 * kws - probe_charge_sr_coulomb + # +Nk*madelung(cell, kmesh) - np.pi / omega**2 * kws - probe_charge_sr_coulomb # The last two terms are included in the get_k_sr. The second term # (np.pi/omega**2) removes the contribution of the SR integrals at G=0. # - # pbctools.madelung(cell, kpts) includes three terms: -2*ewovrl, -2*ewself and -2*ewg. + # madelung(cell, kmesh) includes three terms: -2*ewovrl, -2*ewself and -2*ewg. # ewself is the sum of ewself_lr_point_charge and ewself_sr_at_G0. - # This correction is identical to madelung(cell, kpts, omega=omega), + # This correction is identical to madelung(cell, omega, kmesh), # which gives -2*(ewself_lr_point_charges + ewg) . # ewself_sr_at_G0 in ewovrl cancels out the second term (np.pi/omega**2); # -2*ewovrl cancels out the last term (probe_charge_sr_coulomb). - coulG[0] += Nk*pbctools.madelung(cell, kpts, omega=omega) + coulG[0] += Nk*madelung(cell, omega=omega, kmesh=kmesh) return coulG def _get_ejk_sr_ip1(self, dm, kpts=None, exxdiv=None, @@ -716,8 +720,10 @@ def _get_ejk_sr_strain_deriv(self, dm, kpts=None, exxdiv=None, if kpts is None: kpts = np.zeros((1, 3)) + kmesh = [1] * 3 else: kpts = kpts.reshape(-1, 3) + kmesh = kpts_to_kmesh(cell, kpts, bound_by_supmol=True) is_gamma_point = is_zero(kpts) if is_gamma_point: assert dms.dtype == np.float64 @@ -906,7 +912,6 @@ def proc(dms, dm_cond): sigma += ej_G0 * np.eye(3) sigma -= wcoulG_SR_at_G0 * ek_G0 * np.eye(3) if exxdiv == 'ewald': - from pyscf.pbc.tools.pbc import madelung from gpu4pyscf.pbc.grad.rks_stress import _finite_diff_cells scaled_kpts = kpts.dot(cell.lattice_vectors().T) ewald_G0_response = np.empty((3,3)) @@ -914,10 +919,8 @@ def proc(dms, dm_cond): for i in range(3): for j in range(i+1): cell1, cell2 = _finite_diff_cells(cell, i, j, disp) - kpts1 = scaled_kpts.dot(cell1.reciprocal_vectors(norm_to=1)) - kpts2 = scaled_kpts.dot(cell2.reciprocal_vectors(norm_to=1)) - e1 = nkpts * madelung(cell1, kpts1, omega=-omega) - e2 = nkpts * madelung(cell2, kpts2, omega=-omega) + e1 = nkpts * madelung(cell1, omega=-omega, kmesh=kmesh) + e2 = nkpts * madelung(cell2, omega=-omega, kmesh=kmesh) ewald_G0_response[j,i] = ewald_G0_response[i,j] = (e1-e2)/(2*disp) ewald_G0_response *= ek_G0 sigma -= ewald_G0_response diff --git a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py index c78d86332..4ff16256f 100644 --- a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py +++ b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_jk.py @@ -396,7 +396,7 @@ def test_ejk_ip1_per_atom_gamma_point(): for i in range(cell.natm): p0, p1 = aoslices[i, 2:] ref[i] = np.einsum('xnpq,nqp->x', vhf[:,:,p0:p1], dm[:,:,p0:p1]) - assert abs(ejk - ref).max() < 1e-8 + assert abs(ejk - ref).max() < 3e-8 else: ejk = with_rsjk._get_ejk_sr_ip1(dm, kpts=kpt, exxdiv=None) ejk += with_rsjk._get_ejk_lr_ip1(dm, kpts=kpt, exxdiv=None) diff --git a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_smearing.py b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_smearing.py index e0f8580a0..deeaab2b3 100644 --- a/gpu4pyscf/pbc/scf/tests/test_pbc_scf_smearing.py +++ b/gpu4pyscf/pbc/scf/tests/test_pbc_scf_smearing.py @@ -17,7 +17,6 @@ import cupy as cp from packaging.version import Version import pyscf -from pyscf.pbc.scf import addons as cpu_addons from gpu4pyscf.pbc.scf import smearing @@ -99,16 +98,17 @@ def test_uhf_smearing(self): mf.get_occ(mo_energy) self.assertAlmostEqual(mf.entropy, 0.42189309944541731, 9) - @unittest.skipIf(Version(pyscf.__version__) < Version('2.12'), - 'Require new interface developed in pyscf-2.12') + @unittest.skipIf(Version(pyscf.__version__) < Version('2.13'), + 'Require new interface developed in pyscf-2.13') def test_to_gpu(self): - mf = cpu_addons.smearing(cell.RHF(), sigma=0.1) + from pyscf.pbc.scf import smearing as cpu_smearing + mf = cell.RHF().smearing(sigma=0.1) gpu_mf = mf.to_gpu() assert isinstance(gpu_mf, smearing._SmearingKSCF) assert gpu_mf.sigma == 0.1 mf = gpu_mf.to_cpu() - assert isinstance(mf, cpu_addons._SmearingKSCF) + assert isinstance(mf, cpu_smearing._SmearingKSCF) assert mf.sigma == 0.1 if __name__ == "__main__": diff --git a/gpu4pyscf/pbc/tools/pbc.py b/gpu4pyscf/pbc/tools/pbc.py index 1be9da0a1..e117c7026 100644 --- a/gpu4pyscf/pbc/tools/pbc.py +++ b/gpu4pyscf/pbc/tools/pbc.py @@ -17,7 +17,7 @@ from scipy.special import erfc from pyscf import lib from pyscf.pbc.gto.cell import Cell -from pyscf.pbc.tools.pbc import madelung, get_monkhorst_pack_size +from pyscf.pbc.tools.pbc import get_monkhorst_pack_size from gpu4pyscf.lib.cupy_helper import asarray def fft(f, mesh): @@ -147,7 +147,7 @@ def _Gv_wrap_around(cell, Gv, k, mesh): return kG def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None, - wrap_around=True, omega=None, kpts=None, **kwargs): + wrap_around=True, omega=None, kmesh=None, **kwargs): '''Calculate the Coulomb kernel for all G-vectors, handling G=0 and exchange. Args: @@ -181,7 +181,8 @@ def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None, elif exx and mf is not None: exxdiv = mf.exxdiv if exxdiv == 'vcut_sph' or exxdiv == 'vcut_ws': - return asarray(get_coulG(cell, k, exx, mf, mesh, Gv, wrap_around, omega, **kwargs)) + return asarray(get_coulG(cell, k, exx, mf, mesh, Gv, wrap_around, omega, + kmesh=kmesh, **kwargs)) if mesh is None: mesh = cell.mesh @@ -286,29 +287,70 @@ def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None, # cancelled out by Coulomb integrals. Its leading term is calculated # using Ewald probe charge (the function madelung below) if cell.dimension > 0 and exxdiv == 'ewald' and G0_idx is not None: - if kpts is None: - kpts = np.zeros((1, 3)) + if kmesh is None: + Nk = 1 if mf is not None: raise DeprecationWarning( 'Accessing kpts via mf.kpts is deprecated. ' 'kpts should be passed to get_coulG explicitly.') else: - assert kpts.ndim == 2 - Nk = len(kpts) + Nk = np.prod(kmesh) if omega is None or omega == 0: - coulG[G0_idx] += Nk*cell.vol*madelung(cell, kpts) + coulG[G0_idx] += Nk*cell.vol*madelung(cell, kmesh=kmesh) else: # G=0 term should be handled separately in RSGDF and RSJK raise NotImplementedError(f'exx=ewald for omega={omega}') return coulG -def probe_charge_sr_coulomb(cell, omega, kpts=None): - if kpts is None: - kmesh = np.array([1, 1, 1]) - else: - kmesh = get_monkhorst_pack_size(cell, kpts) +def probe_charge_sr_coulomb(cell, omega, kmesh=None): + if kmesh is None: + kmesh = np.ones(3, dtype=int) rcut = (-np.log(cell.precision*1e-3)/omega**2)**.5 Ls = cell.get_lattice_Ls(rcut=rcut) * kmesh r = np.linalg.norm(Ls, axis=1) r = r[(r > 1e-10) & (omega * r < 7)] ewovrl = .5 * (erfc(omega * r) / r).sum() return 2 * ewovrl * np.prod(kmesh) + +def madelung(cell, kpts=None, omega=None, kmesh=None): + if kmesh is None: + if kpts is None: + Nk = np.ones(3) + else: + Nk = get_monkhorst_pack_size(cell, kpts) + else: + Nk = kmesh + ecell = cell.copy(deep=False) + ecell._atm = np.array([[1, cell._env.size, 0, 0, 0, 0]]) + ecell._env = np.append(cell._env, [0., 0., 0.]) + ecell.unit = 'B' + #ecell.verbose = 0 + ecell.a = a = np.einsum('xi,x->xi', cell.lattice_vectors(), Nk) + + if omega is None: + omega = cell.omega + + if omega == 0: + return -2*ecell.ewald() + + else: + # cell.ewald function does not use the Coulomb kernel function + # get_coulG. When computing the nuclear interactions with attenuated + # Coulomb operator, the Ewald summation technique is not needed + # because the Coulomb kernel 4pi/G^2*exp(-G^2/4/omega**2) decays + # quickly. + precision = cell.precision + Ecut = 10. + Ecut = np.log(16*np.pi**2/(2*omega**2*(2*Ecut)**.5) / precision + 1.) * 2*omega**2 + Ecut = np.log(16*np.pi**2/(2*omega**2*(2*Ecut)**.5) / precision + 1.) * 2*omega**2 + mesh = cutoff_to_mesh(a, Ecut) + Gv, Gvbase, weights = ecell.get_Gv_weights(mesh) + wcoulG = get_coulG(ecell, Gv=Gv, omega=abs(omega), exxdiv=None) * weights + SI = ecell.get_SI(mesh=mesh) + ZSI = SI[0] + e_lr = (2*abs(omega)/np.pi**0.5 - + np.einsum('i,i,i->', ZSI.conj(), ZSI, wcoulG).real) + if omega > 0: + return e_lr + else: + e_fr = -2*ecell.ewald() # The full-range Coulomb + return e_fr - e_lr diff --git a/gpu4pyscf/scf/tests/test_fermi_smearing.py b/gpu4pyscf/scf/tests/test_fermi_smearing.py index 8ee81c5f2..521f4002f 100644 --- a/gpu4pyscf/scf/tests/test_fermi_smearing.py +++ b/gpu4pyscf/scf/tests/test_fermi_smearing.py @@ -61,16 +61,17 @@ def test_df_uhf_gradient(self): assert np.allclose(gpu_mf.e_tot, cpu_mf.e_tot, atol=1e-9) assert np.allclose(gpu_gradient, cpu_gradient, atol=1e-7) - @unittest.skipIf(Version(pyscf.__version__) < Version('2.12'), - 'Require new interface developed in pyscf-2.12') + @unittest.skipIf(Version(pyscf.__version__) < Version('2.13'), + 'Require new interface developed in pyscf-2.13') def test_to_gpu(self): - mf = cpu_addons.smearing(mol.RHF(), sigma=0.1) + from pyscf.scf import smearing as cpu_smearing + mf = mol.RHF().smearing(sigma=0.1) gpu_mf = mf.to_gpu() assert isinstance(gpu_mf, smearing._SmearingSCF) assert gpu_mf.sigma == 0.1 mf = gpu_mf.to_cpu() - assert isinstance(mf, cpu_addons._SmearingSCF) + assert isinstance(mf, cpu_smearing._SmearingSCF) assert mf.sigma == 0.1 if __name__ == "__main__": From 4302b746f5a9cca7bf3ce61489ae9d60a7533ca9 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Thu, 26 Mar 2026 14:58:08 -0700 Subject: [PATCH 5/5] Fix error caused by missing function import --- gpu4pyscf/pbc/df/aft_jk.py | 1 - gpu4pyscf/pbc/tools/pbc.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/gpu4pyscf/pbc/df/aft_jk.py b/gpu4pyscf/pbc/df/aft_jk.py index 241abf6a5..9505a4e3a 100644 --- a/gpu4pyscf/pbc/df/aft_jk.py +++ b/gpu4pyscf/pbc/df/aft_jk.py @@ -837,7 +837,6 @@ def get_ek_strain_deriv(mydf, dm, kpts=None, exxdiv=None, omega=None): k_dm = contract('nkpr,nkrs->kps', k_dm, dm0) ek_G0 = .5 * cp.einsum('kij,kji->', s0, k_dm).real.get() / nkpts**2 - scaled_kpts = kpts.dot(cell.lattice_vectors().T) ewald_G0 = np.empty((3,3)) disp = max(1e-5, (cell.precision*.1)**.5) for i in range(3): diff --git a/gpu4pyscf/pbc/tools/pbc.py b/gpu4pyscf/pbc/tools/pbc.py index e117c7026..3f10962d6 100644 --- a/gpu4pyscf/pbc/tools/pbc.py +++ b/gpu4pyscf/pbc/tools/pbc.py @@ -17,7 +17,7 @@ from scipy.special import erfc from pyscf import lib from pyscf.pbc.gto.cell import Cell -from pyscf.pbc.tools.pbc import get_monkhorst_pack_size +from pyscf.pbc.tools.pbc import get_monkhorst_pack_size, cutoff_to_mesh from gpu4pyscf.lib.cupy_helper import asarray def fft(f, mesh):