From 611b1d6d76fa7f7fcedf2356bd03ab99cb1de910 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 12 May 2026 14:43:46 +0800 Subject: [PATCH 01/30] add the first version --- gpu4pyscf/dmet/__init__.py | 25 ++ gpu4pyscf/dmet/dmet.py | 658 ++++++++++++++++++++++++++++++ gpu4pyscf/dmet/tests/test_dmet.py | 121 ++++++ 3 files changed, 804 insertions(+) create mode 100644 gpu4pyscf/dmet/__init__.py create mode 100644 gpu4pyscf/dmet/dmet.py create mode 100644 gpu4pyscf/dmet/tests/test_dmet.py diff --git a/gpu4pyscf/dmet/__init__.py b/gpu4pyscf/dmet/__init__.py new file mode 100644 index 000000000..883b3e735 --- /dev/null +++ b/gpu4pyscf/dmet/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .dmet import ( + DMET, + get_fragment_ao_indices, + schmidt_decompose, + build_embedding_basis, + build_core_dm, + transform_h1, + transform_eri, + lowdin_orth, +) diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py new file mode 100644 index 000000000..7c7a893d9 --- /dev/null +++ b/gpu4pyscf/dmet/dmet.py @@ -0,0 +1,658 @@ +# Copyright 2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import copy +import numpy as np +import cupy as cp +import pyscf +from pyscf import gto, ao2mo +import gpu4pyscf +from gpu4pyscf.scf import hf as gpu_hf + + +def _as_cupy(x): + if isinstance(x, cp.ndarray): + return x + return cp.asarray(x) + + +# TODO: use already implemented lowdin_orth +def lowdin_orth(s): + """ + Loewdin symmetric orthogonalization. + + Given an AO overlap matrix ``S``, return ``X = S^{-1/2}`` and + ``X_inv = S^{1/2}``. Eigenvalues of ``S`` smaller than 1e-12 are + treated as linearly dependent and dropped. + + Returns + ------- + X : cp.ndarray, shape (nao, nao_orth) + AO -> orthonormal AO transformation. Columns of ``X`` are the + coefficients of the orthonormal AOs in the AO basis. + X_inv : cp.ndarray, shape (nao_orth, nao) + Inverse transformation: ``X_inv = X^T S``. + """ + s = _as_cupy(s) + s = 0.5 * (s + s.T) + eigvals, eigvecs = cp.linalg.eigh(s) + keep = eigvals > 1e-12 + if not cp.all(keep): + eigvals = eigvals[keep] + eigvecs = eigvecs[:, keep] + inv_sqrt = 1.0 / cp.sqrt(eigvals) + sqrt = cp.sqrt(eigvals) + X = (eigvecs * inv_sqrt) @ eigvecs.T # S^{-1/2} + X_inv = (eigvecs * sqrt) @ eigvecs.T # S^{+1/2} + return X, X_inv + + +def get_fragment_ao_indices(mol, frag_atoms): + """ + Return the atomic-orbital indices that belong to the listed atoms. + + Parameters + ---------- + mol : pyscf.gto.Mole + The full system molecule. + frag_atoms : sequence of int + Atom indices that constitute the fragment. + + Returns + ------- + ao_indices : cp.ndarray of int + Sorted AO indices (in the AO ordering of ``mol``) that belong + to ``frag_atoms``. + """ + aoslice = mol.aoslice_by_atom() + indices = [] + for ia in frag_atoms: + ia = int(ia) + if ia < 0 or ia >= mol.natm: + raise ValueError( + f"Atom index {ia} is out of range [0, {mol.natm})." + ) + p0, p1 = int(aoslice[ia, 2]), int(aoslice[ia, 3]) + indices.extend(range(p0, p1)) + indices = cp.asarray(sorted(indices), dtype=cp.int32) + if indices.size == 0: + raise ValueError( + "Fragment is empty: no atomic orbitals were selected." + ) + return indices + + +def schmidt_decompose(dm_full, frag_idx, env_idx, threshold=1e-5): + """ + Schmidt decomposition. + + Parameters + ---------- + dm_full : array_like, shape (nao, nao) + Spin-summed 1-RDM in the full AO basis. The trace equals the + number of electrons. + frag_idx, env_idx : cp.ndarray + AO indices of fragment and environment, respectively. + ``frag_idx`` and ``env_idx`` together must form a partition of + ``range(nao)``. + threshold : float + Eigenvalue cutoff used to classify the environment orbitals. + + Returns + ------- + bath_orb : cp.ndarray, shape (n_env, n_bath) + Eigenvectors of D^E whose eigenvalues are within + (threshold, 2 - threshold). + core_orb : cp.ndarray, shape (n_env, n_core) + Eigenvectors of D^E whose eigenvalues exceed 2 - threshold. + These define the unentangled occupied (core) orbitals. + info : dict + Dictionary with eigenvalue arrays for each category and the + electron count of the core space. + """ + dm = _as_cupy(dm_full) + if dm.ndim != 2 or dm.shape[0] != dm.shape[1]: + raise ValueError("dm_full must be a square 2D matrix.") + + env_idx = _as_cupy(env_idx) + if env_idx.size == 0: + # Pure fragment, no environment to entangle with. + return (cp.zeros((0, 0)), + cp.zeros((0, 0)), + {'core': cp.zeros(0), + 'bath': cp.zeros(0), + 'virtual': cp.zeros(0), + 'n_core_electrons': 0}) + + # Symmetrize to suppress numerical asymmetry from the SCF solver + D_env = dm[env_idx[:, None], env_idx[None, :]] + D_env = 0.5 * (D_env + D_env.T) + + eigvals, eigvecs = cp.linalg.eigh(D_env) + + is_core = eigvals > (2.0 - threshold) + is_virt = eigvals < threshold + is_bath = ~(is_core | is_virt) + + bath_orb = eigvecs[:, is_bath] + core_orb = eigvecs[:, is_core] + + info = { + 'core': eigvals[is_core], + 'bath': eigvals[is_bath], + 'virtual': eigvals[is_virt], + # Each unentangled-occupied orbital is doubly occupied in the + # spin-restricted formulation. + 'n_core_electrons': 2 * int(is_core.sum()), + } + return bath_orb, core_orb, info + + +def build_embedding_basis(nao, frag_idx, env_idx, bath_orb): + """ + Construct the AO -> embedded transformation matrix B. + + Columns of B are arranged as + [ fragment-AO basis (identity columns), + bath orbitals (eigenvectors lifted into the env block) ]. + + Parameters + ---------- + nao : int + Number of atomic orbitals in the full system. + frag_idx : cp.ndarray of int + AO indices of the fragment. + env_idx : cp.ndarray of int + AO indices of the environment. + bath_orb : cp.ndarray, shape (n_env, n_bath) + Bath orbitals expressed in the environment AO subspace. + + Returns + ------- + B : cp.ndarray, shape (nao, n_frag + n_bath) + Transformation matrix whose columns span the embedded space A. + """ + frag_idx = _as_cupy(frag_idx) + env_idx = _as_cupy(env_idx) + n_frag = frag_idx.size + n_bath = bath_orb.shape[1] if bath_orb.size else 0 + + B = cp.zeros((nao, n_frag + n_bath), dtype=float) + # Fragment columns: identity on fragment AOs + B[frag_idx, cp.arange(n_frag)] = 1.0 + # Bath columns: embed env eigenvectors into the env rows + if n_bath > 0: + B[env_idx[:, None], cp.arange(n_bath)[None, :] + n_frag] = bath_orb + return B + + +def build_core_dm(env_idx, core_orb, nao): + """ + Build the spin-summed core 1-RDM in the full AO basis. + + Each unentangled-occupied orbital is doubly occupied: + + D_core = 2 * C_core C_core^T, + + where C_core is the matrix of core orbitals lifted into the full + AO basis (the rows corresponding to fragment AOs are zero). + """ + env_idx = _as_cupy(env_idx) + if core_orb.size == 0: + return cp.zeros((nao, nao), dtype=float) + C_core = cp.zeros((nao, core_orb.shape[1]), dtype=float) + C_core[env_idx, :] = core_orb + return 2.0 * (C_core @ C_core.T) + + +# --------------------------------------------------------------------------- +# Hamiltonian transformations +# --------------------------------------------------------------------------- +def transform_h1(h_ao, B): + """ + Project a 1-electron operator from the full AO basis to the + embedded basis: ``h_emb = B^T h_ao B``. + """ + h_emb = B.T @ h_ao @ B + return h_emb + + +def transform_eri(mol, B): + """ + Transform the four-index two-electron repulsion integrals from the + full AO basis to the embedded basis using ``pyscf.ao2mo``: + + V^A_{xy,zw} = sum_{rstu} B^r_x B^s_y V^{rs}_{tu} B^t_z B^u_w. + + The result is returned in 4-fold symmetric packed form, suitable + for assignment to ``mf._eri`` of an SCF object. + + Parameters + ---------- + mol : pyscf.gto.Mole + Full-system molecule providing the AO integrals. + B : cp.ndarray, shape (nao, nemb) + AO -> embedded transformation matrix. + + Returns + ------- + eri_emb : cp.ndarray + ERIs in the embedded basis (4-fold symmetric, packed). + """ + nemb = B.shape[1] + # pyscf.ao2mo requires CPU numpy arrays + B_cpu = cp.asnumpy(B) + eri_emb = ao2mo.kernel(mol, B_cpu, compact=True) + # ``ao2mo.kernel`` already returns the 4-fold packed form for + # real, equal-MOs inputs; ensure consistent shape. + eri_emb = ao2mo.restore(4, eri_emb, nemb) + return cp.asarray(eri_emb) + + +# --------------------------------------------------------------------------- +# Embedded Mole helper +# --------------------------------------------------------------------------- +def _build_embedded_mole(nemb, n_emb_electrons, spin=0, + verbose=0, max_memory=4000): + """ + Build a placeholder ``pyscf.gto.Mole`` whose only role is to carry + the bookkeeping needed by a PySCF SCF driver: the number of + electrons, the number of orbitals, and the ``incore_anyway`` flag + (so that the driver consumes ``mf._eri`` directly instead of + rebuilding integrals from atomic basis functions). + """ + if n_emb_electrons < 0: + raise ValueError( + f"Embedded electron count {n_emb_electrons} is negative; " + "check the fragment definition and the Schmidt threshold." + ) + if n_emb_electrons > 2 * nemb: + raise ValueError( + f"Embedded electron count {n_emb_electrons} exceeds " + f"2 * nemb = {2 * nemb}; the embedded space is too small." + ) + + mol = gto.Mole() + mol.verbose = verbose + mol.max_memory = max_memory + mol.atom = [] + mol.basis = {} + mol.unit = 'Bohr' + mol.spin = spin + mol.nelectron = int(n_emb_electrons) + mol.charge = 0 + mol.incore_anyway = True + mol.build(parse_arg=False, dump_input=False) + + # Override the basis-counting helpers so PySCF treats the molecule + # as having exactly nemb orbitals. + nemb_int = int(nemb) + + def _nao_nr(self=mol, _n=nemb_int): + return _n + + mol.nao_nr = _nao_nr + mol.nao = nemb_int + return mol + + +def _instantiate_inner_mf(mf_template, embedded_mol): + """ + Create an SCF/DFT object on ``embedded_mol`` that mirrors + the type/configuration of ``mf_template``. + """ + cls = type(mf_template) + try: + new_mf = cls(embedded_mol) + except TypeError: + new_mf = copy.copy(mf_template) + new_mf.mol = embedded_mol + new_mf.mo_coeff = None + new_mf.mo_energy = None + new_mf.mo_occ = None + new_mf.converged = False + + # Propagate selected configuration parameters + for attr in ('xc', 'conv_tol', 'conv_tol_grad', 'max_cycle', + 'level_shift', 'damp', 'diis', 'verbose'): + if hasattr(mf_template, attr): + try: + setattr(new_mf, attr, getattr(mf_template, attr)) + except Exception: + pass + + if hasattr(mf_template, 'grids') and hasattr(new_mf, 'grids'): + for g_attr in ('level', 'prune', 'atom_grid'): + if hasattr(mf_template.grids, g_attr): + try: + setattr(new_mf.grids, g_attr, + getattr(mf_template.grids, g_attr)) + except Exception: + pass + + return new_mf + + +# --------------------------------------------------------------------------- +# Main driver +# --------------------------------------------------------------------------- +class DMET: + """ + Single-shot Density Matrix Embedding Theory driver. + + Parameters + ---------- + mf_outer : SCF object (gpu4pyscf) + Low-level mean-field on the full system. Must be (or be made) + converged before its 1-RDM is consumed. If ``mf_outer`` does + not yet hold a converged MO set, ``kernel()`` will run it. + mf_inner : SCF/DFT object (gpu4pyscf) + High-level mean-field template applied to the embedded cluster. + A fresh PySCF object of the same class is instantiated on + the embedded "mole" and patched with the embedded Hamiltonian + (h^A, V^A). The user-supplied object is left untouched. + frag_atoms : sequence of int, optional + Atom indices that define the fragment region A. Mutually + exclusive with ``frag_orbs``. + frag_orbs : sequence of int, optional + Explicit AO indices defining the fragment region. + threshold : float + Eigenvalue cutoff used to classify environment orbitals into + core / bath / virtual. Defaults to 1e-5. + """ + + def __init__(self, mf_outer, mf_inner, + frag_atoms=None, frag_orbs=None, + threshold=1e-5): + if mf_outer is None or mf_inner is None: + raise ValueError("mf_outer and mf_inner are both required.") + if frag_atoms is None and frag_orbs is None: + raise ValueError( + "Provide either 'frag_atoms' or 'frag_orbs' to define " + "the DMET fragment." + ) + if frag_atoms is not None and frag_orbs is not None: + raise ValueError( + "Specify only one of 'frag_atoms' or 'frag_orbs'." + ) + if not (0.0 < threshold < 1.0): + raise ValueError( + f"threshold must lie in (0, 1); got {threshold}." + ) + + self.mf_outer = mf_outer + self.mf_inner_template = mf_inner + self.full_mol = mf_outer.mol + self.threshold = float(threshold) + + nao = int(self.full_mol.nao_nr()) + if frag_atoms is not None: + self.frag_atoms = list(int(a) for a in frag_atoms) + self.frag_idx = get_fragment_ao_indices( + self.full_mol, self.frag_atoms) + else: + self.frag_atoms = None + self.frag_idx = cp.asarray(sorted(int(i) for i in frag_orbs), + dtype=cp.int32) + + all_idx = cp.arange(nao, dtype=cp.int32) + env_mask = cp.ones(nao, dtype=bool) + env_mask[self.frag_idx] = False + self.env_idx = all_idx[env_mask] + + # ---- intermediate / output caches ---- + self.bath_orb = None # (n_env, n_bath) + self.core_orb = None # (n_env, n_core) + self.eig_info = None # dict from schmidt_decompose + self.B = None # AO -> embedded basis transform + self.dm_core = None # full-AO core density matrix + self.h_emb = None # embedded 1e Hamiltonian (cupy) + self.eri_emb = None # embedded 2e Hamiltonian (cupy) + self.e_core = None # core energy contribution + self.e_nuc = None # nuclear repulsion energy + self.mf_inner = None # patched inner SCF object + self.dm_emb_init = None # initial embedded density matrix + self.e_inner = None # inner SCF total energy w/ overrides + self.e_tot = None # final DMET total energy + + # ------------------------------------------------------------------ + # Step 1: ensure low-level mean-field is converged + # ------------------------------------------------------------------ + def _ensure_outer_converged(self): + if getattr(self.mf_outer, 'mo_coeff', None) is None or not getattr(self.mf_outer, 'converged', True): + self.mf_outer.kernel() + + # ------------------------------------------------------------------ + # Step 2: bath construction + # ------------------------------------------------------------------ + def build_bath(self): + """ + Run the Schmidt decomposition on the environment block of the + outer-SCF density matrix expressed in the Loewdin orthonormal + AO (OAO) basis. Populates ``self.bath_orb``, ``self.core_orb``, + ``self.eig_info``, ``self.B_oao``, ``self.X``, and ``self.B`` + (the AO coefficients of the embedded orbitals). + """ + self._ensure_outer_converged() + dm_full_ao = _as_cupy(self.mf_outer.make_rdm1()) + + # Loewdin orthogonalization of the AO basis + s_ao = _as_cupy(self.mf_outer.get_ovlp()) + X, X_inv = lowdin_orth(s_ao) + # 1-RDM in the OAO basis: D' = S^{1/2} D S^{1/2} + dm_full_oao = X_inv @ dm_full_ao @ X_inv + + bath_orb, core_orb, info = schmidt_decompose( + dm_full_oao, self.frag_idx, self.env_idx, self.threshold) + + nao_oao = X.shape[1] + # OAO -> embedded transformation + B_oao = build_embedding_basis(nao_oao, self.frag_idx, self.env_idx, + bath_orb) + # AO coefficients of the embedded orbitals: C_emb = X B' + B_ao = X @ B_oao + + # Core orbitals lifted from OAO env subspace into the AO basis. + if core_orb.size > 0: + C_core_oao = cp.zeros((nao_oao, core_orb.shape[1]), dtype=float) + C_core_oao[self.env_idx, :] = core_orb + C_core_ao = X @ C_core_oao + dm_core_ao = 2.0 * (C_core_ao @ C_core_ao.T) + else: + dm_core_ao = cp.zeros_like(dm_full_ao) + + self.X = X + self.X_inv = X_inv + self.bath_orb = bath_orb + self.core_orb = core_orb + self.eig_info = info + self.B_oao = B_oao # OAO -> embedded + self.B = B_ao # AO -> embedded (orthonormal columns) + self.dm_core = dm_core_ao + return self + + # ------------------------------------------------------------------ + # Step 3: build the embedded Hamiltonian + # ------------------------------------------------------------------ + def build_embedded_hamiltonian(self): + """ + Construct h^A and V^A in the embedded basis A and the + constant core energy. + """ + if self.B is None: + self.build_bath() + + mol = self.full_mol + # Bare 1e Hamiltonian on the full AO basis. Use the outer-mf + # implementation to inherit any custom modifications (ECPs, + # external charges, etc.). + h_ao = _as_cupy(self.mf_outer.get_hcore()) + + # Mean-field potential generated by the unentangled-occupied + # core orbitals in the full AO basis. + if self.eig_info['n_core_electrons'] > 0: + vj_core, vk_core = self.mf_outer.get_jk(mol, self.dm_core) + v_core_ao = _as_cupy(vj_core) - 0.5 * _as_cupy(vk_core) + else: + v_core_ao = cp.zeros_like(h_ao) + + # 1-electron Hamiltonian in the embedded basis + h_emb = transform_h1(h_ao + v_core_ao, self.B) + + # 2-electron Hamiltonian in the embedded basis + eri_emb = transform_eri(mol, self.B) + + # Constant core energy: 1/2 Tr[D_core (h + (h + v_core))] + # = Tr[D_core h] + 1/2 Tr[D_core v_core] + if self.eig_info['n_core_electrons'] > 0: + e_core = (cp.einsum('ij,ji->', self.dm_core, h_ao) + + 0.5 * cp.einsum('ij,ji->', self.dm_core, v_core_ao)) + else: + e_core = 0.0 + + self.h_emb = h_emb + self.eri_emb = eri_emb + self.e_core = float(e_core) + self.e_nuc = float(mol.energy_nuc()) + return self + + # ------------------------------------------------------------------ + # Step 4: build / patch the inner SCF object and solve + # ------------------------------------------------------------------ + def _build_inner_mf(self): + """Instantiate the inner SCF on the embedded mole.""" + if self.h_emb is None: + self.build_embedded_hamiltonian() + + nemb = self.B.shape[1] + n_total_electrons = int(self.full_mol.nelectron) + n_emb_electrons = n_total_electrons \ + - int(self.eig_info['n_core_electrons']) + + emb_mol = _build_embedded_mole( + nemb=nemb, + n_emb_electrons=n_emb_electrons, + spin=int(getattr(self.full_mol, 'spin', 0)), + verbose=int(getattr(self.full_mol, 'verbose', 0)), + max_memory=int(getattr(self.full_mol, 'max_memory', 4000)), + ) + + mf_inner = _instantiate_inner_mf(self.mf_inner_template, emb_mol) + + # ----- Patch the underlying Hamiltonian ----- + h_emb = self.h_emb + ovlp = cp.eye(nemb) + + mf_inner.get_hcore = lambda *args, **kwargs: h_emb + mf_inner.get_ovlp = lambda *args, **kwargs: ovlp + mf_inner.energy_nuc = lambda *args, **kwargs: self.e_nuc + self.e_core + + # Use ao2mo's 8-fold packed format for the in-core ERIs so + # PySCF's optimized JK routines can be reused. + eri_emb_cpu = cp.asnumpy(self.eri_emb) + eri_8fold = ao2mo.restore(8, eri_emb_cpu, nemb) + mf_inner._eri = cp.asarray(eri_8fold) + + # Initial guess: project the outer 1-RDM into the embedded + # basis. With C_emb expressed in AO coefficients, the projector + # is C_emb^T S D_AO S C_emb (which equals B_oao^T D_OAO B_oao). + s_ao = _as_cupy(self.mf_outer.get_ovlp()) + dm_full_ao = _as_cupy(self.mf_outer.make_rdm1()) + sB = s_ao @ self.B + dm_emb_init = sB.T @ dm_full_ao @ sB + + # Ensure exact electron count consistency + trace = float(cp.trace(dm_emb_init)) + if trace > 0: + dm_emb_init = dm_emb_init * (n_emb_electrons / trace) + self.dm_emb_init = dm_emb_init + + self.mf_inner = mf_inner + return mf_inner + + def solve_embedded(self): + """Run the high-level embedded SCF and return its total energy.""" + if self.mf_inner is None: + self._build_inner_mf() + + e_inner = self.mf_inner.kernel(dm0=self.dm_emb_init) + if isinstance(e_inner, tuple): + e_inner = float(self.mf_inner.e_tot) + else: + e_inner = float(e_inner) + self.e_inner = e_inner + return e_inner + + # ------------------------------------------------------------------ + # Public entry point + # ------------------------------------------------------------------ + def kernel(self): + """ + Drive the full single-shot DMET workflow and return the total + energy. + + E_DMET = E_inner_total + + Note: the inner SCF's ``energy_nuc`` is set to (E_nuc + E_core), + so the energy returned by the inner solver already accounts for + the nuclear repulsion of the full system and the mean-field + contribution of the unentangled-occupied core orbitals. + """ + self.build_bath() + self.build_embedded_hamiltonian() + self._build_inner_mf() + e_inner = self.solve_embedded() + self.e_tot = float(e_inner) + return self.e_tot + + # ------------------------------------------------------------------ + # Diagnostics + # ------------------------------------------------------------------ + def energy_decomposition(self): + """ + Return a dictionary describing the various energy contributions + gathered during the DMET calculation. + """ + if self.e_tot is None: + self.kernel() + return { + 'E_nuc': self.e_nuc, + 'E_core': self.e_core, + 'E_inner': self.e_inner, + 'E_DMET': self.e_tot, + } + + def bath_summary(self): + """ + Return a brief description of the Schmidt decomposition + outcome: the sizes of the fragment, bath, core and virtual + spaces, and the eigenvalue arrays of each environment block. + """ + if self.eig_info is None: + self.build_bath() + return { + 'n_fragment_aos': int(self.frag_idx.size), + 'n_bath': int(self.bath_orb.shape[1]), + 'n_core': int(self.core_orb.shape[1]), + 'n_virtual': int(self.eig_info['virtual'].size), + 'core_eigvals': self.eig_info['core'], + 'bath_eigvals': self.eig_info['bath'], + 'virt_eigvals': self.eig_info['virtual'], + 'n_core_electrons': int(self.eig_info['n_core_electrons']), + } + + def __call__(self): + """Allow ``DMET(...)()`` invocation in the PySCF mf style.""" + return self.kernel() diff --git a/gpu4pyscf/dmet/tests/test_dmet.py b/gpu4pyscf/dmet/tests/test_dmet.py new file mode 100644 index 000000000..a3c8194ea --- /dev/null +++ b/gpu4pyscf/dmet/tests/test_dmet.py @@ -0,0 +1,121 @@ +# Copyright 2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Basic correctness tests for the single-shot DMET driver. + +The cancellation property used here: + + For a closed-shell system computed at the SAME mean-field level + (i.e. ``mf_inner`` and ``mf_outer`` share the same method and the + same orbital basis), the single-shot DMET total energy must + reproduce the full-system mean-field total energy exactly. +""" + +import unittest +import numpy as np +from pyscf import gto, scf + +from gpu4pyscf.dmet import DMET + + +class KnownValues(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.mol = gto.M( + atom=''' + H 0.0 0.0 0.00 + H 0.0 0.0 0.74 + H 0.0 0.0 2.20 + H 0.0 0.0 2.94 + ''', + basis='sto-3g', + verbose=0, + ) + cls.mf_ref = scf.RHF(cls.mol) + cls.e_ref = cls.mf_ref.kernel() + + def test_self_consistency_two_atom_fragment(self): + # A single-shot DMET with the same low- and high-level method + # must reproduce the full-system mean-field energy. + mf_outer = scf.RHF(self.mol) + mf_outer.kernel() + + mf_inner_template = scf.RHF(self.mol) + + dmet = DMET( + mf_outer=mf_outer, + mf_inner=mf_inner_template, + frag_atoms=[0, 1], + threshold=1e-8, + ) + e_dmet = dmet.kernel() + + self.assertAlmostEqual(e_dmet, self.e_ref, places=7) + + def test_self_consistency_single_atom_fragment(self): + mf_outer = scf.RHF(self.mol) + mf_outer.kernel() + + mf_inner_template = scf.RHF(self.mol) + + dmet = DMET( + mf_outer=mf_outer, + mf_inner=mf_inner_template, + frag_atoms=[0], + threshold=1e-8, + ) + e_dmet = dmet.kernel() + self.assertAlmostEqual(e_dmet, self.e_ref, places=7) + + def test_bath_summary(self): + mf_outer = scf.RHF(self.mol) + mf_outer.kernel() + + dmet = DMET( + mf_outer=mf_outer, + mf_inner=scf.RHF(self.mol), + frag_atoms=[0, 1], + threshold=1e-6, + ) + dmet.build_bath() + info = dmet.bath_summary() + # Two H atoms in STO-3G means 2 fragment AOs. + self.assertEqual(info['n_fragment_aos'], 2) + # Number of (bath + core + virtual) eigenvalues equals the + # environment AO count. + self.assertEqual( + info['n_bath'] + info['n_core'] + info['n_virtual'], + self.mol.nao_nr() - info['n_fragment_aos'], + ) + + def test_decomposition_keys(self): + mf_outer = scf.RHF(self.mol) + mf_outer.kernel() + + dmet = DMET( + mf_outer=mf_outer, + mf_inner=scf.RHF(self.mol), + frag_atoms=[0, 1], + threshold=1e-8, + ) + dmet.kernel() + decomp = dmet.energy_decomposition() + for key in ('E_nuc', 'E_core', 'E_inner', 'E_DMET'): + self.assertIn(key, decomp) + + +if __name__ == '__main__': + print("Tests for single-shot DMET") + unittest.main() From 2385803cfc507ad1457953a9659de952977ba9c0 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 12 May 2026 15:05:03 +0800 Subject: [PATCH 02/30] debug --- gpu4pyscf/dmet/dmet.py | 553 +++++++++++++---------------------------- 1 file changed, 172 insertions(+), 381 deletions(-) diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py index 7c7a893d9..6c67408b5 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/dmet/dmet.py @@ -28,22 +28,9 @@ def _as_cupy(x): return cp.asarray(x) -# TODO: use already implemented lowdin_orth def lowdin_orth(s): """ Loewdin symmetric orthogonalization. - - Given an AO overlap matrix ``S``, return ``X = S^{-1/2}`` and - ``X_inv = S^{1/2}``. Eigenvalues of ``S`` smaller than 1e-12 are - treated as linearly dependent and dropped. - - Returns - ------- - X : cp.ndarray, shape (nao, nao_orth) - AO -> orthonormal AO transformation. Columns of ``X`` are the - coefficients of the orthonormal AOs in the AO basis. - X_inv : cp.ndarray, shape (nao_orth, nao) - Inverse transformation: ``X_inv = X^T S``. """ s = _as_cupy(s) s = 0.5 * (s + s.T) @@ -62,81 +49,32 @@ def lowdin_orth(s): def get_fragment_ao_indices(mol, frag_atoms): """ Return the atomic-orbital indices that belong to the listed atoms. - - Parameters - ---------- - mol : pyscf.gto.Mole - The full system molecule. - frag_atoms : sequence of int - Atom indices that constitute the fragment. - - Returns - ------- - ao_indices : cp.ndarray of int - Sorted AO indices (in the AO ordering of ``mol``) that belong - to ``frag_atoms``. """ aoslice = mol.aoslice_by_atom() indices = [] for ia in frag_atoms: ia = int(ia) if ia < 0 or ia >= mol.natm: - raise ValueError( - f"Atom index {ia} is out of range [0, {mol.natm})." - ) + raise ValueError(f"Atom index {ia} is out of range [0, {mol.natm}).") p0, p1 = int(aoslice[ia, 2]), int(aoslice[ia, 3]) indices.extend(range(p0, p1)) indices = cp.asarray(sorted(indices), dtype=cp.int32) if indices.size == 0: - raise ValueError( - "Fragment is empty: no atomic orbitals were selected." - ) + raise ValueError("Fragment is empty: no atomic orbitals were selected.") return indices -def schmidt_decompose(dm_full, frag_idx, env_idx, threshold=1e-5): +def schmidt_decompose(dm_full, env_idx, threshold=1e-5): """ Schmidt decomposition. - - Parameters - ---------- - dm_full : array_like, shape (nao, nao) - Spin-summed 1-RDM in the full AO basis. The trace equals the - number of electrons. - frag_idx, env_idx : cp.ndarray - AO indices of fragment and environment, respectively. - ``frag_idx`` and ``env_idx`` together must form a partition of - ``range(nao)``. - threshold : float - Eigenvalue cutoff used to classify the environment orbitals. - - Returns - ------- - bath_orb : cp.ndarray, shape (n_env, n_bath) - Eigenvectors of D^E whose eigenvalues are within - (threshold, 2 - threshold). - core_orb : cp.ndarray, shape (n_env, n_core) - Eigenvectors of D^E whose eigenvalues exceed 2 - threshold. - These define the unentangled occupied (core) orbitals. - info : dict - Dictionary with eigenvalue arrays for each category and the - electron count of the core space. """ dm = _as_cupy(dm_full) - if dm.ndim != 2 or dm.shape[0] != dm.shape[1]: - raise ValueError("dm_full must be a square 2D matrix.") - env_idx = _as_cupy(env_idx) if env_idx.size == 0: - # Pure fragment, no environment to entangle with. return (cp.zeros((0, 0)), cp.zeros((0, 0)), - {'core': cp.zeros(0), - 'bath': cp.zeros(0), - 'virtual': cp.zeros(0), - 'n_core_electrons': 0}) + {'core': cp.zeros(0), 'bath': cp.zeros(0), 'virtual': cp.zeros(0), 'n_core_electrons': 0}) - # Symmetrize to suppress numerical asymmetry from the SCF solver D_env = dm[env_idx[:, None], env_idx[None, :]] D_env = 0.5 * (D_env + D_env.T) @@ -153,8 +91,6 @@ def schmidt_decompose(dm_full, frag_idx, env_idx, threshold=1e-5): 'core': eigvals[is_core], 'bath': eigvals[is_bath], 'virtual': eigvals[is_virt], - # Each unentangled-occupied orbital is doubly occupied in the - # spin-restricted formulation. 'n_core_electrons': 2 * int(is_core.sum()), } return bath_orb, core_orb, info @@ -163,26 +99,6 @@ def schmidt_decompose(dm_full, frag_idx, env_idx, threshold=1e-5): def build_embedding_basis(nao, frag_idx, env_idx, bath_orb): """ Construct the AO -> embedded transformation matrix B. - - Columns of B are arranged as - [ fragment-AO basis (identity columns), - bath orbitals (eigenvectors lifted into the env block) ]. - - Parameters - ---------- - nao : int - Number of atomic orbitals in the full system. - frag_idx : cp.ndarray of int - AO indices of the fragment. - env_idx : cp.ndarray of int - AO indices of the environment. - bath_orb : cp.ndarray, shape (n_env, n_bath) - Bath orbitals expressed in the environment AO subspace. - - Returns - ------- - B : cp.ndarray, shape (nao, n_frag + n_bath) - Transformation matrix whose columns span the embedded space A. """ frag_idx = _as_cupy(frag_idx) env_idx = _as_cupy(env_idx) @@ -190,9 +106,7 @@ def build_embedding_basis(nao, frag_idx, env_idx, bath_orb): n_bath = bath_orb.shape[1] if bath_orb.size else 0 B = cp.zeros((nao, n_frag + n_bath), dtype=float) - # Fragment columns: identity on fragment AOs B[frag_idx, cp.arange(n_frag)] = 1.0 - # Bath columns: embed env eigenvectors into the env rows if n_bath > 0: B[env_idx[:, None], cp.arange(n_bath)[None, :] + n_frag] = bath_orb return B @@ -200,14 +114,7 @@ def build_embedding_basis(nao, frag_idx, env_idx, bath_orb): def build_core_dm(env_idx, core_orb, nao): """ - Build the spin-summed core 1-RDM in the full AO basis. - - Each unentangled-occupied orbital is doubly occupied: - - D_core = 2 * C_core C_core^T, - - where C_core is the matrix of core orbitals lifted into the full - AO basis (the rows corresponding to fragment AOs are zero). + Build the core 1-RDM in the full AO basis. """ env_idx = _as_cupy(env_idx) if core_orb.size == 0: @@ -217,72 +124,27 @@ def build_core_dm(env_idx, core_orb, nao): return 2.0 * (C_core @ C_core.T) -# --------------------------------------------------------------------------- -# Hamiltonian transformations -# --------------------------------------------------------------------------- def transform_h1(h_ao, B): """ - Project a 1-electron operator from the full AO basis to the - embedded basis: ``h_emb = B^T h_ao B``. + Project a 1-electron operator from the full AO basis to the embedded basis. """ - h_emb = B.T @ h_ao @ B - return h_emb + return B.T @ h_ao @ B def transform_eri(mol, B): """ - Transform the four-index two-electron repulsion integrals from the - full AO basis to the embedded basis using ``pyscf.ao2mo``: - - V^A_{xy,zw} = sum_{rstu} B^r_x B^s_y V^{rs}_{tu} B^t_z B^u_w. - - The result is returned in 4-fold symmetric packed form, suitable - for assignment to ``mf._eri`` of an SCF object. - - Parameters - ---------- - mol : pyscf.gto.Mole - Full-system molecule providing the AO integrals. - B : cp.ndarray, shape (nao, nemb) - AO -> embedded transformation matrix. - - Returns - ------- - eri_emb : cp.ndarray - ERIs in the embedded basis (4-fold symmetric, packed). + Transform the four-index two-electron repulsion integrals from the full AO basis. """ nemb = B.shape[1] - # pyscf.ao2mo requires CPU numpy arrays B_cpu = cp.asnumpy(B) eri_emb = ao2mo.kernel(mol, B_cpu, compact=True) - # ``ao2mo.kernel`` already returns the 4-fold packed form for - # real, equal-MOs inputs; ensure consistent shape. eri_emb = ao2mo.restore(4, eri_emb, nemb) return cp.asarray(eri_emb) -# --------------------------------------------------------------------------- -# Embedded Mole helper -# --------------------------------------------------------------------------- -def _build_embedded_mole(nemb, n_emb_electrons, spin=0, - verbose=0, max_memory=4000): - """ - Build a placeholder ``pyscf.gto.Mole`` whose only role is to carry - the bookkeeping needed by a PySCF SCF driver: the number of - electrons, the number of orbitals, and the ``incore_anyway`` flag - (so that the driver consumes ``mf._eri`` directly instead of - rebuilding integrals from atomic basis functions). - """ - if n_emb_electrons < 0: - raise ValueError( - f"Embedded electron count {n_emb_electrons} is negative; " - "check the fragment definition and the Schmidt threshold." - ) - if n_emb_electrons > 2 * nemb: - raise ValueError( - f"Embedded electron count {n_emb_electrons} exceeds " - f"2 * nemb = {2 * nemb}; the embedded space is too small." - ) +def _build_embedded_mole(nemb, n_emb_electrons, spin=0, verbose=0, max_memory=4000): + if n_emb_electrons < 0 or n_emb_electrons > 2 * nemb: + raise ValueError(f"Invalid embedded electron count: {n_emb_electrons}") mol = gto.Mole() mol.verbose = verbose @@ -296,10 +158,7 @@ def _build_embedded_mole(nemb, n_emb_electrons, spin=0, mol.incore_anyway = True mol.build(parse_arg=False, dump_input=False) - # Override the basis-counting helpers so PySCF treats the molecule - # as having exactly nemb orbitals. nemb_int = int(nemb) - def _nao_nr(self=mol, _n=nemb_int): return _n @@ -309,10 +168,6 @@ def _nao_nr(self=mol, _n=nemb_int): def _instantiate_inner_mf(mf_template, embedded_mol): - """ - Create an SCF/DFT object on ``embedded_mol`` that mirrors - the type/configuration of ``mf_template``. - """ cls = type(mf_template) try: new_mf = cls(embedded_mol) @@ -324,7 +179,6 @@ def _instantiate_inner_mf(mf_template, embedded_mol): new_mf.mo_occ = None new_mf.converged = False - # Propagate selected configuration parameters for attr in ('xc', 'conv_tol', 'conv_tol_grad', 'max_cycle', 'level_shift', 'damp', 'diis', 'verbose'): if hasattr(mf_template, attr): @@ -337,322 +191,259 @@ def _instantiate_inner_mf(mf_template, embedded_mol): for g_attr in ('level', 'prune', 'atom_grid'): if hasattr(mf_template.grids, g_attr): try: - setattr(new_mf.grids, g_attr, - getattr(mf_template.grids, g_attr)) + setattr(new_mf.grids, g_attr, getattr(mf_template.grids, g_attr)) except Exception: pass return new_mf -# --------------------------------------------------------------------------- -# Main driver -# --------------------------------------------------------------------------- class DMET: """ - Single-shot Density Matrix Embedding Theory driver. + Density Matrix Embedding Theory driver with macroscopic iteration. Parameters ---------- mf_outer : SCF object (gpu4pyscf) - Low-level mean-field on the full system. Must be (or be made) - converged before its 1-RDM is consumed. If ``mf_outer`` does - not yet hold a converged MO set, ``kernel()`` will run it. + Low-level mean-field on the full system. mf_inner : SCF/DFT object (gpu4pyscf) High-level mean-field template applied to the embedded cluster. - A fresh PySCF object of the same class is instantiated on - the embedded "mole" and patched with the embedded Hamiltonian - (h^A, V^A). The user-supplied object is left untouched. - frag_atoms : sequence of int, optional - Atom indices that define the fragment region A. Mutually - exclusive with ``frag_orbs``. - frag_orbs : sequence of int, optional - Explicit AO indices defining the fragment region. + fragments : list of lists of int + List of fragments, where each fragment is a list of atom indices. threshold : float - Eigenvalue cutoff used to classify environment orbitals into - core / bath / virtual. Defaults to 1e-5. + Eigenvalue cutoff used to classify environment orbitals. + max_macro_iter : int + Maximum number of macroscopic iterations for correlation potential (u). + macro_tol : float + Convergence tolerance for the difference in fragment 1-RDMs. """ - def __init__(self, mf_outer, mf_inner, - frag_atoms=None, frag_orbs=None, - threshold=1e-5): + def __init__(self, mf_outer, mf_inner, fragments, + threshold=1e-5, max_macro_iter=20, macro_tol=1e-4): if mf_outer is None or mf_inner is None: raise ValueError("mf_outer and mf_inner are both required.") - if frag_atoms is None and frag_orbs is None: - raise ValueError( - "Provide either 'frag_atoms' or 'frag_orbs' to define " - "the DMET fragment." - ) - if frag_atoms is not None and frag_orbs is not None: - raise ValueError( - "Specify only one of 'frag_atoms' or 'frag_orbs'." - ) - if not (0.0 < threshold < 1.0): - raise ValueError( - f"threshold must lie in (0, 1); got {threshold}." - ) + if not fragments: + raise ValueError("Provide a list of fragments to define the DMET regions.") self.mf_outer = mf_outer self.mf_inner_template = mf_inner self.full_mol = mf_outer.mol self.threshold = float(threshold) + self.max_macro_iter = max_macro_iter + self.macro_tol = macro_tol + self.fragments = [list(int(a) for a in frag) for frag in fragments] + self.nfrags = len(self.fragments) + nao = int(self.full_mol.nao_nr()) - if frag_atoms is not None: - self.frag_atoms = list(int(a) for a in frag_atoms) - self.frag_idx = get_fragment_ao_indices( - self.full_mol, self.frag_atoms) - else: - self.frag_atoms = None - self.frag_idx = cp.asarray(sorted(int(i) for i in frag_orbs), - dtype=cp.int32) - all_idx = cp.arange(nao, dtype=cp.int32) - env_mask = cp.ones(nao, dtype=bool) - env_mask[self.frag_idx] = False - self.env_idx = all_idx[env_mask] - - # ---- intermediate / output caches ---- - self.bath_orb = None # (n_env, n_bath) - self.core_orb = None # (n_env, n_core) - self.eig_info = None # dict from schmidt_decompose - self.B = None # AO -> embedded basis transform - self.dm_core = None # full-AO core density matrix - self.h_emb = None # embedded 1e Hamiltonian (cupy) - self.eri_emb = None # embedded 2e Hamiltonian (cupy) - self.e_core = None # core energy contribution - self.e_nuc = None # nuclear repulsion energy - self.mf_inner = None # patched inner SCF object - self.dm_emb_init = None # initial embedded density matrix - self.e_inner = None # inner SCF total energy w/ overrides - self.e_tot = None # final DMET total energy - - # ------------------------------------------------------------------ - # Step 1: ensure low-level mean-field is converged - # ------------------------------------------------------------------ - def _ensure_outer_converged(self): - if getattr(self.mf_outer, 'mo_coeff', None) is None or not getattr(self.mf_outer, 'converged', True): - self.mf_outer.kernel() - - # ------------------------------------------------------------------ - # Step 2: bath construction - # ------------------------------------------------------------------ - def build_bath(self): + + self.frag_idx = [] + self.env_idx = [] + for frag_atoms in self.fragments: + f_idx = get_fragment_ao_indices(self.full_mol, frag_atoms) + self.frag_idx.append(f_idx) + env_mask = cp.ones(nao, dtype=bool) + env_mask[f_idx] = False + self.env_idx.append(all_idx[env_mask]) + + # ---- intermediate / output caches (lists for multiple fragments) ---- + self.bath_orb = [None] * self.nfrags + self.core_orb = [None] * self.nfrags + self.eig_info = [None] * self.nfrags + self.B_oao = [None] * self.nfrags + self.B = [None] * self.nfrags + self.dm_core = [None] * self.nfrags + self.h_emb = [None] * self.nfrags + self.eri_emb = [None] * self.nfrags + self.e_core = [None] * self.nfrags + self.mf_inner = [None] * self.nfrags + self.dm_emb_init = [None] * self.nfrags + self.e_inner = [None] * self.nfrags + self.e_tot = None + self.u = cp.zeros((nao, nao)) # Global correlation potential + + def build_bath(self, ifrag, dm_full_oao, X): """ - Run the Schmidt decomposition on the environment block of the - outer-SCF density matrix expressed in the Loewdin orthonormal - AO (OAO) basis. Populates ``self.bath_orb``, ``self.core_orb``, - ``self.eig_info``, ``self.B_oao``, ``self.X``, and ``self.B`` - (the AO coefficients of the embedded orbitals). + Run the Schmidt decomposition for a specific fragment. """ - self._ensure_outer_converged() - dm_full_ao = _as_cupy(self.mf_outer.make_rdm1()) - - # Loewdin orthogonalization of the AO basis - s_ao = _as_cupy(self.mf_outer.get_ovlp()) - X, X_inv = lowdin_orth(s_ao) - # 1-RDM in the OAO basis: D' = S^{1/2} D S^{1/2} - dm_full_oao = X_inv @ dm_full_ao @ X_inv - bath_orb, core_orb, info = schmidt_decompose( - dm_full_oao, self.frag_idx, self.env_idx, self.threshold) + dm_full_oao, self.frag_idx[ifrag], self.env_idx[ifrag], self.threshold) nao_oao = X.shape[1] - # OAO -> embedded transformation - B_oao = build_embedding_basis(nao_oao, self.frag_idx, self.env_idx, - bath_orb) - # AO coefficients of the embedded orbitals: C_emb = X B' + B_oao = build_embedding_basis(nao_oao, self.frag_idx[ifrag], self.env_idx[ifrag], bath_orb) B_ao = X @ B_oao - # Core orbitals lifted from OAO env subspace into the AO basis. if core_orb.size > 0: C_core_oao = cp.zeros((nao_oao, core_orb.shape[1]), dtype=float) - C_core_oao[self.env_idx, :] = core_orb + C_core_oao[self.env_idx[ifrag], :] = core_orb C_core_ao = X @ C_core_oao dm_core_ao = 2.0 * (C_core_ao @ C_core_ao.T) else: - dm_core_ao = cp.zeros_like(dm_full_ao) - - self.X = X - self.X_inv = X_inv - self.bath_orb = bath_orb - self.core_orb = core_orb - self.eig_info = info - self.B_oao = B_oao # OAO -> embedded - self.B = B_ao # AO -> embedded (orthonormal columns) - self.dm_core = dm_core_ao + dm_core_ao = cp.zeros((X.shape[0], X.shape[0]), dtype=float) + + self.bath_orb[ifrag] = bath_orb + self.core_orb[ifrag] = core_orb + self.eig_info[ifrag] = info + self.B_oao[ifrag] = B_oao + self.B[ifrag] = B_ao + self.dm_core[ifrag] = dm_core_ao return self - # ------------------------------------------------------------------ - # Step 3: build the embedded Hamiltonian - # ------------------------------------------------------------------ - def build_embedded_hamiltonian(self): + def build_embedded_hamiltonian(self, ifrag, hcore_orig): """ - Construct h^A and V^A in the embedded basis A and the - constant core energy. + Construct h^A and V^A in the embedded basis A. + Uses bare hcore_orig (without the correlation potential 'u'). """ - if self.B is None: - self.build_bath() - mol = self.full_mol - # Bare 1e Hamiltonian on the full AO basis. Use the outer-mf - # implementation to inherit any custom modifications (ECPs, - # external charges, etc.). - h_ao = _as_cupy(self.mf_outer.get_hcore()) - - # Mean-field potential generated by the unentangled-occupied - # core orbitals in the full AO basis. - if self.eig_info['n_core_electrons'] > 0: - vj_core, vk_core = self.mf_outer.get_jk(mol, self.dm_core) + h_ao = _as_cupy(hcore_orig) + + if self.eig_info[ifrag]['n_core_electrons'] > 0: + vj_core, vk_core = self.mf_outer.get_jk(mol, self.dm_core[ifrag]) v_core_ao = _as_cupy(vj_core) - 0.5 * _as_cupy(vk_core) else: v_core_ao = cp.zeros_like(h_ao) - # 1-electron Hamiltonian in the embedded basis - h_emb = transform_h1(h_ao + v_core_ao, self.B) + h_emb = transform_h1(h_ao + v_core_ao, self.B[ifrag]) + eri_emb = transform_eri(mol, self.B[ifrag]) - # 2-electron Hamiltonian in the embedded basis - eri_emb = transform_eri(mol, self.B) - - # Constant core energy: 1/2 Tr[D_core (h + (h + v_core))] - # = Tr[D_core h] + 1/2 Tr[D_core v_core] - if self.eig_info['n_core_electrons'] > 0: - e_core = (cp.einsum('ij,ji->', self.dm_core, h_ao) - + 0.5 * cp.einsum('ij,ji->', self.dm_core, v_core_ao)) + if self.eig_info[ifrag]['n_core_electrons'] > 0: + e_core = (cp.einsum('ij,ji->', self.dm_core[ifrag], h_ao) + + 0.5 * cp.einsum('ij,ji->', self.dm_core[ifrag], v_core_ao)) else: e_core = 0.0 - self.h_emb = h_emb - self.eri_emb = eri_emb - self.e_core = float(e_core) - self.e_nuc = float(mol.energy_nuc()) + self.h_emb[ifrag] = h_emb + self.eri_emb[ifrag] = eri_emb + self.e_core[ifrag] = float(e_core) return self - # ------------------------------------------------------------------ - # Step 4: build / patch the inner SCF object and solve - # ------------------------------------------------------------------ - def _build_inner_mf(self): + def _build_inner_mf(self, ifrag, dm_full_ao): """Instantiate the inner SCF on the embedded mole.""" - if self.h_emb is None: - self.build_embedded_hamiltonian() - - nemb = self.B.shape[1] + nemb = self.B[ifrag].shape[1] n_total_electrons = int(self.full_mol.nelectron) - n_emb_electrons = n_total_electrons \ - - int(self.eig_info['n_core_electrons']) + n_emb_electrons = n_total_electrons - int(self.eig_info[ifrag]['n_core_electrons']) emb_mol = _build_embedded_mole( nemb=nemb, n_emb_electrons=n_emb_electrons, spin=int(getattr(self.full_mol, 'spin', 0)), - verbose=int(getattr(self.full_mol, 'verbose', 0)), + verbose=0, max_memory=int(getattr(self.full_mol, 'max_memory', 4000)), ) mf_inner = _instantiate_inner_mf(self.mf_inner_template, emb_mol) - # ----- Patch the underlying Hamiltonian ----- - h_emb = self.h_emb + h_emb = self.h_emb[ifrag] ovlp = cp.eye(nemb) + # Base energy offset for debugging per fragment + e_nuc = float(self.full_mol.energy_nuc()) mf_inner.get_hcore = lambda *args, **kwargs: h_emb mf_inner.get_ovlp = lambda *args, **kwargs: ovlp - mf_inner.energy_nuc = lambda *args, **kwargs: self.e_nuc + self.e_core + mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc + self.e_core[ifrag] - # Use ao2mo's 8-fold packed format for the in-core ERIs so - # PySCF's optimized JK routines can be reused. - eri_emb_cpu = cp.asnumpy(self.eri_emb) + eri_emb_cpu = cp.asnumpy(self.eri_emb[ifrag]) eri_8fold = ao2mo.restore(8, eri_emb_cpu, nemb) mf_inner._eri = cp.asarray(eri_8fold) - # Initial guess: project the outer 1-RDM into the embedded - # basis. With C_emb expressed in AO coefficients, the projector - # is C_emb^T S D_AO S C_emb (which equals B_oao^T D_OAO B_oao). s_ao = _as_cupy(self.mf_outer.get_ovlp()) - dm_full_ao = _as_cupy(self.mf_outer.make_rdm1()) - sB = s_ao @ self.B + sB = s_ao @ self.B[ifrag] dm_emb_init = sB.T @ dm_full_ao @ sB - # Ensure exact electron count consistency trace = float(cp.trace(dm_emb_init)) if trace > 0: dm_emb_init = dm_emb_init * (n_emb_electrons / trace) - self.dm_emb_init = dm_emb_init + self.dm_emb_init[ifrag] = dm_emb_init - self.mf_inner = mf_inner + self.mf_inner[ifrag] = mf_inner return mf_inner - def solve_embedded(self): - """Run the high-level embedded SCF and return its total energy.""" - if self.mf_inner is None: - self._build_inner_mf() - - e_inner = self.mf_inner.kernel(dm0=self.dm_emb_init) + def solve_embedded(self, ifrag): + """Run the high-level embedded SCF for a specific fragment.""" + e_inner = self.mf_inner[ifrag].kernel(dm0=self.dm_emb_init[ifrag]) if isinstance(e_inner, tuple): - e_inner = float(self.mf_inner.e_tot) + e_inner = float(self.mf_inner[ifrag].e_tot) else: e_inner = float(e_inner) - self.e_inner = e_inner + self.e_inner[ifrag] = e_inner return e_inner - # ------------------------------------------------------------------ - # Public entry point - # ------------------------------------------------------------------ def kernel(self): """ - Drive the full single-shot DMET workflow and return the total - energy. + Drive the macroscopic-iterating DMET workflow. + Returns the DMET total energy. + """ + hcore_orig = _as_cupy(self.mf_outer.get_hcore()) + s_ao = _as_cupy(self.mf_outer.get_ovlp()) + X, X_inv = lowdin_orth(s_ao) - E_DMET = E_inner_total + for macro_iter in range(self.max_macro_iter): + # 1. Run low-level SCF with current correlation potential 'u' + self.mf_outer.get_hcore = lambda *args, **kwargs: cp.asnumpy(hcore_orig + self.u) + self.mf_outer.mo_coeff = None # Force re-run + self.mf_outer.kernel() + + dm_full_ao = _as_cupy(self.mf_outer.make_rdm1()) + dm_full_oao = X_inv @ dm_full_ao @ X_inv + + e_tot = 0.0 + dm_inners = [] + + # 2. Loop over all fragments + for ifrag in range(self.nfrags): + self.build_bath(ifrag, dm_full_oao, X) + self.build_embedded_hamiltonian(ifrag, hcore_orig) + mf_inner = self._build_inner_mf(ifrag, dm_full_ao) + self.solve_embedded(ifrag) + + dm_emb = _as_cupy(mf_inner.make_rdm1()) + fock_emb = _as_cupy(mf_inner.get_fock(dm=mf_inner.make_rdm1())) + + # Transform inner DM back to full AO basis for D-matching + B = self.B[ifrag] + dm_inner_ao = B @ dm_emb @ B.T + dm_inners.append(dm_inner_ao) + + # Extract Fragment Energy: 1/2 Tr_x [ D (h + F) ] + n_frag = self.frag_idx[ifrag].size + e_frag_elec = 0.5 * cp.sum( + dm_emb[:n_frag, :] * (self.h_emb[ifrag][:n_frag, :] + fock_emb[:n_frag, :]) + ) + + # Extract Fragment Nuclear Energy + e_frag_nuc = 0.0 + coords = self.full_mol.atom_coords() + charges = self.full_mol.atom_charges() + frag_atoms = self.fragments[ifrag] + for i in frag_atoms: + for j in range(self.full_mol.natm): + if i == j: continue + r = np.linalg.norm(coords[i] - coords[j]) + factor = 0.5 if j in frag_atoms else 1.0 + e_frag_nuc += factor * charges[i] * charges[j] / r + + e_tot += float(e_frag_elec) + e_frag_nuc + + # 3. Macroscopic iteration: update correlation potential 'u' + error = 0.0 + for ifrag in range(self.nfrags): + idx = self.frag_idx[ifrag] + idx_mesh = cp.ix_(idx, idx) + # Cost function: \Delta D = D_inner - D_outer over fragment blocks + diff = dm_inners[ifrag][idx_mesh] - dm_full_ao[idx_mesh] + error += float(cp.linalg.norm(diff)) + + # Simple gradient descent step with damping factor + self.u[idx_mesh] -= 0.5 * diff + + print(f"Macro Iter {macro_iter + 1:2d} | E_DMET = {e_tot:.8f} | max(dD) = {error:.6e}") + self.e_tot = e_tot + if error < self.macro_tol: + print("DMET macroscopic iterations converged.") + break - Note: the inner SCF's ``energy_nuc`` is set to (E_nuc + E_core), - so the energy returned by the inner solver already accounts for - the nuclear repulsion of the full system and the mean-field - contribution of the unentangled-occupied core orbitals. - """ - self.build_bath() - self.build_embedded_hamiltonian() - self._build_inner_mf() - e_inner = self.solve_embedded() - self.e_tot = float(e_inner) return self.e_tot - # ------------------------------------------------------------------ - # Diagnostics - # ------------------------------------------------------------------ - def energy_decomposition(self): - """ - Return a dictionary describing the various energy contributions - gathered during the DMET calculation. - """ - if self.e_tot is None: - self.kernel() - return { - 'E_nuc': self.e_nuc, - 'E_core': self.e_core, - 'E_inner': self.e_inner, - 'E_DMET': self.e_tot, - } - - def bath_summary(self): - """ - Return a brief description of the Schmidt decomposition - outcome: the sizes of the fragment, bath, core and virtual - spaces, and the eigenvalue arrays of each environment block. - """ - if self.eig_info is None: - self.build_bath() - return { - 'n_fragment_aos': int(self.frag_idx.size), - 'n_bath': int(self.bath_orb.shape[1]), - 'n_core': int(self.core_orb.shape[1]), - 'n_virtual': int(self.eig_info['virtual'].size), - 'core_eigvals': self.eig_info['core'], - 'bath_eigvals': self.eig_info['bath'], - 'virt_eigvals': self.eig_info['virtual'], - 'n_core_electrons': int(self.eig_info['n_core_electrons']), - } - def __call__(self): - """Allow ``DMET(...)()`` invocation in the PySCF mf style.""" - return self.kernel() + return self.kernel() \ No newline at end of file From e551598d6880f5600d0a9734d37bf0eb8b7551f8 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 12 May 2026 15:48:07 +0800 Subject: [PATCH 03/30] debug --- gpu4pyscf/dmet/dmet.py | 89 +++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 32 deletions(-) diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py index 6c67408b5..c0281c352 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/dmet/dmet.py @@ -16,10 +16,7 @@ import copy import numpy as np import cupy as cp -import pyscf -from pyscf import gto, ao2mo -import gpu4pyscf -from gpu4pyscf.scf import hf as gpu_hf +from pyscf import gto def _as_cupy(x): @@ -131,17 +128,6 @@ def transform_h1(h_ao, B): return B.T @ h_ao @ B -def transform_eri(mol, B): - """ - Transform the four-index two-electron repulsion integrals from the full AO basis. - """ - nemb = B.shape[1] - B_cpu = cp.asnumpy(B) - eri_emb = ao2mo.kernel(mol, B_cpu, compact=True) - eri_emb = ao2mo.restore(4, eri_emb, nemb) - return cp.asarray(eri_emb) - - def _build_embedded_mole(nemb, n_emb_electrons, spin=0, verbose=0, max_memory=4000): if n_emb_electrons < 0 or n_emb_electrons > 2 * nemb: raise ValueError(f"Invalid embedded electron count: {n_emb_electrons}") @@ -155,7 +141,6 @@ def _build_embedded_mole(nemb, n_emb_electrons, spin=0, verbose=0, max_memory=40 mol.spin = spin mol.nelectron = int(n_emb_electrons) mol.charge = 0 - mol.incore_anyway = True mol.build(parse_arg=False, dump_input=False) nemb_int = int(nemb) @@ -255,7 +240,6 @@ def __init__(self, mf_outer, mf_inner, fragments, self.B = [None] * self.nfrags self.dm_core = [None] * self.nfrags self.h_emb = [None] * self.nfrags - self.eri_emb = [None] * self.nfrags self.e_core = [None] * self.nfrags self.mf_inner = [None] * self.nfrags self.dm_emb_init = [None] * self.nfrags @@ -292,7 +276,7 @@ def build_bath(self, ifrag, dm_full_oao, X): def build_embedded_hamiltonian(self, ifrag, hcore_orig): """ - Construct h^A and V^A in the embedded basis A. + Construct h^A in the embedded basis A. Uses bare hcore_orig (without the correlation potential 'u'). """ mol = self.full_mol @@ -305,7 +289,6 @@ def build_embedded_hamiltonian(self, ifrag, hcore_orig): v_core_ao = cp.zeros_like(h_ao) h_emb = transform_h1(h_ao + v_core_ao, self.B[ifrag]) - eri_emb = transform_eri(mol, self.B[ifrag]) if self.eig_info[ifrag]['n_core_electrons'] > 0: e_core = (cp.einsum('ij,ji->', self.dm_core[ifrag], h_ao) @@ -314,7 +297,6 @@ def build_embedded_hamiltonian(self, ifrag, hcore_orig): e_core = 0.0 self.h_emb[ifrag] = h_emb - self.eri_emb[ifrag] = eri_emb self.e_core[ifrag] = float(e_core) return self @@ -343,9 +325,39 @@ def _build_inner_mf(self, ifrag, dm_full_ao): mf_inner.get_ovlp = lambda *args, **kwargs: ovlp mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc + self.e_core[ifrag] - eri_emb_cpu = cp.asnumpy(self.eri_emb[ifrag]) - eri_8fold = ao2mo.restore(8, eri_emb_cpu, nemb) - mf_inner._eri = cp.asarray(eri_8fold) + # Overwrite get_jk to compute J and K on-the-fly using the outer MF + # without computing or storing 4-index ERIs. + def _get_jk(mol=None, dm=None, hermi=1, with_j=True, with_k=True, omega=None): + if dm is None: + dm = mf_inner.make_rdm1() + dm_cp = _as_cupy(dm) + B_mat = self.B[ifrag] + + # Project embedded dm to full AO basis + if dm_cp.ndim == 2: + dm_ao = B_mat @ dm_cp @ B_mat.T + else: + dm_ao = cp.einsum('pi,xij,qj->xpq', B_mat, dm_cp, B_mat) + + # Compute J and K in full AO basis using outer SCF's optimized routine + vj_ao, vk_ao = self.mf_outer.get_jk(self.full_mol, dm_ao, hermi, with_j, with_k, omega) + + # Project J and K back to embedded basis + vj_emb = vk_emb = None + if vj_ao is not None: + if dm_cp.ndim == 2: + vj_emb = B_mat.T @ vj_ao @ B_mat + else: + vj_emb = cp.einsum('pi,xpq,qj->xij', B_mat, vj_ao, B_mat) + if vk_ao is not None: + if dm_cp.ndim == 2: + vk_emb = B_mat.T @ vk_ao @ B_mat + else: + vk_emb = cp.einsum('pi,xpq,qj->xij', B_mat, vk_ao, B_mat) + + return vj_emb, vk_emb + + mf_inner.get_jk = _get_jk s_ao = _as_cupy(self.mf_outer.get_ovlp()) sB = s_ao @ self.B[ifrag] @@ -398,17 +410,28 @@ def kernel(self): self.solve_embedded(ifrag) dm_emb = _as_cupy(mf_inner.make_rdm1()) - fock_emb = _as_cupy(mf_inner.get_fock(dm=mf_inner.make_rdm1())) - # Transform inner DM back to full AO basis for D-matching + # Transform inner DM back to full AO basis B = self.B[ifrag] - dm_inner_ao = B @ dm_emb @ B.T - dm_inners.append(dm_inner_ao) + dm_inner_active_ao = B @ dm_emb @ B.T + + dm_inner_full_ao = self.dm_core[ifrag] + dm_inner_active_ao + dm_inners.append(dm_inner_full_ao) - # Extract Fragment Energy: 1/2 Tr_x [ D (h + F) ] - n_frag = self.frag_idx[ifrag].size + # 2.1 Reconstruct the full effective Fock matrix for the embedded system in AO + vj, vk = self.mf_outer.get_jk(self.full_mol, dm_inner_full_ao) + fock_full_ao = hcore_orig + _as_cupy(vj) - 0.5 * _as_cupy(vk) + + # 2.2 Transform D, H, and F to the Lowdin orthogonalized (OAO) basis + dm_full_oao_inner = X_inv @ dm_inner_full_ao @ X_inv + hcore_oao = X.T @ hcore_orig @ X + fock_oao = X.T @ fock_full_ao @ X + + # 2.3 Extract Fragment Energy: 1/2 \sum_{i \in A, j} D_{ij}^{OAO} (H_{ij}^{OAO} + F_{ij}^{OAO}) + # In symmetric orthogonalization, AO index mapping is perfectly preserved in OAO. + idx = self.frag_idx[ifrag] e_frag_elec = 0.5 * cp.sum( - dm_emb[:n_frag, :] * (self.h_emb[ifrag][:n_frag, :] + fock_emb[:n_frag, :]) + dm_full_oao_inner[idx, :] * (hcore_oao[idx, :] + fock_oao[idx, :]) ) # Extract Fragment Nuclear Energy @@ -430,11 +453,13 @@ def kernel(self): for ifrag in range(self.nfrags): idx = self.frag_idx[ifrag] idx_mesh = cp.ix_(idx, idx) - # Cost function: \Delta D = D_inner - D_outer over fragment blocks + + # Cost function: \Delta D = D_inner_full - D_outer_full over fragment blocks diff = dm_inners[ifrag][idx_mesh] - dm_full_ao[idx_mesh] error += float(cp.linalg.norm(diff)) - # Simple gradient descent step with damping factor + # Simple gradient descent step + # Note: 0.5 is a hyperparameter. If it oscillates, reduce it (e.g. to 0.1). self.u[idx_mesh] -= 0.5 * diff print(f"Macro Iter {macro_iter + 1:2d} | E_DMET = {e_tot:.8f} | max(dD) = {error:.6e}") From 94ed8e4e2295fa39a3bf9ea0fb5c41d2f7c8e17f Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 13 May 2026 11:12:51 +0800 Subject: [PATCH 04/30] runable, but needs debug --- gpu4pyscf/dmet/__init__.py | 11 +-- gpu4pyscf/dmet/dmet.py | 108 ++++++++++++--------- gpu4pyscf/dmet/tests/test_dmet.py | 150 ++++++++++++------------------ 3 files changed, 128 insertions(+), 141 deletions(-) diff --git a/gpu4pyscf/dmet/__init__.py b/gpu4pyscf/dmet/__init__.py index 883b3e735..3b9c8ea05 100644 --- a/gpu4pyscf/dmet/__init__.py +++ b/gpu4pyscf/dmet/__init__.py @@ -13,13 +13,4 @@ # limitations under the License. -from .dmet import ( - DMET, - get_fragment_ao_indices, - schmidt_decompose, - build_embedding_basis, - build_core_dm, - transform_h1, - transform_eri, - lowdin_orth, -) +from .dmet import DMET diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py index c0281c352..0ccb8ca23 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/dmet/dmet.py @@ -61,34 +61,50 @@ def get_fragment_ao_indices(mol, frag_atoms): return indices -def schmidt_decompose(dm_full, env_idx, threshold=1e-5): +def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): """ - Schmidt decomposition. + Schmidt decomposition via SVD of the occupied orbital coefficients on the fragment. + Strictly follows the original 2012 DMET formulation. """ - dm = _as_cupy(dm_full) + mo_coeff_oao = _as_cupy(mo_coeff_oao) + mo_occ = _as_cupy(mo_occ) env_idx = _as_cupy(env_idx) - if env_idx.size == 0: - return (cp.zeros((0, 0)), - cp.zeros((0, 0)), - {'core': cp.zeros(0), 'bath': cp.zeros(0), 'virtual': cp.zeros(0), 'n_core_electrons': 0}) - - D_env = dm[env_idx[:, None], env_idx[None, :]] - D_env = 0.5 * (D_env + D_env.T) - - eigvals, eigvecs = cp.linalg.eigh(D_env) - - is_core = eigvals > (2.0 - threshold) - is_virt = eigvals < threshold - is_bath = ~(is_core | is_virt) - - bath_orb = eigvecs[:, is_bath] - core_orb = eigvecs[:, is_core] - + frag_idx = _as_cupy(frag_idx) + + # Filter strictly occupied orbitals + occ_mask = mo_occ > 1e-8 + C_occ = mo_coeff_oao[:, occ_mask] + + if env_idx.size == 0 or C_occ.shape[1] == 0: + return (cp.zeros((0, 0)), cp.zeros((0, 0)), + {'n_core_electrons': 0}) + + # Fragment block of occupied orbitals + C_A = C_occ[frag_idx, :] + + # SVD of C_A: C_A = U * S * Vh + U, S, Vh = cp.linalg.svd(C_A, full_matrices=True) + + # Rotate all occupied orbitals according to Vh + C_rot = C_occ @ Vh.T + + is_bath = S > threshold + is_core_small = S <= threshold + n_sv = len(S) + + # Entangled bath orbitals (environment part) + bath_orb = C_rot[env_idx, :n_sv][:, is_bath] + norms = cp.linalg.norm(bath_orb, axis=0) + norms[norms < 1e-12] = 1.0 # Safe division + bath_orb = bath_orb / norms + + # Pure environment core orbitals come from null space + small singular values + core_orb_small = C_rot[env_idx, :n_sv][:, is_core_small] + core_orb_null = C_rot[env_idx, n_sv:] + core_orb = cp.hstack([core_orb_small, core_orb_null]) + info = { - 'core': eigvals[is_core], - 'bath': eigvals[is_bath], - 'virtual': eigvals[is_virt], - 'n_core_electrons': 2 * int(is_core.sum()), + 'n_core_electrons': 2 * core_orb.shape[1] } return bath_orb, core_orb, info @@ -245,14 +261,15 @@ def __init__(self, mf_outer, mf_inner, fragments, self.dm_emb_init = [None] * self.nfrags self.e_inner = [None] * self.nfrags self.e_tot = None - self.u = cp.zeros((nao, nao)) # Global correlation potential + self.u_oao = cp.zeros((nao, nao)) # Global correlation potential - def build_bath(self, ifrag, dm_full_oao, X): + def build_bath(self, ifrag, mo_coeff, mo_occ, X_inv, X): """ Run the Schmidt decomposition for a specific fragment. """ + mo_coeff_oao = X_inv @ _as_cupy(mo_coeff) bath_orb, core_orb, info = schmidt_decompose( - dm_full_oao, self.frag_idx[ifrag], self.env_idx[ifrag], self.threshold) + mo_coeff_oao, mo_occ, self.frag_idx[ifrag], self.env_idx[ifrag], self.threshold) nao_oao = X.shape[1] B_oao = build_embedding_basis(nao_oao, self.frag_idx[ifrag], self.env_idx[ifrag], bath_orb) @@ -301,7 +318,6 @@ def build_embedded_hamiltonian(self, ifrag, hcore_orig): return self def _build_inner_mf(self, ifrag, dm_full_ao): - """Instantiate the inner SCF on the embedded mole.""" nemb = self.B[ifrag].shape[1] n_total_electrons = int(self.full_mol.nelectron) n_emb_electrons = n_total_electrons - int(self.eig_info[ifrag]['n_core_electrons']) @@ -359,6 +375,14 @@ def _get_jk(mol=None, dm=None, hermi=1, with_j=True, with_k=True, omega=None): mf_inner.get_jk = _get_jk + def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): + if dm is None: + dm = mf_inner.make_rdm1() + vj, vk = _get_jk(mol, dm, hermi=hermi) + return vj - 0.5 * vk + + mf_inner.get_veff = _get_veff + s_ao = _as_cupy(self.mf_outer.get_ovlp()) sB = s_ao @ self.B[ifrag] dm_emb_init = sB.T @ dm_full_ao @ sB @@ -391,20 +415,22 @@ def kernel(self): X, X_inv = lowdin_orth(s_ao) for macro_iter in range(self.max_macro_iter): - # 1. Run low-level SCF with current correlation potential 'u' - self.mf_outer.get_hcore = lambda *args, **kwargs: cp.asnumpy(hcore_orig + self.u) + u_ao = X_inv @ self.u_oao @ X_inv + + # Run low-level SCF with current correlation potential 'u' + self.mf_outer.get_hcore = lambda *args, **kwargs: cp.asnumpy(hcore_orig + u_ao) self.mf_outer.mo_coeff = None # Force re-run self.mf_outer.kernel() + mo_coeff = _as_cupy(self.mf_outer.mo_coeff) + mo_occ = _as_cupy(self.mf_outer.mo_occ) dm_full_ao = _as_cupy(self.mf_outer.make_rdm1()) - dm_full_oao = X_inv @ dm_full_ao @ X_inv e_tot = 0.0 dm_inners = [] - # 2. Loop over all fragments for ifrag in range(self.nfrags): - self.build_bath(ifrag, dm_full_oao, X) + self.build_bath(ifrag, mo_coeff, mo_occ, X_inv, X) self.build_embedded_hamiltonian(ifrag, hcore_orig) mf_inner = self._build_inner_mf(ifrag, dm_full_ao) self.solve_embedded(ifrag) @@ -418,16 +444,16 @@ def kernel(self): dm_inner_full_ao = self.dm_core[ifrag] + dm_inner_active_ao dm_inners.append(dm_inner_full_ao) - # 2.1 Reconstruct the full effective Fock matrix for the embedded system in AO + # Reconstruct the full effective Fock matrix for the embedded system in AO vj, vk = self.mf_outer.get_jk(self.full_mol, dm_inner_full_ao) fock_full_ao = hcore_orig + _as_cupy(vj) - 0.5 * _as_cupy(vk) - # 2.2 Transform D, H, and F to the Lowdin orthogonalized (OAO) basis + # Transform D, H, and F to the Lowdin orthogonalized (OAO) basis dm_full_oao_inner = X_inv @ dm_inner_full_ao @ X_inv hcore_oao = X.T @ hcore_orig @ X fock_oao = X.T @ fock_full_ao @ X - # 2.3 Extract Fragment Energy: 1/2 \sum_{i \in A, j} D_{ij}^{OAO} (H_{ij}^{OAO} + F_{ij}^{OAO}) + # Extract Fragment Energy: 1/2 \sum_{i \in A, j} D_{ij}^{OAO} (H_{ij}^{OAO} + F_{ij}^{OAO}) # In symmetric orthogonalization, AO index mapping is perfectly preserved in OAO. idx = self.frag_idx[ifrag] e_frag_elec = 0.5 * cp.sum( @@ -443,24 +469,22 @@ def kernel(self): for j in range(self.full_mol.natm): if i == j: continue r = np.linalg.norm(coords[i] - coords[j]) - factor = 0.5 if j in frag_atoms else 1.0 - e_frag_nuc += factor * charges[i] * charges[j] / r + e_frag_nuc += 0.5 * charges[i] * charges[j] / r e_tot += float(e_frag_elec) + e_frag_nuc - # 3. Macroscopic iteration: update correlation potential 'u' + # Mupdate correlation potential 'u' error = 0.0 for ifrag in range(self.nfrags): idx = self.frag_idx[ifrag] idx_mesh = cp.ix_(idx, idx) - # Cost function: \Delta D = D_inner_full - D_outer_full over fragment blocks diff = dm_inners[ifrag][idx_mesh] - dm_full_ao[idx_mesh] error += float(cp.linalg.norm(diff)) # Simple gradient descent step - # Note: 0.5 is a hyperparameter. If it oscillates, reduce it (e.g. to 0.1). - self.u[idx_mesh] -= 0.5 * diff + # TODO: 0.5 is a hyperparameter. If it oscillates, reduce it (e.g. to 0.1). + self.u_oao[idx_mesh] -= 0.5 * diff print(f"Macro Iter {macro_iter + 1:2d} | E_DMET = {e_tot:.8f} | max(dD) = {error:.6e}") self.e_tot = e_tot diff --git a/gpu4pyscf/dmet/tests/test_dmet.py b/gpu4pyscf/dmet/tests/test_dmet.py index a3c8194ea..93312df3d 100644 --- a/gpu4pyscf/dmet/tests/test_dmet.py +++ b/gpu4pyscf/dmet/tests/test_dmet.py @@ -12,110 +12,82 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Basic correctness tests for the single-shot DMET driver. - -The cancellation property used here: - - For a closed-shell system computed at the SAME mean-field level - (i.e. ``mf_inner`` and ``mf_outer`` share the same method and the - same orbital basis), the single-shot DMET total energy must - reproduce the full-system mean-field total energy exactly. -""" import unittest -import numpy as np -from pyscf import gto, scf - -from gpu4pyscf.dmet import DMET +import cupy as cp +from pyscf import gto +from gpu4pyscf.scf import hf as gpu_hf +from gpu4pyscf.dmet import DMET class KnownValues(unittest.TestCase): @classmethod def setUpClass(cls): - cls.mol = gto.M( - atom=''' - H 0.0 0.0 0.00 - H 0.0 0.0 0.74 - H 0.0 0.0 2.20 - H 0.0 0.0 2.94 - ''', - basis='sto-3g', - verbose=0, - ) - cls.mf_ref = scf.RHF(cls.mol) - cls.e_ref = cls.mf_ref.kernel() - - def test_self_consistency_two_atom_fragment(self): - # A single-shot DMET with the same low- and high-level method - # must reproduce the full-system mean-field energy. - mf_outer = scf.RHF(self.mol) - mf_outer.kernel() - - mf_inner_template = scf.RHF(self.mol) - - dmet = DMET( - mf_outer=mf_outer, - mf_inner=mf_inner_template, - frag_atoms=[0, 1], - threshold=1e-8, - ) - e_dmet = dmet.kernel() - self.assertAlmostEqual(e_dmet, self.e_ref, places=7) + cls.mol = gto.Mole() + cls.mol.atom = ''' + H 0.0 0.0 0.0 + H 0.0 0.0 1.0 + H 0.0 0.0 2.0 + H 0.0 0.0 3.0 + ''' + cls.mol.basis = 'sto-3g' + cls.mol.spin = 0 + cls.mol.charge = 0 + cls.mol.verbose = 0 + cls.mol.build() - def test_self_consistency_single_atom_fragment(self): - mf_outer = scf.RHF(self.mol) - mf_outer.kernel() + cls.fragments = [[0, 1], [2, 3]] - mf_inner_template = scf.RHF(self.mol) + cls.mf_outer = gpu_hf.RHF(cls.mol) + cls.mf_inner_template = gpu_hf.RHF(cls.mol) - dmet = DMET( - mf_outer=mf_outer, - mf_inner=mf_inner_template, - frag_atoms=[0], - threshold=1e-8, - ) - e_dmet = dmet.kernel() - self.assertAlmostEqual(e_dmet, self.e_ref, places=7) - - def test_bath_summary(self): - mf_outer = scf.RHF(self.mol) - mf_outer.kernel() - - dmet = DMET( - mf_outer=mf_outer, - mf_inner=scf.RHF(self.mol), - frag_atoms=[0, 1], - threshold=1e-6, + @classmethod + def tearDownClass(cls): + del cls.mol + del cls.mf_outer + del cls.mf_inner_template + cp.get_default_memory_pool().free_all_blocks() + + def test_dmet_initialization(self): + dmet_solver = DMET( + mf_outer=self.mf_outer, + mf_inner=self.mf_inner_template, + fragments=self.fragments, + threshold=1e-5 ) - dmet.build_bath() - info = dmet.bath_summary() - # Two H atoms in STO-3G means 2 fragment AOs. - self.assertEqual(info['n_fragment_aos'], 2) - # Number of (bath + core + virtual) eigenvalues equals the - # environment AO count. - self.assertEqual( - info['n_bath'] + info['n_core'] + info['n_virtual'], - self.mol.nao_nr() - info['n_fragment_aos'], + + nao = self.mol.nao_nr() + + self.assertEqual(dmet_solver.nfrags, 2, "Number of fragments should be 2.") + self.assertEqual(len(dmet_solver.frag_idx), 2, "Fragment indices list should have length 2.") + + self.assertEqual(dmet_solver.u_oao.shape, (nao, nao), "Correlation potential u_oao should be of shape (nao, nao).") + self.assertTrue(isinstance(dmet_solver.u_oao, cp.ndarray), "Correlation potential should be a CuPy array.") + + def test_dmet_execution_and_convergence(self): + dmet_solver = DMET( + mf_outer=self.mf_outer, + mf_inner=self.mf_inner_template, + fragments=self.fragments, + threshold=1e-5, + max_macro_iter=20, + macro_tol=1e-3 ) - def test_decomposition_keys(self): - mf_outer = scf.RHF(self.mol) - mf_outer.kernel() + e_tot = dmet_solver.kernel() - dmet = DMET( - mf_outer=mf_outer, - mf_inner=scf.RHF(self.mol), - frag_atoms=[0, 1], - threshold=1e-8, - ) - dmet.kernel() - decomp = dmet.energy_decomposition() - for key in ('E_nuc', 'E_core', 'E_inner', 'E_DMET'): - self.assertIn(key, decomp) + self.assertIsNotNone(e_tot, "DMET kernel should return a valid energy value, not None.") + self.assertIsInstance(e_tot, float, "The returned total energy must be a float.") + + self.assertLess(e_tot, 0.0, "Total energy of H4 molecule should be negative.") + + self.assertIsNotNone(dmet_solver.bath_orb[0], "Bath orbitals for fragment 0 should be generated.") + self.assertIsNotNone(dmet_solver.h_emb[0], "Embedded Hamiltonian for fragment 0 should be generated.") + + self.assertTrue(isinstance(dmet_solver.dm_core[0], cp.ndarray), "Core density matrix should be a CuPy array.") if __name__ == '__main__': - print("Tests for single-shot DMET") - unittest.main() + print("Full Tests for DMET") + unittest.main() \ No newline at end of file From bc48b6dc4883536088b92de62c03493a6e72663c Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 14 May 2026 16:36:08 +0800 Subject: [PATCH 05/30] H4 passed, C4 has bug, needs debug --- gpu4pyscf/dmet/dmet.py | 86 ++++++++++++++++++------------- gpu4pyscf/dmet/tests/test_dmet.py | 27 ++++++---- gpu4pyscf/scf/hf.py | 33 ++++++++++++ 3 files changed, 100 insertions(+), 46 deletions(-) diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py index 0ccb8ca23..a53b6a289 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/dmet/dmet.py @@ -17,6 +17,7 @@ import numpy as np import cupy as cp from pyscf import gto +import pyscf.ao2mo # Added for exact 4-index ERI transformation def _as_cupy(x): @@ -26,9 +27,6 @@ def _as_cupy(x): def lowdin_orth(s): - """ - Loewdin symmetric orthogonalization. - """ s = _as_cupy(s) s = 0.5 * (s + s.T) eigvals, eigvecs = cp.linalg.eigh(s) @@ -38,8 +36,8 @@ def lowdin_orth(s): eigvecs = eigvecs[:, keep] inv_sqrt = 1.0 / cp.sqrt(eigvals) sqrt = cp.sqrt(eigvals) - X = (eigvecs * inv_sqrt) @ eigvecs.T # S^{-1/2} - X_inv = (eigvecs * sqrt) @ eigvecs.T # S^{+1/2} + X = (eigvecs * inv_sqrt) @ eigvecs.T # S^{-1/2} + X_inv = (eigvecs * sqrt) @ eigvecs.T # S^{+1/2} return X, X_inv @@ -63,7 +61,6 @@ def get_fragment_ao_indices(mol, frag_atoms): def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): """ - Schmidt decomposition via SVD of the occupied orbital coefficients on the fragment. Strictly follows the original 2012 DMET formulation. """ mo_coeff_oao = _as_cupy(mo_coeff_oao) @@ -71,7 +68,6 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): env_idx = _as_cupy(env_idx) frag_idx = _as_cupy(frag_idx) - # Filter strictly occupied orbitals occ_mask = mo_occ > 1e-8 C_occ = mo_coeff_oao[:, occ_mask] @@ -79,13 +75,10 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): return (cp.zeros((0, 0)), cp.zeros((0, 0)), {'n_core_electrons': 0}) - # Fragment block of occupied orbitals C_A = C_occ[frag_idx, :] - # SVD of C_A: C_A = U * S * Vh U, S, Vh = cp.linalg.svd(C_A, full_matrices=True) - # Rotate all occupied orbitals according to Vh C_rot = C_occ @ Vh.T is_bath = S > threshold @@ -95,7 +88,7 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): # Entangled bath orbitals (environment part) bath_orb = C_rot[env_idx, :n_sv][:, is_bath] norms = cp.linalg.norm(bath_orb, axis=0) - norms[norms < 1e-12] = 1.0 # Safe division + norms[norms < 1e-12] = 1.0 # This should not happen bath_orb = bath_orb / norms # Pure environment core orbitals come from null space + small singular values @@ -113,6 +106,9 @@ def build_embedding_basis(nao, frag_idx, env_idx, bath_orb): """ Construct the AO -> embedded transformation matrix B. """ + # Due to the Carlson-Keller theorem, the lowdin OAO basis + # and the AO basis is 1-to-1 match. + # Therefore, we can use the fragment indices to construct the embedding matrix. frag_idx = _as_cupy(frag_idx) env_idx = _as_cupy(env_idx) n_frag = frag_idx.size @@ -207,8 +203,8 @@ class DMET: ---------- mf_outer : SCF object (gpu4pyscf) Low-level mean-field on the full system. - mf_inner : SCF/DFT object (gpu4pyscf) - High-level mean-field template applied to the embedded cluster. + mf_inner : SCF/DFT/post-HF object (gpu4pyscf) + High-level mean-field or post-HF template applied to the embedded cluster. fragments : list of lists of int List of fragments, where each fragment is a list of atom indices. threshold : float @@ -248,7 +244,6 @@ def __init__(self, mf_outer, mf_inner, fragments, env_mask[f_idx] = False self.env_idx.append(all_idx[env_mask]) - # ---- intermediate / output caches (lists for multiple fragments) ---- self.bath_orb = [None] * self.nfrags self.core_orb = [None] * self.nfrags self.eig_info = [None] * self.nfrags @@ -313,11 +308,12 @@ def build_embedded_hamiltonian(self, ifrag, hcore_orig): else: e_core = 0.0 - self.h_emb[ifrag] = h_emb + self.h_emb[ifrag] = h_emb # embeding basis self.e_core[ifrag] = float(e_core) return self def _build_inner_mf(self, ifrag, dm_full_ao): + # TODO: Handle post-HF case! nemb = self.B[ifrag].shape[1] n_total_electrons = int(self.full_mol.nelectron) n_emb_electrons = n_total_electrons - int(self.eig_info[ifrag]['n_core_electrons']) @@ -375,6 +371,7 @@ def _get_jk(mol=None, dm=None, hermi=1, with_j=True, with_k=True, omega=None): mf_inner.get_jk = _get_jk + # TODO: this is only works for SCF, even not for DFT or post-HF! def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): if dm is None: dm = mf_inner.make_rdm1() @@ -382,7 +379,8 @@ def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): return vj - 0.5 * vk mf_inner.get_veff = _get_veff - + + # using s to make the upper index to the lower index s_ao = _as_cupy(self.mf_outer.get_ovlp()) sB = s_ao @ self.B[ifrag] dm_emb_init = sB.T @ dm_full_ao @ sB @@ -396,7 +394,6 @@ def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): return mf_inner def solve_embedded(self, ifrag): - """Run the high-level embedded SCF for a specific fragment.""" e_inner = self.mf_inner[ifrag].kernel(dm0=self.dm_emb_init[ifrag]) if isinstance(e_inner, tuple): e_inner = float(self.mf_inner[ifrag].e_tot) @@ -406,10 +403,6 @@ def solve_embedded(self, ifrag): return e_inner def kernel(self): - """ - Drive the macroscopic-iterating DMET workflow. - Returns the DMET total energy. - """ hcore_orig = _as_cupy(self.mf_outer.get_hcore()) s_ao = _as_cupy(self.mf_outer.get_ovlp()) X, X_inv = lowdin_orth(s_ao) @@ -444,21 +437,38 @@ def kernel(self): dm_inner_full_ao = self.dm_core[ifrag] + dm_inner_active_ao dm_inners.append(dm_inner_full_ao) - # Reconstruct the full effective Fock matrix for the embedded system in AO - vj, vk = self.mf_outer.get_jk(self.full_mol, dm_inner_full_ao) - fock_full_ao = hcore_orig + _as_cupy(vj) - 0.5 * _as_cupy(vk) + # Compute Embedded 4-index ERI for Exact Correlation Energy + nemb = B.shape[1] + # TODO: this can be replaced by a more efficient routine + B_cpu = cp.asnumpy(B) + eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) + eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) # Restore to 4D array + eri_emb = _as_cupy(eri_emb_cpu) - # Transform D, H, and F to the Lowdin orthogonalized (OAO) basis - dm_full_oao_inner = X_inv @ dm_inner_full_ao @ X_inv - hcore_oao = X.T @ hcore_orig @ X - fock_oao = X.T @ fock_full_ao @ X + # Extract 1-RDM and 2-RDM + dm1_emb = dm_emb + if hasattr(mf_inner, 'make_rdm2'): + dm2_emb = _as_cupy(mf_inner.make_rdm2()) + else: + # using the HF 2-RDM formulation + dm2_emb = (cp.einsum('ij,kl->ijkl', dm1_emb, dm1_emb) + - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb, dm1_emb)) + + # By construction, fragment orbitals are precisely the first n_frag indices + n_frag = len(self.fragments[ifrag]) - # Extract Fragment Energy: 1/2 \sum_{i \in A, j} D_{ij}^{OAO} (H_{ij}^{OAO} + F_{ij}^{OAO}) - # In symmetric orthogonalization, AO index mapping is perfectly preserved in OAO. + # Extract Fragment Electronic Energy + e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * self.h_emb[ifrag][:n_frag, :]) + e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) + + # Extract Fragment Core Energy Partition in AO basis + # TODO: this is only works for SCF, even not for DFT or post-HF! + vj_core, vk_core = self.mf_outer.get_jk(self.full_mol, self.dm_core[ifrag]) + v_core_ao = _as_cupy(vj_core) - 0.5 * _as_cupy(vk_core) idx = self.frag_idx[ifrag] - e_frag_elec = 0.5 * cp.sum( - dm_full_oao_inner[idx, :] * (hcore_oao[idx, :] + fock_oao[idx, :]) - ) + + e_frag_core = cp.sum(self.dm_core[ifrag][idx, :] * hcore_orig[idx, :]) + \ + 0.5 * cp.sum(self.dm_core[ifrag][idx, :] * v_core_ao[idx, :]) # Extract Fragment Nuclear Energy e_frag_nuc = 0.0 @@ -471,15 +481,19 @@ def kernel(self): r = np.linalg.norm(coords[i] - coords[j]) e_frag_nuc += 0.5 * charges[i] * charges[j] / r - e_tot += float(e_frag_elec) + e_frag_nuc + e_tot += float(e_frag_elec) + float(e_frag_core) + e_frag_nuc - # Mupdate correlation potential 'u' + # Strictly use OAO basis to evaluate density differences + dm_low_oao = X_inv @ dm_full_ao @ X_inv + error = 0.0 for ifrag in range(self.nfrags): idx = self.frag_idx[ifrag] idx_mesh = cp.ix_(idx, idx) - diff = dm_inners[ifrag][idx_mesh] - dm_full_ao[idx_mesh] + dm_high_oao = X_inv @ dm_inners[ifrag] @ X_inv + + diff = dm_high_oao[idx_mesh] - dm_low_oao[idx_mesh] error += float(cp.linalg.norm(diff)) # Simple gradient descent step diff --git a/gpu4pyscf/dmet/tests/test_dmet.py b/gpu4pyscf/dmet/tests/test_dmet.py index 93312df3d..ee0ba7f6f 100644 --- a/gpu4pyscf/dmet/tests/test_dmet.py +++ b/gpu4pyscf/dmet/tests/test_dmet.py @@ -14,10 +14,12 @@ import unittest +import numpy as np import cupy as cp from pyscf import gto from gpu4pyscf.scf import hf as gpu_hf from gpu4pyscf.dmet import DMET +from gpu4pyscf import dmet class KnownValues(unittest.TestCase): @@ -34,7 +36,7 @@ def setUpClass(cls): cls.mol.basis = 'sto-3g' cls.mol.spin = 0 cls.mol.charge = 0 - cls.mol.verbose = 0 + # cls.mol.verbose = 0 cls.mol.build() cls.fragments = [[0, 1], [2, 3]] @@ -47,7 +49,6 @@ def tearDownClass(cls): del cls.mol del cls.mf_outer del cls.mf_inner_template - cp.get_default_memory_pool().free_all_blocks() def test_dmet_initialization(self): dmet_solver = DMET( @@ -65,6 +66,18 @@ def test_dmet_initialization(self): self.assertEqual(dmet_solver.u_oao.shape, (nao, nao), "Correlation potential u_oao should be of shape (nao, nao).") self.assertTrue(isinstance(dmet_solver.u_oao, cp.ndarray), "Correlation potential should be a CuPy array.") + def test_lowdin(self): + ovlp = self.mf_outer.get_ovlp() + X, _ = dmet.dmet.lowdin_orth(ovlp) + X_ref = cp.array([[ 1.1214051976, -0.3278815514, 0.0611473762, -0.0095874461], + [-0.3278815514, 1.2643824327, -0.3597401082, 0.0611473762], + [ 0.0611473762, -0.3597401082, 1.2643824327, -0.3278815514], + [-0.0095874461, 0.0611473762, -0.3278815514, 1.1214051976]]) + assert np.abs(X - X_ref).max() < 1e-8, "Lowdin orthogonalization should yield a close-to-identity matrix." + + def test_schmidt(self): + pass + def test_dmet_execution_and_convergence(self): dmet_solver = DMET( mf_outer=self.mf_outer, @@ -77,15 +90,9 @@ def test_dmet_execution_and_convergence(self): e_tot = dmet_solver.kernel() - self.assertIsNotNone(e_tot, "DMET kernel should return a valid energy value, not None.") - self.assertIsInstance(e_tot, float, "The returned total energy must be a float.") - - self.assertLess(e_tot, 0.0, "Total energy of H4 molecule should be negative.") - - self.assertIsNotNone(dmet_solver.bath_orb[0], "Bath orbitals for fragment 0 should be generated.") - self.assertIsNotNone(dmet_solver.h_emb[0], "Embedded Hamiltonian for fragment 0 should be generated.") + e_tot_ref = self.mf_outer.kernel() - self.assertTrue(isinstance(dmet_solver.dm_core[0], cp.ndarray), "Core density matrix should be a CuPy array.") + assert np.abs(e_tot - e_tot_ref) < 1e-8, "DMET energy should be close to the reference energy." if __name__ == '__main__': diff --git a/gpu4pyscf/scf/hf.py b/gpu4pyscf/scf/hf.py index 56c1d30eb..f18a84cac 100644 --- a/gpu4pyscf/scf/hf.py +++ b/gpu4pyscf/scf/hf.py @@ -662,6 +662,34 @@ def __call__(self, mol_or_geom, **kwargs): self._last_mol_fp = mol.ao_loc return e_tot + +def make_rdm2(mo_coeff, mo_occ, **kwargs): + '''Two-particle density matrix in AO representation + + NOTE the indices of the two-particle density matrix is ordered to + + dm2[p,q,r,s] = . + + HF energy can be computed + E = einsum('pq,qp', hcore, 1pdm) + einsum('pqrs,pqrs', eri, 2pdm) / 2 + where h1[p,q] = and eri[p,q,r,s] = (pq|rs) +to make the density matrix consistent with the density matrix obtained + from post-HF methods, + + Args: + mo_coeff : 2D ndarray + Orbital coefficients. Each column is one orbital. + mo_occ : 1D ndarray + Occupancy + Returns: + Two-particle density matrix, 4D ndarray + ''' + dm1 = make_rdm1(mo_coeff, mo_occ, **kwargs) + dm2 = (cupy.einsum('ij,kl->ijkl', dm1, dm1) + - cupy.einsum('ij,kl->iklj', dm1, dm1)/2) + return dm2 + + class SCF(pyscf_lib.StreamObject): # attributes @@ -869,6 +897,11 @@ def make_rdm1(self, mo_coeff=None, mo_occ=None, **kwargs): if mo_coeff is None: mo_coeff = self.mo_coeff return make_rdm1(mo_coeff, mo_occ) + def make_rdm2(self, mo_coeff=None, mo_occ=None, **kwargs): + if mo_occ is None: mo_occ = self.mo_occ + if mo_coeff is None: mo_coeff = self.mo_coeff + return make_rdm2(mo_coeff, mo_occ) + def dip_moment(self, mol=None, dm=None, unit='Debye', origin=None, verbose=logger.NOTE): if mol is None: mol = self.mol From fa41b1268cc6764ada087fdfcfe96fb6ae27017c Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 15 May 2026 10:32:32 +0800 Subject: [PATCH 06/30] runable codes, just for HF embeding HF --- gpu4pyscf/dmet/dmet.py | 40 +++++++++++++++------------- gpu4pyscf/dmet/tests/test_dmet.py | 44 ++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py index a53b6a289..6cf499f00 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/dmet/dmet.py @@ -17,6 +17,8 @@ import numpy as np import cupy as cp from pyscf import gto +from pyscf import lib +from gpu4pyscf.lib import logger import pyscf.ao2mo # Added for exact 4-index ERI transformation @@ -195,7 +197,7 @@ def _instantiate_inner_mf(mf_template, embedded_mol): return new_mf -class DMET: +class DMET(lib.StreamObject): """ Density Matrix Embedding Theory driver with macroscopic iteration. @@ -216,12 +218,17 @@ class DMET: """ def __init__(self, mf_outer, mf_inner, fragments, - threshold=1e-5, max_macro_iter=20, macro_tol=1e-4): + threshold=1e-5, max_macro_iter=20, macro_tol=1e-4, verbose=None): if mf_outer is None or mf_inner is None: raise ValueError("mf_outer and mf_inner are both required.") if not fragments: raise ValueError("Provide a list of fragments to define the DMET regions.") - + + if verbose is None: + verbose = mf_outer.verbose + else: + verbose = int(verbose) + self.log = logger.new_logger(mf_outer, verbose) self.mf_outer = mf_outer self.mf_inner_template = mf_inner self.full_mol = mf_outer.mol @@ -408,6 +415,7 @@ def kernel(self): X, X_inv = lowdin_orth(s_ao) for macro_iter in range(self.max_macro_iter): + self.log.info(f"Macro Iter {macro_iter}") u_ao = X_inv @ self.u_oao @ X_inv # Run low-level SCF with current correlation potential 'u' @@ -455,22 +463,18 @@ def kernel(self): - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb, dm1_emb)) # By construction, fragment orbitals are precisely the first n_frag indices - n_frag = len(self.fragments[ifrag]) + n_frag = self.frag_idx[ifrag].size - # Extract Fragment Electronic Energy - e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * self.h_emb[ifrag][:n_frag, :]) - e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) - - # Extract Fragment Core Energy Partition in AO basis # TODO: this is only works for SCF, even not for DFT or post-HF! vj_core, vk_core = self.mf_outer.get_jk(self.full_mol, self.dm_core[ifrag]) v_core_ao = _as_cupy(vj_core) - 0.5 * _as_cupy(vk_core) - idx = self.frag_idx[ifrag] + v_core_emb = B.T @ v_core_ao @ B - e_frag_core = cp.sum(self.dm_core[ifrag][idx, :] * hcore_orig[idx, :]) + \ - 0.5 * cp.sum(self.dm_core[ifrag][idx, :] * v_core_ao[idx, :]) + # Apply 0.5 factor to core potential to avoid double counting across fragments + h_eval = self.h_emb[ifrag] - 0.5 * v_core_emb + e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * h_eval[:n_frag, :]) + e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) - # Extract Fragment Nuclear Energy e_frag_nuc = 0.0 coords = self.full_mol.atom_coords() charges = self.full_mol.atom_charges() @@ -480,10 +484,10 @@ def kernel(self): if i == j: continue r = np.linalg.norm(coords[i] - coords[j]) e_frag_nuc += 0.5 * charges[i] * charges[j] / r - - e_tot += float(e_frag_elec) + float(e_frag_core) + e_frag_nuc + + self.log.info(f"Fragment {ifrag} Electronic Energy: {float(e_frag_elec):.8f} | Nuclear Energy: {e_frag_nuc:.8f}") + e_tot += float(e_frag_elec) + e_frag_nuc - # Strictly use OAO basis to evaluate density differences dm_low_oao = X_inv @ dm_full_ao @ X_inv error = 0.0 @@ -500,10 +504,10 @@ def kernel(self): # TODO: 0.5 is a hyperparameter. If it oscillates, reduce it (e.g. to 0.1). self.u_oao[idx_mesh] -= 0.5 * diff - print(f"Macro Iter {macro_iter + 1:2d} | E_DMET = {e_tot:.8f} | max(dD) = {error:.6e}") + self.log.info(f"Macro Iter {macro_iter + 1:2d} | E_DMET = {e_tot:.8f} | max(dD) = {error:.6e}") self.e_tot = e_tot if error < self.macro_tol: - print("DMET macroscopic iterations converged.") + self.log.info("DMET macroscopic iterations converged.") break return self.e_tot diff --git a/gpu4pyscf/dmet/tests/test_dmet.py b/gpu4pyscf/dmet/tests/test_dmet.py index ee0ba7f6f..81a7b3a0f 100644 --- a/gpu4pyscf/dmet/tests/test_dmet.py +++ b/gpu4pyscf/dmet/tests/test_dmet.py @@ -36,7 +36,7 @@ def setUpClass(cls): cls.mol.basis = 'sto-3g' cls.mol.spin = 0 cls.mol.charge = 0 - # cls.mol.verbose = 0 + cls.mol.verbose = 0 cls.mol.build() cls.fragments = [[0, 1], [2, 3]] @@ -44,11 +44,38 @@ def setUpClass(cls): cls.mf_outer = gpu_hf.RHF(cls.mol) cls.mf_inner_template = gpu_hf.RHF(cls.mol) + cls.mol2 = gto.Mole() + cls.mol2.atom = ''' + C -0.76091 -0.00000 0.00000 + C 0.76091 -0.00000 0.00000 + H -1.16001 1.02029 0.00000 + H -1.16001 -0.51014 -0.88357 + H -1.16001 -0.51014 0.88357 + H 1.16001 -1.02029 0.00000 + H 1.16001 0.51014 0.88357 + H 1.16001 0.51014 -0.88357 + ''' + cls.mol2.basis = '6-31g' + cls.mol2.spin = 0 + cls.mol2.charge = 0 + cls.mol2.verbose = 0 + cls.mol2.build() + + cls.fragments2 = [[0, 2, 3, 4], [1, 5, 6, 7]] + + cls.mf_outer2 = gpu_hf.RHF(cls.mol2) + cls.mf_outer2.conv_tol = 1e-12 + cls.mf_inner_template2 = gpu_hf.RHF(cls.mol2) + cls.mf_inner_template2.conv_tol = 1e-12 + @classmethod def tearDownClass(cls): del cls.mol del cls.mf_outer del cls.mf_inner_template + del cls.mol2 + del cls.mf_outer2 + del cls.mf_inner_template2 def test_dmet_initialization(self): dmet_solver = DMET( @@ -94,6 +121,21 @@ def test_dmet_execution_and_convergence(self): assert np.abs(e_tot - e_tot_ref) < 1e-8, "DMET energy should be close to the reference energy." + dmet_solver2 = DMET( + mf_outer=self.mf_outer2, + mf_inner=self.mf_inner_template2, + fragments=self.fragments2, + threshold=1e-5, + max_macro_iter=20, + macro_tol=1e-3 + ) + + e_tot = dmet_solver2.kernel() + self.mf_outer2.mo_coeff = None + e_tot_ref = self.mf_outer2.kernel() + + assert np.abs(e_tot - e_tot_ref) < 1e-8, "DMET energy should be close to the reference energy." + if __name__ == '__main__': print("Full Tests for DMET") From a219258bd6982c7e27991aa647d3638490c5b896 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 15 May 2026 11:22:25 +0800 Subject: [PATCH 07/30] add a new routine for calculating fragment energies for mean field methods --- gpu4pyscf/dmet/dmet.py | 46 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py index 6cf499f00..7b53aa4f3 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/dmet/dmet.py @@ -445,24 +445,8 @@ def kernel(self): dm_inner_full_ao = self.dm_core[ifrag] + dm_inner_active_ao dm_inners.append(dm_inner_full_ao) - # Compute Embedded 4-index ERI for Exact Correlation Energy - nemb = B.shape[1] - # TODO: this can be replaced by a more efficient routine - B_cpu = cp.asnumpy(B) - eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) - eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) # Restore to 4D array - eri_emb = _as_cupy(eri_emb_cpu) - - # Extract 1-RDM and 2-RDM dm1_emb = dm_emb - if hasattr(mf_inner, 'make_rdm2'): - dm2_emb = _as_cupy(mf_inner.make_rdm2()) - else: - # using the HF 2-RDM formulation - dm2_emb = (cp.einsum('ij,kl->ijkl', dm1_emb, dm1_emb) - - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb, dm1_emb)) - # By construction, fragment orbitals are precisely the first n_frag indices n_frag = self.frag_idx[ifrag].size # TODO: this is only works for SCF, even not for DFT or post-HF! @@ -473,7 +457,31 @@ def kernel(self): # Apply 0.5 factor to core potential to avoid double counting across fragments h_eval = self.h_emb[ifrag] - 0.5 * v_core_emb e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * h_eval[:n_frag, :]) - e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) + + # Check if the inner solver is a mean-field template by looking for 'get_veff' + is_mean_field = hasattr(self.mf_inner_template, 'get_veff') + + if not is_mean_field: + self.log.info("using non-mean-field solver") + nemb = B.shape[1] + # TODO: this can be replaced by a more efficient routine + B_cpu = cp.asnumpy(B) + eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) + eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) # Restore to 4D array + eri_emb = _as_cupy(eri_emb_cpu) + + if hasattr(mf_inner, 'make_rdm2'): + dm2_emb = _as_cupy(mf_inner.make_rdm2()) + else: + # Fallback using the HF 2-RDM formulation for post-HF methods lacking make_rdm2 + dm2_emb = (cp.einsum('ij,kl->ijkl', dm1_emb, dm1_emb) + - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb, dm1_emb)) + + e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) + else: + self.log.info("using mean-field solver") + vj_emb, vk_emb = mf_inner.get_jk(dm=dm1_emb) + e_frag_elec += 0.5 * cp.sum(dm1_emb[:n_frag, :] * (_as_cupy(vj_emb) - 0.5 * _as_cupy(vk_emb))[:n_frag, :]) e_frag_nuc = 0.0 coords = self.full_mol.atom_coords() @@ -504,10 +512,10 @@ def kernel(self): # TODO: 0.5 is a hyperparameter. If it oscillates, reduce it (e.g. to 0.1). self.u_oao[idx_mesh] -= 0.5 * diff - self.log.info(f"Macro Iter {macro_iter + 1:2d} | E_DMET = {e_tot:.8f} | max(dD) = {error:.6e}") + self.log.note(f"Macro Iter {macro_iter + 1:2d} | E_DMET = {e_tot:.8f} | max(dD) = {error:.6e}") self.e_tot = e_tot if error < self.macro_tol: - self.log.info("DMET macroscopic iterations converged.") + self.log.note("DMET macroscopic iterations converged.") break return self.e_tot From 0437ddeff73e1f1076ceb08f8df25b887b6a93b0 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 15 May 2026 14:36:42 +0800 Subject: [PATCH 08/30] WIP: adding unit test for schmidt and DFT --- gpu4pyscf/dmet/dmet.py | 61 +++++++++++++------------------ gpu4pyscf/dmet/tests/test_dmet.py | 29 ++++++++++++++- 2 files changed, 52 insertions(+), 38 deletions(-) diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py index 7b53aa4f3..7e8d75df7 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/dmet/dmet.py @@ -19,7 +19,7 @@ from pyscf import gto from pyscf import lib from gpu4pyscf.lib import logger -import pyscf.ao2mo # Added for exact 4-index ERI transformation +import pyscf.ao2mo def _as_cupy(x): @@ -257,6 +257,7 @@ def __init__(self, mf_outer, mf_inner, fragments, self.B_oao = [None] * self.nfrags self.B = [None] * self.nfrags self.dm_core = [None] * self.nfrags + self.v_core_ao = [None] * self.nfrags self.h_emb = [None] * self.nfrags self.e_core = [None] * self.nfrags self.mf_inner = [None] * self.nfrags @@ -302,10 +303,11 @@ def build_embedded_hamiltonian(self, ifrag, hcore_orig): h_ao = _as_cupy(hcore_orig) if self.eig_info[ifrag]['n_core_electrons'] > 0: - vj_core, vk_core = self.mf_outer.get_jk(mol, self.dm_core[ifrag]) - v_core_ao = _as_cupy(vj_core) - 0.5 * _as_cupy(vk_core) + v_core_ao = _as_cupy(self.mf_outer.get_veff(mol, self.dm_core[ifrag])) else: v_core_ao = cp.zeros_like(h_ao) + + self.v_core_ao[ifrag] = v_core_ao h_emb = transform_h1(h_ao + v_core_ao, self.B[ifrag]) @@ -344,9 +346,8 @@ def _build_inner_mf(self, ifrag, dm_full_ao): mf_inner.get_ovlp = lambda *args, **kwargs: ovlp mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc + self.e_core[ifrag] - # Overwrite get_jk to compute J and K on-the-fly using the outer MF - # without computing or storing 4-index ERIs. - def _get_jk(mol=None, dm=None, hermi=1, with_j=True, with_k=True, omega=None): + # Overwrite get_veff to compute on-the-fly using the outer MF + def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): if dm is None: dm = mf_inner.make_rdm1() dm_cp = _as_cupy(dm) @@ -358,33 +359,21 @@ def _get_jk(mol=None, dm=None, hermi=1, with_j=True, with_k=True, omega=None): else: dm_ao = cp.einsum('pi,xij,qj->xpq', B_mat, dm_cp, B_mat) - # Compute J and K in full AO basis using outer SCF's optimized routine - vj_ao, vk_ao = self.mf_outer.get_jk(self.full_mol, dm_ao, hermi, with_j, with_k, omega) + dm_full_ao = self.dm_core[ifrag] + dm_ao - # Project J and K back to embedded basis - vj_emb = vk_emb = None - if vj_ao is not None: - if dm_cp.ndim == 2: - vj_emb = B_mat.T @ vj_ao @ B_mat - else: - vj_emb = cp.einsum('pi,xpq,qj->xij', B_mat, vj_ao, B_mat) - if vk_ao is not None: - if dm_cp.ndim == 2: - vk_emb = B_mat.T @ vk_ao @ B_mat - else: - vk_emb = cp.einsum('pi,xpq,qj->xij', B_mat, vk_ao, B_mat) + # Compute Veff in full AO basis using outer SCF's optimized routine + v_eff_full = self.mf_outer.get_veff(self.full_mol, dm_full_ao, hermi=hermi) + v_eff_active = _as_cupy(v_eff_full) - self.v_core_ao[ifrag] + + # Project Veff back to embedded basis + if dm_cp.ndim == 2: + v_eff_emb = B_mat.T @ v_eff_active @ B_mat + else: + v_eff_emb = cp.einsum('pi,xpq,qj->xij', B_mat, v_eff_active, B_mat) - return vj_emb, vk_emb - - mf_inner.get_jk = _get_jk + return v_eff_emb - # TODO: this is only works for SCF, even not for DFT or post-HF! - def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): - if dm is None: - dm = mf_inner.make_rdm1() - vj, vk = _get_jk(mol, dm, hermi=hermi) - return vj - 0.5 * vk - + # TODO: this is only works for HF/DFT, not for post-HF! mf_inner.get_veff = _get_veff # using s to make the upper index to the lower index @@ -449,19 +438,19 @@ def kernel(self): n_frag = self.frag_idx[ifrag].size - # TODO: this is only works for SCF, even not for DFT or post-HF! - vj_core, vk_core = self.mf_outer.get_jk(self.full_mol, self.dm_core[ifrag]) - v_core_ao = _as_cupy(vj_core) - 0.5 * _as_cupy(vk_core) + # TODO: this is only works for HF/DFT, not for post-HF! + v_core_ao = self.v_core_ao[ifrag] v_core_emb = B.T @ v_core_ao @ B # Apply 0.5 factor to core potential to avoid double counting across fragments h_eval = self.h_emb[ifrag] - 0.5 * v_core_emb e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * h_eval[:n_frag, :]) - # Check if the inner solver is a mean-field template by looking for 'get_veff' + # Check if the inner solver is a mean-field template is_mean_field = hasattr(self.mf_inner_template, 'get_veff') if not is_mean_field: + raise NotImplementedError("Only mean-field solver is supported for DMET.") self.log.info("using non-mean-field solver") nemb = B.shape[1] # TODO: this can be replaced by a more efficient routine @@ -480,8 +469,8 @@ def kernel(self): e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) else: self.log.info("using mean-field solver") - vj_emb, vk_emb = mf_inner.get_jk(dm=dm1_emb) - e_frag_elec += 0.5 * cp.sum(dm1_emb[:n_frag, :] * (_as_cupy(vj_emb) - 0.5 * _as_cupy(vk_emb))[:n_frag, :]) + v_eff_emb = mf_inner.get_veff(dm=dm1_emb) + e_frag_elec += 0.5 * cp.sum(dm1_emb[:n_frag, :] * _as_cupy(v_eff_emb)[:n_frag, :]) e_frag_nuc = 0.0 coords = self.full_mol.atom_coords() diff --git a/gpu4pyscf/dmet/tests/test_dmet.py b/gpu4pyscf/dmet/tests/test_dmet.py index 81a7b3a0f..646d07044 100644 --- a/gpu4pyscf/dmet/tests/test_dmet.py +++ b/gpu4pyscf/dmet/tests/test_dmet.py @@ -18,6 +18,7 @@ import cupy as cp from pyscf import gto from gpu4pyscf.scf import hf as gpu_hf +from gpu4pyscf.dft import rks from gpu4pyscf.dmet import DMET from gpu4pyscf import dmet @@ -68,6 +69,11 @@ def setUpClass(cls): cls.mf_inner_template2 = gpu_hf.RHF(cls.mol2) cls.mf_inner_template2.conv_tol = 1e-12 + cls.mf_outer3 = rks.RKS(cls.mol2) + cls.mf_outer3.conv_tol = 1e-12 + cls.mf_inner_template3 = rks.RKS(cls.mol2) + cls.mf_inner_template3.conv_tol = 1e-12 + @classmethod def tearDownClass(cls): del cls.mol @@ -95,15 +101,34 @@ def test_dmet_initialization(self): def test_lowdin(self): ovlp = self.mf_outer.get_ovlp() - X, _ = dmet.dmet.lowdin_orth(ovlp) + X, X_inv = dmet.dmet.lowdin_orth(ovlp) X_ref = cp.array([[ 1.1214051976, -0.3278815514, 0.0611473762, -0.0095874461], [-0.3278815514, 1.2643824327, -0.3597401082, 0.0611473762], [ 0.0611473762, -0.3597401082, 1.2643824327, -0.3278815514], [-0.0095874461, 0.0611473762, -0.3278815514, 1.1214051976]]) + identity = cp.eye(4) + assert np.abs(X@X_inv - identity).max() < 1e-8, "Lowdin orthogonalization should yield an identity matrix." assert np.abs(X - X_ref).max() < 1e-8, "Lowdin orthogonalization should yield a close-to-identity matrix." def test_schmidt(self): - pass + mol = gto.Mole() + mol.atom = ''' + H 0.0 0.0 0.0 + H 0.0 0.0 1.0 + H 0.0 0.0 2.0 + H 0.0 0.0 3.0 + ''' + mol.basis = '6-31g' + mol.spin = 0 + mol.charge = 0 + mol.verbose = 0 + mol.build() + + mf = gpu_hf.RHF(mol) + mf.kernel() + + s = + def test_dmet_execution_and_convergence(self): dmet_solver = DMET( From 7e3f8461f8def6af5d773ea611a9f7fa0c637484 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 18 May 2026 09:13:07 +0800 Subject: [PATCH 09/30] add unit tests --- gpu4pyscf/dmet/tests/test_dmet.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/gpu4pyscf/dmet/tests/test_dmet.py b/gpu4pyscf/dmet/tests/test_dmet.py index 646d07044..d559f13e7 100644 --- a/gpu4pyscf/dmet/tests/test_dmet.py +++ b/gpu4pyscf/dmet/tests/test_dmet.py @@ -127,8 +127,19 @@ def test_schmidt(self): mf = gpu_hf.RHF(mol) mf.kernel() - s = - + s = mf.get_ovlp() + mo_coeff = mf.mo_coeff + X, X_inv = dmet.dmet.lowdin_orth(s) + mo_coeff_oao = X@mo_coeff + C_occ = mo_coeff_oao[:, :2] + C_A = mo_coeff_oao[:4, :2] + U, S, Vh = cp.linalg.svd(C_A, full_matrices=True) + C_rot = C_occ @ Vh.T + bath_orb_ref = C_rot[4:] + norms = cp.linalg.norm(bath_orb_ref, axis=0) + bath_orb_ref /= norms + bath_orb = dmet.dmet.schmidt_decompose(mo_coeff_oao, mf.mo_occ, [0,1,2,3], [4,5,6,7])[0] + assert np.abs(bath_orb.get() - bath_orb_ref.get()).max() < 1e-8, "Schmidt decomposition should yield close-to-identity matrices." def test_dmet_execution_and_convergence(self): dmet_solver = DMET( From fc6767a3877c334785af212d00bedc3fddca5acb Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 18 May 2026 10:27:59 +0800 Subject: [PATCH 10/30] DFT runable, needs debug --- gpu4pyscf/dmet/dmet.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/dmet/dmet.py index 7e8d75df7..689733419 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/dmet/dmet.py @@ -19,6 +19,7 @@ from pyscf import gto from pyscf import lib from gpu4pyscf.lib import logger +from gpu4pyscf.lib.cupy_helper import tag_array import pyscf.ao2mo @@ -370,6 +371,19 @@ def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): v_eff_emb = B_mat.T @ v_eff_active @ B_mat else: v_eff_emb = cp.einsum('pi,xpq,qj->xij', B_mat, v_eff_active, B_mat) + + ecoul = getattr(v_eff_full, 'ecoul', 0.0) + exc = getattr(v_eff_full, 'exc', 0.0) + if hasattr(v_eff_full, 'vj'): + vj = getattr(v_eff_full, 'vj') + else: + vj = cp.zeros_like(v_eff_emb) + if hasattr(v_eff_full, 'vk'): + vk = getattr(v_eff_full, 'vk') + else: + vk = cp.zeros_like(v_eff_emb) + + v_eff_emb = tag_array(v_eff_emb, ecoul=ecoul, exc=exc, vj=vj, vk=vk) return v_eff_emb @@ -399,6 +413,7 @@ def solve_embedded(self, ifrag): return e_inner def kernel(self): + orig_outer_get_hcore = self.mf_outer.get_hcore hcore_orig = _as_cupy(self.mf_outer.get_hcore()) s_ao = _as_cupy(self.mf_outer.get_ovlp()) X, X_inv = lowdin_orth(s_ao) @@ -506,6 +521,19 @@ def kernel(self): if error < self.macro_tol: self.log.note("DMET macroscopic iterations converged.") break + + # Restore outer mean-field to its original unpolluted state + self.mf_outer.get_hcore = orig_outer_get_hcore + self.mf_outer.mo_coeff = None + self.mf_outer.mo_energy = None + self.mf_outer.mo_occ = None + + # Free up memory and break closures in inner mean-fields + for ifrag in range(self.nfrags): + if self.mf_inner[ifrag] is not None: + self.mf_inner[ifrag].mo_coeff = None + self.mf_inner[ifrag].mo_occ = None + self.mf_inner[ifrag].mo_energy = None return self.e_tot From d71f94b228962c4e545d2dad996fa5ea77bc9ed9 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 20 May 2026 12:34:53 +0800 Subject: [PATCH 11/30] move the codes --- .../{dmet => qmmm/embedding}/__init__.py | 2 +- .../dmet.py => qmmm/embedding/embedding.py} | 194 ++++++++++++------ .../embedding/tests/test_dmet_embeding.py} | 35 +++- 3 files changed, 163 insertions(+), 68 deletions(-) rename gpu4pyscf/{dmet => qmmm/embedding}/__init__.py (95%) rename gpu4pyscf/{dmet/dmet.py => qmmm/embedding/embedding.py} (67%) rename gpu4pyscf/{dmet/tests/test_dmet.py => qmmm/embedding/tests/test_dmet_embeding.py} (80%) diff --git a/gpu4pyscf/dmet/__init__.py b/gpu4pyscf/qmmm/embedding/__init__.py similarity index 95% rename from gpu4pyscf/dmet/__init__.py rename to gpu4pyscf/qmmm/embedding/__init__.py index 3b9c8ea05..e829ae4bb 100644 --- a/gpu4pyscf/dmet/__init__.py +++ b/gpu4pyscf/qmmm/embedding/__init__.py @@ -13,4 +13,4 @@ # limitations under the License. -from .dmet import DMET +from .embedding import DMET diff --git a/gpu4pyscf/dmet/dmet.py b/gpu4pyscf/qmmm/embedding/embedding.py similarity index 67% rename from gpu4pyscf/dmet/dmet.py rename to gpu4pyscf/qmmm/embedding/embedding.py index 689733419..e439ecd25 100644 --- a/gpu4pyscf/dmet/dmet.py +++ b/gpu4pyscf/qmmm/embedding/embedding.py @@ -63,9 +63,6 @@ def get_fragment_ao_indices(mol, frag_atoms): def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): - """ - Strictly follows the original 2012 DMET formulation. - """ mo_coeff_oao = _as_cupy(mo_coeff_oao) mo_occ = _as_cupy(mo_occ) env_idx = _as_cupy(env_idx) @@ -216,10 +213,13 @@ class DMET(lib.StreamObject): Maximum number of macroscopic iterations for correlation potential (u). macro_tol : float Convergence tolerance for the difference in fragment 1-RDMs. + energy_method : str + Method for calculating the total energy: 'direct' or 'delta'. """ def __init__(self, mf_outer, mf_inner, fragments, - threshold=1e-5, max_macro_iter=20, macro_tol=1e-4, verbose=None): + threshold=1e-5, max_macro_iter=20, macro_tol=1e-4, + energy_method='direct', verbose=None): if mf_outer is None or mf_inner is None: raise ValueError("mf_outer and mf_inner are both required.") if not fragments: @@ -236,6 +236,10 @@ def __init__(self, mf_outer, mf_inner, fragments, self.threshold = float(threshold) self.max_macro_iter = max_macro_iter self.macro_tol = macro_tol + + self.energy_method = energy_method.lower() + if self.energy_method not in ['direct', 'delta']: + raise ValueError("energy_method must be 'direct' or 'delta'") self.fragments = [list(int(a) for a in frag) for frag in fragments] self.nfrags = len(self.fragments) @@ -323,7 +327,6 @@ def build_embedded_hamiltonian(self, ifrag, hcore_orig): return self def _build_inner_mf(self, ifrag, dm_full_ao): - # TODO: Handle post-HF case! nemb = self.B[ifrag].shape[1] n_total_electrons = int(self.full_mol.nelectron) n_emb_electrons = n_total_electrons - int(self.eig_info[ifrag]['n_core_electrons']) @@ -338,21 +341,33 @@ def _build_inner_mf(self, ifrag, dm_full_ao): mf_inner = _instantiate_inner_mf(self.mf_inner_template, emb_mol) - h_emb = self.h_emb[ifrag] + B_mat = self.B[ifrag] + + if hasattr(self.mf_inner_template, 'get_veff'): + v_core_inner_ao = _as_cupy(self.mf_inner_template.get_veff(self.full_mol, self.dm_core[ifrag])) + else: + v_core_inner_ao = cp.zeros_like(self.dm_core[ifrag]) + + h_ao = _as_cupy(self.mf_outer.get_hcore()) + # The inner Hamiltonian gets the strict high-level background potential + h_emb_inner = B_mat.T @ (h_ao + v_core_inner_ao) @ B_mat ovlp = cp.eye(nemb) - # Base energy offset for debugging per fragment e_nuc = float(self.full_mol.energy_nuc()) - mf_inner.get_hcore = lambda *args, **kwargs: h_emb + mf_inner.get_hcore = lambda *args, **kwargs: h_emb_inner mf_inner.get_ovlp = lambda *args, **kwargs: ovlp - mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc + self.e_core[ifrag] + + # Energy offset for inner solver debugging aligns with inner core potential + # This 0.5 will be removed for 1-fragment systmes. + e_core_inner = float(cp.einsum('ij,ji->', self.dm_core[ifrag], h_ao) + + 0.5 * cp.einsum('ij,ji->', self.dm_core[ifrag], v_core_inner_ao)) + mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc + e_core_inner - # Overwrite get_veff to compute on-the-fly using the outer MF + # Overwrite get_veff to compute on-the-fly using the inner template def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): if dm is None: dm = mf_inner.make_rdm1() dm_cp = _as_cupy(dm) - B_mat = self.B[ifrag] # Project embedded dm to full AO basis if dm_cp.ndim == 2: @@ -360,11 +375,11 @@ def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): else: dm_ao = cp.einsum('pi,xij,qj->xpq', B_mat, dm_cp, B_mat) - dm_full_ao = self.dm_core[ifrag] + dm_ao + dm_full_ao_inner = self.dm_core[ifrag] + dm_ao - # Compute Veff in full AO basis using outer SCF's optimized routine - v_eff_full = self.mf_outer.get_veff(self.full_mol, dm_full_ao, hermi=hermi) - v_eff_active = _as_cupy(v_eff_full) - self.v_core_ao[ifrag] + # [FIXED] Compute Veff in full AO basis using inner template strictly + v_eff_full = self.mf_inner_template.get_veff(self.full_mol, dm_full_ao_inner, hermi=hermi) + v_eff_active = _as_cupy(v_eff_full) - v_core_inner_ao # Project Veff back to embedded basis if dm_cp.ndim == 2: @@ -387,7 +402,6 @@ def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): return v_eff_emb - # TODO: this is only works for HF/DFT, not for post-HF! mf_inner.get_veff = _get_veff # using s to make the upper index to the lower index @@ -430,8 +444,13 @@ def kernel(self): mo_coeff = _as_cupy(self.mf_outer.mo_coeff) mo_occ = _as_cupy(self.mf_outer.mo_occ) dm_full_ao = _as_cupy(self.mf_outer.make_rdm1()) - - e_tot = 0.0 + + if self.energy_method == 'delta': + # Remove the correlation potential penalty from the total energy to get the physical base energy + e_tot = self.mf_outer.e_tot - float(cp.sum(dm_full_ao * u_ao)) + else: + e_tot = 0.0 + dm_inners = [] for ifrag in range(self.nfrags): @@ -453,52 +472,112 @@ def kernel(self): n_frag = self.frag_idx[ifrag].size - # TODO: this is only works for HF/DFT, not for post-HF! + # Outer (Low-level) environment embedding v_core_ao = self.v_core_ao[ifrag] v_core_emb = B.T @ v_core_ao @ B # Apply 0.5 factor to core potential to avoid double counting across fragments + # TODO: The 0.5 factor should be removed for ONIOM energy of just 1 fragment. h_eval = self.h_emb[ifrag] - 0.5 * v_core_emb - e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * h_eval[:n_frag, :]) - - # Check if the inner solver is a mean-field template + is_mean_field = hasattr(self.mf_inner_template, 'get_veff') - if not is_mean_field: - raise NotImplementedError("Only mean-field solver is supported for DMET.") - self.log.info("using non-mean-field solver") - nemb = B.shape[1] - # TODO: this can be replaced by a more efficient routine - B_cpu = cp.asnumpy(B) - eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) - eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) # Restore to 4D array - eri_emb = _as_cupy(eri_emb_cpu) - - if hasattr(mf_inner, 'make_rdm2'): - dm2_emb = _as_cupy(mf_inner.make_rdm2()) + # [FIXED] Inner (High-level) evaluation uses its own core functional to prevent cross-talk + if is_mean_field: + v_core_inner_ao = _as_cupy(self.mf_inner_template.get_veff(self.full_mol, self.dm_core[ifrag])) + v_core_inner_emb = B.T @ v_core_inner_ao @ B + h_ao = _as_cupy(hcore_orig) + h_emb_inner = B.T @ (h_ao + v_core_inner_ao) @ B + h_eval_high = h_emb_inner - 0.5 * v_core_inner_emb + else: + h_eval_high = h_eval + + if self.energy_method == 'direct': + e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * h_eval_high[:n_frag, :]) + if not is_mean_field: + raise NotImplementedError("Only mean-field solver is supported for DMET.") + self.log.info("using non-mean-field solver") + nemb = B.shape[1] + # TODO: this can be replaced by a more efficient routine + B_cpu = cp.asnumpy(B) + eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) + eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) # Restore to 4D array + eri_emb = _as_cupy(eri_emb_cpu) + + if hasattr(mf_inner, 'make_rdm2'): + dm2_emb = _as_cupy(mf_inner.make_rdm2()) + else: + # Fallback using the HF 2-RDM formulation for post-HF methods lacking make_rdm2 + dm2_emb = (cp.einsum('ij,kl->ijkl', dm1_emb, dm1_emb) + - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb, dm1_emb)) + + e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) else: - # Fallback using the HF 2-RDM formulation for post-HF methods lacking make_rdm2 - dm2_emb = (cp.einsum('ij,kl->ijkl', dm1_emb, dm1_emb) - - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb, dm1_emb)) + self.log.info("using mean-field solver") + v_eff_emb = mf_inner.get_veff(dm=dm1_emb) + e_frag_elec += 0.5 * cp.sum(dm1_emb[:n_frag, :] * _as_cupy(v_eff_emb)[:n_frag, :]) - e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) - else: - self.log.info("using mean-field solver") - v_eff_emb = mf_inner.get_veff(dm=dm1_emb) - e_frag_elec += 0.5 * cp.sum(dm1_emb[:n_frag, :] * _as_cupy(v_eff_emb)[:n_frag, :]) - - e_frag_nuc = 0.0 - coords = self.full_mol.atom_coords() - charges = self.full_mol.atom_charges() - frag_atoms = self.fragments[ifrag] - for i in frag_atoms: - for j in range(self.full_mol.natm): - if i == j: continue - r = np.linalg.norm(coords[i] - coords[j]) - e_frag_nuc += 0.5 * charges[i] * charges[j] / r + e_frag_nuc = 0.0 + coords = self.full_mol.atom_coords() + charges = self.full_mol.atom_charges() + frag_atoms = self.fragments[ifrag] + for i in frag_atoms: + for j in range(self.full_mol.natm): + if i == j: continue + r = np.linalg.norm(coords[i] - coords[j]) + e_frag_nuc += 0.5 * charges[i] * charges[j] / r + + self.log.info(f"Fragment {ifrag} Electronic Energy: {float(e_frag_elec):.8f} | Nuclear Energy: {e_frag_nuc:.8f}") + e_tot += float(e_frag_elec) + e_frag_nuc + + elif self.energy_method == 'delta': + dm1_emb_high = dm1_emb + dm1_emb_low = self.dm_emb_init[ifrag] + + # Compute High-Level pseudo energy (using strictly high-level core potential evaluation) + e_high = cp.sum(dm1_emb_high[:n_frag, :] * h_eval_high[:n_frag, :]) + + # Compute Low-Level pseudo energy (using strictly low-level core potential evaluation) + e_low = cp.sum(dm1_emb_low[:n_frag, :] * h_eval[:n_frag, :]) + + if not is_mean_field: + raise NotImplementedError("Only mean-field solver is supported for DMET.") + self.log.info("using non-mean-field solver") + nemb = B.shape[1] + B_cpu = cp.asnumpy(B) + eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) + eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) + eri_emb = _as_cupy(eri_emb_cpu) - self.log.info(f"Fragment {ifrag} Electronic Energy: {float(e_frag_elec):.8f} | Nuclear Energy: {e_frag_nuc:.8f}") - e_tot += float(e_frag_elec) + e_frag_nuc + if hasattr(mf_inner, 'make_rdm2'): + dm2_emb_high = _as_cupy(mf_inner.make_rdm2()) + else: + dm2_emb_high = (cp.einsum('ij,kl->ijkl', dm1_emb_high, dm1_emb_high) + - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb_high, dm1_emb_high)) + e_high += 0.5 * cp.sum(dm2_emb_high[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) + + # Low-level is always un-correlated 2-RDM + dm2_emb_low = (cp.einsum('ij,kl->ijkl', dm1_emb_low, dm1_emb_low) + - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb_low, dm1_emb_low)) + e_low += 0.5 * cp.sum(dm2_emb_low[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) + else: + self.log.info("using mean-field solver") + v_eff_emb_high = mf_inner.get_veff(dm=dm1_emb_high) + e_high += 0.5 * cp.sum(dm1_emb_high[:n_frag, :] * _as_cupy(v_eff_emb_high)[:n_frag, :]) + + # [FIXED] Compute Veff for the low-level density explicitly using the outer functional + dm_ao_low = B @ dm1_emb_low @ B.T + dm_full_ao_low = self.dm_core[ifrag] + dm_ao_low + + v_eff_full_low = self.mf_outer.get_veff(self.full_mol, dm_full_ao_low) + v_eff_active_low = _as_cupy(v_eff_full_low) - self.v_core_ao[ifrag] + v_eff_emb_low = B.T @ v_eff_active_low @ B + + e_low += 0.5 * cp.sum(dm1_emb_low[:n_frag, :] * v_eff_emb_low[:n_frag, :]) + + delta_e = float(e_high - e_low) + self.log.info(f"Fragment {ifrag} Delta E (Correlation Improvement): {delta_e:.8f}") + e_tot += delta_e dm_low_oao = X_inv @ dm_full_ao @ X_inv @@ -527,13 +606,6 @@ def kernel(self): self.mf_outer.mo_coeff = None self.mf_outer.mo_energy = None self.mf_outer.mo_occ = None - - # Free up memory and break closures in inner mean-fields - for ifrag in range(self.nfrags): - if self.mf_inner[ifrag] is not None: - self.mf_inner[ifrag].mo_coeff = None - self.mf_inner[ifrag].mo_occ = None - self.mf_inner[ifrag].mo_energy = None return self.e_tot diff --git a/gpu4pyscf/dmet/tests/test_dmet.py b/gpu4pyscf/qmmm/embedding/tests/test_dmet_embeding.py similarity index 80% rename from gpu4pyscf/dmet/tests/test_dmet.py rename to gpu4pyscf/qmmm/embedding/tests/test_dmet_embeding.py index d559f13e7..8f55b69f0 100644 --- a/gpu4pyscf/dmet/tests/test_dmet.py +++ b/gpu4pyscf/qmmm/embedding/tests/test_dmet_embeding.py @@ -19,8 +19,8 @@ from pyscf import gto from gpu4pyscf.scf import hf as gpu_hf from gpu4pyscf.dft import rks -from gpu4pyscf.dmet import DMET -from gpu4pyscf import dmet +from gpu4pyscf.qmmm.embedding.embedding import DMET +from gpu4pyscf.qmmm.embedding import embedding class KnownValues(unittest.TestCase): @@ -43,7 +43,9 @@ def setUpClass(cls): cls.fragments = [[0, 1], [2, 3]] cls.mf_outer = gpu_hf.RHF(cls.mol) + cls.mf_outer.conv_tol = 1e-14 cls.mf_inner_template = gpu_hf.RHF(cls.mol) + cls.mf_inner_template.conv_tol = 1e-14 cls.mol2 = gto.Mole() cls.mol2.atom = ''' @@ -101,7 +103,7 @@ def test_dmet_initialization(self): def test_lowdin(self): ovlp = self.mf_outer.get_ovlp() - X, X_inv = dmet.dmet.lowdin_orth(ovlp) + X, X_inv = embedding.lowdin_orth(ovlp) X_ref = cp.array([[ 1.1214051976, -0.3278815514, 0.0611473762, -0.0095874461], [-0.3278815514, 1.2643824327, -0.3597401082, 0.0611473762], [ 0.0611473762, -0.3597401082, 1.2643824327, -0.3278815514], @@ -129,7 +131,7 @@ def test_schmidt(self): s = mf.get_ovlp() mo_coeff = mf.mo_coeff - X, X_inv = dmet.dmet.lowdin_orth(s) + X, X_inv = embedding.lowdin_orth(s) mo_coeff_oao = X@mo_coeff C_occ = mo_coeff_oao[:, :2] C_A = mo_coeff_oao[:4, :2] @@ -138,7 +140,7 @@ def test_schmidt(self): bath_orb_ref = C_rot[4:] norms = cp.linalg.norm(bath_orb_ref, axis=0) bath_orb_ref /= norms - bath_orb = dmet.dmet.schmidt_decompose(mo_coeff_oao, mf.mo_occ, [0,1,2,3], [4,5,6,7])[0] + bath_orb = embedding.schmidt_decompose(mo_coeff_oao, mf.mo_occ, [0,1,2,3], [4,5,6,7])[0] assert np.abs(bath_orb.get() - bath_orb_ref.get()).max() < 1e-8, "Schmidt decomposition should yield close-to-identity matrices." def test_dmet_execution_and_convergence(self): @@ -156,7 +158,8 @@ def test_dmet_execution_and_convergence(self): e_tot_ref = self.mf_outer.kernel() assert np.abs(e_tot - e_tot_ref) < 1e-8, "DMET energy should be close to the reference energy." - + assert np.abs(dmet_solver.u_oao).sum() < 1e-8, "Correlation potential should be close to zero." + dmet_solver2 = DMET( mf_outer=self.mf_outer2, mf_inner=self.mf_inner_template2, @@ -169,8 +172,28 @@ def test_dmet_execution_and_convergence(self): e_tot = dmet_solver2.kernel() self.mf_outer2.mo_coeff = None e_tot_ref = self.mf_outer2.kernel() + + dmet_solver2_iter1 = DMET( + mf_outer=self.mf_outer2, + mf_inner=self.mf_inner_template2, + fragments=self.fragments2, + threshold=1e-5, + max_macro_iter=1, + macro_tol=1e-3 + ) + e_tot_iter1 = dmet_solver2_iter1.kernel() + + total_elec_dmet = 0.0 + for ifrag in range(dmet_solver2.nfrags): + dm_high = cp.asnumpy(dmet_solver2.mf_inner[ifrag].make_rdm1()) + n_frag_orbs = len(dmet_solver2.frag_idx[ifrag]) + total_elec_dmet += np.trace(dm_high[:n_frag_orbs, :n_frag_orbs]) + assert np.abs(total_elec_dmet - (self.mol2.nelec[0] + self.mol2.nelec[1])) < 1e-8, \ + "Sum of numbers of electrons from fragments should be close to the total number." assert np.abs(e_tot - e_tot_ref) < 1e-8, "DMET energy should be close to the reference energy." + assert np.abs(e_tot_iter1 - e_tot) < 1e-8, "DMET energy should be converged in 1 macro iteration." + assert np.abs(dmet_solver2.u_oao).sum() < 1e-8, "Correlation potential should be close to zero." if __name__ == '__main__': From 1c602261d6f940d055cbf2b0632288705c8cd1eb Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 20 May 2026 13:46:18 +0800 Subject: [PATCH 12/30] add embedding for 1-fragment DFT --- gpu4pyscf/qmmm/embedding/embeding_dft.py | 152 +++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 gpu4pyscf/qmmm/embedding/embeding_dft.py diff --git a/gpu4pyscf/qmmm/embedding/embeding_dft.py b/gpu4pyscf/qmmm/embedding/embeding_dft.py new file mode 100644 index 000000000..443dbca26 --- /dev/null +++ b/gpu4pyscf/qmmm/embedding/embeding_dft.py @@ -0,0 +1,152 @@ +# Copyright 2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cupy as cp +import numpy as np +import pyscf.ao2mo +from gpu4pyscf.lib.cupy_helper import tag_array + +# Import your original DMET base class and helper functions +# from dmet import DMET, lowdin_orth, _as_cupy +from .dmet import DMET, lowdin_orth, _as_cupy + + +class SingleFragmentEmbedding(DMET): + """ + Single-Fragment ONIOM-like Embedding driver inheriting from the DMET base class. + + This class overrides the initialization and kernel to perform a single-shot, + single-fragment delta-method energy evaluation without macroscopic iterations. + It rigorously traces over the entire active space (Fragment + Bath) to capture + full polarization correlation, eliminating the 0.5 double-counting factor. + """ + + def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): + """ + Parameters + ---------- + mf_outer : SCF object + Low-level mean-field on the full system (e.g., PBE). + mf_inner : SCF/DFT/post-HF object + High-level template applied to the embedded cluster (e.g., B3LYP). + fragment : list of int + A single list of atom indices defining the QM region. + threshold : float + Eigenvalue cutoff used to classify environment orbitals. + """ + # Wrap the single fragment into a list of lists to satisfy parent DMET __init__ + fragments = [fragment] + + # Initialize parent class. + # Force max_macro_iter=1 and energy_method='delta' strictly + super().__init__(mf_outer, mf_inner, fragments, + threshold=threshold, max_macro_iter=1, + energy_method='delta', verbose=verbose) + + # Expose the single fragment directly for user convenience + self.fragment = self.fragments[0] + + def kernel(self): + """ + Executes the single-shot embedding workflow. + """ + # 1. Run Outer Mean-Field (if not already converged) + if not self.mf_outer.converged: + self.mf_outer.kernel() + + e_global_low = self.mf_outer.e_tot + mo_coeff = _as_cupy(self.mf_outer.mo_coeff) + mo_occ = _as_cupy(self.mf_outer.mo_occ) + dm_full_ao_low = _as_cupy(self.mf_outer.make_rdm1()) + + hcore_orig = _as_cupy(self.mf_outer.get_hcore()) + s_ao = _as_cupy(self.mf_outer.get_ovlp()) + X, X_inv = lowdin_orth(s_ao) + + ifrag = 0 # Strictly single fragment at index 0 + + # 2. Schmidt Decomposition & Bath Construction using parent methods + self.build_bath(ifrag, mo_coeff, mo_occ, X_inv, X) + self.build_embedded_hamiltonian(ifrag, hcore_orig) + + # 3. Build and Run Inner embedded solver + # _build_inner_mf already encapsulates the rigorous dual-functional core potential logic + mf_inner = self._build_inner_mf(ifrag, dm_full_ao_low) + self.log.info("Running high-level inner solver...") + self.solve_embedded(ifrag) + + dm_emb_high = _as_cupy(mf_inner.make_rdm1()) + dm_emb_low = self.dm_emb_init[ifrag] + + B = self.B[ifrag] + nemb = B.shape[1] + is_mean_field = hasattr(self.mf_inner_template, 'get_veff') + + # 4. Evaluate Energy using strict Delta Method + + # --- Evaluate High-Level trace --- + # Note: Trace is implicitly over the FULL active space (dm_emb_high * h_eval_high). + # No 0.5 reduction factor is applied for the core potential since there are no other fragments. + if is_mean_field: + v_core_inner_ao = _as_cupy(self.mf_inner_template.get_veff(self.full_mol, self.dm_core[ifrag])) + h_eval_high = B.T @ (hcore_orig + v_core_inner_ao) @ B + else: + h_eval_high = self.h_emb[ifrag] + + e_high = cp.sum(dm_emb_high * h_eval_high) + + if is_mean_field: + v_eff_emb_high = mf_inner.get_veff(dm=dm_emb_high) + e_high += 0.5 * cp.sum(dm_emb_high * _as_cupy(v_eff_emb_high)) + else: + # WFT evaluation over full active space (kept for future CCSD/MP2 extensions) + B_cpu = cp.asnumpy(B) + eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) + eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) + eri_emb = _as_cupy(eri_emb_cpu) + + if hasattr(mf_inner, 'make_rdm2'): + dm2_emb_high = _as_cupy(mf_inner.make_rdm2()) + else: + dm2_emb_high = (cp.einsum('ij,kl->ijkl', dm_emb_high, dm_emb_high) + - 0.5 * cp.einsum('il,jk->ijkl', dm_emb_high, dm_emb_high)) + e_high += 0.5 * cp.sum(dm2_emb_high * eri_emb) + + # --- Evaluate Low-Level trace --- + # self.h_emb strictly contains 1.0 * v_core_outer_ao natively + h_eval_low = self.h_emb[ifrag] + e_low = cp.sum(dm_emb_low * h_eval_low) + + if is_mean_field: + # Reconstruct full low-level density strictly from embedded projection + dm_full_ao_low_reconstructed = self.dm_core[ifrag] + B @ dm_emb_low @ B.T + v_eff_full_low = self.mf_outer.get_veff(self.full_mol, dm_full_ao_low_reconstructed) + v_eff_active_low = _as_cupy(v_eff_full_low) - self.v_core_ao[ifrag] + v_eff_emb_low = B.T @ v_eff_active_low @ B + + e_low += 0.5 * cp.sum(dm_emb_low * v_eff_emb_low) + else: + dm2_emb_low = (cp.einsum('ij,kl->ijkl', dm_emb_low, dm_emb_low) + - 0.5 * cp.einsum('il,jk->ijkl', dm_emb_low, dm_emb_low)) + e_low += 0.5 * cp.sum(dm2_emb_low * eri_emb) + + # --- Assembly --- + delta_e = float(e_high - e_low) + self.log.note(f"Global Low-Level E : {e_global_low:.8f}") + self.log.note(f"Active Space dE : {delta_e:.8f}") + + self.e_tot = e_global_low + delta_e + self.log.note(f"Total Embedded E : {self.e_tot:.8f}") + + return self.e_tot \ No newline at end of file From 0ed250150de5d107d2189c9948ea430e44a6552b Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 20 May 2026 16:28:47 +0800 Subject: [PATCH 13/30] in debugging --- gpu4pyscf/qmmm/embedding/embeding_dft.py | 145 ++++++++++++++--------- 1 file changed, 87 insertions(+), 58 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embeding_dft.py b/gpu4pyscf/qmmm/embedding/embeding_dft.py index 443dbca26..87b17940f 100644 --- a/gpu4pyscf/qmmm/embedding/embeding_dft.py +++ b/gpu4pyscf/qmmm/embedding/embeding_dft.py @@ -16,18 +16,15 @@ import numpy as np import pyscf.ao2mo from gpu4pyscf.lib.cupy_helper import tag_array - -# Import your original DMET base class and helper functions -# from dmet import DMET, lowdin_orth, _as_cupy -from .dmet import DMET, lowdin_orth, _as_cupy +from gpu4pyscf.qmmm.embedding.embedding import DMET, lowdin_orth, _as_cupy class SingleFragmentEmbedding(DMET): """ - Single-Fragment ONIOM-like Embedding driver inheriting from the DMET base class. + Single-Fragment ONIOM-like embedding. - This class overrides the initialization and kernel to perform a single-shot, - single-fragment delta-method energy evaluation without macroscopic iterations. + This class performs a single-shot, + single-fragment delta-method energy evaluation WITHOUT macroscopic iterations. It rigorously traces over the entire active space (Fragment + Bath) to capture full polarization correlation, eliminating the 0.5 double-counting factor. """ @@ -35,7 +32,7 @@ class SingleFragmentEmbedding(DMET): def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): """ Parameters - ---------- + ------- mf_outer : SCF object Low-level mean-field on the full system (e.g., PBE). mf_inner : SCF/DFT/post-HF object @@ -45,11 +42,8 @@ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): threshold : float Eigenvalue cutoff used to classify environment orbitals. """ - # Wrap the single fragment into a list of lists to satisfy parent DMET __init__ fragments = [fragment] - # Initialize parent class. - # Force max_macro_iter=1 and energy_method='delta' strictly super().__init__(mf_outer, mf_inner, fragments, threshold=threshold, max_macro_iter=1, energy_method='delta', verbose=verbose) @@ -58,10 +52,7 @@ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): self.fragment = self.fragments[0] def kernel(self): - """ - Executes the single-shot embedding workflow. - """ - # 1. Run Outer Mean-Field (if not already converged) + if not self.mf_outer.converged: self.mf_outer.kernel() @@ -74,14 +65,12 @@ def kernel(self): s_ao = _as_cupy(self.mf_outer.get_ovlp()) X, X_inv = lowdin_orth(s_ao) - ifrag = 0 # Strictly single fragment at index 0 + ifrag = 0 - # 2. Schmidt Decomposition & Bath Construction using parent methods self.build_bath(ifrag, mo_coeff, mo_occ, X_inv, X) self.build_embedded_hamiltonian(ifrag, hcore_orig) - # 3. Build and Run Inner embedded solver - # _build_inner_mf already encapsulates the rigorous dual-functional core potential logic + # Build and Run Inner embedded solver mf_inner = self._build_inner_mf(ifrag, dm_full_ao_low) self.log.info("Running high-level inner solver...") self.solve_embedded(ifrag) @@ -90,58 +79,98 @@ def kernel(self): dm_emb_low = self.dm_emb_init[ifrag] B = self.B[ifrag] - nemb = B.shape[1] is_mean_field = hasattr(self.mf_inner_template, 'get_veff') - # 4. Evaluate Energy using strict Delta Method - - # --- Evaluate High-Level trace --- - # Note: Trace is implicitly over the FULL active space (dm_emb_high * h_eval_high). - # No 0.5 reduction factor is applied for the core potential since there are no other fragments. + # Evaluate High-Level trace if is_mean_field: - v_core_inner_ao = _as_cupy(self.mf_inner_template.get_veff(self.full_mol, self.dm_core[ifrag])) - h_eval_high = B.T @ (hcore_orig + v_core_inner_ao) @ B - else: - h_eval_high = self.h_emb[ifrag] + # Bare one-electron Hamiltonian trace + h_eval_bare = B.T @ hcore_orig @ B + e_high_h = cp.sum(dm_emb_high * h_eval_bare) - e_high = cp.sum(dm_emb_high * h_eval_high) - - if is_mean_field: - v_eff_emb_high = mf_inner.get_veff(dm=dm_emb_high) - e_high += 0.5 * cp.sum(dm_emb_high * _as_cupy(v_eff_emb_high)) - else: - # WFT evaluation over full active space (kept for future CCSD/MP2 extensions) - B_cpu = cp.asnumpy(B) - eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) - eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) - eri_emb = _as_cupy(eri_emb_cpu) + # Full density reconstruction + dm_full_ao_high = self.dm_core[ifrag] + B @ dm_emb_high @ B.T + v_eff_full_high = self.mf_inner_template.get_veff(self.full_mol, dm_full_ao_high) - if hasattr(mf_inner, 'make_rdm2'): - dm2_emb_high = _as_cupy(mf_inner.make_rdm2()) - else: - dm2_emb_high = (cp.einsum('ij,kl->ijkl', dm_emb_high, dm_emb_high) - - 0.5 * cp.einsum('il,jk->ijkl', dm_emb_high, dm_emb_high)) - e_high += 0.5 * cp.sum(dm2_emb_high * eri_emb) + # Coulomb J interaction traced over active space + vj_full_high = getattr(v_eff_full_high, 'vj', None) + vj_emb_high = B.T @ _as_cupy(vj_full_high) @ B + e_high_J = 0.5 * cp.sum(dm_emb_high * vj_emb_high) - # --- Evaluate Low-Level trace --- - # self.h_emb strictly contains 1.0 * v_core_outer_ao natively - h_eval_low = self.h_emb[ifrag] - e_low = cp.sum(dm_emb_low * h_eval_low) - + # Exact Exchange interaction traced over active space + Grid XC extraction + exc_tot_high = getattr(v_eff_full_high, 'exc', 0.0) + vk_full_high = getattr(v_eff_full_high, 'vk', None) + + e_high_K = 0.0 + grid_exc_tot_high = exc_tot_high + if vk_full_high is not None: + vk_full_high = _as_cupy(vk_full_high) + vk_emb_high = B.T @ vk_full_high @ B + e_high_K = -0.5 * cp.sum(dm_emb_high * vk_emb_high) + e_K_global_high = -0.5 * cp.sum(dm_full_ao_high * vk_full_high) + # Isolate the pure non-linear grid integration part + grid_exc_tot_high = exc_tot_high - e_K_global_high + + # Core evaluation for pure Grid XC subtraction + v_eff_core_high = self.mf_inner_template.get_veff(self.full_mol, self.dm_core[ifrag]) + exc_core_high = getattr(v_eff_core_high, 'exc', 0.0) + vk_core_high = getattr(v_eff_core_high, 'vk', None) + + grid_exc_core_high = exc_core_high + if vk_core_high is not None: + vk_core_high = _as_cupy(vk_core_high) + e_K_global_core_high = -0.25 * cp.sum(self.dm_core[ifrag] * vk_core_high) + grid_exc_core_high = exc_core_high - e_K_global_core_high + + e_high = e_high_h + e_high_J + e_high_K + grid_exc_tot_high - grid_exc_core_high + else: + raise NotImplementedError("WFT evaluation is not implemented for this class.") + + # Evaluate Low-Level trace (Exact Real-Space XC Integration) if is_mean_field: + # 1. Bare one-electron Hamiltonian trace + e_low_h = cp.sum(dm_emb_low * h_eval_bare) + # Reconstruct full low-level density strictly from embedded projection dm_full_ao_low_reconstructed = self.dm_core[ifrag] + B @ dm_emb_low @ B.T v_eff_full_low = self.mf_outer.get_veff(self.full_mol, dm_full_ao_low_reconstructed) - v_eff_active_low = _as_cupy(v_eff_full_low) - self.v_core_ao[ifrag] - v_eff_emb_low = B.T @ v_eff_active_low @ B - e_low += 0.5 * cp.sum(dm_emb_low * v_eff_emb_low) + # 2. Coulomb (J) interaction traced over active space + vj_full_low = getattr(v_eff_full_low, 'vj', None) + if vj_full_low is None: + vj_full_low = self.mf_outer.get_j(self.full_mol, dm_full_ao_low_reconstructed) + vj_emb_low = B.T @ _as_cupy(vj_full_low) @ B + e_low_J = 0.5 * cp.sum(dm_emb_low * vj_emb_low) + + # 3. Exact Exchange (K) interaction traced over active space + Grid XC extraction + exc_tot_low = getattr(v_eff_full_low, 'exc', 0.0) + vk_full_low = getattr(v_eff_full_low, 'vk', None) + + e_low_K = 0.0 + grid_exc_tot_low = exc_tot_low + if vk_full_low is not None: + vk_full_low = _as_cupy(vk_full_low) + vk_emb_low = B.T @ vk_full_low @ B + e_low_K = -0.5 * cp.sum(dm_emb_low * vk_emb_low) + e_K_global_low = -0.5 * cp.sum(dm_full_ao_low_reconstructed * vk_full_low) + # Isolate the pure non-linear grid integration part + grid_exc_tot_low = exc_tot_low - e_K_global_low + + # Core evaluation for pure Grid XC subtraction + v_eff_core_low = self.mf_outer.get_veff(self.full_mol, self.dm_core[ifrag]) + exc_core_low = getattr(v_eff_core_low, 'exc', 0.0) + vk_core_low = getattr(v_eff_core_low, 'vk', None) + + grid_exc_core_low = exc_core_low + if vk_core_low is not None: + vk_core_low = _as_cupy(vk_core_low) + e_K_global_core_low = -0.25 * cp.sum(self.dm_core[ifrag] * vk_core_low) + grid_exc_core_low = exc_core_low - e_K_global_core_low + + e_low = e_low_h + e_low_J + e_low_K + grid_exc_tot_low - grid_exc_core_low else: - dm2_emb_low = (cp.einsum('ij,kl->ijkl', dm_emb_low, dm_emb_low) - - 0.5 * cp.einsum('il,jk->ijkl', dm_emb_low, dm_emb_low)) - e_low += 0.5 * cp.sum(dm2_emb_low * eri_emb) + raise NotImplementedError("WFT evaluation is not implemented for this class.") - # --- Assembly --- + # Assembly delta_e = float(e_high - e_low) self.log.note(f"Global Low-Level E : {e_global_low:.8f}") self.log.note(f"Active Space dE : {delta_e:.8f}") From 945fb6aabc5c951f861f1cc14ed6770d9e9b2c26 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 20 May 2026 17:28:01 +0800 Subject: [PATCH 14/30] finish writing, needs debug --- gpu4pyscf/qmmm/embedding/__init__.py | 1 + gpu4pyscf/qmmm/embedding/embeding_dft.py | 144 +++++++----------- .../embedding/tests/test_dft_embedding.py | 0 ...met_embeding.py => test_dmet_embedding.py} | 0 4 files changed, 58 insertions(+), 87 deletions(-) create mode 100644 gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py rename gpu4pyscf/qmmm/embedding/tests/{test_dmet_embeding.py => test_dmet_embedding.py} (100%) diff --git a/gpu4pyscf/qmmm/embedding/__init__.py b/gpu4pyscf/qmmm/embedding/__init__.py index e829ae4bb..01eaa5903 100644 --- a/gpu4pyscf/qmmm/embedding/__init__.py +++ b/gpu4pyscf/qmmm/embedding/__init__.py @@ -14,3 +14,4 @@ from .embedding import DMET +from .embeding_dft import SingleFragmentEmbedding diff --git a/gpu4pyscf/qmmm/embedding/embeding_dft.py b/gpu4pyscf/qmmm/embedding/embeding_dft.py index 87b17940f..9c8d206ce 100644 --- a/gpu4pyscf/qmmm/embedding/embeding_dft.py +++ b/gpu4pyscf/qmmm/embedding/embeding_dft.py @@ -45,14 +45,53 @@ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): fragments = [fragment] super().__init__(mf_outer, mf_inner, fragments, - threshold=threshold, max_macro_iter=1, - energy_method='delta', verbose=verbose) + threshold=threshold, max_macro_iter=1, verbose=verbose) - # Expose the single fragment directly for user convenience self.fragment = self.fragments[0] - def kernel(self): + def _evaluate_embedded_energy(self, mf_obj, dm_emb, h_eval_bare, B, dm_core): + # Bare one-electron Hamiltonian trace + e_h = cp.sum(dm_emb * h_eval_bare) + + # Full density reconstruction + dm_full_ao = dm_core + B @ dm_emb @ B.T + v_eff_full = mf_obj.get_veff(self.full_mol, dm_full_ao) + + # Coulomb J interaction traced over active space + vj_full = getattr(v_eff_full, 'vj', None) + if vj_full is None: + vj_full = mf_obj.get_j(self.full_mol, dm_full_ao) + vj_emb = B.T @ _as_cupy(vj_full) @ B + e_J = 0.5 * cp.sum(dm_emb * vj_emb) + + # Exact Exchange interaction traced over active space + Grid XC extraction + exc_tot = getattr(v_eff_full, 'exc', 0.0) + vk_full = getattr(v_eff_full, 'vk', None) + + e_K = 0.0 + grid_exc_tot = exc_tot + if vk_full is not None: + vk_full = _as_cupy(vk_full) + vk_emb = B.T @ vk_full @ B + e_K = -0.5 * cp.sum(dm_emb * vk_emb) + e_K_global = -0.5 * cp.sum(dm_full_ao * vk_full) + # Isolate the pure non-linear grid integration part + grid_exc_tot = exc_tot - e_K_global + + # Core evaluation for pure Grid XC subtraction + v_eff_core = mf_obj.get_veff(self.full_mol, dm_core) + exc_core = getattr(v_eff_core, 'exc', 0.0) + vk_core = getattr(v_eff_core, 'vk', None) + + grid_exc_core = exc_core + if vk_core is not None: + vk_core = _as_cupy(vk_core) + e_K_global_core = -0.5 * cp.sum(dm_core * vk_core) + grid_exc_core = exc_core - e_K_global_core + + return e_h + e_J + e_K + grid_exc_tot - grid_exc_core + def kernel(self): if not self.mf_outer.converged: self.mf_outer.kernel() @@ -79,98 +118,25 @@ def kernel(self): dm_emb_low = self.dm_emb_init[ifrag] B = self.B[ifrag] + dm_core = self.dm_core[ifrag] is_mean_field = hasattr(self.mf_inner_template, 'get_veff') - # Evaluate High-Level trace if is_mean_field: - # Bare one-electron Hamiltonian trace + # Bare one-electron Hamiltonian evaluated in active space h_eval_bare = B.T @ hcore_orig @ B - e_high_h = cp.sum(dm_emb_high * h_eval_bare) - - # Full density reconstruction - dm_full_ao_high = self.dm_core[ifrag] + B @ dm_emb_high @ B.T - v_eff_full_high = self.mf_inner_template.get_veff(self.full_mol, dm_full_ao_high) - - # Coulomb J interaction traced over active space - vj_full_high = getattr(v_eff_full_high, 'vj', None) - vj_emb_high = B.T @ _as_cupy(vj_full_high) @ B - e_high_J = 0.5 * cp.sum(dm_emb_high * vj_emb_high) - - # Exact Exchange interaction traced over active space + Grid XC extraction - exc_tot_high = getattr(v_eff_full_high, 'exc', 0.0) - vk_full_high = getattr(v_eff_full_high, 'vk', None) - e_high_K = 0.0 - grid_exc_tot_high = exc_tot_high - if vk_full_high is not None: - vk_full_high = _as_cupy(vk_full_high) - vk_emb_high = B.T @ vk_full_high @ B - e_high_K = -0.5 * cp.sum(dm_emb_high * vk_emb_high) - e_K_global_high = -0.5 * cp.sum(dm_full_ao_high * vk_full_high) - # Isolate the pure non-linear grid integration part - grid_exc_tot_high = exc_tot_high - e_K_global_high - - # Core evaluation for pure Grid XC subtraction - v_eff_core_high = self.mf_inner_template.get_veff(self.full_mol, self.dm_core[ifrag]) - exc_core_high = getattr(v_eff_core_high, 'exc', 0.0) - vk_core_high = getattr(v_eff_core_high, 'vk', None) + # Evaluate High-Level trace + e_high = self._evaluate_embedded_energy( + self.mf_inner_template, dm_emb_high, h_eval_bare, B, dm_core + ) - grid_exc_core_high = exc_core_high - if vk_core_high is not None: - vk_core_high = _as_cupy(vk_core_high) - e_K_global_core_high = -0.25 * cp.sum(self.dm_core[ifrag] * vk_core_high) - grid_exc_core_high = exc_core_high - e_K_global_core_high - - e_high = e_high_h + e_high_J + e_high_K + grid_exc_tot_high - grid_exc_core_high - else: - raise NotImplementedError("WFT evaluation is not implemented for this class.") - - # Evaluate Low-Level trace (Exact Real-Space XC Integration) - if is_mean_field: - # 1. Bare one-electron Hamiltonian trace - e_low_h = cp.sum(dm_emb_low * h_eval_bare) - - # Reconstruct full low-level density strictly from embedded projection - dm_full_ao_low_reconstructed = self.dm_core[ifrag] + B @ dm_emb_low @ B.T - v_eff_full_low = self.mf_outer.get_veff(self.full_mol, dm_full_ao_low_reconstructed) - - # 2. Coulomb (J) interaction traced over active space - vj_full_low = getattr(v_eff_full_low, 'vj', None) - if vj_full_low is None: - vj_full_low = self.mf_outer.get_j(self.full_mol, dm_full_ao_low_reconstructed) - vj_emb_low = B.T @ _as_cupy(vj_full_low) @ B - e_low_J = 0.5 * cp.sum(dm_emb_low * vj_emb_low) - - # 3. Exact Exchange (K) interaction traced over active space + Grid XC extraction - exc_tot_low = getattr(v_eff_full_low, 'exc', 0.0) - vk_full_low = getattr(v_eff_full_low, 'vk', None) - - e_low_K = 0.0 - grid_exc_tot_low = exc_tot_low - if vk_full_low is not None: - vk_full_low = _as_cupy(vk_full_low) - vk_emb_low = B.T @ vk_full_low @ B - e_low_K = -0.5 * cp.sum(dm_emb_low * vk_emb_low) - e_K_global_low = -0.5 * cp.sum(dm_full_ao_low_reconstructed * vk_full_low) - # Isolate the pure non-linear grid integration part - grid_exc_tot_low = exc_tot_low - e_K_global_low - - # Core evaluation for pure Grid XC subtraction - v_eff_core_low = self.mf_outer.get_veff(self.full_mol, self.dm_core[ifrag]) - exc_core_low = getattr(v_eff_core_low, 'exc', 0.0) - vk_core_low = getattr(v_eff_core_low, 'vk', None) - - grid_exc_core_low = exc_core_low - if vk_core_low is not None: - vk_core_low = _as_cupy(vk_core_low) - e_K_global_core_low = -0.25 * cp.sum(self.dm_core[ifrag] * vk_core_low) - grid_exc_core_low = exc_core_low - e_K_global_core_low - - e_low = e_low_h + e_low_J + e_low_K + grid_exc_tot_low - grid_exc_core_low + # Evaluate Low-Level trace + e_low = self._evaluate_embedded_energy( + self.mf_outer, dm_emb_low, h_eval_bare, B, dm_core + ) else: raise NotImplementedError("WFT evaluation is not implemented for this class.") - # Assembly delta_e = float(e_high - e_low) self.log.note(f"Global Low-Level E : {e_global_low:.8f}") self.log.note(f"Active Space dE : {delta_e:.8f}") @@ -178,4 +144,8 @@ def kernel(self): self.e_tot = e_global_low + delta_e self.log.note(f"Total Embedded E : {self.e_tot:.8f}") + self.mf_outer.mo_coeff = None + self.mf_outer.mo_energy = None + self.mf_outer.mo_occ = None + return self.e_tot \ No newline at end of file diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py new file mode 100644 index 000000000..e69de29bb diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dmet_embeding.py b/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py similarity index 100% rename from gpu4pyscf/qmmm/embedding/tests/test_dmet_embeding.py rename to gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py From f6877de3c934575432733ca7af23d6ed7e831586 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 21 May 2026 15:26:44 +0800 Subject: [PATCH 15/30] finish debug the embedding DFT --- gpu4pyscf/qmmm/embedding/embedding.py | 208 +++++++---------------- gpu4pyscf/qmmm/embedding/embeding_dft.py | 10 +- 2 files changed, 66 insertions(+), 152 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding.py b/gpu4pyscf/qmmm/embedding/embedding.py index e439ecd25..40f3ae7da 100644 --- a/gpu4pyscf/qmmm/embedding/embedding.py +++ b/gpu4pyscf/qmmm/embedding/embedding.py @@ -122,9 +122,6 @@ def build_embedding_basis(nao, frag_idx, env_idx, bath_orb): def build_core_dm(env_idx, core_orb, nao): - """ - Build the core 1-RDM in the full AO basis. - """ env_idx = _as_cupy(env_idx) if core_orb.size == 0: return cp.zeros((nao, nao), dtype=float) @@ -201,9 +198,9 @@ class DMET(lib.StreamObject): Parameters ---------- - mf_outer : SCF object (gpu4pyscf) + mf_outer : SCF object Low-level mean-field on the full system. - mf_inner : SCF/DFT/post-HF object (gpu4pyscf) + mf_inner : SCF/post-HF object High-level mean-field or post-HF template applied to the embedded cluster. fragments : list of lists of int List of fragments, where each fragment is a list of atom indices. @@ -213,13 +210,11 @@ class DMET(lib.StreamObject): Maximum number of macroscopic iterations for correlation potential (u). macro_tol : float Convergence tolerance for the difference in fragment 1-RDMs. - energy_method : str - Method for calculating the total energy: 'direct' or 'delta'. """ def __init__(self, mf_outer, mf_inner, fragments, threshold=1e-5, max_macro_iter=20, macro_tol=1e-4, - energy_method='direct', verbose=None): + verbose=None): if mf_outer is None or mf_inner is None: raise ValueError("mf_outer and mf_inner are both required.") if not fragments: @@ -230,16 +225,12 @@ def __init__(self, mf_outer, mf_inner, fragments, else: verbose = int(verbose) self.log = logger.new_logger(mf_outer, verbose) - self.mf_outer = mf_outer - self.mf_inner_template = mf_inner + self.mf_outer = mf_outer.copy() + self.mf_inner_template = mf_inner.copy() self.full_mol = mf_outer.mol self.threshold = float(threshold) self.max_macro_iter = max_macro_iter self.macro_tol = macro_tol - - self.energy_method = energy_method.lower() - if self.energy_method not in ['direct', 'delta']: - raise ValueError("energy_method must be 'direct' or 'delta'") self.fragments = [list(int(a) for a in frag) for frag in fragments] self.nfrags = len(self.fragments) @@ -341,29 +332,18 @@ def _build_inner_mf(self, ifrag, dm_full_ao): mf_inner = _instantiate_inner_mf(self.mf_inner_template, emb_mol) - B_mat = self.B[ifrag] - - if hasattr(self.mf_inner_template, 'get_veff'): - v_core_inner_ao = _as_cupy(self.mf_inner_template.get_veff(self.full_mol, self.dm_core[ifrag])) - else: - v_core_inner_ao = cp.zeros_like(self.dm_core[ifrag]) - - h_ao = _as_cupy(self.mf_outer.get_hcore()) - # The inner Hamiltonian gets the strict high-level background potential - h_emb_inner = B_mat.T @ (h_ao + v_core_inner_ao) @ B_mat + h_emb = self.h_emb[ifrag] ovlp = cp.eye(nemb) e_nuc = float(self.full_mol.energy_nuc()) - mf_inner.get_hcore = lambda *args, **kwargs: h_emb_inner + mf_inner.get_hcore = lambda *args, **kwargs: h_emb mf_inner.get_ovlp = lambda *args, **kwargs: ovlp - - # Energy offset for inner solver debugging aligns with inner core potential - # This 0.5 will be removed for 1-fragment systmes. - e_core_inner = float(cp.einsum('ij,ji->', self.dm_core[ifrag], h_ao) + - 0.5 * cp.einsum('ij,ji->', self.dm_core[ifrag], v_core_inner_ao)) - mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc + e_core_inner + mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc + self.e_core[ifrag] + + B_mat = self.B[ifrag] + v_core_ao = self.v_core_ao[ifrag] - # Overwrite get_veff to compute on-the-fly using the inner template + # Overwrite get_veff to compute on-the-fly using the outer HF def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): if dm is None: dm = mf_inner.make_rdm1() @@ -377,36 +357,41 @@ def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): dm_full_ao_inner = self.dm_core[ifrag] + dm_ao - # [FIXED] Compute Veff in full AO basis using inner template strictly + # For pure HF, this may be redundant, cause J, K are linear, + # but this is also used for DFT based method, so, we use the delta method. v_eff_full = self.mf_inner_template.get_veff(self.full_mol, dm_full_ao_inner, hermi=hermi) - v_eff_active = _as_cupy(v_eff_full) - v_core_inner_ao + v_eff_active = _as_cupy(v_eff_full) - v_core_ao # Project Veff back to embedded basis if dm_cp.ndim == 2: v_eff_emb = B_mat.T @ v_eff_active @ B_mat else: v_eff_emb = cp.einsum('pi,xpq,qj->xij', B_mat, v_eff_active, B_mat) - + ecoul = getattr(v_eff_full, 'ecoul', 0.0) exc = getattr(v_eff_full, 'exc', 0.0) - if hasattr(v_eff_full, 'vj'): - vj = getattr(v_eff_full, 'vj') + vj_full = getattr(v_eff_full, 'vj', None) + if vj_full is not None: + vj_emb = B_mat.T @ _as_cupy(vj_full) @ B_mat else: - vj = cp.zeros_like(v_eff_emb) - if hasattr(v_eff_full, 'vk'): - vk = getattr(v_eff_full, 'vk') + vj_emb = cp.zeros_like(v_eff_emb) + + vk_full = getattr(v_eff_full, 'vk', None) + if vk_full is not None: + vk_emb = B_mat.T @ _as_cupy(vk_full) @ B_mat else: - vk = cp.zeros_like(v_eff_emb) + vk_emb = cp.zeros_like(v_eff_emb) + + v_eff_emb = tag_array(v_eff_emb, ecoul=ecoul, exc=exc, vj=vj_emb, vk=vk_emb) - v_eff_emb = tag_array(v_eff_emb, ecoul=ecoul, exc=exc, vj=vj, vk=vk) - return v_eff_emb mf_inner.get_veff = _get_veff - + # using s to make the upper index to the lower index s_ao = _as_cupy(self.mf_outer.get_ovlp()) sB = s_ao @ self.B[ifrag] + # Due to ths BSC_core = 0, this the following is equivelent to dm_full_ao - dm_core_ao dm_emb_init = sB.T @ dm_full_ao @ sB trace = float(cp.trace(dm_emb_init)) @@ -445,12 +430,7 @@ def kernel(self): mo_occ = _as_cupy(self.mf_outer.mo_occ) dm_full_ao = _as_cupy(self.mf_outer.make_rdm1()) - if self.energy_method == 'delta': - # Remove the correlation potential penalty from the total energy to get the physical base energy - e_tot = self.mf_outer.e_tot - float(cp.sum(dm_full_ao * u_ao)) - else: - e_tot = 0.0 - + e_tot = 0.0 dm_inners = [] for ifrag in range(self.nfrags): @@ -469,7 +449,6 @@ def kernel(self): dm_inners.append(dm_inner_full_ao) dm1_emb = dm_emb - n_frag = self.frag_idx[ifrag].size # Outer (Low-level) environment embedding @@ -477,107 +456,46 @@ def kernel(self): v_core_emb = B.T @ v_core_ao @ B # Apply 0.5 factor to core potential to avoid double counting across fragments - # TODO: The 0.5 factor should be removed for ONIOM energy of just 1 fragment. + # The 0.5 factor should be removed for ONIOM energy of just 1 fragment. h_eval = self.h_emb[ifrag] - 0.5 * v_core_emb is_mean_field = hasattr(self.mf_inner_template, 'get_veff') - # [FIXED] Inner (High-level) evaluation uses its own core functional to prevent cross-talk - if is_mean_field: - v_core_inner_ao = _as_cupy(self.mf_inner_template.get_veff(self.full_mol, self.dm_core[ifrag])) - v_core_inner_emb = B.T @ v_core_inner_ao @ B - h_ao = _as_cupy(hcore_orig) - h_emb_inner = B.T @ (h_ao + v_core_inner_ao) @ B - h_eval_high = h_emb_inner - 0.5 * v_core_inner_emb - else: - h_eval_high = h_eval - - if self.energy_method == 'direct': - e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * h_eval_high[:n_frag, :]) - if not is_mean_field: - raise NotImplementedError("Only mean-field solver is supported for DMET.") - self.log.info("using non-mean-field solver") - nemb = B.shape[1] - # TODO: this can be replaced by a more efficient routine - B_cpu = cp.asnumpy(B) - eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) - eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) # Restore to 4D array - eri_emb = _as_cupy(eri_emb_cpu) - - if hasattr(mf_inner, 'make_rdm2'): - dm2_emb = _as_cupy(mf_inner.make_rdm2()) - else: - # Fallback using the HF 2-RDM formulation for post-HF methods lacking make_rdm2 - dm2_emb = (cp.einsum('ij,kl->ijkl', dm1_emb, dm1_emb) - - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb, dm1_emb)) - - e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) - else: - self.log.info("using mean-field solver") - v_eff_emb = mf_inner.get_veff(dm=dm1_emb) - e_frag_elec += 0.5 * cp.sum(dm1_emb[:n_frag, :] * _as_cupy(v_eff_emb)[:n_frag, :]) - - e_frag_nuc = 0.0 - coords = self.full_mol.atom_coords() - charges = self.full_mol.atom_charges() - frag_atoms = self.fragments[ifrag] - for i in frag_atoms: - for j in range(self.full_mol.natm): - if i == j: continue - r = np.linalg.norm(coords[i] - coords[j]) - e_frag_nuc += 0.5 * charges[i] * charges[j] / r - - self.log.info(f"Fragment {ifrag} Electronic Energy: {float(e_frag_elec):.8f} | Nuclear Energy: {e_frag_nuc:.8f}") - e_tot += float(e_frag_elec) + e_frag_nuc - - elif self.energy_method == 'delta': - dm1_emb_high = dm1_emb - dm1_emb_low = self.dm_emb_init[ifrag] - - # Compute High-Level pseudo energy (using strictly high-level core potential evaluation) - e_high = cp.sum(dm1_emb_high[:n_frag, :] * h_eval_high[:n_frag, :]) - - # Compute Low-Level pseudo energy (using strictly low-level core potential evaluation) - e_low = cp.sum(dm1_emb_low[:n_frag, :] * h_eval[:n_frag, :]) + e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * h_eval[:n_frag, :]) + if not is_mean_field: + self.log.info("using non-mean-field solver") + nemb = B.shape[1] + # TODO: this can be replaced by a more efficient routine + B_cpu = cp.asnumpy(B) + eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) + eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) # Restore to 4D array + eri_emb = _as_cupy(eri_emb_cpu) - if not is_mean_field: - raise NotImplementedError("Only mean-field solver is supported for DMET.") - self.log.info("using non-mean-field solver") - nemb = B.shape[1] - B_cpu = cp.asnumpy(B) - eri_emb_cpu = pyscf.ao2mo.kernel(self.full_mol, B_cpu) - eri_emb_cpu = pyscf.ao2mo.restore(1, eri_emb_cpu, nemb) - eri_emb = _as_cupy(eri_emb_cpu) - - if hasattr(mf_inner, 'make_rdm2'): - dm2_emb_high = _as_cupy(mf_inner.make_rdm2()) - else: - dm2_emb_high = (cp.einsum('ij,kl->ijkl', dm1_emb_high, dm1_emb_high) - - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb_high, dm1_emb_high)) - e_high += 0.5 * cp.sum(dm2_emb_high[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) - - # Low-level is always un-correlated 2-RDM - dm2_emb_low = (cp.einsum('ij,kl->ijkl', dm1_emb_low, dm1_emb_low) - - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb_low, dm1_emb_low)) - e_low += 0.5 * cp.sum(dm2_emb_low[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) + if hasattr(mf_inner, 'make_rdm2'): + dm2_emb = _as_cupy(mf_inner.make_rdm2()) else: - self.log.info("using mean-field solver") - v_eff_emb_high = mf_inner.get_veff(dm=dm1_emb_high) - e_high += 0.5 * cp.sum(dm1_emb_high[:n_frag, :] * _as_cupy(v_eff_emb_high)[:n_frag, :]) - - # [FIXED] Compute Veff for the low-level density explicitly using the outer functional - dm_ao_low = B @ dm1_emb_low @ B.T - dm_full_ao_low = self.dm_core[ifrag] + dm_ao_low - - v_eff_full_low = self.mf_outer.get_veff(self.full_mol, dm_full_ao_low) - v_eff_active_low = _as_cupy(v_eff_full_low) - self.v_core_ao[ifrag] - v_eff_emb_low = B.T @ v_eff_active_low @ B - - e_low += 0.5 * cp.sum(dm1_emb_low[:n_frag, :] * v_eff_emb_low[:n_frag, :]) + # Fallback using the HF 2-RDM formulation for post-HF methods lacking make_rdm2 + dm2_emb = (cp.einsum('ij,kl->ijkl', dm1_emb, dm1_emb) + - 0.5 * cp.einsum('il,jk->ijkl', dm1_emb, dm1_emb)) - delta_e = float(e_high - e_low) - self.log.info(f"Fragment {ifrag} Delta E (Correlation Improvement): {delta_e:.8f}") - e_tot += delta_e + e_frag_elec += 0.5 * cp.sum(dm2_emb[:n_frag, :, :, :] * eri_emb[:n_frag, :, :, :]) + else: + self.log.info("using mean-field solver") + v_eff_emb = mf_inner.get_veff(dm=dm1_emb) + e_frag_elec += 0.5 * cp.sum(dm1_emb[:n_frag, :] * _as_cupy(v_eff_emb)[:n_frag, :]) + + e_frag_nuc = 0.0 + coords = self.full_mol.atom_coords() + charges = self.full_mol.atom_charges() + frag_atoms = self.fragments[ifrag] + for i in frag_atoms: + for j in range(self.full_mol.natm): + if i == j: continue + r = np.linalg.norm(coords[i] - coords[j]) + e_frag_nuc += 0.5 * charges[i] * charges[j] / r + + self.log.info(f"Fragment {ifrag} Electronic Energy: {float(e_frag_elec):.8f} | Nuclear Energy: {e_frag_nuc:.8f}") + e_tot += float(e_frag_elec) + e_frag_nuc dm_low_oao = X_inv @ dm_full_ao @ X_inv diff --git a/gpu4pyscf/qmmm/embedding/embeding_dft.py b/gpu4pyscf/qmmm/embedding/embeding_dft.py index 9c8d206ce..418b357ae 100644 --- a/gpu4pyscf/qmmm/embedding/embeding_dft.py +++ b/gpu4pyscf/qmmm/embedding/embeding_dft.py @@ -25,8 +25,8 @@ class SingleFragmentEmbedding(DMET): This class performs a single-shot, single-fragment delta-method energy evaluation WITHOUT macroscopic iterations. - It rigorously traces over the entire active space (Fragment + Bath) to capture - full polarization correlation, eliminating the 0.5 double-counting factor. + It rigorously traces over the entire active space (ffagment + bath) to capture + full polarization correlation,. """ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): @@ -143,9 +143,5 @@ def kernel(self): self.e_tot = e_global_low + delta_e self.log.note(f"Total Embedded E : {self.e_tot:.8f}") - - self.mf_outer.mo_coeff = None - self.mf_outer.mo_energy = None - self.mf_outer.mo_occ = None - + return self.e_tot \ No newline at end of file From 41fdbc2ce282a6906c9562a8bfe45d27aee0f9d8 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 22 May 2026 11:28:07 +0800 Subject: [PATCH 16/30] - fix some bugs, which generating some unphysical bath orbitals - add more unit test covering this issue - add two examples --- examples/embedding/48-dmet-embedding.py | 86 +++++++++++ examples/embedding/49-dft-dmet-embedding.py | 84 +++++++++++ gpu4pyscf/qmmm/embedding/embedding.py | 24 ++- .../embedding/tests/test_dft_embedding.py | 137 ++++++++++++++++++ .../embedding/tests/test_dmet_embedding.py | 104 +++++++++++-- 5 files changed, 420 insertions(+), 15 deletions(-) create mode 100644 examples/embedding/48-dmet-embedding.py create mode 100644 examples/embedding/49-dft-dmet-embedding.py diff --git a/examples/embedding/48-dmet-embedding.py b/examples/embedding/48-dmet-embedding.py new file mode 100644 index 000000000..f2adc8b1b --- /dev/null +++ b/examples/embedding/48-dmet-embedding.py @@ -0,0 +1,86 @@ +# Copyright 2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Example 48: Standard Multi-Fragment Self-Consistent DMET Calculation. + +This script demonstrates how to partition a molecule into multiple fragments +and optimize the global correlation potential (u_oao) to match high-level and +low-level local 1-RDM density matrices self-consistently. +""" + +from pyscf import gto +from gpu4pyscf.scf import hf as gpu_hf +from gpu4pyscf.qmmm.embedding.embedding import DMET + +def run_dmet_example(): + # 1. Define the system (Ethane molecule with 6-31G basis) + mol = gto.Mole() + mol.atom = ''' + C -0.76091 -0.00000 0.00000 + C 0.76091 -0.00000 0.00000 + H -1.16001 1.02029 0.00000 + H -1.16001 -0.51014 -0.88357 + H -1.16001 -0.51014 0.88357 + H 1.16001 -1.02029 0.00000 + H 1.16001 0.51014 0.88357 + H 1.16001 0.51014 -0.88357 + ''' + mol.basis = '6-31g' + mol.verbose = 4 # Set verbose to see detailed DMET iteration logs + mol.build() + + print("--- Step 1: Initialize Low-Level and High-Level Solver Templates ---") + # In this classic exact-back-to-exact test case, we nest RHF within RHF. + # DMET should converge the correlation potential to exactly zero. + mf_outer = gpu_hf.RHF(mol) + mf_outer.conv_tol = 1e-12 + + mf_inner_template = gpu_hf.RHF(mol) + mf_inner_template.conv_tol = 1e-12 + + print("\n--- Step 2: Define Molecular Fragments ---") + # Partition the Ethane molecule into two methyl fragments based on atom indices: + # Fragment 0: First Methyl group [C1, H1, H2, H3] + # Fragment 1: Second Methyl group [C2, H4, H5, H6] + fragments = [ + [0, 2, 3, 4], + [1, 5, 6, 7] + ] + print(f"Fragment 0 atom indices: {fragments[0]}") + print(f"Fragment 1 atom indices: {fragments[1]}") + + print("\n--- Step 3: Setup and Execute the Self-Consistent DMET Solver ---") + dmet_solver = DMET( + mf_outer=mf_outer, + mf_inner=mf_inner_template, + fragments=fragments, + threshold=1e-5, # SVD eigenvalue threshold for bath selection + max_macro_iter=20, # Max macro loops for correlation potential fitting + macro_tol=1e-4 # Convergence tolerance for the density matching cost + ) + + # Trigger the DMET macroscopic self-consistent optimization + e_dmet = dmet_solver.kernel() + + print("\n--- Final Results Summary ---") + # Run the raw full system RHF as an exact reference + e_hf_ref = mf_outer.kernel() + + print(f"Global Reference RHF Energy : {e_hf_ref:.8f} Hartree") # -79.19706462 + print(f"Macroscopic DMET Total Energy: {e_dmet:.8f} Hartree") # -79.19706462 + print(f"Absolute Energy Deviation : {abs(e_dmet - e_hf_ref):.2e} Hartree") # 9.15e-11 Hartree + +if __name__ == '__main__': + run_dmet_example() \ No newline at end of file diff --git a/examples/embedding/49-dft-dmet-embedding.py b/examples/embedding/49-dft-dmet-embedding.py new file mode 100644 index 000000000..575e44da2 --- /dev/null +++ b/examples/embedding/49-dft-dmet-embedding.py @@ -0,0 +1,84 @@ +# Copyright 2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Example 49: Single-Fragment Delta-Method DFT-in-DFT Embedding. + +This script demonstrates how to embed a high-level hybrid DFT functional (B3LYP) +into a localized region of a low-level GGA DFT environment (PBE) using a +highly-optimized projection basis without macroscopic iterations. +""" + +from pyscf import gto +from gpu4pyscf.dft import rks +from gpu4pyscf.qmmm.embedding.embeding_dft import SingleFragmentEmbedding + +def run_dft_embedding_example(): + # 1. Define the system (Ethane molecule with 6-31G basis) + mol = gto.Mole() + mol.atom = ''' + C -0.76091 -0.00000 0.00000 + C 0.76091 -0.00000 0.00000 + H -1.16001 1.02029 0.00000 + H -1.16001 -0.51014 -0.88357 + H -1.16001 -0.51014 0.88357 + H 1.16001 -1.02029 0.00000 + H 1.16001 0.51014 0.88357 + H 1.16001 0.51014 -0.88357 + ''' + mol.basis = '6-31g' + mol.verbose = 4 # Enable to monitor localized cluster basis dimensions and logs + mol.build() + + print("--- Step 1: Prepare Environment (PBE) and Active Region (B3LYP) Solvers ---") + # Low-level full system solver (Environment description) + mf_outer = rks.RKS(mol, xc='PBE') + mf_outer.conv_tol = 1e-10 + + # High-level solver template (Active cluster description) + mf_inner_template = rks.RKS(mol, xc='B3LYP') + mf_inner_template.conv_tol = 1e-10 + + print("\n--- Step 2: Define Single Target Active Fragment ---") + # Select only one methyl group as the active QM region. + # The other half will automatically serve as the embedding environment. + active_fragment = [0, 2, 3, 4] + print(f"Target QM Active Region atom indices: {active_fragment}") + + print("\n--- Step 3: Initialize and Run Single Fragment Embedding ---") + # Construct the single-shot embedding object. Notice that mf_inner_template + # will be cloned internally via .copy() to completely avoid cache poisoning. + emb_obj = SingleFragmentEmbedding( + mf_outer=mf_outer, + mf_inner=mf_inner_template, + fragment=active_fragment, + threshold=1e-5 # Filters out pure fragment states and numerical noise + ) + + # Compute the final multi-scale total energy via the delta method: + # E_tot = E_PBE(Full) + [E_B3LYP(Active) - E_PBE(Active)] + e_embedded_tot = emb_obj.kernel() + + print("\n--- Step 4: Verification of Template Isolation ---") + # Verify that our protection armor works seamlessly: + # Executing the template after embedding must converge successfully without any side effects. + print("Verifying inner template isolation status...") + mf_inner_template.kernel() + if mf_inner_template.converged: + print("Template isolation check passed successfully! No cache poisoning detected.") + else: + print("Warning: Template convergence failed, check cache isolation leaks.") + +if __name__ == '__main__': + run_dft_embedding_example() \ No newline at end of file diff --git a/gpu4pyscf/qmmm/embedding/embedding.py b/gpu4pyscf/qmmm/embedding/embedding.py index 40f3ae7da..4df7ca5e7 100644 --- a/gpu4pyscf/qmmm/embedding/embedding.py +++ b/gpu4pyscf/qmmm/embedding/embedding.py @@ -72,8 +72,9 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): C_occ = mo_coeff_oao[:, occ_mask] if env_idx.size == 0 or C_occ.shape[1] == 0: + s_dummy = cp.ones(C_occ.shape[1]) if env_idx.size == 0 else cp.zeros(0) return (cp.zeros((0, 0)), cp.zeros((0, 0)), - {'n_core_electrons': 0}) + {'n_core_electrons': 0, 'singular_values': s_dummy}) C_A = C_occ[frag_idx, :] @@ -81,14 +82,14 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): C_rot = C_occ @ Vh.T - is_bath = S > threshold + is_bath = (S > threshold) & (S < 1.0 - threshold) # Exclude singular values close to 1.0 is_core_small = S <= threshold n_sv = len(S) # Entangled bath orbitals (environment part) bath_orb = C_rot[env_idx, :n_sv][:, is_bath] norms = cp.linalg.norm(bath_orb, axis=0) - norms[norms < 1e-12] = 1.0 # This should not happen + norms[norms < 1e-12] = 1.0 # This may happen, if s=1.0, which will add a new null vector to B! bath_orb = bath_orb / norms # Pure environment core orbitals come from null space + small singular values @@ -97,7 +98,8 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): core_orb = cp.hstack([core_orb_small, core_orb_null]) info = { - 'n_core_electrons': 2 * core_orb.shape[1] + 'n_core_electrons': 2 * core_orb.shape[1], + 'singular_values': S } return bath_orb, core_orb, info @@ -288,6 +290,20 @@ def build_bath(self, ifrag, mo_coeff, mo_occ, X_inv, X): self.B_oao[ifrag] = B_oao self.B[ifrag] = B_ao self.dm_core[ifrag] = dm_core_ao + + n_frag = int(self.frag_idx[ifrag].size) + n_bath = int(bath_orb.shape[1] if bath_orb.size else 0) + n_core = int(core_orb.shape[1] if core_orb.size else 0) + + self.log.info(f"Fragment {ifrag} Schmidt decomposition singular values:") + self.log.info(f" {info['singular_values']}") + + self.log.info(f"Fragment {ifrag} embedding basis partition:") + self.log.info(f" Number of Fragment AOs : {n_frag}") + self.log.info(f" Number of Bath Orbitals: {n_bath}") + self.log.info(f" Number of Core Orbitals: {n_core} ({info['n_core_electrons']} electrons)") + self.log.info(f" Total Embedded Space : {n_frag + n_bath} / {nao_oao} (full AO)") + return self def build_embedded_hamiltonian(self, ifrag, hcore_orig): diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py index e69de29bb..47400776f 100644 --- a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py +++ b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py @@ -0,0 +1,137 @@ +# Copyright 2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import unittest +import numpy as np +import cupy as cp +from pyscf import gto +from gpu4pyscf.scf import hf as gpu_hf +from gpu4pyscf.dft import rks +from gpu4pyscf.qmmm.embedding import embedding +from gpu4pyscf.qmmm.embedding.embeding_dft import SingleFragmentEmbedding + + +class KnownValues(unittest.TestCase): + @classmethod + def setUpClass(cls): + + cls.mol = gto.Mole() + cls.mol.atom = ''' + C -0.76091 -0.00000 0.00000 + C 0.76091 -0.00000 0.00000 + H -1.16001 1.02029 0.00000 + H -1.16001 -0.51014 -0.88357 + H -1.16001 -0.51014 0.88357 + H 1.16001 -1.02029 0.00000 + H 1.16001 0.51014 0.88357 + H 1.16001 0.51014 -0.88357 + ''' + cls.mol.basis = '6-31g' + cls.mol.spin = 0 + cls.mol.charge = 0 + cls.mol.verbose = 0 + cls.mol.build() + + cls.fragments = [[0, 1], [2, 3]] + + @classmethod + def tearDownClass(cls): + del cls.mol + + def test_b3lyp_in_b3lyp(self): + + mf_outer = rks.RKS(self.mol, xc='B3LYP') + mf_inner_template = rks.RKS(self.mol, xc='B3LYP') + + emb_obj = SingleFragmentEmbedding(mf_outer, mf_inner_template, [0, 2, 3, 4]) + emb_obj.kernel() + + e_ref = mf_outer.kernel() + + assert np.abs(e_ref - emb_obj.e_tot) < 1e-8, f"Reference energy {e_ref} != Embedding energy {emb_obj.energy}" + + def test_b3lyp_in_pbe(self): + mf_outer = rks.RKS(self.mol, xc='PBE') + mf_inner_template = rks.RKS(self.mol, xc='B3LYP') + + emb_obj = SingleFragmentEmbedding(mf_outer, mf_inner_template, [i for i in range(8)]) + emb_obj.kernel() + + e_ref = mf_inner_template.kernel() + + assert np.abs(e_ref - emb_obj.e_tot) < 1e-8, f"Reference energy {e_ref} != Embedding energy {emb_obj.energy}" + + def test_algebraic_properties(self): + mf_outer = rks.RKS(self.mol, xc='PBE') + mf_inner = rks.RKS(self.mol, xc='PBE') + + emb_obj = SingleFragmentEmbedding(mf_outer, mf_inner, [0, 1, 2]) + emb_obj.kernel() + + S_ao = cp.asarray(mf_outer.get_ovlp()) + B = emb_obj.B[0] + D_core = emb_obj.dm_core[0] + + # Check B^T * S * B == I (Orthonormality of embedding basis) + ortho_check = B.T @ S_ao @ B + identity = cp.eye(B.shape[1]) + max_ortho_err = float(cp.abs(ortho_check - identity).max()) + self.assertTrue(max_ortho_err < 1e-10, + f"Basis B is not orthogonal, max error: {max_ortho_err}") + + # Check Spatial Isolation (Core DM projected onto the active space must be zero) + core_overlap = B.T @ S_ao @ D_core @ S_ao @ B + max_overlap_err = float(cp.abs(core_overlap).max()) + self.assertTrue(max_overlap_err < 1e-10, + f"Core DM leaks into Active Space, max error: {max_overlap_err}") + + def test_electron_conservation(self): + mf_outer = rks.RKS(self.mol, xc='PBE') + mf_inner = rks.RKS(self.mol, xc='B3LYP') + emb_obj = SingleFragmentEmbedding(mf_outer, mf_inner, [0, 1]) + emb_obj.kernel() + + S_ao = cp.asarray(mf_outer.get_ovlp()) + D_emb_high = cp.asarray(emb_obj.mf_inner[0].make_rdm1()) + D_core = emb_obj.dm_core[0] + B = emb_obj.B[0] + + # Project local active density back to full AO basis + D_emb_ao = B @ D_emb_high @ B.T # Identity S ignored + D_total_ao = D_core + D_emb_ao + + n_elec_calc = float(cp.trace(D_total_ao @ S_ao)) + n_elec_exact = float(self.mol.nelectron) + + self.assertAlmostEqual(n_elec_calc, n_elec_exact, places=8, + msg=f"Electron loss: {n_elec_calc} != {n_elec_exact}") + + def test_template_isolation_and_convergence(self): + mf_outer = rks.RKS(self.mol, xc='PBE') + mf_inner_template = rks.RKS(self.mol, xc='PBE') + + emb_obj = SingleFragmentEmbedding(mf_outer, mf_inner_template, [0, 2, 3, 4], threshold=-1.0) + emb_obj.kernel() + + mf_inner_template.kernel() + + # Assert the template is still clean and converges properly + self.assertTrue(mf_inner_template.converged, + "Template object was poisoned and failed to converge!") + + +if __name__ == '__main__': + print("Full Tests for ONIOM-like DFT embedding.") + unittest.main() diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py b/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py index 8f55b69f0..2d3353ae8 100644 --- a/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py +++ b/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py @@ -71,11 +71,6 @@ def setUpClass(cls): cls.mf_inner_template2 = gpu_hf.RHF(cls.mol2) cls.mf_inner_template2.conv_tol = 1e-12 - cls.mf_outer3 = rks.RKS(cls.mol2) - cls.mf_outer3.conv_tol = 1e-12 - cls.mf_inner_template3 = rks.RKS(cls.mol2) - cls.mf_inner_template3.conv_tol = 1e-12 - @classmethod def tearDownClass(cls): del cls.mol @@ -132,16 +127,30 @@ def test_schmidt(self): s = mf.get_ovlp() mo_coeff = mf.mo_coeff X, X_inv = embedding.lowdin_orth(s) - mo_coeff_oao = X@mo_coeff + mo_coeff_oao = X @ mo_coeff C_occ = mo_coeff_oao[:, :2] C_A = mo_coeff_oao[:4, :2] + U, S, Vh = cp.linalg.svd(C_A, full_matrices=True) C_rot = C_occ @ Vh.T - bath_orb_ref = C_rot[4:] - norms = cp.linalg.norm(bath_orb_ref, axis=0) - bath_orb_ref /= norms - bath_orb = embedding.schmidt_decompose(mo_coeff_oao, mf.mo_occ, [0,1,2,3], [4,5,6,7])[0] - assert np.abs(bath_orb.get() - bath_orb_ref.get()).max() < 1e-8, "Schmidt decomposition should yield close-to-identity matrices." + + threshold = 1e-5 + is_bath = (S > threshold) & (S < 1.0 - threshold) + n_sv = len(S) + + bath_orb_ref = C_rot[4:, :n_sv][:, is_bath] + if bath_orb_ref.size > 0: + norms = cp.linalg.norm(bath_orb_ref, axis=0) + norms[norms < 1e-12] = 1.0 + bath_orb_ref /= norms + + bath_orb = embedding.schmidt_decompose(mo_coeff_oao, mf.mo_occ, [0,1,2,3], [4,5,6,7], threshold=threshold)[0] + + self.assertEqual(bath_orb.shape, bath_orb_ref.shape, + "Matrix shapes must match after filtering pure fragment orbitals.") + if bath_orb.size > 0: + assert np.abs(bath_orb.get() - bath_orb_ref.get()).max() < 1e-8, \ + "Schmidt decomposition should yield close-to-identity matrices." def test_dmet_execution_and_convergence(self): dmet_solver = DMET( @@ -195,6 +204,79 @@ def test_dmet_execution_and_convergence(self): assert np.abs(e_tot_iter1 - e_tot) < 1e-8, "DMET energy should be converged in 1 macro iteration." assert np.abs(dmet_solver2.u_oao).sum() < 1e-8, "Correlation potential should be close to zero." + def test_multifragment_algebraic_and_conservation(self): + dmet_solver = DMET( + mf_outer=self.mf_outer2, + mf_inner=self.mf_inner_template2, + fragments=self.fragments2, + threshold=1e-5, + max_macro_iter=1 + ) + dmet_solver.kernel() + + S_ao = cp.asarray(self.mf_outer2.get_ovlp()) + n_total_elec = float(self.mol2.nelectron) + + for ifrag in range(dmet_solver.nfrags): + B = dmet_solver.B[ifrag] + D_core = dmet_solver.dm_core[ifrag] + D_emb_high = cp.asarray(dmet_solver.mf_inner[ifrag].make_rdm1()) + + # Check B^T * S * B == I for each fragment + ortho_check = B.T @ S_ao @ B + identity = cp.eye(B.shape[1]) + max_ortho_err = float(cp.abs(ortho_check - identity).max()) + self.assertTrue(max_ortho_err < 1e-10, + f"Fragment {ifrag}: Basis B is not orthonormal. Max err: {max_ortho_err}") + + # Check Core DM spatial isolation from the active space + core_overlap = B.T @ S_ao @ D_core @ S_ao @ B + max_overlap_err = float(cp.abs(core_overlap).max()) + self.assertTrue(max_overlap_err < 1e-10, + f"Fragment {ifrag}: Core DM leaks into Active Space. Max err: {max_overlap_err}") + + # Check total electron conservation for this fragment representation + D_emb_ao = B @ D_emb_high @ B.T + D_total_ao = D_core + D_emb_ao + n_elec_calc = float(cp.trace(D_total_ao @ S_ao)) + self.assertAlmostEqual(n_elec_calc, n_total_elec, places=8, + msg=f"Fragment {ifrag}: Electron loss detected. {n_elec_calc} != {n_total_elec}") + + def test_dmet_template_isolation(self): + dmet_solver = DMET( + mf_outer=self.mf_outer2, + mf_inner=self.mf_inner_template2, + fragments=self.fragments2, + threshold=1e-5, + max_macro_iter=3, + macro_tol=1e-3 + ) + dmet_solver.kernel() + + self.mf_inner_template2.mo_coeff = None + self.mf_inner_template2.kernel() + + self.assertTrue(self.mf_inner_template2.converged, + "The inner template was poisoned by DMET macro-loops and failed to converge!") + + def test_correlation_potential_symmetry(self): + dmet_solver = DMET( + mf_outer=self.mf_outer, + mf_inner=self.mf_inner_template, + fragments=self.fragments, + threshold=1e-5, + max_macro_iter=2 + ) + dmet_solver.kernel() + + u = dmet_solver.u_oao + + sym_err = float(cp.abs(u - u.T).max()) + self.assertTrue(sym_err < 1e-12, f"Correlation potential u_oao is not symmetric. Max err: {sym_err}") + + max_u_val = float(cp.abs(u).max()) + self.assertTrue(max_u_val < 1e-7, f"Trivial correlation potential should be zero, but got max: {max_u_val}") + if __name__ == '__main__': print("Full Tests for DMET") From fde13a1aabcb481a7ccca0980fb013a765f57c78 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 22 May 2026 13:08:41 +0800 Subject: [PATCH 17/30] fix some typos --- gpu4pyscf/qmmm/embedding/__init__.py | 2 +- gpu4pyscf/qmmm/embedding/{embeding_dft.py => embedding_dft.py} | 0 gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename gpu4pyscf/qmmm/embedding/{embeding_dft.py => embedding_dft.py} (100%) diff --git a/gpu4pyscf/qmmm/embedding/__init__.py b/gpu4pyscf/qmmm/embedding/__init__.py index 01eaa5903..6884d9f8e 100644 --- a/gpu4pyscf/qmmm/embedding/__init__.py +++ b/gpu4pyscf/qmmm/embedding/__init__.py @@ -14,4 +14,4 @@ from .embedding import DMET -from .embeding_dft import SingleFragmentEmbedding +from .embedding_dft import SingleFragmentEmbedding diff --git a/gpu4pyscf/qmmm/embedding/embeding_dft.py b/gpu4pyscf/qmmm/embedding/embedding_dft.py similarity index 100% rename from gpu4pyscf/qmmm/embedding/embeding_dft.py rename to gpu4pyscf/qmmm/embedding/embedding_dft.py diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py index 47400776f..6e2d4243f 100644 --- a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py +++ b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py @@ -20,7 +20,7 @@ from gpu4pyscf.scf import hf as gpu_hf from gpu4pyscf.dft import rks from gpu4pyscf.qmmm.embedding import embedding -from gpu4pyscf.qmmm.embedding.embeding_dft import SingleFragmentEmbedding +from gpu4pyscf.qmmm.embedding.embedding_dft import SingleFragmentEmbedding class KnownValues(unittest.TestCase): From f8445c62b30427b4f23c04fe05c1b3c11994261b Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 22 May 2026 14:18:02 +0800 Subject: [PATCH 18/30] fix some typos --- examples/embedding/49-dft-dmet-embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/embedding/49-dft-dmet-embedding.py b/examples/embedding/49-dft-dmet-embedding.py index 575e44da2..a3fe49afc 100644 --- a/examples/embedding/49-dft-dmet-embedding.py +++ b/examples/embedding/49-dft-dmet-embedding.py @@ -22,7 +22,7 @@ from pyscf import gto from gpu4pyscf.dft import rks -from gpu4pyscf.qmmm.embedding.embeding_dft import SingleFragmentEmbedding +from gpu4pyscf.qmmm.embedding.embedding_dft import SingleFragmentEmbedding def run_dft_embedding_example(): # 1. Define the system (Ethane molecule with 6-31G basis) From 4071c501d8583b9c0037c82bdb70f8dab9833142 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 22 May 2026 14:35:30 +0800 Subject: [PATCH 19/30] add some comments --- gpu4pyscf/qmmm/embedding/embedding.py | 14 +++++--------- gpu4pyscf/qmmm/embedding/embedding_dft.py | 10 +++------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding.py b/gpu4pyscf/qmmm/embedding/embedding.py index 4df7ca5e7..9ab8fd587 100644 --- a/gpu4pyscf/qmmm/embedding/embedding.py +++ b/gpu4pyscf/qmmm/embedding/embedding.py @@ -45,9 +45,6 @@ def lowdin_orth(s): def get_fragment_ao_indices(mol, frag_atoms): - """ - Return the atomic-orbital indices that belong to the listed atoms. - """ aoslice = mol.aoslice_by_atom() indices = [] for ia in frag_atoms: @@ -82,7 +79,8 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): C_rot = C_occ @ Vh.T - is_bath = (S > threshold) & (S < 1.0 - threshold) # Exclude singular values close to 1.0 + # Exclude singular values close to 1.0, which are fragment orbitals + is_bath = (S > threshold) & (S < 1.0 - threshold) is_core_small = S <= threshold n_sv = len(S) @@ -106,7 +104,7 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): def build_embedding_basis(nao, frag_idx, env_idx, bath_orb): """ - Construct the AO -> embedded transformation matrix B. + Construct the AO -> embedded transformation matrix B^{mu}_{k} """ # Due to the Carlson-Keller theorem, the lowdin OAO basis # and the AO basis is 1-to-1 match. @@ -366,10 +364,7 @@ def _get_veff(mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): dm_cp = _as_cupy(dm) # Project embedded dm to full AO basis - if dm_cp.ndim == 2: - dm_ao = B_mat @ dm_cp @ B_mat.T - else: - dm_ao = cp.einsum('pi,xij,qj->xpq', B_mat, dm_cp, B_mat) + dm_ao = B_mat @ dm_cp @ B_mat.T dm_full_ao_inner = self.dm_core[ifrag] + dm_ao @@ -479,6 +474,7 @@ def kernel(self): e_frag_elec = cp.sum(dm1_emb[:n_frag, :] * h_eval[:n_frag, :]) if not is_mean_field: + raise NotImplementedError("Non-mean-field solver not implemented, needs thorough testing...") self.log.info("using non-mean-field solver") nemb = B.shape[1] # TODO: this can be replaced by a more efficient routine diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft.py b/gpu4pyscf/qmmm/embedding/embedding_dft.py index 418b357ae..019f60934 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft.py @@ -21,12 +21,10 @@ class SingleFragmentEmbedding(DMET): """ - Single-Fragment ONIOM-like embedding. + Single-Fragment ONIOM-like embedding for DFT. This class performs a single-shot, single-fragment delta-method energy evaluation WITHOUT macroscopic iterations. - It rigorously traces over the entire active space (ffagment + bath) to capture - full polarization correlation,. """ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): @@ -50,7 +48,6 @@ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): self.fragment = self.fragments[0] def _evaluate_embedded_energy(self, mf_obj, dm_emb, h_eval_bare, B, dm_core): - # Bare one-electron Hamiltonian trace e_h = cp.sum(dm_emb * h_eval_bare) # Full density reconstruction @@ -122,15 +119,14 @@ def kernel(self): is_mean_field = hasattr(self.mf_inner_template, 'get_veff') if is_mean_field: - # Bare one-electron Hamiltonian evaluated in active space h_eval_bare = B.T @ hcore_orig @ B - # Evaluate High-Level trace + # Evaluate High-Level energy e_high = self._evaluate_embedded_energy( self.mf_inner_template, dm_emb_high, h_eval_bare, B, dm_core ) - # Evaluate Low-Level trace + # Evaluate Low-Level energy e_low = self._evaluate_embedded_energy( self.mf_outer, dm_emb_low, h_eval_bare, B, dm_core ) From 9c2b47e4c159e35b2f1e8f097e47cc3eb033be0d Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 26 May 2026 07:55:49 +0800 Subject: [PATCH 20/30] - debug the error in evaluating energies; - debug the schmidt procedure - add more tests --- gpu4pyscf/qmmm/embedding/embedding.py | 26 +++- gpu4pyscf/qmmm/embedding/embedding_dft.py | 40 +---- .../embedding/tests/test_dft_embedding.py | 58 +++++++ .../embedding/tests/test_dmet_embedding.py | 145 ++++++++++++------ 4 files changed, 182 insertions(+), 87 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding.py b/gpu4pyscf/qmmm/embedding/embedding.py index 9ab8fd587..462ac95b3 100644 --- a/gpu4pyscf/qmmm/embedding/embedding.py +++ b/gpu4pyscf/qmmm/embedding/embedding.py @@ -79,16 +79,28 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): C_rot = C_occ @ Vh.T - # Exclude singular values close to 1.0, which are fragment orbitals - is_bath = (S > threshold) & (S < 1.0 - threshold) + # Broadly select all potential bath orbitals (including pure fragment ones S ~ 1.0) + is_bath_candidate = S > threshold is_core_small = S <= threshold n_sv = len(S) - # Entangled bath orbitals (environment part) - bath_orb = C_rot[env_idx, :n_sv][:, is_bath] - norms = cp.linalg.norm(bath_orb, axis=0) - norms[norms < 1e-12] = 1.0 # This may happen, if s=1.0, which will add a new null vector to B! - bath_orb = bath_orb / norms + # Extract the environment part for these candidates + raw_bath_orb = C_rot[env_idx, :n_sv][:, is_bath_candidate] + + # Calculate their true physical norms in the environment space + norms = cp.linalg.norm(raw_bath_orb, axis=0) + + # Keep only those with a mathematically meaningful environment tail. + # This automatically drops pure fragment orbitals (norm ~ 0) preventing null vectors, + # while safely preserving orbitals with legitimate tiny tails (like in STO-3G). + valid_mask = norms > 1e-10 + + # Apply the mask to both the orbitals and their norms + bath_orb = raw_bath_orb[:, valid_mask] + valid_norms = norms[valid_mask] + + # Safely normalize the surviving valid bath orbitals + bath_orb = bath_orb / valid_norms # Pure environment core orbitals come from null space + small singular values core_orb_small = C_rot[env_idx, :n_sv][:, is_core_small] diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft.py b/gpu4pyscf/qmmm/embedding/embedding_dft.py index 019f60934..8639576a8 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft.py @@ -48,45 +48,17 @@ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): self.fragment = self.fragments[0] def _evaluate_embedded_energy(self, mf_obj, dm_emb, h_eval_bare, B, dm_core): - e_h = cp.sum(dm_emb * h_eval_bare) + e_h_active = cp.sum(dm_emb * h_eval_bare) - # Full density reconstruction dm_full_ao = dm_core + B @ dm_emb @ B.T - v_eff_full = mf_obj.get_veff(self.full_mol, dm_full_ao) - - # Coulomb J interaction traced over active space - vj_full = getattr(v_eff_full, 'vj', None) - if vj_full is None: - vj_full = mf_obj.get_j(self.full_mol, dm_full_ao) - vj_emb = B.T @ _as_cupy(vj_full) @ B - e_J = 0.5 * cp.sum(dm_emb * vj_emb) - - # Exact Exchange interaction traced over active space + Grid XC extraction - exc_tot = getattr(v_eff_full, 'exc', 0.0) - vk_full = getattr(v_eff_full, 'vk', None) - e_K = 0.0 - grid_exc_tot = exc_tot - if vk_full is not None: - vk_full = _as_cupy(vk_full) - vk_emb = B.T @ vk_full @ B - e_K = -0.5 * cp.sum(dm_emb * vk_emb) - e_K_global = -0.5 * cp.sum(dm_full_ao * vk_full) - # Isolate the pure non-linear grid integration part - grid_exc_tot = exc_tot - e_K_global - - # Core evaluation for pure Grid XC subtraction + v_eff_full = mf_obj.get_veff(self.full_mol, dm_full_ao) v_eff_core = mf_obj.get_veff(self.full_mol, dm_core) - exc_core = getattr(v_eff_core, 'exc', 0.0) - vk_core = getattr(v_eff_core, 'vk', None) - - grid_exc_core = exc_core - if vk_core is not None: - vk_core = _as_cupy(vk_core) - e_K_global_core = -0.5 * cp.sum(dm_core * vk_core) - grid_exc_core = exc_core - e_K_global_core - return e_h + e_J + e_K + grid_exc_tot - grid_exc_core + e_2e_full = getattr(v_eff_full, 'ecoul', 0.0) + getattr(v_eff_full, 'exc', 0.0) + e_2e_core = getattr(v_eff_core, 'ecoul', 0.0) + getattr(v_eff_core, 'exc', 0.0) + # E_active = E_1e(Active) + [E_2e(Full) - E_2e(Core)] + return e_h_active + e_2e_full - e_2e_core def kernel(self): if not self.mf_outer.converged: diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py index 6e2d4243f..4b643724d 100644 --- a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py +++ b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding.py @@ -131,6 +131,64 @@ def test_template_isolation_and_convergence(self): self.assertTrue(mf_inner_template.converged, "Template object was poisoned and failed to converge!") + def test_hexane_core_isolation_and_exactness(self): + mol = gto.Mole() + mol.atom = ''' + C 1.4522500000 -2.8230000000 0.0000000000 + C 1.4522500000 -1.2830000000 0.0000000000 + C 0.0002500000 -0.7700000000 0.0000000000 + C 0.0002500000 0.7700000000 0.0000000000 + C -1.4517500000 1.2830000000 0.0000000000 + C -1.4517500000 2.8230000000 0.0000000000 + H 2.4792500000 -3.1870000000 0.0000000000 + H 0.9382500000 -3.1870000000 0.8900000000 + H 0.9382500000 -3.1870000000 -0.8900000000 + H 1.9652500000 -0.9200000000 0.8900000000 + H 1.9652500000 -0.9200000000 -0.8900000000 + H -0.5137500000 -1.1330000000 -0.8900000000 + H -0.5137500000 -1.1330000000 0.8900000000 + H 0.5132500000 1.1330000000 0.8900000000 + H 0.5132500000 1.1330000000 -0.8900000000 + H -1.9657500000 0.9200000000 -0.8900000000 + H -1.9657500000 0.9200000000 0.8900000000 + H -2.4797500000 3.1870000000 0.0000000000 + H -0.9377500000 3.1870000000 0.8900000000 + H -0.9377500000 3.1870000000 -0.8900000000 + ''' + mol.basis = 'sto3g' + mol.spin = 0 + mol.verbose = 0 + mol.build() + + mf_outer = rks.RKS(mol, xc='PBE') + mf_inner = rks.RKS(mol, xc='PBE') + + methyl_fragment = [0, 6, 7, 8] + emb_obj = SingleFragmentEmbedding(mf_outer, mf_inner, methyl_fragment, threshold=1e-5) + emb_obj.kernel() + + mf_outer.kernel() + e_global = mf_outer.e_tot + e_embedded = emb_obj.e_tot + self.assertTrue(np.abs(e_global - e_embedded) < 1e-6, + f"PBE-in-PBE Exactness failed! Error: {np.abs(e_global - e_embedded)}") + + dm_core_sum = float(cp.sum(emb_obj.dm_core[0])) + self.assertTrue(dm_core_sum > 1.0, + "Hexane test did not generate a non-trivial Core DM. SVD truncation might be failing.") + + def test_pure_dft_vk_bypass(self): + mf_outer = rks.RKS(self.mol, xc='PBE') + mf_inner = rks.RKS(self.mol, xc='PBE') + + emb_obj = SingleFragmentEmbedding(mf_outer, mf_inner, self.fragments[0]) + try: + emb_obj.kernel() + except AttributeError as e: + self.fail(f"Embedding failed for Pure DFT due to missing vk attribute: {e}") + + self.assertTrue(emb_obj.e_tot is not None, "Pure DFT embedding failed to return an energy.") + if __name__ == '__main__': print("Full Tests for ONIOM-like DFT embedding.") diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py b/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py index 2d3353ae8..ca0c1b6ab 100644 --- a/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py +++ b/gpu4pyscf/qmmm/embedding/tests/test_dmet_embedding.py @@ -71,6 +71,47 @@ def setUpClass(cls): cls.mf_inner_template2 = gpu_hf.RHF(cls.mol2) cls.mf_inner_template2.conv_tol = 1e-12 + cls.mol3 = gto.Mole() + cls.mol3.atom = ''' + C 1.4522500000 -2.8230000000 0.0000000000 + C 1.4522500000 -1.2830000000 0.0000000000 + C 0.0002500000 -0.7700000000 0.0000000000 + C 0.0002500000 0.7700000000 0.0000000000 + C -1.4517500000 1.2830000000 0.0000000000 + C -1.4517500000 2.8230000000 0.0000000000 + H 2.4792500000 -3.1870000000 0.0000000000 + H 0.9382500000 -3.1870000000 0.8900000000 + H 0.9382500000 -3.1870000000 -0.8900000000 + H 1.9652500000 -0.9200000000 0.8900000000 + H 1.9652500000 -0.9200000000 -0.8900000000 + H -0.5137500000 -1.1330000000 -0.8900000000 + H -0.5137500000 -1.1330000000 0.8900000000 + H 0.5132500000 1.1330000000 0.8900000000 + H 0.5132500000 1.1330000000 -0.8900000000 + H -1.9657500000 0.9200000000 -0.8900000000 + H -1.9657500000 0.9200000000 0.8900000000 + H -2.4797500000 3.1870000000 0.0000000000 + H -0.9377500000 3.1870000000 0.8900000000 + H -0.9377500000 3.1870000000 -0.8900000000 + ''' + cls.mol3.basis = '6-31g' + cls.mol3.spin = 0 + cls.mol3.charge = 0 + cls.mol3.verbose = 0 + cls.mol3.build() + + cls.fragments3 = [[0, 6, 7, 8], + [1, 9, 10], + [2, 11, 12], + [3, 13, 14], + [4, 15, 16], + [5, 17, 18, 19]] + + cls.mf_outer3 = gpu_hf.RHF(cls.mol3) + cls.mf_outer3.conv_tol = 1e-12 + cls.mf_inner_template3 = gpu_hf.RHF(cls.mol3) + cls.mf_inner_template3.conv_tol = 1e-12 + @classmethod def tearDownClass(cls): del cls.mol @@ -108,6 +149,10 @@ def test_lowdin(self): assert np.abs(X - X_ref).max() < 1e-8, "Lowdin orthogonalization should yield a close-to-identity matrix." def test_schmidt(self): + """ + Test Schmidt decomposition with the rigorous norm-based filtering logic + to prevent null vectors and preserve legitimate physical tails. + """ mol = gto.Mole() mol.atom = ''' H 0.0 0.0 0.0 @@ -135,22 +180,27 @@ def test_schmidt(self): C_rot = C_occ @ Vh.T threshold = 1e-5 - is_bath = (S > threshold) & (S < 1.0 - threshold) + is_bath_candidate = S > threshold n_sv = len(S) - bath_orb_ref = C_rot[4:, :n_sv][:, is_bath] - if bath_orb_ref.size > 0: - norms = cp.linalg.norm(bath_orb_ref, axis=0) - norms[norms < 1e-12] = 1.0 - bath_orb_ref /= norms + raw_bath_orb_ref = C_rot[4:, :n_sv][:, is_bath_candidate] + if raw_bath_orb_ref.size > 0: + norms = cp.linalg.norm(raw_bath_orb_ref, axis=0) + valid_mask = norms > 1e-10 + bath_orb_ref = raw_bath_orb_ref[:, valid_mask] + valid_norms = norms[valid_mask] + if bath_orb_ref.size > 0: + bath_orb_ref /= valid_norms + else: + bath_orb_ref = raw_bath_orb_ref bath_orb = embedding.schmidt_decompose(mo_coeff_oao, mf.mo_occ, [0,1,2,3], [4,5,6,7], threshold=threshold)[0] self.assertEqual(bath_orb.shape, bath_orb_ref.shape, - "Matrix shapes must match after filtering pure fragment orbitals.") + "Matrix shapes must match after norm-based filtering.") if bath_orb.size > 0: assert np.abs(bath_orb.get() - bath_orb_ref.get()).max() < 1e-8, \ - "Schmidt decomposition should yield close-to-identity matrices." + "Schmidt decomposition should yield highly accurate normalized basis vectors." def test_dmet_execution_and_convergence(self): dmet_solver = DMET( @@ -204,44 +254,6 @@ def test_dmet_execution_and_convergence(self): assert np.abs(e_tot_iter1 - e_tot) < 1e-8, "DMET energy should be converged in 1 macro iteration." assert np.abs(dmet_solver2.u_oao).sum() < 1e-8, "Correlation potential should be close to zero." - def test_multifragment_algebraic_and_conservation(self): - dmet_solver = DMET( - mf_outer=self.mf_outer2, - mf_inner=self.mf_inner_template2, - fragments=self.fragments2, - threshold=1e-5, - max_macro_iter=1 - ) - dmet_solver.kernel() - - S_ao = cp.asarray(self.mf_outer2.get_ovlp()) - n_total_elec = float(self.mol2.nelectron) - - for ifrag in range(dmet_solver.nfrags): - B = dmet_solver.B[ifrag] - D_core = dmet_solver.dm_core[ifrag] - D_emb_high = cp.asarray(dmet_solver.mf_inner[ifrag].make_rdm1()) - - # Check B^T * S * B == I for each fragment - ortho_check = B.T @ S_ao @ B - identity = cp.eye(B.shape[1]) - max_ortho_err = float(cp.abs(ortho_check - identity).max()) - self.assertTrue(max_ortho_err < 1e-10, - f"Fragment {ifrag}: Basis B is not orthonormal. Max err: {max_ortho_err}") - - # Check Core DM spatial isolation from the active space - core_overlap = B.T @ S_ao @ D_core @ S_ao @ B - max_overlap_err = float(cp.abs(core_overlap).max()) - self.assertTrue(max_overlap_err < 1e-10, - f"Fragment {ifrag}: Core DM leaks into Active Space. Max err: {max_overlap_err}") - - # Check total electron conservation for this fragment representation - D_emb_ao = B @ D_emb_high @ B.T - D_total_ao = D_core + D_emb_ao - n_elec_calc = float(cp.trace(D_total_ao @ S_ao)) - self.assertAlmostEqual(n_elec_calc, n_total_elec, places=8, - msg=f"Fragment {ifrag}: Electron loss detected. {n_elec_calc} != {n_total_elec}") - def test_dmet_template_isolation(self): dmet_solver = DMET( mf_outer=self.mf_outer2, @@ -277,6 +289,47 @@ def test_correlation_potential_symmetry(self): max_u_val = float(cp.abs(u).max()) self.assertTrue(max_u_val < 1e-7, f"Trivial correlation potential should be zero, but got max: {max_u_val}") + def test_multifragment_algebraic_and_conservation(self): + dmet_solver = DMET( + mf_outer=self.mf_outer3, + mf_inner=self.mf_inner_template3, + fragments=self.fragments3, + threshold=1e-5, + max_macro_iter=1 + ) + dmet_solver.kernel() + + S_ao = cp.asarray(self.mf_outer3.get_ovlp()) + n_total_elec = float(self.mol3.nelectron) + + e_ref = self.mf_outer3.kernel() + assert np.abs(e_ref - dmet_solver.e_tot) < 1e-8, f"Reference energy {e_ref} != Embedding energy {dmet_solver.e_tot}" + + for ifrag in range(dmet_solver.nfrags): + B = dmet_solver.B[ifrag] + D_core = dmet_solver.dm_core[ifrag] + D_emb_high = cp.asarray(dmet_solver.mf_inner[ifrag].make_rdm1()) + + # Check B^T * S * B == I for each fragment + ortho_check = B.T @ S_ao @ B + identity = cp.eye(B.shape[1]) + max_ortho_err = float(cp.abs(ortho_check - identity).max()) + self.assertTrue(max_ortho_err < 1e-10, + f"Fragment {ifrag}: Basis B is not orthonormal. Max err: {max_ortho_err}") + + # Check Core DM spatial isolation from the active space + core_overlap = B.T @ S_ao @ D_core @ S_ao @ B + max_overlap_err = float(cp.abs(core_overlap).max()) + self.assertTrue(max_overlap_err < 1e-10, + f"Fragment {ifrag}: Core DM leaks into Active Space. Max err: {max_overlap_err}") + + # Check total electron conservation for this fragment representation + D_emb_ao = B @ D_emb_high @ B.T + D_total_ao = D_core + D_emb_ao + n_elec_calc = float(cp.trace(D_total_ao @ S_ao)) + self.assertAlmostEqual(n_elec_calc, n_total_elec, places=8, + msg=f"Fragment {ifrag}: Electron loss detected. {n_elec_calc} != {n_total_elec}") + if __name__ == '__main__': print("Full Tests for DMET") From 80a23f8fbf5fcc16d7457081f6c8e0282074eb0c Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 26 May 2026 14:10:21 +0800 Subject: [PATCH 21/30] rebase master --- gpu4pyscf/scf/hf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gpu4pyscf/scf/hf.py b/gpu4pyscf/scf/hf.py index f18a84cac..a46b1dbb1 100644 --- a/gpu4pyscf/scf/hf.py +++ b/gpu4pyscf/scf/hf.py @@ -841,7 +841,6 @@ def dump_flags(self, verbose=None): init_guess_by_chkfile = return_cupy_array(hf_cpu.SCF.init_guess_by_chkfile) from_chk = return_cupy_array(hf_cpu.SCF.from_chk) get_init_guess = hf_cpu.SCF.get_init_guess - make_rdm2 = NotImplemented energy_elec = NotImplemented energy_tot = energy_tot energy_nuc = hf_cpu.SCF.energy_nuc From afc35a6795f3d266257d68e6ba703b1e6f9ed6cc Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 27 May 2026 10:31:42 +0800 Subject: [PATCH 22/30] begin to write the codes --- gpu4pyscf/qmmm/embedding/embedding_dft_harris.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 gpu4pyscf/qmmm/embedding/embedding_dft_harris.py diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py new file mode 100644 index 000000000..3787aed7e --- /dev/null +++ b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py @@ -0,0 +1,13 @@ +# Copyright 2021-2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file From a0ca103b69865eefbc5f3134f60099a41f840dff Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 28 May 2026 15:08:25 +0800 Subject: [PATCH 23/30] begin to write --- .../qmmm/embedding/embedding_dft_harris.py | 247 +++++++++++++++++- 1 file changed, 246 insertions(+), 1 deletion(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py index 3787aed7e..ed1bc983b 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py @@ -10,4 +10,249 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. + +import numpy as np +import cupy as cp + +from pyscf import lib +from gpu4pyscf.dft import rks +from gpu4pyscf.lib.cupy_helper import _as_cupy +from gpu4pyscf.qmmm.embedding.embedding import DMET, lowdin_orth + +class HarrisRKS(rks.RKS): + """ + Harris RKS class based on machine learning (ML) predicted density. + + This class bypasses traditional SCF iterations. Instead, it relies entirely + on an external ML density evaluation function to construct the global effective + potential and calculate the double counting energy. + """ + def __init__(self, mol, eval_density_func, xc='LDA,VWN'): + super().__init__(mol) + self.xc = xc + self.max_cycle = 1 + + # eval_density_func is the external ML interface. + # Signature: def func(mol, grids, atomic_weights=None) + # Returns 7 elements: + # 1. vj: Coulomb potential matrix (AO basis) + # 2. vk: Exact exchange potential matrix (AO basis, can be None for pure DFT) + # 3. vxc: Exchange-correlation potential matrix (AO basis) + # 4. e_j: Coulomb energy (scalar) + # 5. e_k: Exact exchange energy (scalar, can be 0.0 for pure DFT) + # 6. e_xc: Exchange-correlation energy (scalar) + # 7. int_rho_vxc: Integral of rho * V_xc (scalar) + self.eval_density_func = eval_density_func + + # Cache for global evaluation results to avoid redundant ML inferences + self._v_eff_global = None + self._e_dc_global = None + + def get_veff(self, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): + + if mol is None: + mol = self.mol + + if self._v_eff_global is not None: + return self._v_eff_global + + if self.grids.coords is None: + self.grids.build() + + vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( + mol, self.xc, self.grids, atomic_weights=None + ) + + v_eff_ao = _as_cupy(vj) + _as_cupy(vxc) + if vk is not None: + v_eff_ao -= _as_cupy(vk) + e_k = float(e_k) + else: + e_k = 0.0 + + # Assemble double counting energy + e_dc = float(e_j) - e_k + float(int_rho_vxc) - float(e_xc) + + self._v_eff_global = v_eff_ao + self._e_dc_global = e_dc + return self._v_eff_global + + def energy_elec(self, dm=None, h1e=None, vhf=None): + """ + Overrides electronic energy evaluation using the Harris energy formula: + E_elec = Tr[D * (h + Veff)] - E_DC + """ + if dm is None: dm = self.make_rdm1() + if h1e is None: h1e = self.get_hcore() + if vhf is None: vhf = self.get_veff(self.mol, dm) + + dm_cp = _as_cupy(dm) + h1e_cp = _as_cupy(h1e) + vhf_cp = _as_cupy(vhf) + + fock = h1e_cp + vhf_cp + e_band = float(cp.sum(dm_cp * fock)) + + e_elec = e_band - self._e_dc_global + return e_elec, self._e_dc_global + + def get_local_veff_and_dc(self, atomic_weights): + + if self.grids.coords is None: + self.grids.build() + + vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( + self.mol, self.xc, self.grids, atomic_weights=atomic_weights + ) + + v_eff_ao_local = _as_cupy(vj) + _as_cupy(vxc) + if vk is not None: + v_eff_ao_local -= _as_cupy(vk) + e_k = float(e_k) + else: + e_k = 0.0 + + e_dc_local = float(e_j) - e_k + float(int_rho_vxc) - float(e_xc) + + return v_eff_ao_local, e_dc_local + + +class SingleFragmentEmbedding_ML(DMET): + """ + Single-Fragment ONIOM-like embedding utilizing ML density scaling. + + This class performs DMET bond-breaking via SVD, maps the DMET orbital + population to atomic weights, extracts a perfectly matched local ML density, + and evaluates the total energy using ONIOM error cancellation. + """ + def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): + """ + Parameters + ---------- + mf_outer : HarrisRKS object + The global low-level solver driven by ML density. + mf_inner : SCF/DFT/post-HF object + The high-level solver applied to the embedded fragment+bath cluster. + fragment : list of int + List of atom indices defining the core QM region. + threshold : float + Eigenvalue cutoff for the Schmidt decomposition to classify bath orbitals. + """ + fragments = [fragment] + super().__init__(mf_outer, mf_inner, fragments, + threshold=threshold, max_macro_iter=1, verbose=verbose) + self.fragment = self.fragments[0] + + def _get_atomic_weights(self, dm_active_ao, dm_full_ao, s_ao, mol): + """ + Calculate the projection weight (w_A) for each atom + using Mulliken population analysis of the fragment+bath (FB) orbitals. + """ + pop_active = cp.einsum('ij,ji->i', dm_active_ao, s_ao) + pop_full = cp.einsum('ij,ji->i', dm_full_ao, s_ao) + + aoslice = mol.aoslice_by_atom() + weights = np.zeros(mol.natm) + + for ia in range(mol.natm): + p0, p1 = aoslice[ia, 2], aoslice[ia, 3] + if p1 > p0: + n_active = float(cp.sum(pop_active[p0:p1])) + n_full = float(cp.sum(pop_full[p0:p1])) + + if n_full > 1e-12: + w = n_active / n_full + weights[ia] = max(0.0, min(1.0, w)) + else: + weights[ia] = 0.0 + + return weights + + def _get_scaled_nuclear_energy(self, mol, weights): + coords = mol.atom_coords() + charges = mol.atom_charges() + e_nuc_local = 0.0 + + for i in range(mol.natm): + if weights[i] < 1e-8: + continue + for j in range(i + 1, mol.natm): + if weights[j] < 1e-8: + continue + r = np.linalg.norm(coords[i] - coords[j]) + # Scale repulsion by the product of atomic inclusion weights + e_nuc_local += weights[i] * weights[j] * charges[i] * charges[j] / r + + return e_nuc_local + + def kernel(self): + + if not self.mf_outer.converged: + self.mf_outer.kernel() + + e_global_low = self.mf_outer.e_tot + self.log.note(f"Step 1: Global Low-Level E (Harris) = {e_global_low:.8f}") + + mo_coeff = _as_cupy(self.mf_outer.mo_coeff) + mo_occ = _as_cupy(self.mf_outer.mo_occ) + dm_full_ao_low = _as_cupy(self.mf_outer.make_rdm1()) + hcore_orig = _as_cupy(self.mf_outer.get_hcore()) + s_ao = _as_cupy(self.mf_outer.get_ovlp()) + X, X_inv = lowdin_orth(s_ao) + + # DMET Schmidt decomposition to extract bath orbitals + ifrag = 0 + self.build_bath(ifrag, mo_coeff, mo_occ, X_inv, X) + B = self.B[ifrag] + + # Project density to active space and back to AO for population analysis + dm_emb_low = B.T @ dm_full_ao_low @ B + dm_active_ao = B @ dm_emb_low @ B.T + + # Calculate mapping weights w_A + self.log.info("Step 2 & 3: DMET SVD and calculating Atomic Weights...") + w_A = self._get_atomic_weights(dm_active_ao, dm_full_ao_low, s_ao, self.full_mol) + + # Retrieve local ML effective potential and double counting energy + self.log.info("Step 4: Extracting matched local ML density components...") + v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(atomic_weights=w_A) + e_nn_local = self._get_scaled_nuclear_energy(self.full_mol, w_A) + + # Calculate strictly matched local low-level energy (E_L^local) + fock_ao_local = hcore_orig + v_eff_ao_local + fock_fb_local = B.T @ fock_ao_local @ B + e_band_local = float(cp.sum(dm_emb_low * fock_fb_local)) + e_local_low = e_band_local - e_dc_local + e_nn_local + self.log.note(f"Step 5: Matched Local Low-Level E = {e_local_low:.8f}") + + # Construct pure environment core Hamiltonian and run high-level SCF + fock_ao_global = hcore_orig + self.mf_outer.get_veff() + fock_fb_global = B.T @ fock_ao_global @ B + + v_eff_fb_local = B.T @ v_eff_ao_local @ B + + # Effective core Hamiltonian isolates the environment potential + h_core_fb_eff = fock_fb_global - v_eff_fb_local + + self.h_emb[ifrag] = h_core_fb_eff + self.e_core[ifrag] = 0.0 # ONIOM framework implies E_core shift is 0 + + self.log.info("Step 6: Running high-level inner SCF in embedding space...") + # Build the inner solver (automatically stored in self.mf_inner[ifrag]) + self._build_inner_mf(ifrag, dm_full_ao_low) + + # Solve the embedded cluster problem + self.solve_embedded(ifrag) + + e_local_high = self.e_inner[ifrag] + e_nn_local + self.log.note(f"Step 6: Local High-Level E (SCF) = {e_local_high:.8f}") + + # Exact ONIOM energy assembly + self.e_tot = e_global_low - e_local_low + e_local_high + + self.log.note("="*50) + self.log.note(f"FINAL ONIOM TOTAL ENERGY = {self.e_tot:.8f}") + self.log.note("="*50) + + return self.e_tot \ No newline at end of file From 4b596ed6419d452faf4aa861c98dcfa43b6611f8 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 28 May 2026 15:28:56 +0800 Subject: [PATCH 24/30] add the density-dependent weight partition --- .../qmmm/embedding/embedding_dft_harris.py | 83 ++++++++++--------- 1 file changed, 45 insertions(+), 38 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py index ed1bc983b..916fa2e84 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py @@ -34,7 +34,7 @@ def __init__(self, mol, eval_density_func, xc='LDA,VWN'): self.max_cycle = 1 # eval_density_func is the external ML interface. - # Signature: def func(mol, grids, atomic_weights=None) + # Signature: def func(mol, xc, grids, atomic_weights=None, grid_weights=None) # Returns 7 elements: # 1. vj: Coulomb potential matrix (AO basis) # 2. vk: Exact exchange potential matrix (AO basis, can be None for pure DFT) @@ -60,8 +60,9 @@ def get_veff(self, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): if self.grids.coords is None: self.grids.build() + # Global evaluation uses no weights vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( - mol, self.xc, self.grids, atomic_weights=None + mol, self.xc, self.grids, atomic_weights=None, grid_weights=None ) v_eff_ao = _as_cupy(vj) + _as_cupy(vxc) @@ -97,13 +98,16 @@ def energy_elec(self, dm=None, h1e=None, vhf=None): e_elec = e_band - self._e_dc_global return e_elec, self._e_dc_global - def get_local_veff_and_dc(self, atomic_weights): - + def get_local_veff_and_dc(self, atomic_weights=None, grid_weights=None): + # Pass both weight options to the external ML interface. + # The ML function should apply the provided one appropriately. if self.grids.coords is None: self.grids.build() vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( - self.mol, self.xc, self.grids, atomic_weights=atomic_weights + self.mol, self.xc, self.grids, + atomic_weights=atomic_weights, + grid_weights=grid_weights ) v_eff_ao_local = _as_cupy(vj) + _as_cupy(vxc) @@ -126,7 +130,7 @@ class SingleFragmentEmbedding_ML(DMET): population to atomic weights, extracts a perfectly matched local ML density, and evaluates the total energy using ONIOM error cancellation. """ - def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): + def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, partition_type='atom', verbose=None): """ Parameters ---------- @@ -138,17 +142,17 @@ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): List of atom indices defining the core QM region. threshold : float Eigenvalue cutoff for the Schmidt decomposition to classify bath orbitals. + partition_type : str + 'atom' for Mulliken population-based atomic weights. + 'grid' for real-space density-based grid weights w(r) = rho_local(r) / rho_global(r). """ fragments = [fragment] super().__init__(mf_outer, mf_inner, fragments, threshold=threshold, max_macro_iter=1, verbose=verbose) self.fragment = self.fragments[0] + self.partition_type = partition_type def _get_atomic_weights(self, dm_active_ao, dm_full_ao, s_ao, mol): - """ - Calculate the projection weight (w_A) for each atom - using Mulliken population analysis of the fragment+bath (FB) orbitals. - """ pop_active = cp.einsum('ij,ji->i', dm_active_ao, s_ao) pop_full = cp.einsum('ij,ji->i', dm_full_ao, s_ao) @@ -169,22 +173,18 @@ def _get_atomic_weights(self, dm_active_ao, dm_full_ao, s_ao, mol): return weights - def _get_scaled_nuclear_energy(self, mol, weights): - coords = mol.atom_coords() - charges = mol.atom_charges() - e_nuc_local = 0.0 + def _get_grid_weights(self, dm_active_ao, dm_full_ao, mol, grids): + + ni = self.mf_outer._numint - for i in range(mol.natm): - if weights[i] < 1e-8: - continue - for j in range(i + 1, mol.natm): - if weights[j] < 1e-8: - continue - r = np.linalg.norm(coords[i] - coords[j]) - # Scale repulsion by the product of atomic inclusion weights - e_nuc_local += weights[i] * weights[j] * charges[i] * charges[j] / r - - return e_nuc_local + rho_active = ni.get_rho(mol, dm_active_ao, grids) + rho_full = ni.get_rho(mol, dm_full_ao, grids) + + weights = rho_active / cp.maximum(rho_full, 1e-12) + + weights = cp.clip(weights, 0.0, 1.0) + + return weights def kernel(self): @@ -210,20 +210,31 @@ def kernel(self): dm_emb_low = B.T @ dm_full_ao_low @ B dm_active_ao = B @ dm_emb_low @ B.T - # Calculate mapping weights w_A - self.log.info("Step 2 & 3: DMET SVD and calculating Atomic Weights...") - w_A = self._get_atomic_weights(dm_active_ao, dm_full_ao_low, s_ao, self.full_mol) + # Calculate mapping weights and extract local ML components based on partition_type + if self.partition_type == 'atom': + self.log.info("Step 2 & 3: DMET SVD and calculating Atomic Weights...") + w_A = self._get_atomic_weights(dm_active_ao, dm_full_ao_low, s_ao, self.full_mol) + self.log.info("Step 4: Extracting matched local ML density components (Atom-based)...") + v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(atomic_weights=w_A) + + elif self.partition_type == 'grid': + self.log.info("Step 2 & 3: DMET SVD and calculating Grid Weights w(r)...") + if self.mf_outer.grids.coords is None: + self.mf_outer.grids.build() + w_grid = self._get_grid_weights(dm_active_ao, dm_full_ao_low, self.full_mol, self.mf_outer.grids) + self.log.info("Step 4: Extracting matched local ML density components (Grid-based)...") + v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(grid_weights=w_grid) + + else: + raise ValueError(f"Unknown partition_type: {self.partition_type}. Use 'atom' or 'grid'.") - # Retrieve local ML effective potential and double counting energy - self.log.info("Step 4: Extracting matched local ML density components...") - v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(atomic_weights=w_A) - e_nn_local = self._get_scaled_nuclear_energy(self.full_mol, w_A) + e_nuc_constant = self.full_mol.energy_nuc() # Calculate strictly matched local low-level energy (E_L^local) fock_ao_local = hcore_orig + v_eff_ao_local fock_fb_local = B.T @ fock_ao_local @ B e_band_local = float(cp.sum(dm_emb_low * fock_fb_local)) - e_local_low = e_band_local - e_dc_local + e_nn_local + e_local_low = e_band_local - e_dc_local + e_nuc_constant self.log.note(f"Step 5: Matched Local Low-Level E = {e_local_low:.8f}") # Construct pure environment core Hamiltonian and run high-level SCF @@ -239,16 +250,12 @@ def kernel(self): self.e_core[ifrag] = 0.0 # ONIOM framework implies E_core shift is 0 self.log.info("Step 6: Running high-level inner SCF in embedding space...") - # Build the inner solver (automatically stored in self.mf_inner[ifrag]) self._build_inner_mf(ifrag, dm_full_ao_low) - - # Solve the embedded cluster problem self.solve_embedded(ifrag) - e_local_high = self.e_inner[ifrag] + e_nn_local + e_local_high = self.e_inner[ifrag] self.log.note(f"Step 6: Local High-Level E (SCF) = {e_local_high:.8f}") - # Exact ONIOM energy assembly self.e_tot = e_global_low - e_local_low + e_local_high self.log.note("="*50) From e36c253985ee1462c12c04338edee5c3e316efed Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 28 May 2026 16:56:32 +0800 Subject: [PATCH 25/30] fix the incorrect non-linear treatment for v --- .../qmmm/embedding/embedding_dft_harris.py | 51 ++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py index 916fa2e84..c89c54d6b 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py @@ -17,8 +17,7 @@ from pyscf import lib from gpu4pyscf.dft import rks -from gpu4pyscf.lib.cupy_helper import _as_cupy -from gpu4pyscf.qmmm.embedding.embedding import DMET, lowdin_orth +from gpu4pyscf.qmmm.embedding.embedding import DMET, lowdin_orth, _as_cupy class HarrisRKS(rks.RKS): """ @@ -201,8 +200,8 @@ def kernel(self): s_ao = _as_cupy(self.mf_outer.get_ovlp()) X, X_inv = lowdin_orth(s_ao) - # DMET Schmidt decomposition to extract bath orbitals ifrag = 0 + self.build_bath(ifrag, mo_coeff, mo_occ, X_inv, X) B = self.B[ifrag] @@ -213,41 +212,47 @@ def kernel(self): # Calculate mapping weights and extract local ML components based on partition_type if self.partition_type == 'atom': self.log.info("Step 2 & 3: DMET SVD and calculating Atomic Weights...") - w_A = self._get_atomic_weights(dm_active_ao, dm_full_ao_low, s_ao, self.full_mol) - self.log.info("Step 4: Extracting matched local ML density components (Atom-based)...") - v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(atomic_weights=w_A) + w_active = self._get_atomic_weights(dm_active_ao, dm_full_ao_low, s_ao, self.full_mol) + w_core = 1.0 - w_active + + self.log.info("Step 4a: Extracting pure CORE potential using (1-w)...") + v_core_ao, _ = self.mf_outer.get_local_veff_and_dc(atomic_weights=w_core) + + self.log.info("Step 4b: Extracting ACTIVE components for Double Counting...") + v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(atomic_weights=w_active) elif self.partition_type == 'grid': self.log.info("Step 2 & 3: DMET SVD and calculating Grid Weights w(r)...") if self.mf_outer.grids.coords is None: self.mf_outer.grids.build() - w_grid = self._get_grid_weights(dm_active_ao, dm_full_ao_low, self.full_mol, self.mf_outer.grids) - self.log.info("Step 4: Extracting matched local ML density components (Grid-based)...") - v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(grid_weights=w_grid) + w_active = self._get_grid_weights(dm_active_ao, dm_full_ao_low, self.full_mol, self.mf_outer.grids) + w_core = 1.0 - w_active + + self.log.info("Step 4a: Extracting pure CORE potential using (1-w)...") + v_core_ao, _ = self.mf_outer.get_local_veff_and_dc(grid_weights=w_core) + + self.log.info("Step 4b: Extracting ACTIVE components for Double Counting...") + v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(grid_weights=w_active) else: raise ValueError(f"Unknown partition_type: {self.partition_type}. Use 'atom' or 'grid'.") e_nuc_constant = self.full_mol.energy_nuc() - # Calculate strictly matched local low-level energy (E_L^local) - fock_ao_local = hcore_orig + v_eff_ao_local - fock_fb_local = B.T @ fock_ao_local @ B + # Construct exact embedded Hamiltonian: h_emb = B^T (h_core^AO + V_core) B + fock_core_ao = hcore_orig + v_core_ao + h_core_fb_eff = B.T @ fock_core_ao @ B + + self.h_emb[ifrag] = h_core_fb_eff + self.e_core[ifrag] = 0.0 # ONIOM framework implies E_core shift is 0 + + fock_fb_local = h_core_fb_eff + (B.T @ v_eff_ao_local @ B) e_band_local = float(cp.sum(dm_emb_low * fock_fb_local)) e_local_low = e_band_local - e_dc_local + e_nuc_constant self.log.note(f"Step 5: Matched Local Low-Level E = {e_local_low:.8f}") - # Construct pure environment core Hamiltonian and run high-level SCF - fock_ao_global = hcore_orig + self.mf_outer.get_veff() - fock_fb_global = B.T @ fock_ao_global @ B - - v_eff_fb_local = B.T @ v_eff_ao_local @ B - - # Effective core Hamiltonian isolates the environment potential - h_core_fb_eff = fock_fb_global - v_eff_fb_local - - self.h_emb[ifrag] = h_core_fb_eff - self.e_core[ifrag] = 0.0 # ONIOM framework implies E_core shift is 0 + self.dm_core[ifrag] = cp.zeros_like(dm_full_ao_low) + self.v_core_ao[ifrag] = cp.zeros_like(dm_full_ao_low) self.log.info("Step 6: Running high-level inner SCF in embedding space...") self._build_inner_mf(ifrag, dm_full_ao_low) From bca0dae0aba24344345780388a7ce485aa6207cb Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 28 May 2026 17:49:11 +0800 Subject: [PATCH 26/30] fix some bugs --- .../qmmm/embedding/embedding_dft_harris.py | 135 +++++++++++++----- 1 file changed, 96 insertions(+), 39 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py index c89c54d6b..42e38ad3f 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py @@ -17,6 +17,7 @@ from pyscf import lib from gpu4pyscf.dft import rks +from gpu4pyscf.lib.cupy_helper import tag_array from gpu4pyscf.qmmm.embedding.embedding import DMET, lowdin_orth, _as_cupy class HarrisRKS(rks.RKS): @@ -48,7 +49,7 @@ def __init__(self, mol, eval_density_func, xc='LDA,VWN'): self._v_eff_global = None self._e_dc_global = None - def get_veff(self, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): + def _get_harris_veff(self, mol=None): if mol is None: mol = self.mol @@ -78,6 +79,51 @@ def get_veff(self, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): self._e_dc_global = e_dc return self._v_eff_global + def get_veff(self, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): + if mol is None: mol = self.mol + if dm is None: dm = self.make_rdm1() + + dm_cp = _as_cupy(dm) + s_ao = _as_cupy(self.get_ovlp()) + + # Calculate the actual number of electrons represented by the density matrix in AO basis + nelec_dm = float(cp.sum(dm_cp * s_ao)) + + # Handle zero density matrix under full-system inclusion limit safely + if nelec_dm < 1e-4: + v_eff_ao = cp.zeros_like(dm_cp) + return tag_array(v_eff_ao, ecoul=0.0, exc=0.0, vj=cp.zeros_like(dm_cp), vk=cp.zeros_like(dm_cp)) + + # Rigorous electron count inspection instead of the non-orthogonal matrix trace + if nelec_dm > self.mol.nelectron - 0.5: + v_eff_ao = self._get_harris_veff(mol) + e_2e = float(cp.sum(dm_cp * v_eff_ao)) - self._e_dc_global + return tag_array(v_eff_ao, ecoul=e_2e, exc=0.0, vj=v_eff_ao, vk=cp.zeros_like(v_eff_ao)) + else: + # Core evaluation using the pre-stored complementary weights + if self.grids.coords is None: + self.grids.build() + if isinstance(self.current_w_core, cp.ndarray) and self.current_w_core.ndim == 1: + vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( + mol, self.xc, self.grids, atomic_weights=None, grid_weights=self.current_w_core + ) + else: + vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( + mol, self.xc, self.grids, atomic_weights=self.current_w_core, grid_weights=None + ) + v_eff_ao = _as_cupy(vj) + _as_cupy(vxc) + if vk is not None: v_eff_ao -= _as_cupy(vk) + e_k = float(e_k) if vk is not None else 0.0 + e_dc = float(e_j) - e_k + float(int_rho_vxc) - float(e_xc) + e_2e = float(cp.sum(dm_cp * v_eff_ao)) - e_dc + return tag_array(v_eff_ao, ecoul=e_2e, exc=0.0, vj=_as_cupy(vj), vk=_as_cupy(vk) if vk is not None else cp.zeros_like(v_eff_ao)) + + def kernel(self, dm0=None, **kwargs): + # Pass through to the standard solver, get_veff handles everything natively via electron counting + e_tot = rks.RKS.kernel(self, dm0=dm0, **kwargs) + self.converged = True + return e_tot + def energy_elec(self, dm=None, h1e=None, vhf=None): """ Overrides electronic energy evaluation using the Harris energy formula: @@ -85,7 +131,7 @@ def energy_elec(self, dm=None, h1e=None, vhf=None): """ if dm is None: dm = self.make_rdm1() if h1e is None: h1e = self.get_hcore() - if vhf is None: vhf = self.get_veff(self.mol, dm) + if vhf is None: vhf = self._get_harris_veff(self.mol) dm_cp = _as_cupy(dm) h1e_cp = _as_cupy(h1e) @@ -185,6 +231,19 @@ def _get_grid_weights(self, dm_active_ao, dm_full_ao, mol, grids): return weights + def _evaluate_embedded_energy(self, mf_obj, dm_emb, h_eval_bare, B, dm_core): + e_h_active = cp.sum(dm_emb * h_eval_bare) + + dm_full_ao = dm_core + B @ dm_emb @ B.T + + v_eff_full = mf_obj.get_veff(self.full_mol, dm_full_ao) + v_eff_core = mf_obj.get_veff(self.full_mol, dm_core) + + e_2e_full = getattr(v_eff_full, 'ecoul', 0.0) + getattr(v_eff_full, 'exc', 0.0) + e_2e_core = getattr(v_eff_core, 'ecoul', 0.0) + getattr(v_eff_core, 'exc', 0.0) + # E_active = E_1e(Active) + [E_2e(Full) - E_2e(Core)] + return e_h_active + e_2e_full - e_2e_core + def kernel(self): if not self.mf_outer.converged: @@ -205,8 +264,8 @@ def kernel(self): self.build_bath(ifrag, mo_coeff, mo_occ, X_inv, X) B = self.B[ifrag] - # Project density to active space and back to AO for population analysis - dm_emb_low = B.T @ dm_full_ao_low @ B + # Rigorous density matrix projection incorporating the non-orthogonal overlap metric S + dm_emb_low = B.T @ s_ao @ dm_full_ao_low @ s_ao @ B dm_active_ao = B @ dm_emb_low @ B.T # Calculate mapping weights and extract local ML components based on partition_type @@ -215,12 +274,6 @@ def kernel(self): w_active = self._get_atomic_weights(dm_active_ao, dm_full_ao_low, s_ao, self.full_mol) w_core = 1.0 - w_active - self.log.info("Step 4a: Extracting pure CORE potential using (1-w)...") - v_core_ao, _ = self.mf_outer.get_local_veff_and_dc(atomic_weights=w_core) - - self.log.info("Step 4b: Extracting ACTIVE components for Double Counting...") - v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(atomic_weights=w_active) - elif self.partition_type == 'grid': self.log.info("Step 2 & 3: DMET SVD and calculating Grid Weights w(r)...") if self.mf_outer.grids.coords is None: @@ -228,43 +281,47 @@ def kernel(self): w_active = self._get_grid_weights(dm_active_ao, dm_full_ao_low, self.full_mol, self.mf_outer.grids) w_core = 1.0 - w_active - self.log.info("Step 4a: Extracting pure CORE potential using (1-w)...") - v_core_ao, _ = self.mf_outer.get_local_veff_and_dc(grid_weights=w_core) - - self.log.info("Step 4b: Extracting ACTIVE components for Double Counting...") - v_eff_ao_local, e_dc_local = self.mf_outer.get_local_veff_and_dc(grid_weights=w_active) - else: raise ValueError(f"Unknown partition_type: {self.partition_type}. Use 'atom' or 'grid'.") + print("debug w_core:", w_core) - e_nuc_constant = self.full_mol.energy_nuc() + # Store w_core into mf_outer for automated core potential evaluation via trace inspection + self.mf_outer.current_w_core = w_core - # Construct exact embedded Hamiltonian: h_emb = B^T (h_core^AO + V_core) B - fock_core_ao = hcore_orig + v_core_ao - h_core_fb_eff = B.T @ fock_core_ao @ B - - self.h_emb[ifrag] = h_core_fb_eff - self.e_core[ifrag] = 0.0 # ONIOM framework implies E_core shift is 0 - - fock_fb_local = h_core_fb_eff + (B.T @ v_eff_ao_local @ B) - e_band_local = float(cp.sum(dm_emb_low * fock_fb_local)) - e_local_low = e_band_local - e_dc_local + e_nuc_constant - self.log.note(f"Step 5: Matched Local Low-Level E = {e_local_low:.8f}") - - self.dm_core[ifrag] = cp.zeros_like(dm_full_ao_low) - self.v_core_ao[ifrag] = cp.zeros_like(dm_full_ao_low) + # Standard DMET embedded Hamiltonian and core potentials construction + self.build_embedded_hamiltonian(ifrag, hcore_orig) self.log.info("Step 6: Running high-level inner SCF in embedding space...") - self._build_inner_mf(ifrag, dm_full_ao_low) + mf_inner = self._build_inner_mf(ifrag, dm_full_ao_low) self.solve_embedded(ifrag) - e_local_high = self.e_inner[ifrag] - self.log.note(f"Step 6: Local High-Level E (SCF) = {e_local_high:.8f}") - - self.e_tot = e_global_low - e_local_low + e_local_high + dm_emb_high = _as_cupy(mf_inner.make_rdm1()) + dm_emb_low = self.dm_emb_init[ifrag] + + B = self.B[ifrag] + dm_core = self.dm_core[ifrag] + is_mean_field = hasattr(self.mf_inner_template, 'get_veff') + + if is_mean_field: + h_eval_bare = B.T @ hcore_orig @ B + + # Evaluate High-Level energy + e_high = self._evaluate_embedded_energy( + self.mf_inner_template, dm_emb_high, h_eval_bare, B, dm_core + ) + + # Evaluate Low-Level energy + e_low = self._evaluate_embedded_energy( + self.mf_outer, dm_emb_low, h_eval_bare, B, dm_core + ) + else: + raise NotImplementedError("WFT evaluation is not implemented for this class.") + + delta_e = float(e_high - e_low) + self.log.note(f"Global Low-Level E : {e_global_low:.8f}") + self.log.note(f"Active Space dE : {delta_e:.8f}") - self.log.note("="*50) - self.log.note(f"FINAL ONIOM TOTAL ENERGY = {self.e_tot:.8f}") - self.log.note("="*50) + self.e_tot = e_global_low + delta_e + self.log.note(f"Total Embedded E : {self.e_tot:.8f}") return self.e_tot \ No newline at end of file From 6669fa636096a25f7ba1e59ba9316ade7a89f29f Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 29 May 2026 16:56:06 +0800 Subject: [PATCH 27/30] Use the ML-density for global energy and density creation only. The low level energy is performed from SCF --- .../qmmm/embedding/embedding_dft_harris.py | 222 +++++------------- 1 file changed, 53 insertions(+), 169 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py index 42e38ad3f..aee5a1060 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py @@ -14,11 +14,12 @@ import numpy as np import cupy as cp - from pyscf import lib from gpu4pyscf.dft import rks from gpu4pyscf.lib.cupy_helper import tag_array from gpu4pyscf.qmmm.embedding.embedding import DMET, lowdin_orth, _as_cupy +from gpu4pyscf.qmmm.embedding.embedding_dft import SingleFragmentEmbedding + class HarrisRKS(rks.RKS): """ @@ -45,12 +46,11 @@ def __init__(self, mol, eval_density_func, xc='LDA,VWN'): # 7. int_rho_vxc: Integral of rho * V_xc (scalar) self.eval_density_func = eval_density_func - # Cache for global evaluation results to avoid redundant ML inferences self._v_eff_global = None self._e_dc_global = None + self._use_harris_veff = False def _get_harris_veff(self, mol=None): - if mol is None: mol = self.mol @@ -72,110 +72,72 @@ def _get_harris_veff(self, mol=None): else: e_k = 0.0 - # Assemble double counting energy + # double counting energy e_dc = float(e_j) - e_k + float(int_rho_vxc) - float(e_xc) + vk_array = _as_cupy(vk) if vk is not None else cp.zeros_like(v_eff_ao) + v_eff_ao = tag_array(v_eff_ao, ecoul=float(e_j) - e_k, exc=float(e_xc), vj=_as_cupy(vj), vk=vk_array) + self._v_eff_global = v_eff_ao self._e_dc_global = e_dc return self._v_eff_global def get_veff(self, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): - if mol is None: mol = self.mol - if dm is None: dm = self.make_rdm1() - - dm_cp = _as_cupy(dm) - s_ao = _as_cupy(self.get_ovlp()) - - # Calculate the actual number of electrons represented by the density matrix in AO basis - nelec_dm = float(cp.sum(dm_cp * s_ao)) - - # Handle zero density matrix under full-system inclusion limit safely - if nelec_dm < 1e-4: - v_eff_ao = cp.zeros_like(dm_cp) - return tag_array(v_eff_ao, ecoul=0.0, exc=0.0, vj=cp.zeros_like(dm_cp), vk=cp.zeros_like(dm_cp)) - - # Rigorous electron count inspection instead of the non-orthogonal matrix trace - if nelec_dm > self.mol.nelectron - 0.5: - v_eff_ao = self._get_harris_veff(mol) - e_2e = float(cp.sum(dm_cp * v_eff_ao)) - self._e_dc_global - return tag_array(v_eff_ao, ecoul=e_2e, exc=0.0, vj=v_eff_ao, vk=cp.zeros_like(v_eff_ao)) - else: - # Core evaluation using the pre-stored complementary weights - if self.grids.coords is None: - self.grids.build() - if isinstance(self.current_w_core, cp.ndarray) and self.current_w_core.ndim == 1: - vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( - mol, self.xc, self.grids, atomic_weights=None, grid_weights=self.current_w_core - ) - else: - vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( - mol, self.xc, self.grids, atomic_weights=self.current_w_core, grid_weights=None - ) - v_eff_ao = _as_cupy(vj) + _as_cupy(vxc) - if vk is not None: v_eff_ao -= _as_cupy(vk) - e_k = float(e_k) if vk is not None else 0.0 - e_dc = float(e_j) - e_k + float(int_rho_vxc) - float(e_xc) - e_2e = float(cp.sum(dm_cp * v_eff_ao)) - e_dc - return tag_array(v_eff_ao, ecoul=e_2e, exc=0.0, vj=_as_cupy(vj), vk=_as_cupy(vk) if vk is not None else cp.zeros_like(v_eff_ao)) + # Use ML evaluation ONLY during the global SCF step. + # For standard embedding steps, fallback to the native exact DFT evaluation. + if getattr(self, '_use_harris_veff', False): + return self._get_harris_veff(mol) + return rks.RKS.get_veff(self, mol, dm, dm_last, vhf_last, hermi) def kernel(self, dm0=None, **kwargs): - # Pass through to the standard solver, get_veff handles everything natively via electron counting - e_tot = rks.RKS.kernel(self, dm0=dm0, **kwargs) + + if self.max_cycle != 1: + lib.logger.warn(self, "HarrisRKS is a non-iterative method. " + f"Overriding max_cycle from {self.max_cycle} to 1.") + self.max_cycle = 1 + + # Temporarily enable Harris ML potential for the global 1-step evaluation + self._use_harris_veff = True + try: + e_tot = rks.RKS.kernel(self, dm0=dm0, **kwargs) + finally: + self._use_harris_veff = False + self.converged = True return e_tot def energy_elec(self, dm=None, h1e=None, vhf=None): """ - Overrides electronic energy evaluation using the Harris energy formula: E_elec = Tr[D * (h + Veff)] - E_DC """ - if dm is None: dm = self.make_rdm1() - if h1e is None: h1e = self.get_hcore() - if vhf is None: vhf = self._get_harris_veff(self.mol) - - dm_cp = _as_cupy(dm) - h1e_cp = _as_cupy(h1e) - vhf_cp = _as_cupy(vhf) - - fock = h1e_cp + vhf_cp - e_band = float(cp.sum(dm_cp * fock)) - - e_elec = e_band - self._e_dc_global - return e_elec, self._e_dc_global - - def get_local_veff_and_dc(self, atomic_weights=None, grid_weights=None): - # Pass both weight options to the external ML interface. - # The ML function should apply the provided one appropriately. - if self.grids.coords is None: - self.grids.build() + if getattr(self, '_use_harris_veff', False): + if dm is None: dm = self.make_rdm1() + if h1e is None: h1e = self.get_hcore() + if vhf is None: vhf = self._get_harris_veff(self.mol) - vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( - self.mol, self.xc, self.grids, - atomic_weights=atomic_weights, - grid_weights=grid_weights - ) - - v_eff_ao_local = _as_cupy(vj) + _as_cupy(vxc) - if vk is not None: - v_eff_ao_local -= _as_cupy(vk) - e_k = float(e_k) - else: - e_k = 0.0 + dm_cp = _as_cupy(dm) + h1e_cp = _as_cupy(h1e) + vhf_cp = _as_cupy(vhf) - e_dc_local = float(e_j) - e_k + float(int_rho_vxc) - float(e_xc) - - return v_eff_ao_local, e_dc_local + fock = h1e_cp + vhf_cp + e_band = float(cp.sum(dm_cp * fock)) + + e_elec = e_band - self._e_dc_global + return e_elec, self._e_dc_global + else: + # Fallback to standard energy evaluation during embedding steps + return rks.RKS.energy_elec(self, dm, h1e, vhf) -class SingleFragmentEmbedding_ML(DMET): +class SingleFragmentEmbedding_ML(SingleFragmentEmbedding): """ - Single-Fragment ONIOM-like embedding utilizing ML density scaling. + Single-Fragment ONIOM-like embedding utilizing ML density for the global low-level. - This class performs DMET bond-breaking via SVD, maps the DMET orbital - population to atomic weights, extracts a perfectly matched local ML density, - and evaluates the total energy using ONIOM error cancellation. + This class performs DMET bond-breaking via SVD, and evaluates the local embedded + energies using rigorous standard SCF evaluations to guarantee exact error cancellation + between the high-level and low-level local calculations. """ - def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, partition_type='atom', verbose=None): + def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): """ Parameters ---------- @@ -187,62 +149,10 @@ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, partition_type= List of atom indices defining the core QM region. threshold : float Eigenvalue cutoff for the Schmidt decomposition to classify bath orbitals. - partition_type : str - 'atom' for Mulliken population-based atomic weights. - 'grid' for real-space density-based grid weights w(r) = rho_local(r) / rho_global(r). """ - fragments = [fragment] - super().__init__(mf_outer, mf_inner, fragments, - threshold=threshold, max_macro_iter=1, verbose=verbose) + super().__init__(mf_outer, mf_inner, fragment, + threshold=threshold, verbose=verbose) self.fragment = self.fragments[0] - self.partition_type = partition_type - - def _get_atomic_weights(self, dm_active_ao, dm_full_ao, s_ao, mol): - pop_active = cp.einsum('ij,ji->i', dm_active_ao, s_ao) - pop_full = cp.einsum('ij,ji->i', dm_full_ao, s_ao) - - aoslice = mol.aoslice_by_atom() - weights = np.zeros(mol.natm) - - for ia in range(mol.natm): - p0, p1 = aoslice[ia, 2], aoslice[ia, 3] - if p1 > p0: - n_active = float(cp.sum(pop_active[p0:p1])) - n_full = float(cp.sum(pop_full[p0:p1])) - - if n_full > 1e-12: - w = n_active / n_full - weights[ia] = max(0.0, min(1.0, w)) - else: - weights[ia] = 0.0 - - return weights - - def _get_grid_weights(self, dm_active_ao, dm_full_ao, mol, grids): - - ni = self.mf_outer._numint - - rho_active = ni.get_rho(mol, dm_active_ao, grids) - rho_full = ni.get_rho(mol, dm_full_ao, grids) - - weights = rho_active / cp.maximum(rho_full, 1e-12) - - weights = cp.clip(weights, 0.0, 1.0) - - return weights - - def _evaluate_embedded_energy(self, mf_obj, dm_emb, h_eval_bare, B, dm_core): - e_h_active = cp.sum(dm_emb * h_eval_bare) - - dm_full_ao = dm_core + B @ dm_emb @ B.T - - v_eff_full = mf_obj.get_veff(self.full_mol, dm_full_ao) - v_eff_core = mf_obj.get_veff(self.full_mol, dm_core) - - e_2e_full = getattr(v_eff_full, 'ecoul', 0.0) + getattr(v_eff_full, 'exc', 0.0) - e_2e_core = getattr(v_eff_core, 'ecoul', 0.0) + getattr(v_eff_core, 'exc', 0.0) - # E_active = E_1e(Active) + [E_2e(Full) - E_2e(Core)] - return e_h_active + e_2e_full - e_2e_core def kernel(self): @@ -250,7 +160,7 @@ def kernel(self): self.mf_outer.kernel() e_global_low = self.mf_outer.e_tot - self.log.note(f"Step 1: Global Low-Level E (Harris) = {e_global_low:.8f}") + self.log.note(f"Global Low-Level E (Harris) = {e_global_low:.8f}") mo_coeff = _as_cupy(self.mf_outer.mo_coeff) mo_occ = _as_cupy(self.mf_outer.mo_occ) @@ -262,36 +172,9 @@ def kernel(self): ifrag = 0 self.build_bath(ifrag, mo_coeff, mo_occ, X_inv, X) - B = self.B[ifrag] - - # Rigorous density matrix projection incorporating the non-orthogonal overlap metric S - dm_emb_low = B.T @ s_ao @ dm_full_ao_low @ s_ao @ B - dm_active_ao = B @ dm_emb_low @ B.T - - # Calculate mapping weights and extract local ML components based on partition_type - if self.partition_type == 'atom': - self.log.info("Step 2 & 3: DMET SVD and calculating Atomic Weights...") - w_active = self._get_atomic_weights(dm_active_ao, dm_full_ao_low, s_ao, self.full_mol) - w_core = 1.0 - w_active - - elif self.partition_type == 'grid': - self.log.info("Step 2 & 3: DMET SVD and calculating Grid Weights w(r)...") - if self.mf_outer.grids.coords is None: - self.mf_outer.grids.build() - w_active = self._get_grid_weights(dm_active_ao, dm_full_ao_low, self.full_mol, self.mf_outer.grids) - w_core = 1.0 - w_active - - else: - raise ValueError(f"Unknown partition_type: {self.partition_type}. Use 'atom' or 'grid'.") - print("debug w_core:", w_core) - - # Store w_core into mf_outer for automated core potential evaluation via trace inspection - self.mf_outer.current_w_core = w_core - - # Standard DMET embedded Hamiltonian and core potentials construction self.build_embedded_hamiltonian(ifrag, hcore_orig) - self.log.info("Step 6: Running high-level inner SCF in embedding space...") + self.log.info("Running high-level inner SCF in embedding space...") mf_inner = self._build_inner_mf(ifrag, dm_full_ao_low) self.solve_embedded(ifrag) @@ -305,15 +188,16 @@ def kernel(self): if is_mean_field: h_eval_bare = B.T @ hcore_orig @ B - # Evaluate High-Level energy e_high = self._evaluate_embedded_energy( self.mf_inner_template, dm_emb_high, h_eval_bare, B, dm_core ) + self.log.note(f"High-Level E : {e_high:.8f}") - # Evaluate Low-Level energy + # Evaluate Low-Level energy (mf_outer will automatically use exact get_veff for xc here) e_low = self._evaluate_embedded_energy( self.mf_outer, dm_emb_low, h_eval_bare, B, dm_core ) + self.log.note(f"Low-Level E : {e_low:.8f}") else: raise NotImplementedError("WFT evaluation is not implemented for this class.") From 6972d2346a3a6006aa095c89898f3bcb1aa919bf Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 1 Jun 2026 08:58:03 +0800 Subject: [PATCH 28/30] add the unit test for the ML-density oniom embedding. --- .../tests/test_dft_embedding_harris.py | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 gpu4pyscf/qmmm/embedding/tests/test_dft_embedding_harris.py diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding_harris.py b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding_harris.py new file mode 100644 index 000000000..631bb9314 --- /dev/null +++ b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding_harris.py @@ -0,0 +1,135 @@ +# Copyright 2021-2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import cupy as cp +from pyscf import gto +from gpu4pyscf.dft import rks +from gpu4pyscf.qmmm.embedding.embedding_dft import SingleFragmentEmbedding +from gpu4pyscf.qmmm.embedding.embedding_dft_harris import HarrisRKS, SingleFragmentEmbedding_ML + + +def dummy_eval_density_func(mol, xc, grids, atomic_weights=None, grid_weights=None): + mf = rks.RKS(mol) + mf.xc = xc + mf.grids = grids + mf.verbose = 0 + mf.kernel() + + dm = cp.asarray(mf.make_rdm1()) + + # Calculate exact J and K matrices + vj, vk = mf.get_jk(mol, dm) + e_j = 0.5 * float(cp.sum(dm * vj)) + + is_hybrid = mf._numint.libxc.is_hybrid_xc(xc) + if is_hybrid: + hyb = mf._numint.libxc.hybrid_coeff(xc, spin=mol.spin) + vk = vk * hyb + e_k = 0.5 * float(cp.sum(dm * vk)) + else: + vk = None + e_k = 0.0 + + # Calculate exact Vxc and Exc + _, e_xc, vxc = mf._numint.nr_rks(mol, grids, xc, dm) + int_rho_vxc = float(cp.sum(dm * vxc)) + + return vj, vk, vxc, e_j, e_k, float(e_xc), int_rho_vxc + + +class TestMLEmbedding(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.mol = gto.Mole() + cls.mol.atom = ''' + C -0.76091 -0.00000 0.00000 + C 0.76091 -0.00000 0.00000 + H -1.16001 1.02029 0.00000 + H -1.16001 -0.51014 -0.88357 + H -1.16001 -0.51014 0.88357 + H 1.16001 -1.02029 0.00000 + H 1.16001 0.51014 0.88357 + H 1.16001 0.51014 -0.88357 + ''' + cls.mol.basis = '6-31g' + cls.mol.spin = 0 + cls.mol.charge = 0 + cls.mol.verbose = 0 + cls.mol.build() + + cls.methyl_fragment = [0, 2, 3, 4] + cls.full_fragment = [i for i in range(cls.mol.natm)] + + @classmethod + def tearDownClass(cls): + del cls.mol + + def test_harris_rks_exactness(self): + mf_ref = rks.RKS(self.mol, xc='PBE') + mf_ref.verbose = 0 + e_ref = mf_ref.kernel() + + mf_harris = HarrisRKS(self.mol, dummy_eval_density_func, xc='PBE') + mf_harris.verbose = 0 + e_harris = mf_harris.kernel() + + self.assertAlmostEqual(e_ref, e_harris, places=8, + msg=f"HarrisRKS energy {e_harris} differs from exact RKS {e_ref}") + + def test_full_system_pbe_in_pbe(self): + mf_outer = HarrisRKS(self.mol, dummy_eval_density_func, xc='PBE') + mf_inner = rks.RKS(self.mol, xc='PBE') + + emb_obj = SingleFragmentEmbedding_ML(mf_outer, mf_inner, self.full_fragment, verbose=0) + emb_obj.kernel() + + mf_outer.kernel() + e_global = mf_outer.e_tot + e_emb = emb_obj.e_tot + + self.assertAlmostEqual(e_global, e_emb, places=8, + msg="Full-system PBE-in-PBE failed exact cancellation.") + + def test_equivalence_to_standard_embedding(self): + + mf_outer_std = rks.RKS(self.mol, xc='PBE') + mf_inner_std = rks.RKS(self.mol, xc='B3LYP') + emb_std = SingleFragmentEmbedding(mf_outer_std, mf_inner_std, self.methyl_fragment, verbose=0) + e_std = emb_std.kernel() + + mf_outer_ml = HarrisRKS(self.mol, dummy_eval_density_func, xc='PBE') + mf_inner_ml = rks.RKS(self.mol, xc='B3LYP') + emb_ml = SingleFragmentEmbedding_ML(mf_outer_ml, mf_inner_ml, self.methyl_fragment, verbose=0) + e_ml = emb_ml.kernel() + + self.assertAlmostEqual(e_std, e_ml, places=8, + msg=f"ML Embedding {e_ml} diverged from Standard Embedding {e_std}!") + + def test_harris_max_cycle_override(self): + + mf_harris = HarrisRKS(self.mol, dummy_eval_density_func, xc='PBE') + mf_harris.max_cycle = 100 + mf_harris.verbose = 0 + + mf_harris.kernel() + + self.assertEqual(mf_harris.max_cycle, 1, + "HarrisRKS failed to override malicious max_cycle setting!") + +if __name__ == '__main__': + print("Full Tests for ML-Driven ONIOM-like Embedding...") + unittest.main() + From 59fe2ec63e51372dbd5980e31d180cbf3a088327 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 1 Jun 2026 09:52:37 +0800 Subject: [PATCH 29/30] add the example and fix some typos --- .../50-example_ml_density_embedding.py | 124 ++++++++++++++++++ .../qmmm/embedding/embedding_dft_harris.py | 6 +- .../tests/test_dft_embedding_harris.py | 2 +- 3 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 examples/embedding/50-example_ml_density_embedding.py diff --git a/examples/embedding/50-example_ml_density_embedding.py b/examples/embedding/50-example_ml_density_embedding.py new file mode 100644 index 000000000..b3f408d73 --- /dev/null +++ b/examples/embedding/50-example_ml_density_embedding.py @@ -0,0 +1,124 @@ +# Copyright 2021-2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Example: ML-Driven DFT Embedding (ONIOM-like scheme) + +This example demonstrates how to use the `HarrisRKS` and `SingleFragmentEmbedding_ML` +classes to perform a multi-scale quantum chemistry calculation (QM/QM). +It uses a dummy ML density evaluator to simulate an ultra-fast global PBE calculation, +and then performs a rigorous B3LYP high-level calculation only on the active fragment. +""" + +import numpy as np +import cupy as cp +from pyscf import gto +from gpu4pyscf.dft import rks +from gpu4pyscf.qmmm.embedding.embedding_dft_harris import HarrisRKS, SingleFragmentEmbedding_ML + + +def dummy_eval_density_func(mol, xc, grids): + """ + A pure DFT surrogate that mimics the behavior of an ML density predictor. + It performs a standard SCF to convergence and returns the exact potentials + and energies, acting as the "Ground Truth" ML model. + """ + print("\n[ML Surrogate] Generating density and effective potentials...") + mf = rks.RKS(mol) + mf.xc = xc + mf.grids = grids + mf.verbose = 0 + mf.kernel() + + dm = cp.asarray(mf.make_rdm1()) + vj, vk = mf.get_jk(mol, dm) + e_j = 0.5 * float(cp.sum(dm * vj)) + + is_hybrid = mf._numint.libxc.is_hybrid_xc(xc) + if is_hybrid: + hyb = mf._numint.libxc.hybrid_coeff(xc, spin=mol.spin) + vk = vk * hyb + e_k = 0.5 * float(cp.sum(dm * vk)) + else: + vk = None + e_k = 0.0 + + _, e_xc, vxc = mf._numint.nr_rks(mol, grids, xc, dm) + int_rho_vxc = float(cp.sum(dm * vxc)) + + print("[ML Surrogate] Potential generation completed.\n") + return vj, vk, vxc, e_j, e_k, float(e_xc), int_rho_vxc + + +def main(): + # 1. Build a target molecule (e.g., Hexane) + mol = gto.Mole() + mol.atom = ''' + C 1.4522500000 -2.8230000000 0.0000000000 + C 1.4522500000 -1.2830000000 0.0000000000 + C 0.0002500000 -0.7700000000 0.0000000000 + C 0.0002500000 0.7700000000 0.0000000000 + C -1.4517500000 1.2830000000 0.0000000000 + C -1.4517500000 2.8230000000 0.0000000000 + H 2.4792500000 -3.1870000000 0.0000000000 + H 0.9382500000 -3.1870000000 0.8900000000 + H 0.9382500000 -3.1870000000 -0.8900000000 + H 1.9652500000 -0.9200000000 0.8900000000 + H 1.9652500000 -0.9200000000 -0.8900000000 + H -0.5137500000 -1.1330000000 -0.8900000000 + H -0.5137500000 -1.1330000000 0.8900000000 + H 0.5132500000 1.1330000000 0.8900000000 + H 0.5132500000 1.1330000000 -0.8900000000 + H -1.9657500000 0.9200000000 -0.8900000000 + H -1.9657500000 0.9200000000 0.8900000000 + H -2.4797500000 3.1870000000 0.0000000000 + H -0.9377500000 3.1870000000 0.8900000000 + H -0.9377500000 3.1870000000 -0.8900000000 + ''' + mol.basis = 'sto3g' # Use a small basis set for quick demonstration + mol.spin = 0 + mol.verbose = 4 + mol.build() + + # 2. Define the active region (e.g., the terminal methyl group: C + 3xH) + methyl_fragment = [0, 6, 7, 8] + + print("==================================================") + print(" Starting ML-Driven DFT Embedding Calculation ") + print("==================================================") + + # 3. Setup the Global Low-Level Solver (driven by ML) + # This evaluates the full system using the Harris functional approach in 1 step. + mf_outer = HarrisRKS(mol, dummy_eval_density_func, xc='PBE') + + # 4. Setup the Local High-Level Solver (Standard rigorous DFT) + # This will only be executed within the embedded active space. + mf_inner = rks.RKS(mol, xc='B3LYP') + + # 5. Initialize and execute the ML Embedding framework + emb_obj = SingleFragmentEmbedding_ML(mf_outer, mf_inner, methyl_fragment) + e_tot = emb_obj.kernel() + + print("\n==================================================") + print(" Summary of Results ") + print("==================================================") + print(f"Global Low-Level E (ML-PBE) : {mf_outer.e_tot:.8f} Hartree") + print(f"High-Level Local E (B3LYP) : {emb_obj.e_inner[0]:.8f} Hartree") + print(f"Low-Level Local E (PBE) : {emb_obj.e_inner[0] - emb_obj.e_tot + mf_outer.e_tot:.8f} Hartree") # Reverse engineered for display + print(f"--------------------------------------------------") + print(f"FINAL ONIOM TOTAL ENERGY : {e_tot:.8f} Hartree") + print("==================================================") + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py index aee5a1060..01e342d15 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py @@ -35,7 +35,7 @@ def __init__(self, mol, eval_density_func, xc='LDA,VWN'): self.max_cycle = 1 # eval_density_func is the external ML interface. - # Signature: def func(mol, xc, grids, atomic_weights=None, grid_weights=None) + # Signature: def func(mol, xc, grids) # Returns 7 elements: # 1. vj: Coulomb potential matrix (AO basis) # 2. vk: Exact exchange potential matrix (AO basis, can be None for pure DFT) @@ -60,10 +60,8 @@ def _get_harris_veff(self, mol=None): if self.grids.coords is None: self.grids.build() - # Global evaluation uses no weights vj, vk, vxc, e_j, e_k, e_xc, int_rho_vxc = self.eval_density_func( - mol, self.xc, self.grids, atomic_weights=None, grid_weights=None - ) + mol, self.xc, self.grids) v_eff_ao = _as_cupy(vj) + _as_cupy(vxc) if vk is not None: diff --git a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding_harris.py b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding_harris.py index 631bb9314..3a2db6773 100644 --- a/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding_harris.py +++ b/gpu4pyscf/qmmm/embedding/tests/test_dft_embedding_harris.py @@ -21,7 +21,7 @@ from gpu4pyscf.qmmm.embedding.embedding_dft_harris import HarrisRKS, SingleFragmentEmbedding_ML -def dummy_eval_density_func(mol, xc, grids, atomic_weights=None, grid_weights=None): +def dummy_eval_density_func(mol, xc, grids): mf = rks.RKS(mol) mf.xc = xc mf.grids = grids From accdebf34fcd2ae5fa733b2b07bf9421474fb687 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 4 Jun 2026 14:54:14 +0800 Subject: [PATCH 30/30] print the true energy in dmet oniom --- gpu4pyscf/qmmm/embedding/embedding.py | 7 ++- gpu4pyscf/qmmm/embedding/embedding_dft.py | 61 ++++++++++++++++--- .../qmmm/embedding/embedding_dft_harris.py | 46 ++++++++++++++ 3 files changed, 106 insertions(+), 8 deletions(-) diff --git a/gpu4pyscf/qmmm/embedding/embedding.py b/gpu4pyscf/qmmm/embedding/embedding.py index 462ac95b3..d3ff8385c 100644 --- a/gpu4pyscf/qmmm/embedding/embedding.py +++ b/gpu4pyscf/qmmm/embedding/embedding.py @@ -80,7 +80,7 @@ def schmidt_decompose(mo_coeff_oao, mo_occ, frag_idx, env_idx, threshold=1e-5): C_rot = C_occ @ Vh.T # Broadly select all potential bath orbitals (including pure fragment ones S ~ 1.0) - is_bath_candidate = S > threshold + is_bath_candidate = (S > threshold) #& (S < 1.0 - threshold) is_core_small = S <= threshold n_sv = len(S) @@ -461,6 +461,11 @@ def kernel(self): self.build_embedded_hamiltonian(ifrag, hcore_orig) mf_inner = self._build_inner_mf(ifrag, dm_full_ao) self.solve_embedded(ifrag) + if not self.mf_inner[ifrag].converged: + raise RuntimeError( + f"Embedded high-level SCF did not converge for fragment {ifrag}; " + "do not use this density for delta energy." + ) dm_emb = _as_cupy(mf_inner.make_rdm1()) diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft.py b/gpu4pyscf/qmmm/embedding/embedding_dft.py index 8639576a8..b405598a9 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft.py @@ -46,19 +46,20 @@ def __init__(self, mf_outer, mf_inner, fragment, threshold=1e-5, verbose=None): threshold=threshold, max_macro_iter=1, verbose=verbose) self.fragment = self.fragments[0] - + def _evaluate_embedded_energy(self, mf_obj, dm_emb, h_eval_bare, B, dm_core): - e_h_active = cp.sum(dm_emb * h_eval_bare) + e_h_active = float(cp.sum(dm_emb * h_eval_bare)) dm_full_ao = dm_core + B @ dm_emb @ B.T v_eff_full = mf_obj.get_veff(self.full_mol, dm_full_ao) - v_eff_core = mf_obj.get_veff(self.full_mol, dm_core) + e_2e_full = float(getattr(v_eff_full, 'ecoul', 0.0) + getattr(v_eff_full, 'exc', 0.0)) - e_2e_full = getattr(v_eff_full, 'ecoul', 0.0) + getattr(v_eff_full, 'exc', 0.0) - e_2e_core = getattr(v_eff_core, 'ecoul', 0.0) + getattr(v_eff_core, 'exc', 0.0) - # E_active = E_1e(Active) + [E_2e(Full) - E_2e(Core)] - return e_h_active + e_2e_full - e_2e_core + hcore_orig = _as_cupy(self.mf_outer.get_hcore()) + e_1e_core = float(cp.sum(dm_core * hcore_orig)) + + e_nuc = float(self.full_mol.energy_nuc()) + return e_nuc + e_1e_core + e_h_active + e_2e_full def kernel(self): if not self.mf_outer.converged: @@ -80,8 +81,54 @@ def kernel(self): # Build and Run Inner embedded solver mf_inner = self._build_inner_mf(ifrag, dm_full_ao_low) + + B_mat = self.B[ifrag] + dm_core_mat = self.dm_core[ifrag] + h_eval_bare_mat = B_mat.T @ hcore_orig @ B_mat + + # Add the missing core 1-electron energy (kinetic + nuclear attraction from the frozen core) + e1_core = float(cp.sum(dm_core_mat * hcore_orig)) + + # Precompute the frozen core's 2-electron energy (constant during inner SCF) + v_eff_core_high = self.mf_inner_template.get_veff(self.full_mol, dm_core_mat) + e_coul_core = float(getattr(v_eff_core_high, 'ecoul', 0.0)) + e_xc_core = float(getattr(v_eff_core_high, 'exc', 0.0)) + + e_nuc_full = float(self.full_mol.energy_nuc()) + mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc_full + + # Override energy_elec to print the true ONIOM energy difference + def custom_energy_elec(dm=None, h1e=None, vhf=None): + if dm is None: dm = mf_inner.make_rdm1() + if vhf is None: vhf = mf_inner.get_veff(mf_inner.mol, dm) + + dm_cp = _as_cupy(dm) + + # e1: Active space single-electron energy + Core single-electron energy + e1_active = float(cp.sum(dm_cp * h_eval_bare_mat)) + e1 = e1_active + e1_core + + # e2: Full system 2e energy minus core 2e energy + ecoul_full = float(getattr(vhf, 'ecoul', 0.0)) + exc_full = float(getattr(vhf, 'exc', 0.0)) + e2 = ecoul_full + exc_full + + # Update scf_summary for meaningful PySCF debugging output + mf_inner.scf_summary['e1'] = e1 + mf_inner.scf_summary['coul'] = ecoul_full - e_coul_core + mf_inner.scf_summary['exc'] = exc_full - e_xc_core + + return e1 + e2, e2 + + mf_inner.energy_elec = custom_energy_elec + self.log.info("Running high-level inner solver...") self.solve_embedded(ifrag) + if not self.mf_inner[ifrag].converged: + raise RuntimeError( + f"Embedded high-level SCF did not converge for fragment {ifrag}; " + "do not use this density for delta energy." + ) dm_emb_high = _as_cupy(mf_inner.make_rdm1()) dm_emb_low = self.dm_emb_init[ifrag] diff --git a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py index 01e342d15..2c8db5586 100644 --- a/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py +++ b/gpu4pyscf/qmmm/embedding/embedding_dft_harris.py @@ -174,7 +174,53 @@ def kernel(self): self.log.info("Running high-level inner SCF in embedding space...") mf_inner = self._build_inner_mf(ifrag, dm_full_ao_low) + + B_mat = self.B[ifrag] + dm_core_mat = self.dm_core[ifrag] + h_eval_bare_mat = B_mat.T @ hcore_orig @ B_mat + + # Add the missing core 1-electron energy (kinetic + nuclear attraction from the frozen core) + e1_core = float(cp.sum(dm_core_mat * hcore_orig)) + + # Precompute the frozen core's 2-electron energy (constant during inner SCF) + v_eff_core_high = self.mf_inner_template.get_veff(self.full_mol, dm_core_mat) + e_coul_core = float(getattr(v_eff_core_high, 'ecoul', 0.0)) + e_xc_core = float(getattr(v_eff_core_high, 'exc', 0.0)) + + e_nuc_full = float(self.full_mol.energy_nuc()) + mf_inner.energy_nuc = lambda *args, **kwargs: e_nuc_full + + # Override energy_elec to print the true full system energy + def custom_energy_elec(dm=None, h1e=None, vhf=None): + if dm is None: dm = mf_inner.make_rdm1() + if vhf is None: vhf = mf_inner.get_veff(mf_inner.mol, dm) + + dm_cp = _as_cupy(dm) + + # e1: Active space single-electron energy + Core single-electron energy + e1_active = float(cp.sum(dm_cp * h_eval_bare_mat)) + e1 = e1_active + e1_core + + # e2: Full system 2e energy minus core 2e energy + ecoul_full = float(getattr(vhf, 'ecoul', 0.0)) + exc_full = float(getattr(vhf, 'exc', 0.0)) + e2 = ecoul_full + exc_full + + # Update scf_summary for meaningful debugging output + mf_inner.scf_summary['e1'] = e1 + mf_inner.scf_summary['coul'] = ecoul_full - e_coul_core + mf_inner.scf_summary['exc'] = exc_full - e_xc_core + + return e1 + e2, e2 + + mf_inner.energy_elec = custom_energy_elec + self.solve_embedded(ifrag) + if not self.mf_inner[ifrag].converged: + raise RuntimeError( + f"Embedded high-level SCF did not converge for fragment {ifrag}; " + "do not use this density for delta energy." + ) dm_emb_high = _as_cupy(mf_inner.make_rdm1()) dm_emb_low = self.dm_emb_init[ifrag]