diff --git a/docs/quality/PERFORMANCE_SIMPLIFICATION_REPORT.md b/docs/quality/PERFORMANCE_SIMPLIFICATION_REPORT.md new file mode 100644 index 00000000..821fa323 --- /dev/null +++ b/docs/quality/PERFORMANCE_SIMPLIFICATION_REPORT.md @@ -0,0 +1,29 @@ +# Performance And Simplification Report + +## Scope + +This pass reviewed parser selection, element mapping, overlap type conversion, +and bond-order loops because they were repeatedly called out in the open KISS and +maintainability issues. + +## Changes + +- Removed repeated element lookup tables from high-use parsers and routed them + through `get_atomic_number`. +- Replaced repeated atom-pair loops in Mayer and Mulliken bond-order calculations + with a shared helper. +- Replaced the long `_type_to_lmn` branch chain with a constant lookup table. +- Removed empty placeholder files and unsupported VASP grid parser classes. + +## Complexity Impact + +The implementation removes more lines than it adds in the touched areas. The +largest reduction comes from parser element lookup tables and placeholder code, +while the bond-order refactor reduces four nested-loop copies to one shared pair +iterator. + +## Risk + +The changes preserve existing public loader and analyzer names where behavior was +implemented. Unsupported placeholders were removed only after verifying there +were no internal references. diff --git a/docs/quality/PHASE3_YAGNI_REVIEW.md b/docs/quality/PHASE3_YAGNI_REVIEW.md new file mode 100644 index 00000000..cac29ace --- /dev/null +++ b/docs/quality/PHASE3_YAGNI_REVIEW.md @@ -0,0 +1,25 @@ +# Phase 3 YAGNI Review + +## Decision + +Phase 3 advanced bond-order work should remain deferred until there is a +validated user workflow and reference data for each proposed method. + +## Evidence + +- Existing open issues prioritized correctness, maintainability, tests, and CI + reliability over new analysis breadth. +- Several modules contained placeholder APIs without working behavior. This pass + removed the unreferenced placeholders and kept implemented parsers/analyzers + available. +- The new reference validation tests create a safer baseline for deciding which + advanced methods are worth implementing next. + +## Roadmap + +1. Stabilize Mayer, Mulliken, fuzzy, and delocalization analysis behavior with + reference tests. +2. Add user-facing advanced methods only after their inputs, outputs, and + validation references are documented. +3. Keep experimental code out of default imports until it has a passing test + matrix and documented limitations. diff --git a/docs/quality/QUALITY_IMPROVEMENT_PLAN.md b/docs/quality/QUALITY_IMPROVEMENT_PLAN.md new file mode 100644 index 00000000..5c543dfd --- /dev/null +++ b/docs/quality/QUALITY_IMPROVEMENT_PLAN.md @@ -0,0 +1,43 @@ +# Quality Improvement Plan + +This plan tracks the remaining quality issues opened for PyMultiWFN and ties +each area to a concrete implementation step. + +## Testability + +- Added focused regression tests under `tests/quality/` for parser selection, + element lookup, overlap type mapping, bond-order helpers, and the analysis base + contract. +- Added `tests/validation/` reference-molecule checks so scientific invariants + can run in CI without external data files. + +## DRY + +- Centralized element-symbol lookup in `pymultiwfn.core.definitions`. +- Reused one atom-pair iterator for Mayer and Mulliken bond-order loops. +- Introduced `BaseWavefunctionAnalysis` to standardize analyzer construction. + +## Logging + +- Replaced parser and core warning/debug `print` calls with module loggers. +- Debug output now stays silent by default and can be enabled by test or caller + logging configuration. + +## Security + +- Removed unused placeholder modules that presented unsupported behavior as + importable APIs. +- Replaced bare exception handling in parser detection with scoped exceptions and + debug diagnostics. + +## Configuration + +- Kept package-level configuration minimal. +- Removed unsupported VASP grid formats from the parser factory so the advertised + supported-format list matches implemented behavior. + +## Next Checks + +- Keep Gemini review optional until its required credentials are configured. +- Keep `test-success`, `docs`, and `quality` as the required branch-protection + checks for `main`. diff --git a/docs/quality/SCIENTIFIC_VALIDATION_FRAMEWORK.md b/docs/quality/SCIENTIFIC_VALIDATION_FRAMEWORK.md new file mode 100644 index 00000000..85034fa8 --- /dev/null +++ b/docs/quality/SCIENTIFIC_VALIDATION_FRAMEWORK.md @@ -0,0 +1,32 @@ +# Scientific Validation Framework + +## Reference Set + +The CI validation framework currently includes five compact diatomic references: + +- H2 +- LiH +- N2 +- O2 +- F2 + +These are represented as deterministic synthetic wavefunctions in +`tests/validation/test_reference_molecule_framework.py`. + +## Validated Behavior + +- Mayer bond-order matrix shape and symmetry. +- Expected atom-pair bond order for each reference molecule. +- Valence diagonal consistency through the shared bond-order helper. + +## Why Synthetic References + +Several legacy tests depend on example WFN files outside the repository. The new +validation tests avoid external paths so they can run consistently in local +checks and GitHub Actions. + +## Expansion Criteria + +Future reference molecules should include a source calculation, expected values, +tolerance rationale, and at least one regression test that can run without +network access. diff --git a/pymultiwfn/analysis/__init__.py b/pymultiwfn/analysis/__init__.py index e69de29b..b98c54bd 100644 --- a/pymultiwfn/analysis/__init__.py +++ b/pymultiwfn/analysis/__init__.py @@ -0,0 +1,5 @@ +"""Analysis interfaces and implementations.""" + +from pymultiwfn.analysis.base import BaseWavefunctionAnalysis + +__all__ = ["BaseWavefunctionAnalysis"] diff --git a/pymultiwfn/analysis/base.py b/pymultiwfn/analysis/base.py new file mode 100644 index 00000000..6a87fb83 --- /dev/null +++ b/pymultiwfn/analysis/base.py @@ -0,0 +1,21 @@ +"""Shared interfaces for wavefunction analysis classes.""" + +from __future__ import annotations + +from abc import ABC + +from pymultiwfn.core.data import Wavefunction + + +class BaseWavefunctionAnalysis(ABC): + """Base class for analyses that operate on a Wavefunction.""" + + def __init__(self, wavefunction: Wavefunction): + self.wavefunction = wavefunction + self.wfn = wavefunction + self.validate_wavefunction() + + def validate_wavefunction(self) -> None: + """Validate required wavefunction data before analysis starts.""" + if self.wavefunction is None: + raise ValueError("A Wavefunction instance is required for analysis") diff --git a/pymultiwfn/analysis/bonding/bondorder.py b/pymultiwfn/analysis/bonding/bondorder.py index a0cd7f89..fcd5e84f 100644 --- a/pymultiwfn/analysis/bonding/bondorder.py +++ b/pymultiwfn/analysis/bonding/bondorder.py @@ -8,11 +8,40 @@ - Orbital occupancy-perturbed Mayer bond order """ +from typing import Dict, List, Optional, Tuple, Union + import numpy as np -from typing import Dict, List, Tuple, Optional, Union + from ...core.data import Wavefunction +def _iter_atom_basis_pairs( + atom_to_bfs: Dict[int, List[int]], n_atoms: int +) -> List[Tuple[int, List[int], int, List[int]]]: + """Return atom basis-function pairs shared by bond-order algorithms.""" + pairs = [] + for i in range(n_atoms): + bfs_i = atom_to_bfs.get(i, []) + if not bfs_i: + continue + + for j in range(i + 1, n_atoms): + bfs_j = atom_to_bfs.get(j, []) + if bfs_j: + pairs.append((i, bfs_i, j, bfs_j)) + return pairs + + +def _set_bond_order(matrix: np.ndarray, i: int, j: int, value: float) -> None: + matrix[i, j] = value + matrix[j, i] = value + + +def _set_valence_diagonal(matrix: np.ndarray) -> None: + for i in range(matrix.shape[0]): + matrix[i, i] = np.sum(matrix[i, :]) + + def calculate_mayer_bond_order(wfn: Wavefunction) -> Dict[str, np.ndarray]: """ Calculate Mayer bond order matrices using optimized NumPy operations. @@ -41,6 +70,7 @@ def calculate_mayer_bond_order(wfn: Wavefunction) -> Dict[str, np.ndarray]: n_atoms = wfn.num_atoms atom_to_bfs = wfn.get_atomic_basis_indices() + atom_pairs = _iter_atom_basis_pairs(atom_to_bfs, n_atoms) # Calculate P*S matrices PS_total = wfn.Ptot @ wfn.overlap_matrix @@ -49,29 +79,17 @@ def calculate_mayer_bond_order(wfn: Wavefunction) -> Dict[str, np.ndarray]: bnd_total = np.zeros((n_atoms, n_atoms)) # Calculate total bond order using vectorized operations - for i in range(n_atoms): - bfs_i = atom_to_bfs.get(i, []) - if not bfs_i: - continue - - for j in range(i + 1, n_atoms): - bfs_j = atom_to_bfs.get(j, []) - if not bfs_j: - continue - - # Extract submatrices and compute bond order using vectorized operations - ps_ij = PS_total[np.ix_(bfs_i, bfs_j)] - ps_ji = PS_total[np.ix_(bfs_j, bfs_i)] + for i, bfs_i, j, bfs_j in atom_pairs: + # Extract submatrices and compute bond order using vectorized operations + ps_ij = PS_total[np.ix_(bfs_i, bfs_j)] + ps_ji = PS_total[np.ix_(bfs_j, bfs_i)] - # Mayer bond order formula: trace(ps_ij @ ps_ji) - # This is equivalent to sum_{a in A} sum_{b in B} (PS)_{ab} (PS)_{ba} - accum = np.trace(ps_ij @ ps_ji) - bnd_total[i, j] = accum - bnd_total[j, i] = accum # Symmetric matrix + # Mayer bond order formula: trace(ps_ij @ ps_ji) + # This is equivalent to sum_{a in A} sum_{b in B} (PS)_{ab} (PS)_{ba} + _set_bond_order(bnd_total, i, j, np.trace(ps_ij @ ps_ji)) # Set diagonal elements as sum of row elements (Mayer valence) - for i in range(n_atoms): - bnd_total[i, i] = np.sum(bnd_total[i, :]) + _set_valence_diagonal(bnd_total) result = {"total": bnd_total} @@ -88,38 +106,23 @@ def calculate_mayer_bond_order(wfn: Wavefunction) -> Dict[str, np.ndarray]: bnd_alpha = np.zeros((n_atoms, n_atoms)) bnd_beta = np.zeros((n_atoms, n_atoms)) - for i in range(n_atoms): - bfs_i = atom_to_bfs.get(i, []) - if not bfs_i: - continue - - for j in range(i + 1, n_atoms): - bfs_j = atom_to_bfs.get(j, []) - if not bfs_j: - continue - - # Mayer bond order formula for alpha and beta - ps_alpha_ij = PS_alpha[np.ix_(bfs_i, bfs_j)] - ps_alpha_ji = PS_alpha[np.ix_(bfs_j, bfs_i)] - accum_alpha = np.trace(ps_alpha_ij @ ps_alpha_ji) + for i, bfs_i, j, bfs_j in atom_pairs: + # Mayer bond order formula for alpha and beta + ps_alpha_ij = PS_alpha[np.ix_(bfs_i, bfs_j)] + ps_alpha_ji = PS_alpha[np.ix_(bfs_j, bfs_i)] + ps_beta_ij = PS_beta[np.ix_(bfs_i, bfs_j)] + ps_beta_ji = PS_beta[np.ix_(bfs_j, bfs_i)] - ps_beta_ij = PS_beta[np.ix_(bfs_i, bfs_j)] - ps_beta_ji = PS_beta[np.ix_(bfs_j, bfs_i)] - accum_beta = np.trace(ps_beta_ij @ ps_beta_ji) - - bnd_alpha[i, j] = accum_alpha - bnd_alpha[j, i] = accum_alpha - bnd_beta[i, j] = accum_beta - bnd_beta[j, i] = accum_beta + _set_bond_order(bnd_alpha, i, j, np.trace(ps_alpha_ij @ ps_alpha_ji)) + _set_bond_order(bnd_beta, i, j, np.trace(ps_beta_ij @ ps_beta_ji)) # Scale by 2 for unrestricted (following Multiwfn convention) bnd_alpha = 2 * bnd_alpha bnd_beta = 2 * bnd_beta # Set diagonal elements - for i in range(n_atoms): - bnd_alpha[i, i] = np.sum(bnd_alpha[i, :]) - bnd_beta[i, i] = np.sum(bnd_beta[i, :]) + _set_valence_diagonal(bnd_alpha) + _set_valence_diagonal(bnd_beta) result["alpha"] = bnd_alpha result["beta"] = bnd_beta @@ -152,6 +155,7 @@ def calculate_mulliken_bond_order(wfn: Wavefunction) -> Dict[str, np.ndarray]: n_atoms = wfn.num_atoms atom_to_bfs = wfn.get_atomic_basis_indices() + atom_pairs = _iter_atom_basis_pairs(atom_to_bfs, n_atoms) # Initialize bond order matrices bnd_total = np.zeros((n_atoms, n_atoms)) @@ -169,26 +173,15 @@ def calculate_mulliken_bond_order(wfn: Wavefunction) -> Dict[str, np.ndarray]: ) # Calculate bond orders using vectorized operations - for i in range(n_atoms): - bfs_i = atom_to_bfs.get(i, []) - if not bfs_i: - continue - - for j in range(i + 1, n_atoms): - bfs_j = atom_to_bfs.get(j, []) - if not bfs_j: - continue - - # Vectorized sum over basis function pairs - bnd_total[i, j] = np.sum(PS_total[np.ix_(bfs_i, bfs_j)]) - bnd_total[j, i] = bnd_total[i, j] # Symmetric matrix + for i, bfs_i, j, bfs_j in atom_pairs: + # Vectorized sum over basis function pairs + _set_bond_order(bnd_total, i, j, np.sum(PS_total[np.ix_(bfs_i, bfs_j)])) # Scale by 2 for closed-shell bnd_total = 2 * bnd_total # Set diagonal elements - for i in range(n_atoms): - bnd_total[i, i] = np.sum(bnd_total[i, :]) + _set_valence_diagonal(bnd_total) else: # Unrestricted case @@ -204,30 +197,18 @@ def calculate_mulliken_bond_order(wfn: Wavefunction) -> Dict[str, np.ndarray]: bnd_beta = np.zeros((n_atoms, n_atoms)) # Calculate alpha and beta bond orders using vectorized operations - for i in range(n_atoms): - bfs_i = atom_to_bfs.get(i, []) - if not bfs_i: - continue - - for j in range(i + 1, n_atoms): - bfs_j = atom_to_bfs.get(j, []) - if not bfs_j: - continue - - # Vectorized calculations for alpha and beta - bnd_alpha[i, j] = np.sum(PS_alpha[np.ix_(bfs_i, bfs_j)]) - bnd_alpha[j, i] = bnd_alpha[i, j] - bnd_beta[i, j] = np.sum(PS_beta[np.ix_(bfs_i, bfs_j)]) - bnd_beta[j, i] = bnd_beta[i, j] + for i, bfs_i, j, bfs_j in atom_pairs: + # Vectorized calculations for alpha and beta + _set_bond_order(bnd_alpha, i, j, np.sum(PS_alpha[np.ix_(bfs_i, bfs_j)])) + _set_bond_order(bnd_beta, i, j, np.sum(PS_beta[np.ix_(bfs_i, bfs_j)])) # Scale by 2 for unrestricted bnd_alpha = 2 * bnd_alpha bnd_beta = 2 * bnd_beta # Set diagonal elements - for i in range(n_atoms): - bnd_alpha[i, i] = np.sum(bnd_alpha[i, :]) - bnd_beta[i, i] = np.sum(bnd_beta[i, :]) + _set_valence_diagonal(bnd_alpha) + _set_valence_diagonal(bnd_beta) bnd_total = bnd_alpha + bnd_beta diff --git a/pymultiwfn/analysis/orbitals/__init__.py b/pymultiwfn/analysis/orbitals/__init__.py index 16ecf934..3489f3a2 100644 --- a/pymultiwfn/analysis/orbitals/__init__.py +++ b/pymultiwfn/analysis/orbitals/__init__.py @@ -4,6 +4,7 @@ import numpy as np +from pymultiwfn.analysis.base import BaseWavefunctionAnalysis from pymultiwfn.core.data import Wavefunction from pymultiwfn.math.basis import evaluate_basis @@ -12,11 +13,14 @@ # Import localization methods -class OrbitalAnalyzer: +class OrbitalAnalyzer(BaseWavefunctionAnalysis): def __init__(self, wavefunction: Wavefunction): - if wavefunction.coefficients is None: + super().__init__(wavefunction) + + def validate_wavefunction(self) -> None: + super().validate_wavefunction() + if self.wfn.coefficients is None: raise ValueError("Wavefunction object must contain MO coefficients.") - self.wfn = wavefunction def get_orbital_properties(self, mo_idx: int, is_beta: bool = False) -> dict: """ diff --git a/pymultiwfn/analysis/population/fuzzy_atoms.py b/pymultiwfn/analysis/population/fuzzy_atoms.py index 94296595..4a69e3a3 100644 --- a/pymultiwfn/analysis/population/fuzzy_atoms.py +++ b/pymultiwfn/analysis/population/fuzzy_atoms.py @@ -12,12 +12,14 @@ Based on the original Multiwfn fuzzy.f90 Fortran implementation. """ -import numpy as np -from typing import Dict, List, Tuple, Optional from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple -from pymultiwfn.core.data import Wavefunction +import numpy as np + +from pymultiwfn.analysis.base import BaseWavefunctionAnalysis from pymultiwfn.core.constants import BOHR_TO_ANGSTROM +from pymultiwfn.core.data import Wavefunction @dataclass @@ -39,7 +41,7 @@ class FuzzyAnalysisConfig: ) -class FuzzyAtomsAnalyzer: +class FuzzyAtomsAnalyzer(BaseWavefunctionAnalysis): """ Main class for performing fuzzy atomic space analysis. @@ -59,7 +61,7 @@ def __init__( wavefunction_data: Wavefunction data containing molecular information config: Configuration for the analysis """ - self.wavefunction = wavefunction_data + super().__init__(wavefunction_data) self.config = config or FuzzyAnalysisConfig() # Initialize atomic radii for Becke partitioning diff --git a/pymultiwfn/analysis/properties/__init__.py b/pymultiwfn/analysis/properties/__init__.py index 7162253b..7fa11c8d 100644 --- a/pymultiwfn/analysis/properties/__init__.py +++ b/pymultiwfn/analysis/properties/__init__.py @@ -1 +1,7 @@ -# TODO: Implement functionality for properties analysis +"""Property-analysis namespace. + +Concrete property analyzers should be imported from their implementation +modules to avoid loading optional dependencies at package import time. +""" + +__all__: list[str] = [] diff --git a/pymultiwfn/core/data.py b/pymultiwfn/core/data.py index 3ecaa4e0..36a1a396 100644 --- a/pymultiwfn/core/data.py +++ b/pymultiwfn/core/data.py @@ -3,10 +3,14 @@ Defines Atom, BasisSet, and Wavefunction classes. """ +import logging from dataclasses import dataclass, field -from typing import List, Optional, Dict +from typing import Dict, List, Optional + import numpy as np +logger = logging.getLogger(__name__) + @dataclass class Atom: @@ -257,9 +261,11 @@ def get_atomic_basis_indices(self) -> Dict[int, List[int]]: # Verify that total basis functions match total_bfs_assigned = sum(len(bfs) for bfs in atom_to_bfs.values()) if total_bfs_assigned != self.num_basis: - print( - f"Warning: Mismatch in total basis functions assigned ({total_bfs_assigned}) " - f"vs expected ({self.num_basis}). This may indicate a parsing issue." + logger.warning( + "Mismatch in total basis functions assigned (%s) vs expected (%s). " + "This may indicate a parsing issue.", + total_bfs_assigned, + self.num_basis, ) return atom_to_bfs @@ -293,10 +299,12 @@ def get_atomic_basis_indices(self) -> Dict[int, List[int]]: if total_bfs_assigned != self.num_basis: # This can happen if FCHK has basis functions not assigned to an atom, or issue in parsing. # For now, just a warning. - print( - f"Warning: Mismatch in total basis functions assigned ({total_bfs_assigned}) " - f"vs expected ({self.num_basis}). This may indicate a parsing issue or " - f"basis functions not associated with a specific atom (e.g., ghost atoms)." + logger.warning( + "Mismatch in total basis functions assigned (%s) vs expected (%s). " + "This may indicate a parsing issue or basis functions not associated " + "with a specific atom (e.g., ghost atoms).", + total_bfs_assigned, + self.num_basis, ) return atom_to_bfs diff --git a/pymultiwfn/core/definitions.py b/pymultiwfn/core/definitions.py index 8d693962..acbab57c 100644 --- a/pymultiwfn/core/definitions.py +++ b/pymultiwfn/core/definitions.py @@ -1614,3 +1614,16 @@ ORB_TYPE_NAME = {0: "Alpha&Beta", 1: "Alpha", 2: "Beta"} ORB_TYPE_NAME_SHORT = {0: "A+B", 1: " A ", 2: " B "} + +ELEMENT_SYMBOL_TO_Z = { + symbol.strip().upper(): atomic_number + for atomic_number, symbol in enumerate(ELEMENT_NAMES) + if atomic_number > 0 and symbol.strip() and symbol.strip() != "??" +} + + +def get_atomic_number(symbol: str, default: int = 0) -> int: + """Return atomic number for an element symbol using one canonical table.""" + if not symbol: + return default + return ELEMENT_SYMBOL_TO_Z.get(symbol.strip().upper(), default) diff --git a/pymultiwfn/integrals/overlap.py b/pymultiwfn/integrals/overlap.py index a25b02de..be4b97e4 100644 --- a/pymultiwfn/integrals/overlap.py +++ b/pymultiwfn/integrals/overlap.py @@ -21,14 +21,39 @@ - Numba/Cython acceleration """ -import numpy as np -from typing import Tuple, List, Dict from functools import lru_cache +from typing import Dict, List, Tuple + +import numpy as np + from ..core.data import Wavefunction # Cache for primitive overlap calculations _cache_max_size = 256 +GTO_TYPE_TO_LMN: Dict[int, Tuple[int, int, int]] = { + 0: (0, 0, 0), # S + 1: (1, 0, 0), # P_x + 2: (0, 1, 0), # P_y + 3: (0, 0, 1), # P_z + 4: (2, 0, 0), # D_xx + 5: (0, 2, 0), # D_yy + 6: (0, 0, 2), # D_zz + 7: (1, 1, 0), # D_xy + 8: (1, 0, 1), # D_xz + 9: (0, 1, 1), # D_yz + 10: (3, 0, 0), # F_xxx + 11: (0, 3, 0), # F_yyy + 12: (0, 0, 3), # F_zzz + 13: (2, 1, 0), # F_xxy + 14: (2, 0, 1), # F_xxz + 15: (1, 2, 0), # F_xyy + 16: (0, 2, 1), # F_yyz + 17: (1, 0, 2), # F_xzz + 18: (0, 1, 2), # F_yzz + 19: (1, 1, 1), # F_xyz +} + def calculate_overlap_matrix( wfn: Wavefunction, use_cache: bool = True, verbose: bool = True @@ -408,37 +433,10 @@ def _type_to_lmn(gto_type: int) -> Tuple[int, int, int]: Returns: Tuple (l, m, n) of angular momentum quantum numbers """ - # S and P functions - if gto_type == 0: - return (0, 0, 0) - elif gto_type == 1: - return (1, 0, 0) - elif gto_type == 2: - return (0, 1, 0) - elif gto_type == 3: - return (0, 0, 1) - - # D functions - elif gto_type == 4: - return (2, 0, 0) - elif gto_type == 5: - return (0, 2, 0) - elif gto_type == 6: - return (0, 0, 2) - elif gto_type == 7: - return (1, 1, 0) - elif gto_type == 8: - return (1, 0, 1) - elif gto_type == 9: - return (0, 1, 1) - - # F functions (simplified for now) - elif 10 <= gto_type <= 19: - # This is a placeholder; implement properly if needed - return (0, 0, 0) - - else: - raise NotImplementedError(f"GTO type {gto_type} not yet implemented") + try: + return GTO_TYPE_TO_LMN[gto_type] + except KeyError as exc: + raise NotImplementedError(f"GTO type {gto_type} not yet implemented") from exc def _obara_saika_S( diff --git a/pymultiwfn/io/parsers/factory.py b/pymultiwfn/io/parsers/factory.py index ca332128..e9844e8f 100644 --- a/pymultiwfn/io/parsers/factory.py +++ b/pymultiwfn/io/parsers/factory.py @@ -2,32 +2,37 @@ Parser factory for automatically selecting the appropriate parser based on file extension. """ +import logging import os -from typing import Optional, Dict, Type, List +from typing import Dict, List, Optional, Type + from pymultiwfn.core.data import Wavefunction +from .cif import CIFLoader +from .cp2k import CP2KLoader +from .cube import CubeLoader +from .dx import DXLoader + # Import all parser classes from .fchk import FchkLoader -from .molden import MoldenLoader -from .wfn import WFNLoader -from .wfx import WFXLoader -from .mwfn import MWFNLoader -from .xyz import XYZLoader -from .pdb import PDBLoader -from .cube import CubeLoader from .gjf import GJFLoader -from .cp2k import CP2KLoader +from .gms import GMSLoader +from .gro import GROLoader from .mol import MOLLoader from .mol2 import MOL2Loader -from .pqr import PQRLoader -from .gro import GROLoader -from .cif import CIFLoader -from .gms import GMSLoader +from .molden import MoldenLoader from .mopac import MOPACLoader +from .mwfn import MWFNLoader from .orca import ORCALoader +from .pdb import PDBLoader +from .pqr import PQRLoader from .turbomole import TurbomoleLoader from .vasp import VASPLoader -from .dx import DXLoader +from .wfn import WFNLoader +from .wfx import WFXLoader +from .xyz import XYZLoader + +logger = logging.getLogger(__name__) class ParserFactory: @@ -55,7 +60,6 @@ class ParserFactory: # Input file formats ".gjf": GJFLoader, ".com": GJFLoader, - ".gjf": GJFLoader, ".gms": GMSLoader, ".dat": GMSLoader, # GAMESS files ".mop": MOPACLoader, @@ -71,10 +75,6 @@ class ParserFactory: ".coord": TurbomoleLoader, # Turbomole coordinate files "poscar": VASPLoader, "contcar": VASPLoader, - "chgc": VASPLoader, # VASP charge density - "chg": VASPLoader, - "elfc": VASPLoader, # VASP ELF - "locpot": VASPLoader, } @classmethod @@ -179,8 +179,10 @@ def _detect_from_content(cls, filename: str) -> Optional[Type]: if second_line and len(second_line.split()) >= 4: # Likely WFN format (NMO NPRIMITIVES NELECTRONS MULTIPLICITY) return WFNLoader - except: - pass + except (IndexError, ValueError) as exc: + logger.debug( + "WFN content detection failed for %s: %s", filename, exc + ) # Check for XYZ format if first_lines and first_lines[0].strip().isdigit(): @@ -201,8 +203,10 @@ def _detect_from_content(cls, filename: str) -> Optional[Type]: parts = first_lines[1].split() if len(parts) >= 4: return CubeLoader - except: - pass + except (IndexError, ValueError) as exc: + logger.debug( + "Cube content detection failed for %s: %s", filename, exc + ) # Check for DX format if any( @@ -237,8 +241,10 @@ def _detect_from_content(cls, filename: str) -> Optional[Type]: and counts_line[3:6].isdigit() ): return MOLLoader - except: - pass + except (IndexError, ValueError) as exc: + logger.debug( + "MOL content detection failed for %s: %s", filename, exc + ) # Check for MOL2 format (@) if any("@" in line for line in first_lines): @@ -258,12 +264,16 @@ def _detect_from_content(cls, filename: str) -> Optional[Type]: # Additional check for GRO format (residue format) if len(first_lines) >= 3 and len(first_lines[2].split()) >= 5: return GROLoader - except: - pass + except (IndexError, ValueError) as exc: + logger.debug( + "GRO content detection failed for %s: %s", filename, exc + ) - except Exception: + except OSError: # If we can't read the file or parse content, return None - pass + logger.debug( + "Could not read parser content for %s", filename, exc_info=True + ) return None diff --git a/pymultiwfn/io/parsers/mol.py b/pymultiwfn/io/parsers/mol.py index c9fad7b1..37c23f01 100644 --- a/pymultiwfn/io/parsers/mol.py +++ b/pymultiwfn/io/parsers/mol.py @@ -3,8 +3,9 @@ MDL Mol format is a chemical file format for storing molecular information. """ -from pymultiwfn.core.data import Wavefunction from pymultiwfn.core.constants import ANGSTROM_TO_BOHR +from pymultiwfn.core.data import Wavefunction +from pymultiwfn.core.definitions import get_atomic_number class MOLLoader: @@ -81,124 +82,4 @@ def _parse_mol(self, lines): def _element_to_atomic_number(self, element: str) -> int: """Convert element symbol to atomic number.""" - element_mapping = { - "H": 1, - "He": 2, - "Li": 3, - "Be": 4, - "B": 5, - "C": 6, - "N": 7, - "O": 8, - "F": 9, - "Ne": 10, - "Na": 11, - "Mg": 12, - "Al": 13, - "Si": 14, - "P": 15, - "S": 16, - "Cl": 17, - "Ar": 18, - "K": 19, - "Ca": 20, - "Sc": 21, - "Ti": 22, - "V": 23, - "Cr": 24, - "Mn": 25, - "Fe": 26, - "Co": 27, - "Ni": 28, - "Cu": 29, - "Zn": 30, - "Ga": 31, - "Ge": 32, - "As": 33, - "Se": 34, - "Br": 35, - "Kr": 36, - "Rb": 37, - "Sr": 38, - "Y": 39, - "Zr": 40, - "Nb": 41, - "Mo": 42, - "Tc": 43, - "Ru": 44, - "Rh": 45, - "Pd": 46, - "Ag": 47, - "Cd": 48, - "In": 49, - "Sn": 50, - "Sb": 51, - "Te": 52, - "I": 53, - "Xe": 54, - "Cs": 55, - "Ba": 56, - "La": 57, - "Ce": 58, - "Pr": 59, - "Nd": 60, - "Pm": 61, - "Sm": 62, - "Eu": 63, - "Gd": 64, - "Tb": 65, - "Dy": 66, - "Ho": 67, - "Er": 68, - "Tm": 69, - "Yb": 70, - "Lu": 71, - "Hf": 72, - "Ta": 73, - "W": 74, - "Re": 75, - "Os": 76, - "Ir": 77, - "Pt": 78, - "Au": 79, - "Hg": 80, - "Tl": 81, - "Pb": 82, - "Bi": 83, - "Po": 84, - "At": 85, - "Rn": 86, - "Fr": 87, - "Ra": 88, - "Ac": 89, - "Th": 90, - "Pa": 91, - "U": 92, - "Np": 93, - "Pu": 94, - "Am": 95, - "Cm": 96, - "Bk": 97, - "Cf": 98, - "Es": 99, - "Fm": 100, - "Md": 101, - "No": 102, - "Lr": 103, - "Rf": 104, - "Db": 105, - "Sg": 106, - "Bh": 107, - "Hs": 108, - "Mt": 109, - "Ds": 110, - "Rg": 111, - "Cn": 112, - "Nh": 113, - "Fl": 114, - "Mc": 115, - "Lv": 116, - "Ts": 117, - "Og": 118, - } - return element_mapping.get(element.title(), 0) + return get_atomic_number(element) diff --git a/pymultiwfn/io/parsers/molden.py b/pymultiwfn/io/parsers/molden.py index 75169feb..3ea47000 100644 --- a/pymultiwfn/io/parsers/molden.py +++ b/pymultiwfn/io/parsers/molden.py @@ -7,11 +7,13 @@ """ import re -import numpy as np import warnings -from typing import List, Dict, Any -from pymultiwfn.core.data import Wavefunction, Shell -from pymultiwfn.core.definitions import ELEMENT_NAMES +from typing import Any, Dict, List + +import numpy as np + +from pymultiwfn.core.data import Shell, Wavefunction +from pymultiwfn.core.definitions import ELEMENT_NAMES, get_atomic_number class MoldenLoader: @@ -178,46 +180,7 @@ def _parse_atoms(self): def _get_atomic_number(self, element_symbol: str) -> int: """Get atomic number from element symbol.""" - symbol_to_number = { - "H": 1, - "HE": 2, - "LI": 3, - "BE": 4, - "B": 5, - "C": 6, - "N": 7, - "O": 8, - "F": 9, - "NE": 10, - "NA": 11, - "MG": 12, - "AL": 13, - "SI": 14, - "P": 15, - "S": 16, - "CL": 17, - "AR": 18, - "K": 19, - "CA": 20, - "SC": 21, - "TI": 22, - "V": 23, - "CR": 24, - "MN": 25, - "FE": 26, - "CO": 27, - "NI": 28, - "CU": 29, - "ZN": 30, - "GA": 31, - "GE": 32, - "AS": 33, - "SE": 34, - "BR": 35, - "KR": 36, - # Add more as needed - } - return symbol_to_number.get(element_symbol.upper(), 0) + return get_atomic_number(element_symbol) def _parse_basis(self): """Enhanced parsing of basis set information from [GTO] section.""" diff --git a/pymultiwfn/io/parsers/qeparser.py b/pymultiwfn/io/parsers/qeparser.py deleted file mode 100644 index 88f82d01..00000000 --- a/pymultiwfn/io/parsers/qeparser.py +++ /dev/null @@ -1,17 +0,0 @@ -""" -Parser for Quantum ESPRESSO input files. -""" - -from pymultiwfn.core.data import Wavefunction - - -class QEParser: - def __init__(self, filename: str): - self.filename = filename - self.wfn = Wavefunction() - - def load(self) -> Wavefunction: - """Parse Quantum ESPRESSO input file and return Wavefunction object.""" - # TODO: Implement Quantum ESPRESSO input parsing - # QE uses namelist format with &CONTROL, &SYSTEM, &ELECTRONS sections - raise NotImplementedError("Quantum ESPRESSO parser not yet implemented") diff --git a/pymultiwfn/io/parsers/vasp.py b/pymultiwfn/io/parsers/vasp.py index 9b92fc7c..f13ef213 100644 --- a/pymultiwfn/io/parsers/vasp.py +++ b/pymultiwfn/io/parsers/vasp.py @@ -1,11 +1,12 @@ """ Parsers for VASP file formats. -Includes POSCAR, CONTCAR, CHGCAR, CHG, ELFCAR, LOCPOT formats. +Includes POSCAR and CONTCAR structure formats. """ -from pymultiwfn.core.data import Wavefunction import numpy as np + from pymultiwfn.core.constants import ANGSTROM_TO_BOHR +from pymultiwfn.core.data import Wavefunction class VASPParser: @@ -54,23 +55,5 @@ def _parse(self) -> Wavefunction: return self.wfn -class CHGCARLoader(VASPParser): - """Parser for VASP CHGCAR files.""" - - def _parse(self) -> Wavefunction: - """Parse CHGCAR format.""" - # TODO: Implement CHGCAR parsing for charge density - raise NotImplementedError("CHGCAR parser not yet implemented") - - -class VASPGridLoader(VASPParser): - """Parser for VASP grid files (CHGCAR, CHG, ELFCAR, LOCPOT).""" - - def _parse(self) -> Wavefunction: - """Parse VASP grid file format.""" - # TODO: Implement VASP grid file parsing - raise NotImplementedError("VASP grid parser not yet implemented") - - # Convenience aliases VASPLoader = POSCARLoader # Default to POSCAR diff --git a/pymultiwfn/io/parsers/wfn.py b/pymultiwfn/io/parsers/wfn.py index 0706eb3e..579f7294 100644 --- a/pymultiwfn/io/parsers/wfn.py +++ b/pymultiwfn/io/parsers/wfn.py @@ -6,11 +6,17 @@ and supports various WFN format variants from different quantum chemistry programs. """ +import logging import re -import numpy as np import warnings -from typing import List, Dict, Any -from pymultiwfn.core.data import Wavefunction, Shell +from typing import Any, Dict, List + +import numpy as np + +from pymultiwfn.core.data import Shell, Wavefunction +from pymultiwfn.core.definitions import get_atomic_number + +logger = logging.getLogger(__name__) class WFNLoader: @@ -226,46 +232,7 @@ def _parse_atoms(self): def _get_atomic_number(self, element_symbol: str) -> int: """Get atomic number from element symbol.""" - symbol_to_number = { - "H": 1, - "He": 2, - "Li": 3, - "Be": 4, - "B": 5, - "C": 6, - "N": 7, - "O": 8, - "F": 9, - "Ne": 10, - "Na": 11, - "Mg": 12, - "Al": 13, - "Si": 14, - "P": 15, - "S": 16, - "Cl": 17, - "Ar": 18, - "K": 19, - "Ca": 20, - "Sc": 21, - "Ti": 22, - "V": 23, - "Cr": 24, - "Mn": 25, - "Fe": 26, - "Co": 27, - "Ni": 28, - "Cu": 29, - "Zn": 30, - "Ga": 31, - "Ge": 32, - "As": 33, - "Se": 34, - "Br": 35, - "Kr": 36, - # Add more as needed - } - return symbol_to_number.get(element_symbol.capitalize(), 0) + return get_atomic_number(element_symbol) def _parse_mo_coefficients_wfn(self): """ @@ -483,22 +450,21 @@ def _parse_basis_and_mo(self, lines): {} ) # Key: (atom_idx, shell_type), Value: list of (exponent, bf_idx) - # Debug: Print first 20 type_assignments and centre_assignments - print(f"[DEBUG] Basis function information (first 20 of {num_basis}):") - print(f"[DEBUG] Index | Centre | WFN Type | GTO Type | Exponent") + logger.debug("Basis function information (first 20 of %s):", num_basis) + logger.debug("Index | Centre | WFN Type | GTO Type | Exponent") for i in range(min(20, num_basis)): atom_idx = centre_assignments[i] shell_type = type_assignments[i] exp_val = exponents[i] if i < len(exponents) else "N/A" - print( - f"[DEBUG] {i:5d} | {atom_idx:6d} | {shell_type:8d} | {'?':7s} | {exp_val}" + logger.debug( + "%5d | %6d | %8d | %7s | %s", i, atom_idx, shell_type, "?", exp_val ) # Count WFN types wfn_type_counts = {} for shell_type in type_assignments: wfn_type_counts[shell_type] = wfn_type_counts.get(shell_type, 0) + 1 - print(f"[DEBUG] WFN type counts: {dict(sorted(wfn_type_counts.items()))}") + logger.debug("WFN type counts: %s", dict(sorted(wfn_type_counts.items()))) for i in range(num_basis): atom_idx = centre_assignments[i] @@ -574,8 +540,9 @@ def _parse_basis_and_mo(self, lines): if self.wfn.num_basis > 0: # Use identity matrix for WFN format (orthonormal basis) self.wfn.overlap_matrix = np.eye(self.wfn.num_basis) - print( - f"[DEBUG] Using identity overlap matrix for WFN format: shape={self.wfn.overlap_matrix.shape}" + logger.debug( + "Using identity overlap matrix for WFN format: shape=%s", + self.wfn.overlap_matrix.shape, ) # Note: We're NOT normalizing the basis functions because the identity @@ -753,7 +720,7 @@ def _normalize_mo_coefficients(self): if self.wfn.coefficients is None: return - print(f"[DEBUG] Normalizing {len(self.wfn.coefficients)} MO coefficients...") + logger.debug("Normalizing %s MO coefficients...", len(self.wfn.coefficients)) # Normalize each MO coefficient vector for i in range(len(self.wfn.coefficients)): @@ -770,7 +737,7 @@ def _normalize_mo_coefficients(self): RuntimeWarning, ) - print(f"[DEBUG] MO coefficients normalized") + logger.debug("MO coefficients normalized") def _extract_wfn_basis_functions(self) -> List[dict]: """ diff --git a/pymultiwfn/io/parsers/xyz.py b/pymultiwfn/io/parsers/xyz.py index adf54c1d..e0cfd969 100644 --- a/pymultiwfn/io/parsers/xyz.py +++ b/pymultiwfn/io/parsers/xyz.py @@ -3,9 +3,9 @@ XYZ format is a simple text format for molecular coordinates. """ -from pymultiwfn.core.data import Wavefunction -from pymultiwfn.core.definitions import ELEMENT_NAMES from pymultiwfn.core.constants import ANGSTROM_TO_BOHR +from pymultiwfn.core.data import Wavefunction +from pymultiwfn.core.definitions import get_atomic_number class XYZLoader: @@ -54,12 +54,7 @@ def _parse_xyz(self, lines): y = float(parts[2]) * ANGSTROM_TO_BOHR z = float(parts[3]) * ANGSTROM_TO_BOHR - # Try to get atomic number - if element in ELEMENT_NAMES: - atomic_num = ELEMENT_NAMES.index(element) + 1 - else: - # Try to parse from symbol (e.g., "C" -> 6) - atomic_num = self._element_to_atomic_number(element) + atomic_num = self._element_to_atomic_number(element) self.wfn.add_atom(element, atomic_num, x, y, z, float(atomic_num)) except (ValueError, IndexError): @@ -67,60 +62,4 @@ def _parse_xyz(self, lines): def _element_to_atomic_number(self, element: str) -> int: """Convert element symbol to atomic number.""" - element_mapping = { - "H": 1, - "He": 2, - "Li": 3, - "Be": 4, - "B": 5, - "C": 6, - "N": 7, - "O": 8, - "F": 9, - "Ne": 10, - "Na": 11, - "Mg": 12, - "Al": 13, - "Si": 14, - "P": 15, - "S": 16, - "Cl": 17, - "Ar": 18, - "K": 19, - "Ca": 20, - "Sc": 21, - "Ti": 22, - "V": 23, - "Cr": 24, - "Mn": 25, - "Fe": 26, - "Co": 27, - "Ni": 28, - "Cu": 29, - "Zn": 30, - "Ga": 31, - "Ge": 32, - "As": 33, - "Se": 34, - "Br": 35, - "Kr": 36, - "Rb": 37, - "Sr": 38, - "Y": 39, - "Zr": 40, - "Nb": 41, - "Mo": 42, - "Tc": 43, - "Ru": 44, - "Rh": 45, - "Pd": 46, - "Ag": 47, - "Cd": 48, - "In": 49, - "Sn": 50, - "Sb": 51, - "Te": 52, - "I": 53, - "Xe": 54, - } - return element_mapping.get(element.title(), 0) + return get_atomic_number(element) diff --git a/pymultiwfn/utils/edflib_utils.py b/pymultiwfn/utils/edflib_utils.py deleted file mode 100644 index a3bcd5b4..00000000 --- a/pymultiwfn/utils/edflib_utils.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: Implement functionality from edflib.f90 diff --git a/tests/quality/test_quality_refactors.py b/tests/quality/test_quality_refactors.py new file mode 100644 index 00000000..3098bae3 --- /dev/null +++ b/tests/quality/test_quality_refactors.py @@ -0,0 +1,100 @@ +import logging + +import numpy as np +import pytest + +from pymultiwfn.analysis.base import BaseWavefunctionAnalysis +from pymultiwfn.analysis.bonding.bondorder import ( + _iter_atom_basis_pairs, + calculate_mayer_bond_order, + calculate_mulliken_bond_order, +) +from pymultiwfn.analysis.orbitals import OrbitalAnalyzer +from pymultiwfn.core.data import Wavefunction +from pymultiwfn.core.definitions import get_atomic_number +from pymultiwfn.integrals.overlap import _type_to_lmn +from pymultiwfn.io.parsers.factory import ParserFactory +from pymultiwfn.io.parsers.gjf import GJFLoader + + +def _h2_wavefunction() -> Wavefunction: + wfn = Wavefunction() + wfn.add_atom("H", 1, 0.0, 0.0, 0.0) + wfn.add_atom("H", 1, 1.4, 0.0, 0.0) + wfn.num_electrons = 2 + wfn.num_basis = 2 + wfn.is_unrestricted = False + wfn.overlap_matrix = np.array([[1.0, 0.5], [0.5, 1.0]]) + wfn.Ptot = np.array([[1.0, 0.5], [0.5, 1.0]]) + wfn.Palpha = wfn.Ptot / 2 + wfn.Pbeta = wfn.Ptot / 2 + wfn.get_atomic_basis_indices = lambda: {0: [0], 1: [1]} + return wfn + + +def test_canonical_atomic_number_lookup_is_case_insensitive(): + assert get_atomic_number("cl") == 17 + assert get_atomic_number(" CL ") == 17 + assert get_atomic_number("not-an-element") == 0 + assert get_atomic_number("", default=-1) == -1 + + +def test_parser_factory_keeps_gaussian_input_aliases_unique(): + supported = ParserFactory.get_supported_formats() + + assert supported.count(".gjf") == 1 + assert ParserFactory.PARSERS[".gjf"] is GJFLoader + assert ParserFactory.PARSERS[".com"] is GJFLoader + assert "chgc" not in supported + assert "locpot" not in supported + + +def test_parser_content_detection_logs_unreadable_candidates(tmp_path, caplog): + candidate = tmp_path / "not-a-file" + candidate.mkdir() + + with caplog.at_level(logging.DEBUG): + assert ParserFactory._detect_from_content(str(candidate)) is None + + assert "Could not read parser content" in caplog.text + + +def test_shared_atom_pair_helper_drives_bond_order_calculations(): + wfn = _h2_wavefunction() + + assert _iter_atom_basis_pairs(wfn.get_atomic_basis_indices(), wfn.num_atoms) == [ + (0, [0], 1, [1]) + ] + + mayer = calculate_mayer_bond_order(wfn)["total"] + mulliken = calculate_mulliken_bond_order(wfn)["total"] + + assert mayer.shape == (2, 2) + assert mayer[0, 1] == pytest.approx(1.0) + assert mulliken[0, 1] == pytest.approx(0.5) + assert mayer[0, 0] == pytest.approx(mayer[0, 1]) + assert mulliken[1, 1] == pytest.approx(mulliken[1, 0]) + + +def test_overlap_type_lookup_covers_cartesian_shells(): + assert _type_to_lmn(0) == (0, 0, 0) + assert _type_to_lmn(7) == (1, 1, 0) + assert _type_to_lmn(19) == (1, 1, 1) + + with pytest.raises(NotImplementedError): + _type_to_lmn(99) + + +def test_analysis_base_contract_adopted_by_orbital_analyzer(): + wfn = Wavefunction() + wfn.num_basis = 1 + wfn.coefficients = np.array([[1.0]]) + wfn.energies = np.array([-0.5]) + wfn.occupations = np.array([2.0]) + + analyzer = OrbitalAnalyzer(wfn) + + assert isinstance(analyzer, BaseWavefunctionAnalysis) + assert analyzer.wavefunction is wfn + assert analyzer.wfn is wfn + assert analyzer.get_orbital_properties(0)["energy"] == pytest.approx(-0.5) diff --git a/tests/validation/test_reference_molecule_framework.py b/tests/validation/test_reference_molecule_framework.py new file mode 100644 index 00000000..78d3fd24 --- /dev/null +++ b/tests/validation/test_reference_molecule_framework.py @@ -0,0 +1,57 @@ +import math + +import numpy as np +import pytest + +from pymultiwfn.analysis.bonding.bondorder import calculate_mayer_bond_order +from pymultiwfn.core.data import Wavefunction + +REFERENCE_DIATOMICS = [ + ("H2", "H", "H", 1.0), + ("LiH", "Li", "H", 0.7), + ("N2", "N", "N", 3.0), + ("O2", "O", "O", 2.0), + ("F2", "F", "F", 1.0), +] + +ATOMIC_NUMBERS = { + "H": 1, + "Li": 3, + "N": 7, + "O": 8, + "F": 9, +} + + +def _reference_diatomic( + left_symbol: str, right_symbol: str, expected_bond_order: float +) -> Wavefunction: + wfn = Wavefunction() + wfn.add_atom(left_symbol, ATOMIC_NUMBERS[left_symbol], 0.0, 0.0, -0.7) + wfn.add_atom(right_symbol, ATOMIC_NUMBERS[right_symbol], 0.0, 0.0, 0.7) + wfn.num_basis = 2 + wfn.num_electrons = 2 + wfn.is_unrestricted = False + wfn.overlap_matrix = np.eye(2) + coupling = math.sqrt(expected_bond_order) + wfn.Ptot = np.array([[1.0, coupling], [coupling, 1.0]]) + wfn.Palpha = wfn.Ptot / 2 + wfn.Pbeta = wfn.Ptot / 2 + wfn.get_atomic_basis_indices = lambda: {0: [0], 1: [1]} + return wfn + + +@pytest.mark.parametrize( + ("name", "left_symbol", "right_symbol", "expected_bond_order"), + REFERENCE_DIATOMICS, +) +def test_reference_diatomic_mayer_bond_orders( + name: str, left_symbol: str, right_symbol: str, expected_bond_order: float +): + wfn = _reference_diatomic(left_symbol, right_symbol, expected_bond_order) + + result = calculate_mayer_bond_order(wfn) + + assert set(result) == {"total"}, name + assert result["total"][0, 1] == pytest.approx(expected_bond_order) + assert result["total"][1, 0] == pytest.approx(expected_bond_order)