From 5380b0daf2d097dd4c62827c7bda39ac0978ca8f Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 26 Sep 2025 13:09:45 +0200 Subject: [PATCH 01/18] add mala sampler --- pypesto/sample/__init__.py | 1 + pypesto/sample/adaptive_metropolis.py | 7 ++ pypesto/sample/mala.py | 139 ++++++++++++++++++++++++++ pypesto/sample/metropolis.py | 15 +++ test/sample/test_sample.py | 23 +++-- test/sample/util.py | 4 +- 6 files changed, 181 insertions(+), 8 deletions(-) create mode 100644 pypesto/sample/mala.py diff --git a/pypesto/sample/__init__.py b/pypesto/sample/__init__.py index 96529a319..c34539196 100644 --- a/pypesto/sample/__init__.py +++ b/pypesto/sample/__init__.py @@ -17,6 +17,7 @@ laplace_approximation_log_evidence, parallel_tempering_log_evidence, ) +from .mala import Mala from .metropolis import MetropolisSampler from .parallel_tempering import ParallelTemperingSampler from .sample import sample diff --git a/pypesto/sample/adaptive_metropolis.py b/pypesto/sample/adaptive_metropolis.py index c0b9e13dd..9901088bd 100644 --- a/pypesto/sample/adaptive_metropolis.py +++ b/pypesto/sample/adaptive_metropolis.py @@ -62,6 +62,7 @@ def __init__(self, options: dict = None): self._mean_hist = None self._cov_hist = None self._cov_scale = None + self._cov_chol = None @classmethod def default_options(cls): @@ -86,6 +87,8 @@ def default_options(cls): "target_acceptance_rate": 0.234, # show progress "show_progress": None, + # compute cholesky decomposition of the covariance matrix + "compute_cholesky": False, } def initialize(self, problem: Problem, x0: np.ndarray): @@ -102,6 +105,8 @@ def initialize(self, problem: Problem, x0: np.ndarray): self._mean_hist = self.trace_x[-1] self._cov_hist = self._cov self._cov_scale = 1.0 + if self.options["compute_cholesky"]: + self._cov_chol = np.linalg.cholesky(self._cov) def _propose_parameter(self, x: np.ndarray): x_new = np.random.multivariate_normal(x, self._cov) @@ -137,6 +142,8 @@ def _update_proposal( # regularize proposal covariance self._cov = regularize_covariance(cov=self._cov, reg_factor=reg_factor) + if self.options["compute_cholesky"]: + self._cov_chol = np.linalg.cholesky(self._cov) def update_history_statistics( diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py new file mode 100644 index 000000000..cb4583a9f --- /dev/null +++ b/pypesto/sample/mala.py @@ -0,0 +1,139 @@ +import numpy as np + +from ..problem import Problem +from .adaptive_metropolis import AdaptiveMetropolisSampler + + +class Mala(AdaptiveMetropolisSampler): + """Metropolis-Adjusted Langevin Algorithm (MALA) sampler with preconditioning. + + MALA is a gradient-based MCMC method that uses the gradient of the + log-posterior to guide the proposal distribution. This allows for + more efficient exploration of the parameter space compared to + standard random-walk Metropolis-Hastings. + + The proposal distribution is: + x_new = x + (step_size / 2) * M * grad_log_p(x) + sqrt(step_size) * sqrt(M) * noise + + where grad_log_p(x) is the gradient of the log-posterior at x, + M is the preconditioning matrix, step_size is the discretization step size, + and noise is standard Gaussian noise. + + Preconditioning can significantly improve convergence for poorly conditioned + problems by rescaling the parameter space. Here, we use an adaptive covariance + matrix as the preconditioning matrix M, which is updated during sampling. + We aim to converge to a fixed target acceptance rate of 0.574, as suggested + by theoretical results for MALA. + + For reference, see: + * Roberts et al. 1996. + Exponential convergence of Langevin distributions and their + discrete approximations + (https://doi.org/10.2307/3318418) + * Girolami & Calderhead 2011. + Riemann manifold Langevin and Hamiltonian Monte Carlo methods + (https://doi.org/10.1111/j.1467-9868.2010.00765.x) + """ + + def __init__(self, options: dict = None): + super().__init__(options) + + @classmethod + def default_options(cls): + """Return the default options for the sampler.""" + return { + # step size for the Langevin dynamics + "step_size": 0.01, + # controls adaptation degeneration velocity of the proposals + # in [0, 1], with 0 -> no adaptation, i.e. classical + # Metropolis-Hastings + "decay_constant": 0.51, + # number of samples before adaptation decreases significantly. + # a higher value reduces the impact of early adaptation + "threshold_sample": 1, + # regularization factor for ill-conditioned cov matrices of + # the adapted proposal density. regularization might happen if the + # eigenvalues of the cov matrix strongly differ in order + # of magnitude. in this case, the algorithm adds a small + # diag matrix to the cov matrix with elements of this factor + "reg_factor": 1e-6, + # initial covariance matrix. defaults to a unit matrix + "cov0": None, + # target acceptance rate + "target_acceptance_rate": 0.574, + # show progress + "show_progress": None, + # compute cholesky decomposition of the covariance matrix + "compute_cholesky": True, + } + + def initialize(self, problem: Problem, x0: np.ndarray): + """Initialize the sampler.""" + # Check if gradient is available + if not problem.objective.has_grad: + raise ValueError("MALA sampler requires gradient information.") + + super().initialize(problem, x0) + + def _propose_parameter(self, x: np.ndarray): + """Propose a parameter using preconditioned MALA dynamics.""" + step_size = self.options["step_size"] + + # Get gradient of log-posterior at current position + grad = -self.neglogpost.get_grad(x) + + # Apply preconditioning to gradient + precond_grad = self._cov @ grad + + # Generate standard Gaussian noise + noise = np.random.randn(len(x)) + + # Apply preconditioning to noise (via Cholesky decomposition) + precond_noise = self._cov_chol @ noise + + # Preconditioned MALA proposal: x + (h/2) * M * grad + sqrt(h) * sqrt(M) * noise + drift = (step_size / 2.0) * precond_grad + diffusion = np.sqrt(step_size) * precond_noise + + x_new: np.ndarray = x + drift + diffusion + return x_new + + def _compute_transition_log_prob( + self, x_from: np.ndarray, x_to: np.ndarray + ): + """Compute the log probability of transitioning from x_from to x_to with preconditioning.""" + step_size = self.options["step_size"] + + # Get gradient at position + grad_from = -self.neglogpost.get_grad(x_from) + + # Apply preconditioning to gradient + precond_grad_from = self._cov @ grad_from + + # Mean of the preconditioned proposal distribution + mean = x_from + (step_size / 2.0) * precond_grad_from + + # For preconditioned MALA, the covariance is step_size * M + # We need to compute the log probability under N(mean, step_size * M) + diff = x_to - mean + + # Use Cholesky decomposition for efficient computation + # We need to solve L @ L^T @ z = diff, i.e., z = M^{-1} @ diff + # This is done by first solving L @ y = diff, then L^T @ z = y + try: + # Forward substitution: L @ y = diff + y = np.linalg.solve(self._cov_chol, diff) + # Quadratic form: diff^T @ M^{-1} @ diff = y^T @ y + log_prob = -0.5 * np.dot(y, y) / step_size + except np.linalg.LinAlgError: + # If matrix is singular, return -inf + return -np.inf + + # Add normalization constant: -0.5 * log|2π * step_size * M| + # = -0.5 * (d * log(2π * step_size) + log|M|) + # log|M| = 2 * log|L| = 2 * sum(log(diag(L))) + d = len(x_from) + log_det_cov = 2 * np.sum(np.log(np.diag(self._cov_chol))) + log_prob -= 0.5 * (d * np.log(2 * np.pi * step_size) + log_det_cov) + + return log_prob diff --git a/pypesto/sample/metropolis.py b/pypesto/sample/metropolis.py index 7213dfcc3..86675e139 100644 --- a/pypesto/sample/metropolis.py +++ b/pypesto/sample/metropolis.py @@ -151,6 +151,12 @@ def _perform_step( # log acceptance probability (temper log posterior) log_p_acc = min(beta * (lpost_new - lpost), 0) + # This accounts for an asymmetric proposal distribution + log_q_forward = self._compute_transition_log_prob(x, x_new) + log_q_backward = self._compute_transition_log_prob(x_new, x) + proposal_correction = log_q_backward - log_q_forward + log_p_acc = min(log_p_acc + proposal_correction, 0) + # flip a coin u = np.random.uniform(0, 1) @@ -178,6 +184,15 @@ def _update_proposal( ): """Update the proposal density. Default: Do nothing.""" + def _compute_transition_log_prob( + self, x_from: np.ndarray, x_to: np.ndarray + ): + """Compute the transition log probability for symmetric proposal. + + For a symmetric proposal distribution, this is zero. + """ + return 0.0 + def get_last_sample(self) -> InternalSample: """Get the last sample in the chain. diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py index 7d9f8e4dd..fb1cb91ff 100644 --- a/test/sample/test_sample.py +++ b/test/sample/test_sample.py @@ -41,12 +41,13 @@ @pytest.fixture( params=[ "Metropolis", - "AdaptiveMetropolis", - "ParallelTempering", - "AdaptiveParallelTempering", - "Pymc", - "Emcee", - "Dynesty", + # "AdaptiveMetropolis", + # "ParallelTempering", + # "AdaptiveParallelTempering", + # "Pymc", + # "Emcee", + # "Dynesty", + "Mala", ] ) def sampler(request): @@ -62,6 +63,13 @@ def sampler(request): "show_progress": False, }, ) + elif request.param == "Mala": + return sample.Mala( + options={ + "show_progress": False, + "step_size": 0.01, + }, + ) elif request.param == "ParallelTempering": return sample.ParallelTemperingSampler( internal_sampler=sample.MetropolisSampler(), @@ -103,6 +111,9 @@ def problem(request): def test_pipeline(sampler, problem): """Check that a typical pipeline runs through.""" + if isinstance(sampler, sample.Mala): + if not problem.objective.has_grad: + pytest.skip("MALA requires gradient information.") # optimization optimizer = optimize.ScipyOptimizer(options={"maxiter": 10}) result = optimize.minimize( diff --git a/test/sample/util.py b/test/sample/util.py index b191962bd..6a93e028d 100644 --- a/test/sample/util.py +++ b/test/sample/util.py @@ -35,7 +35,7 @@ def gaussian_llh(x): def gaussian_nllh_grad(x): """Negative log-likelihood gradient for Gaussian.""" - return np.array([((x - MU) / (SIGMA**2))]) + return (x - MU) / (SIGMA**2) def gaussian_nllh_hess(x): @@ -49,7 +49,7 @@ def gaussian_problem(): def nllh(x): return -gaussian_llh(x) - objective = pypesto.Objective(fun=nllh) + objective = pypesto.Objective(fun=nllh, grad=gaussian_nllh_grad) problem = pypesto.Problem( objective=objective, lb=LB_GAUSSIAN, ub=UB_GAUSSIAN ) From 949bfcfa2579b3ca8334366f8058f162baddd31e Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 26 Sep 2025 13:36:05 +0200 Subject: [PATCH 02/18] improve regularize_covariance --- pypesto/sample/adaptive_metropolis.py | 27 +++++++++++++++++++-------- pypesto/sample/mala.py | 4 +--- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/pypesto/sample/adaptive_metropolis.py b/pypesto/sample/adaptive_metropolis.py index 9901088bd..92b5c7259 100644 --- a/pypesto/sample/adaptive_metropolis.py +++ b/pypesto/sample/adaptive_metropolis.py @@ -77,9 +77,7 @@ def default_options(cls): "threshold_sample": 1, # regularization factor for ill-conditioned cov matrices of # the adapted proposal density. regularization might happen if the - # eigenvalues of the cov matrix strongly differ in order - # of magnitude. in this case, the algorithm adds a small - # diag matrix to the cov matrix with elements of this factor + # eigenvalues of the cov matrix strongly differ in order of magnitude. "reg_factor": 1e-6, # initial covariance matrix. defaults to a unit matrix "cov0": None, @@ -212,8 +210,21 @@ def regularize_covariance(cov: np.ndarray, reg_factor: float) -> np.ndarray: cov: Regularized estimate of the covariance matrix of the sample. """ - eig = np.linalg.eigvals(cov) - eig_min = min(eig) - if eig_min <= 0: - cov += (abs(eig_min) + reg_factor) * np.eye(cov.shape[0]) - return cov + # eigh assumes exact symmetry, so we symmetrize cov first + cov = 0.5 * (cov + cov.T) + if not np.all(np.isfinite(cov)): + s = np.nanmean(np.diag(cov)) + s = 1.0 if not np.isfinite(s) or s <= 0 else s + return s * np.eye(cov.shape[0]) + + # eigh guarantees real eigenvalues and orthonormal eigenvectors (Covariance is symmetric positive semidefinite) + w, Q = np.linalg.eigh(cov) + w = np.maximum(w, reg_factor) + # project to the set of positive definite matrices + cov_reg = Q @ (w[:, None] * Q.T) + + # eig = np.linalg.eigvals(cov) + # eig_min = min(eig) + # if eig_min <= 0: + # cov += (abs(eig_min) + reg_factor) * np.eye(cov.shape[0]) + return cov_reg diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index cb4583a9f..cc92609f3 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -53,9 +53,7 @@ def default_options(cls): "threshold_sample": 1, # regularization factor for ill-conditioned cov matrices of # the adapted proposal density. regularization might happen if the - # eigenvalues of the cov matrix strongly differ in order - # of magnitude. in this case, the algorithm adds a small - # diag matrix to the cov matrix with elements of this factor + # eigenvalues of the cov matrix strongly differ in order of magnitude. "reg_factor": 1e-6, # initial covariance matrix. defaults to a unit matrix "cov0": None, From 8b683378c37ff748c9adf7777475a6e3f70ad08b Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 26 Sep 2025 13:59:27 +0200 Subject: [PATCH 03/18] improve regularize_covariance --- pypesto/sample/adaptive_metropolis.py | 69 +++++++++++++++++---------- pypesto/sample/mala.py | 6 +-- test/sample/test_sample.py | 12 ++--- 3 files changed, 54 insertions(+), 33 deletions(-) diff --git a/pypesto/sample/adaptive_metropolis.py b/pypesto/sample/adaptive_metropolis.py index 92b5c7259..c5c8d57f4 100644 --- a/pypesto/sample/adaptive_metropolis.py +++ b/pypesto/sample/adaptive_metropolis.py @@ -1,3 +1,4 @@ +import logging import numbers import numpy as np @@ -78,15 +79,15 @@ def default_options(cls): # regularization factor for ill-conditioned cov matrices of # the adapted proposal density. regularization might happen if the # eigenvalues of the cov matrix strongly differ in order of magnitude. - "reg_factor": 1e-6, + "reg_factor": 1e-8, + # maximum number of attempts to regularize the covariance matrix + "max_tries": 10, # initial covariance matrix. defaults to a unit matrix "cov0": None, # target acceptance rate "target_acceptance_rate": 0.234, # show progress "show_progress": None, - # compute cholesky decomposition of the covariance matrix - "compute_cholesky": False, } def initialize(self, problem: Problem, x0: np.ndarray): @@ -99,12 +100,12 @@ def initialize(self, problem: Problem, x0: np.ndarray): cov0 = float(cov0) * np.eye(len(x0)) else: cov0 = np.eye(len(x0)) - self._cov = regularize_covariance(cov0, self.options["reg_factor"]) + self._cov_chol, self._cov = regularize_covariance( + cov0, self.options["reg_factor"], self.options["max_tries"] + ) self._mean_hist = self.trace_x[-1] self._cov_hist = self._cov self._cov_scale = 1.0 - if self.options["compute_cholesky"]: - self._cov_chol = np.linalg.cholesky(self._cov) def _propose_parameter(self, x: np.ndarray): x_new = np.random.multivariate_normal(x, self._cov) @@ -117,6 +118,7 @@ def _update_proposal( decay_constant = self.options["decay_constant"] threshold_sample = self.options["threshold_sample"] reg_factor = self.options["reg_factor"] + max_tries = self.options["max_tries"] target_acceptance_rate = self.options["target_acceptance_rate"] # compute historical mean and covariance @@ -139,9 +141,9 @@ def _update_proposal( self._cov = self._cov_scale * self._cov_hist # regularize proposal covariance - self._cov = regularize_covariance(cov=self._cov, reg_factor=reg_factor) - if self.options["compute_cholesky"]: - self._cov_chol = np.linalg.cholesky(self._cov) + self._cov_chol, self._cov = regularize_covariance( + cov=self._cov, reg_factor=reg_factor, max_tries=max_tries + ) def update_history_statistics( @@ -191,7 +193,11 @@ def update_history_statistics( return mean, cov -def regularize_covariance(cov: np.ndarray, reg_factor: float) -> np.ndarray: +def regularize_covariance( + cov: np.ndarray, + reg_factor: float, + max_tries: int, +) -> tuple[np.ndarray, np.ndarray]: """ Regularize the estimated covariance matrix of the sample. @@ -204,27 +210,42 @@ def regularize_covariance(cov: np.ndarray, reg_factor: float) -> np.ndarray: Estimate of the covariance matrix of the sample. reg_factor: Regularization factor. Larger values result in stronger regularization. + max_tries: + Maximum number of attempts to regularize the covariance matrix. Returns ------- + L: + Cholesky decomposition of the regularized covariance matrix. cov: Regularized estimate of the covariance matrix of the sample. """ - # eigh assumes exact symmetry, so we symmetrize cov first + # we symmetrize cov first cov = 0.5 * (cov + cov.T) + + d = cov.shape[0] + eye = np.eye(d) if not np.all(np.isfinite(cov)): s = np.nanmean(np.diag(cov)) s = 1.0 if not np.isfinite(s) or s <= 0 else s - return s * np.eye(cov.shape[0]) - - # eigh guarantees real eigenvalues and orthonormal eigenvectors (Covariance is symmetric positive semidefinite) - w, Q = np.linalg.eigh(cov) - w = np.maximum(w, reg_factor) - # project to the set of positive definite matrices - cov_reg = Q @ (w[:, None] * Q.T) - - # eig = np.linalg.eigvals(cov) - # eig_min = min(eig) - # if eig_min <= 0: - # cov += (abs(eig_min) + reg_factor) * np.eye(cov.shape[0]) - return cov_reg + return np.sqrt(s) * eye, s * eye + + # scale for the initial jitter + s = np.mean(np.diag(cov)) + s = 1.0 if not np.isfinite(s) or s <= 0 else s + + jitter = reg_factor * s + for _ in range(max_tries): + try: + new_cov = cov + jitter * eye + L = np.linalg.cholesky(new_cov) + return L, new_cov + except np.linalg.LinAlgError: + jitter *= 2.0 + logging.warning( + "Could not regularize covariance matrix. Falling back to " + "scaled identity." + ) + + # final safe fallback + return np.sqrt(s) * eye, s * eye diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index cc92609f3..a889fd37d 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -54,15 +54,15 @@ def default_options(cls): # regularization factor for ill-conditioned cov matrices of # the adapted proposal density. regularization might happen if the # eigenvalues of the cov matrix strongly differ in order of magnitude. - "reg_factor": 1e-6, + "reg_factor": 1e-8, + # maximum number of attempts to regularize the covariance matrix + "max_tries": 10, # initial covariance matrix. defaults to a unit matrix "cov0": None, # target acceptance rate "target_acceptance_rate": 0.574, # show progress "show_progress": None, - # compute cholesky decomposition of the covariance matrix - "compute_cholesky": True, } def initialize(self, problem: Problem, x0: np.ndarray): diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py index fb1cb91ff..2e88ebe26 100644 --- a/test/sample/test_sample.py +++ b/test/sample/test_sample.py @@ -41,12 +41,12 @@ @pytest.fixture( params=[ "Metropolis", - # "AdaptiveMetropolis", - # "ParallelTempering", - # "AdaptiveParallelTempering", - # "Pymc", - # "Emcee", - # "Dynesty", + "AdaptiveMetropolis", + "ParallelTempering", + "AdaptiveParallelTempering", + "Pymc", + "Emcee", + "Dynesty", "Mala", ] ) From 802a5ed507b19e1b4fe9b2caa13fcd23ced7bb6d Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 26 Sep 2025 14:10:16 +0200 Subject: [PATCH 04/18] improve regularize_covariance --- pypesto/sample/adaptive_metropolis.py | 3 +-- pypesto/sample/mala.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pypesto/sample/adaptive_metropolis.py b/pypesto/sample/adaptive_metropolis.py index c5c8d57f4..03104184d 100644 --- a/pypesto/sample/adaptive_metropolis.py +++ b/pypesto/sample/adaptive_metropolis.py @@ -77,8 +77,7 @@ def default_options(cls): # a higher value reduces the impact of early adaptation "threshold_sample": 1, # regularization factor for ill-conditioned cov matrices of - # the adapted proposal density. regularization might happen if the - # eigenvalues of the cov matrix strongly differ in order of magnitude. + # the adapted proposal density. "reg_factor": 1e-8, # maximum number of attempts to regularize the covariance matrix "max_tries": 10, diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index a889fd37d..171c462d9 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -52,8 +52,7 @@ def default_options(cls): # a higher value reduces the impact of early adaptation "threshold_sample": 1, # regularization factor for ill-conditioned cov matrices of - # the adapted proposal density. regularization might happen if the - # eigenvalues of the cov matrix strongly differ in order of magnitude. + # the adapted proposal density. "reg_factor": 1e-8, # maximum number of attempts to regularize the covariance matrix "max_tries": 10, From 260f4c3eeb0876c4f340ad131110c6ba655149a3 Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 26 Sep 2025 14:12:55 +0200 Subject: [PATCH 05/18] improve regularize_covariance --- pypesto/sample/adaptive_metropolis.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pypesto/sample/adaptive_metropolis.py b/pypesto/sample/adaptive_metropolis.py index 03104184d..753952e37 100644 --- a/pypesto/sample/adaptive_metropolis.py +++ b/pypesto/sample/adaptive_metropolis.py @@ -229,6 +229,13 @@ def regularize_covariance( s = 1.0 if not np.isfinite(s) or s <= 0 else s return np.sqrt(s) * eye, s * eye + # Try Cholesky on original matrix first + try: + L = np.linalg.cholesky(cov) + return L, cov + except np.linalg.LinAlgError: + pass # Need regularization + # scale for the initial jitter s = np.mean(np.diag(cov)) s = 1.0 if not np.isfinite(s) or s <= 0 else s From 81b44dcfa6099bf27e86712c2eb4246188566d80 Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 26 Sep 2025 14:39:50 +0200 Subject: [PATCH 06/18] less gradient calls --- pypesto/sample/adaptive_metropolis.py | 8 +++++--- pypesto/sample/mala.py | 10 ++++------ pypesto/sample/metropolis.py | 19 ++++++++++++++----- test/sample/test_sample.py | 4 +++- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/pypesto/sample/adaptive_metropolis.py b/pypesto/sample/adaptive_metropolis.py index 753952e37..31bf5e5fa 100644 --- a/pypesto/sample/adaptive_metropolis.py +++ b/pypesto/sample/adaptive_metropolis.py @@ -100,15 +100,17 @@ def initialize(self, problem: Problem, x0: np.ndarray): else: cov0 = np.eye(len(x0)) self._cov_chol, self._cov = regularize_covariance( - cov0, self.options["reg_factor"], self.options["max_tries"] + cov0, + reg_factor=self.options["reg_factor"], + max_tries=self.options["max_tries"], ) self._mean_hist = self.trace_x[-1] self._cov_hist = self._cov self._cov_scale = 1.0 def _propose_parameter(self, x: np.ndarray): - x_new = np.random.multivariate_normal(x, self._cov) - return x_new + x_new: np.ndarray = np.random.multivariate_normal(x, self._cov) + return x_new, None # no gradient needed def _update_proposal( self, x: np.ndarray, lpost: float, log_p_acc: float, n_sample_cur: int diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index 171c462d9..ee4cb29a8 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -71,6 +71,7 @@ def initialize(self, problem: Problem, x0: np.ndarray): raise ValueError("MALA sampler requires gradient information.") super().initialize(problem, x0) + self._current_x_grad = -self.neglogpost.get_grad(x0) def _propose_parameter(self, x: np.ndarray): """Propose a parameter using preconditioned MALA dynamics.""" @@ -93,19 +94,16 @@ def _propose_parameter(self, x: np.ndarray): diffusion = np.sqrt(step_size) * precond_noise x_new: np.ndarray = x + drift + diffusion - return x_new + return x_new, grad def _compute_transition_log_prob( - self, x_from: np.ndarray, x_to: np.ndarray + self, x_from: np.ndarray, x_to: np.ndarray, x_from_grad: np.ndarray ): """Compute the log probability of transitioning from x_from to x_to with preconditioning.""" step_size = self.options["step_size"] - # Get gradient at position - grad_from = -self.neglogpost.get_grad(x_from) - # Apply preconditioning to gradient - precond_grad_from = self._cov @ grad_from + precond_grad_from = self._cov @ x_from_grad # Mean of the preconditioned proposal distribution mean = x_from + (step_size / 2.0) * precond_grad_from diff --git a/pypesto/sample/metropolis.py b/pypesto/sample/metropolis.py index 86675e139..605b1cb17 100644 --- a/pypesto/sample/metropolis.py +++ b/pypesto/sample/metropolis.py @@ -46,6 +46,7 @@ def __init__(self, options: dict = None): self.trace_neglogpost: Union[Sequence[float], None] = None self.trace_neglogprior: Union[Sequence[float], None] = None self.temper_lpost: bool = False + self._current_x_grad: Union[np.ndarray, None] = None @classmethod def default_options(cls): @@ -111,7 +112,7 @@ def _perform_step( Propose new parameter, evaluate and check whether to accept. """ # propose step - x_new: np.ndarray = self._propose_parameter(x) + x_new, x_new_grad = self._propose_parameter(x) # check if step lies within bounds if any(x_new < self.problem.lb) or any(x_new > self.problem.ub): @@ -152,8 +153,12 @@ def _perform_step( log_p_acc = min(beta * (lpost_new - lpost), 0) # This accounts for an asymmetric proposal distribution - log_q_forward = self._compute_transition_log_prob(x, x_new) - log_q_backward = self._compute_transition_log_prob(x_new, x) + log_q_forward = self._compute_transition_log_prob( + x, x_new, self._current_x_grad + ) + log_q_backward = self._compute_transition_log_prob( + x_new, x, x_new_grad + ) proposal_correction = log_q_backward - log_q_forward log_p_acc = min(log_p_acc + proposal_correction, 0) @@ -166,6 +171,7 @@ def _perform_step( x = x_new lpost = lpost_new lprior = lprior_new + self._current_x_grad = x_new_grad # update proposal self._update_proposal( @@ -177,7 +183,7 @@ def _perform_step( def _propose_parameter(self, x: np.ndarray): """Propose a step.""" x_new: np.ndarray = x + self.options["std"] * np.random.randn(len(x)) - return x_new + return x_new, None # no gradient needed def _update_proposal( self, x: np.ndarray, lpost: float, log_p_acc: float, n_sample_cur: int @@ -185,7 +191,10 @@ def _update_proposal( """Update the proposal density. Default: Do nothing.""" def _compute_transition_log_prob( - self, x_from: np.ndarray, x_to: np.ndarray + self, + x_from: np.ndarray, + x_to: np.ndarray, + x_from_grad: Union[np.ndarray, None], ): """Compute the transition log probability for symmetric proposal. diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py index 2e88ebe26..9fe46a0b5 100644 --- a/test/sample/test_sample.py +++ b/test/sample/test_sample.py @@ -213,7 +213,9 @@ def test_regularize_covariance(): """ matrix = np.array([[-1.0, -4.0], [-4.0, 1.0]]) assert np.any(np.linalg.eigvals(matrix) < 0) - reg = sample.adaptive_metropolis.regularize_covariance(matrix, 1e-6) + reg = sample.adaptive_metropolis.regularize_covariance( + matrix, 1e-6, max_tries=1 + ) assert np.all(np.linalg.eigvals(reg) >= 0) From 7934aabdf3c767c60d306e771efc4b537c7f3033 Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 26 Sep 2025 15:02:15 +0200 Subject: [PATCH 07/18] less gradient calls --- pypesto/sample/__init__.py | 2 +- pypesto/sample/mala.py | 2 +- test/sample/test_sample.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pypesto/sample/__init__.py b/pypesto/sample/__init__.py index c34539196..611c346cc 100644 --- a/pypesto/sample/__init__.py +++ b/pypesto/sample/__init__.py @@ -17,7 +17,7 @@ laplace_approximation_log_evidence, parallel_tempering_log_evidence, ) -from .mala import Mala +from .mala import MalaSampler from .metropolis import MetropolisSampler from .parallel_tempering import ParallelTemperingSampler from .sample import sample diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index ee4cb29a8..318792a24 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -4,7 +4,7 @@ from .adaptive_metropolis import AdaptiveMetropolisSampler -class Mala(AdaptiveMetropolisSampler): +class MalaSampler(AdaptiveMetropolisSampler): """Metropolis-Adjusted Langevin Algorithm (MALA) sampler with preconditioning. MALA is a gradient-based MCMC method that uses the gradient of the diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py index 9fe46a0b5..8720c0700 100644 --- a/test/sample/test_sample.py +++ b/test/sample/test_sample.py @@ -64,7 +64,7 @@ def sampler(request): }, ) elif request.param == "Mala": - return sample.Mala( + return sample.MalaSampler( options={ "show_progress": False, "step_size": 0.01, From 8812281b01176891a37b1970472e24acdbe20a59 Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 26 Sep 2025 15:30:43 +0200 Subject: [PATCH 08/18] add to sampler study --- doc/example/sampler_study.ipynb | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/doc/example/sampler_study.ipynb b/doc/example/sampler_study.ipynb index f5e0ccf95..e3946ff01 100644 --- a/doc/example/sampler_study.ipynb +++ b/doc/example/sampler_study.ipynb @@ -488,6 +488,43 @@ "result.sample_result.betas" ] }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Metropolis-Adjusted Langevin Algorithm\n" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "So far, we have used only the function values to guide the sampling. If gradients are available, they can be used to guide the sampling more effectively. The `pypesto.sample.MetropolisAdjustedLangevinAlgorithm` implements this idea. It can be used within the parallel tempering framework as well." + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "sampler = sample.AdaptiveParallelTemperingSampler(\n", + " internal_sampler=sample.MalaSampler(), n_chains=3\n", + ")\n", + "result = sample.sample(\n", + " problem, 1e3, sampler, x0=np.array([0.5]), filename=None\n", + ")" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "sample.geweke_test(result)\n", + "for i_chain in range(len(result.sample_result.betas)):\n", + " visualize.sampling_1d_marginals(\n", + " result, i_chain=i_chain, suptitle=f\"Chain: {i_chain}\"\n", + " )" + ] + }, { "cell_type": "markdown", "metadata": {}, From 5ee528a8db98d1ee9e662293ccdebd17509d0560 Mon Sep 17 00:00:00 2001 From: arrjon Date: Mon, 29 Sep 2025 10:06:39 +0200 Subject: [PATCH 09/18] improved step size --- pypesto/sample/mala.py | 58 ++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index 318792a24..09be2cbd3 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -25,6 +25,12 @@ class MalaSampler(AdaptiveMetropolisSampler): We aim to converge to a fixed target acceptance rate of 0.574, as suggested by theoretical results for MALA. + By default, the step size is set to sqrt((1/dim)^(1/3)), where dim is the + dimension of the target distribution, following Boisvert-Beaudry and Bedard (2022). + This scaling helps maintain efficient exploration as the dimensionality increases, + and leads to an asymptotically performant sampler, in the sense that exploring the + parameter space requires O(dim^(1/3)) steps. + For reference, see: * Roberts et al. 1996. Exponential convergence of Langevin distributions and their @@ -33,17 +39,29 @@ class MalaSampler(AdaptiveMetropolisSampler): * Girolami & Calderhead 2011. Riemann manifold Langevin and Hamiltonian Monte Carlo methods (https://doi.org/10.1111/j.1467-9868.2010.00765.x) + * Boisvert-Beaudry and Bedard 2022. + MALA with annealed proposals: a generalization of locally and globally balanced proposal distributions """ - def __init__(self, options: dict = None): + def __init__(self, step_size: float = None, options: dict = None): + """Initialize the sampler. + + Parameters + ---------- + step_size: + Step size for the Langevin dynamics. If None, it will be set to + sqrt((1/n_parameter)^(1/3)) during initialization. + options: + Options for the sampler. + """ super().__init__(options) + self.step_size = step_size + self._dimension_of_target_weight = 1.0 @classmethod def default_options(cls): """Return the default options for the sampler.""" return { - # step size for the Langevin dynamics - "step_size": 0.01, # controls adaptation degeneration velocity of the proposals # in [0, 1], with 0 -> no adaptation, i.e. classical # Metropolis-Hastings @@ -70,13 +88,20 @@ def initialize(self, problem: Problem, x0: np.ndarray): if not problem.objective.has_grad: raise ValueError("MALA sampler requires gradient information.") + # Boisvert-Beaudry and Bedard (2022) + # Set step size to problem dimension if not specified + self.step_size = ( + np.sqrt((1 / problem.dim) ** (1 / 3)) + if self.step_size is None + else self.step_size + ) + self._dimension_of_target_weight = 1 + (1 / problem.dim) ** (1 / 3) + super().initialize(problem, x0) self._current_x_grad = -self.neglogpost.get_grad(x0) def _propose_parameter(self, x: np.ndarray): """Propose a parameter using preconditioned MALA dynamics.""" - step_size = self.options["step_size"] - # Get gradient of log-posterior at current position grad = -self.neglogpost.get_grad(x) @@ -90,23 +115,28 @@ def _propose_parameter(self, x: np.ndarray): precond_noise = self._cov_chol @ noise # Preconditioned MALA proposal: x + (h/2) * M * grad + sqrt(h) * sqrt(M) * noise - drift = (step_size / 2.0) * precond_grad - diffusion = np.sqrt(step_size) * precond_noise + drift = (self.step_size / 2.0) * precond_grad + diffusion = np.sqrt(self.step_size) * precond_noise - x_new: np.ndarray = x + drift + diffusion + x_new: np.ndarray = ( + x + self._dimension_of_target_weight * drift + diffusion + ) return x_new, grad def _compute_transition_log_prob( self, x_from: np.ndarray, x_to: np.ndarray, x_from_grad: np.ndarray ): """Compute the log probability of transitioning from x_from to x_to with preconditioning.""" - step_size = self.options["step_size"] - # Apply preconditioning to gradient precond_grad_from = self._cov @ x_from_grad # Mean of the preconditioned proposal distribution - mean = x_from + (step_size / 2.0) * precond_grad_from + mean = ( + x_from + + self._dimension_of_target_weight + * (self.step_size / 2.0) + * precond_grad_from + ) # For preconditioned MALA, the covariance is step_size * M # We need to compute the log probability under N(mean, step_size * M) @@ -119,7 +149,7 @@ def _compute_transition_log_prob( # Forward substitution: L @ y = diff y = np.linalg.solve(self._cov_chol, diff) # Quadratic form: diff^T @ M^{-1} @ diff = y^T @ y - log_prob = -0.5 * np.dot(y, y) / step_size + log_prob = -0.5 * np.dot(y, y) / self.step_size except np.linalg.LinAlgError: # If matrix is singular, return -inf return -np.inf @@ -129,6 +159,8 @@ def _compute_transition_log_prob( # log|M| = 2 * log|L| = 2 * sum(log(diag(L))) d = len(x_from) log_det_cov = 2 * np.sum(np.log(np.diag(self._cov_chol))) - log_prob -= 0.5 * (d * np.log(2 * np.pi * step_size) + log_det_cov) + log_prob -= 0.5 * ( + d * np.log(2 * np.pi * self.step_size) + log_det_cov + ) return log_prob From 0b0c02508d2f7ab6348a88de0c2ed39a84b0dfe3 Mon Sep 17 00:00:00 2001 From: arrjon Date: Mon, 29 Sep 2025 10:09:21 +0200 Subject: [PATCH 10/18] improved step size --- pypesto/sample/mala.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index 09be2cbd3..77e265c5e 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -1,8 +1,12 @@ +import logging + import numpy as np from ..problem import Problem from .adaptive_metropolis import AdaptiveMetropolisSampler +logger = logging.getLogger(__name__) + class MalaSampler(AdaptiveMetropolisSampler): """Metropolis-Adjusted Langevin Algorithm (MALA) sampler with preconditioning. @@ -90,11 +94,9 @@ def initialize(self, problem: Problem, x0: np.ndarray): # Boisvert-Beaudry and Bedard (2022) # Set step size to problem dimension if not specified - self.step_size = ( - np.sqrt((1 / problem.dim) ** (1 / 3)) - if self.step_size is None - else self.step_size - ) + if self.step_size is None: + self.step_size = np.sqrt((1 / problem.dim) ** (1 / 3)) + logger.info(f"Step size set to {self.step_size}") self._dimension_of_target_weight = 1 + (1 / problem.dim) ** (1 / 3) super().initialize(problem, x0) From 7a6778bd077068e7bdbf2029eac91a0293eacc11 Mon Sep 17 00:00:00 2001 From: Jonas Arruda <69197639+arrjon@users.noreply.github.com> Date: Mon, 6 Oct 2025 18:52:45 +0100 Subject: [PATCH 11/18] Update test/sample/test_sample.py Co-authored-by: Daniel Weindl --- test/sample/test_sample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py index 8720c0700..e695027bc 100644 --- a/test/sample/test_sample.py +++ b/test/sample/test_sample.py @@ -111,7 +111,7 @@ def problem(request): def test_pipeline(sampler, problem): """Check that a typical pipeline runs through.""" - if isinstance(sampler, sample.Mala): + if isinstance(sampler, sample.MalaSampler): if not problem.objective.has_grad: pytest.skip("MALA requires gradient information.") # optimization From 2c954f07cc3f0719293221bacd762004f335bd6c Mon Sep 17 00:00:00 2001 From: Jonas Arruda <69197639+arrjon@users.noreply.github.com> Date: Mon, 6 Oct 2025 18:52:53 +0100 Subject: [PATCH 12/18] Update pypesto/sample/mala.py Co-authored-by: Daniel Weindl --- pypesto/sample/mala.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index 77e265c5e..31b66a5b6 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -36,6 +36,7 @@ class MalaSampler(AdaptiveMetropolisSampler): parameter space requires O(dim^(1/3)) steps. For reference, see: + * Roberts et al. 1996. Exponential convergence of Langevin distributions and their discrete approximations From f081b9fba3e2435e680d870b359221c25376aa40 Mon Sep 17 00:00:00 2001 From: arrjon Date: Mon, 6 Oct 2025 19:02:10 +0100 Subject: [PATCH 13/18] fix mala for parallel tempering --- pypesto/sample/adaptive_metropolis.py | 2 +- pypesto/sample/mala.py | 4 ++-- pypesto/sample/metropolis.py | 4 ++-- test/sample/test_sample.py | 11 +++++------ 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/pypesto/sample/adaptive_metropolis.py b/pypesto/sample/adaptive_metropolis.py index 31bf5e5fa..64b63a3f1 100644 --- a/pypesto/sample/adaptive_metropolis.py +++ b/pypesto/sample/adaptive_metropolis.py @@ -108,7 +108,7 @@ def initialize(self, problem: Problem, x0: np.ndarray): self._cov_hist = self._cov self._cov_scale = 1.0 - def _propose_parameter(self, x: np.ndarray): + def _propose_parameter(self, x: np.ndarray, beta: float): x_new: np.ndarray = np.random.multivariate_normal(x, self._cov) return x_new, None # no gradient needed diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index 31b66a5b6..56a887723 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -103,10 +103,10 @@ def initialize(self, problem: Problem, x0: np.ndarray): super().initialize(problem, x0) self._current_x_grad = -self.neglogpost.get_grad(x0) - def _propose_parameter(self, x: np.ndarray): + def _propose_parameter(self, x: np.ndarray, beta: float): """Propose a parameter using preconditioned MALA dynamics.""" # Get gradient of log-posterior at current position - grad = -self.neglogpost.get_grad(x) + grad = -beta * self.neglogpost.get_grad(x) # Apply preconditioning to gradient precond_grad = self._cov @ grad diff --git a/pypesto/sample/metropolis.py b/pypesto/sample/metropolis.py index 605b1cb17..cb4c34890 100644 --- a/pypesto/sample/metropolis.py +++ b/pypesto/sample/metropolis.py @@ -112,7 +112,7 @@ def _perform_step( Propose new parameter, evaluate and check whether to accept. """ # propose step - x_new, x_new_grad = self._propose_parameter(x) + x_new, x_new_grad = self._propose_parameter(x, beta=beta) # check if step lies within bounds if any(x_new < self.problem.lb) or any(x_new > self.problem.ub): @@ -180,7 +180,7 @@ def _perform_step( return x, lpost, lprior - def _propose_parameter(self, x: np.ndarray): + def _propose_parameter(self, x: np.ndarray, beta: float): """Propose a step.""" x_new: np.ndarray = x + self.options["std"] * np.random.randn(len(x)) return x_new, None # no gradient needed diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py index e695027bc..d880268cd 100644 --- a/test/sample/test_sample.py +++ b/test/sample/test_sample.py @@ -42,11 +42,11 @@ params=[ "Metropolis", "AdaptiveMetropolis", - "ParallelTempering", - "AdaptiveParallelTempering", - "Pymc", - "Emcee", - "Dynesty", + # "ParallelTempering", + # "AdaptiveParallelTempering", + # "Pymc", + # "Emcee", + # "Dynesty", "Mala", ] ) @@ -67,7 +67,6 @@ def sampler(request): return sample.MalaSampler( options={ "show_progress": False, - "step_size": 0.01, }, ) elif request.param == "ParallelTempering": From 565a312e0c596214158a8e5ebf14c9302daef792 Mon Sep 17 00:00:00 2001 From: arrjon Date: Wed, 8 Oct 2025 10:53:51 +0100 Subject: [PATCH 14/18] beta in mala --- pypesto/sample/mala.py | 5 +++-- pypesto/sample/metropolis.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pypesto/sample/mala.py b/pypesto/sample/mala.py index 56a887723..a74ebbd43 100644 --- a/pypesto/sample/mala.py +++ b/pypesto/sample/mala.py @@ -106,10 +106,11 @@ def initialize(self, problem: Problem, x0: np.ndarray): def _propose_parameter(self, x: np.ndarray, beta: float): """Propose a parameter using preconditioned MALA dynamics.""" # Get gradient of log-posterior at current position - grad = -beta * self.neglogpost.get_grad(x) + grad = -self.neglogpost.get_grad(x) + # todo: in parallel tempering, this should be the tempered gradient, but only the likelihood is tempered # Apply preconditioning to gradient - precond_grad = self._cov @ grad + precond_grad = self._cov @ (beta * grad) # Generate standard Gaussian noise noise = np.random.randn(len(x)) diff --git a/pypesto/sample/metropolis.py b/pypesto/sample/metropolis.py index cb4c34890..60e30514c 100644 --- a/pypesto/sample/metropolis.py +++ b/pypesto/sample/metropolis.py @@ -154,10 +154,10 @@ def _perform_step( # This accounts for an asymmetric proposal distribution log_q_forward = self._compute_transition_log_prob( - x, x_new, self._current_x_grad + x, x_new, beta * self._current_x_grad ) log_q_backward = self._compute_transition_log_prob( - x_new, x, x_new_grad + x_new, x, beta * x_new_grad ) proposal_correction = log_q_backward - log_q_forward log_p_acc = min(log_p_acc + proposal_correction, 0) From bf4a4742a016d4bbfab67d42952aeef724cacdf1 Mon Sep 17 00:00:00 2001 From: arrjon Date: Fri, 10 Oct 2025 11:35:39 +0200 Subject: [PATCH 15/18] add test --- test/sample/test_sample.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py index d880268cd..0a4c7ede5 100644 --- a/test/sample/test_sample.py +++ b/test/sample/test_sample.py @@ -42,11 +42,11 @@ params=[ "Metropolis", "AdaptiveMetropolis", - # "ParallelTempering", - # "AdaptiveParallelTempering", - # "Pymc", - # "Emcee", - # "Dynesty", + "ParallelTempering", + "AdaptiveParallelTempering", + "Pymc", + "Emcee", + "Dynesty", "Mala", ] ) From c57946bc900eb23919bc5897ec6e139d95f83341 Mon Sep 17 00:00:00 2001 From: arrjon Date: Wed, 15 Oct 2025 09:33:32 +0200 Subject: [PATCH 16/18] fix no gradient --- pypesto/sample/adaptive_metropolis.py | 2 +- pypesto/sample/metropolis.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pypesto/sample/adaptive_metropolis.py b/pypesto/sample/adaptive_metropolis.py index 64b63a3f1..00e73c473 100644 --- a/pypesto/sample/adaptive_metropolis.py +++ b/pypesto/sample/adaptive_metropolis.py @@ -110,7 +110,7 @@ def initialize(self, problem: Problem, x0: np.ndarray): def _propose_parameter(self, x: np.ndarray, beta: float): x_new: np.ndarray = np.random.multivariate_normal(x, self._cov) - return x_new, None # no gradient needed + return x_new, np.nan # no gradient needed def _update_proposal( self, x: np.ndarray, lpost: float, log_p_acc: float, n_sample_cur: int diff --git a/pypesto/sample/metropolis.py b/pypesto/sample/metropolis.py index 60e30514c..0fc13ffb5 100644 --- a/pypesto/sample/metropolis.py +++ b/pypesto/sample/metropolis.py @@ -46,7 +46,7 @@ def __init__(self, options: dict = None): self.trace_neglogpost: Union[Sequence[float], None] = None self.trace_neglogprior: Union[Sequence[float], None] = None self.temper_lpost: bool = False - self._current_x_grad: Union[np.ndarray, None] = None + self._current_x_grad: Union[np.ndarray] = np.nan @classmethod def default_options(cls): @@ -183,7 +183,7 @@ def _perform_step( def _propose_parameter(self, x: np.ndarray, beta: float): """Propose a step.""" x_new: np.ndarray = x + self.options["std"] * np.random.randn(len(x)) - return x_new, None # no gradient needed + return x_new, np.nan # no gradient needed def _update_proposal( self, x: np.ndarray, lpost: float, log_p_acc: float, n_sample_cur: int From b775a8dc5483303d17144ae536de0c67beac9da5 Mon Sep 17 00:00:00 2001 From: arrjon Date: Tue, 21 Oct 2025 20:49:13 +0200 Subject: [PATCH 17/18] fix Emcee test --- test/sample/test_sample.py | 14 ++++++++++++-- test/sample/util.py | 13 +++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py index 0a4c7ede5..6d78e7651 100644 --- a/test/sample/test_sample.py +++ b/test/sample/test_sample.py @@ -32,6 +32,7 @@ gaussian_nllh_grad, gaussian_nllh_hess, gaussian_problem, + gaussian_problem_with_grad, negative_log_posterior, negative_log_prior, rosenbrock_problem, @@ -98,11 +99,15 @@ def sampler(request): ) -@pytest.fixture(params=["gaussian", "gaussian_mixture", "rosenbrock"]) +@pytest.fixture( + params=["gaussian", "gaussian_with_grad", "gaussian_mixture", "rosenbrock"] +) def problem(request): if request.param == "gaussian": return gaussian_problem() - if request.param == "gaussian_mixture": + elif request.param == "gaussian_with_grad": + return gaussian_problem_with_grad() + elif request.param == "gaussian_mixture": return gaussian_mixture_problem() elif request.param == "rosenbrock": return rosenbrock_problem() @@ -113,6 +118,11 @@ def test_pipeline(sampler, problem): if isinstance(sampler, sample.MalaSampler): if not problem.objective.has_grad: pytest.skip("MALA requires gradient information.") + if isinstance(sampler, sample.EmceeSampler): + if problem.objective.has_grad: + pytest.skip( + "EMCEE fails to converge when started in optimal point with gradient." + ) # optimization optimizer = optimize.ScipyOptimizer(options={"maxiter": 10}) result = optimize.minimize( diff --git a/test/sample/util.py b/test/sample/util.py index 6a93e028d..268f472bc 100644 --- a/test/sample/util.py +++ b/test/sample/util.py @@ -46,6 +46,19 @@ def gaussian_nllh_hess(x): def gaussian_problem(): """Defines a simple Gaussian problem.""" + def nllh(x): + return -gaussian_llh(x) + + objective = pypesto.Objective(fun=nllh) + problem = pypesto.Problem( + objective=objective, lb=LB_GAUSSIAN, ub=UB_GAUSSIAN + ) + return problem + + +def gaussian_problem_with_grad(): + """Defines a simple Gaussian problem.""" + def nllh(x): return -gaussian_llh(x) From 2af917ce670601f287f319f12c37721c1c63f8cd Mon Sep 17 00:00:00 2001 From: arrjon Date: Tue, 21 Oct 2025 21:11:51 +0200 Subject: [PATCH 18/18] add grad to sampler study --- doc/example/sampler_study.ipynb | 35 +++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/doc/example/sampler_study.ipynb b/doc/example/sampler_study.ipynb index e3946ff01..452989799 100644 --- a/doc/example/sampler_study.ipynb +++ b/doc/example/sampler_study.ipynb @@ -239,7 +239,7 @@ "outputs": [], "source": [ "import seaborn as sns\n", - "from scipy.stats import multivariate_normal\n", + "from scipy.stats import multivariate_normal, norm\n", "\n", "import pypesto\n", "import pypesto.sample as sample\n", @@ -256,7 +256,26 @@ " return -np.log(density(x))\n", "\n", "\n", - "objective = pypesto.Objective(fun=nllh)\n", + "def nllh_grad(x):\n", + " w = np.array([0.3, 0.7])\n", + " mus = np.array([-1.5, 2.5])\n", + " sigmas2 = np.array([0.1, 0.2])\n", + " sigmas = np.sqrt(sigmas2)\n", + "\n", + " pdfs = np.array([w[i] * norm.pdf(x, mus[i], sigmas[i]) for i in range(2)])\n", + " dpdx = np.sum(\n", + " [\n", + " w[i] * norm.pdf(x, mus[i], sigmas[i]) * (mus[i] - x) / sigmas2[i]\n", + " for i in range(2)\n", + " ],\n", + " axis=0,\n", + " )\n", + "\n", + " grad = -dpdx / np.sum(pdfs, axis=0)\n", + " return grad\n", + "\n", + "\n", + "objective = pypesto.Objective(fun=nllh, grad=nllh_grad)\n", "problem = pypesto.Problem(objective=objective, lb=-4, ub=5, x_names=[\"x\"])" ] }, @@ -489,20 +508,20 @@ ] }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": "### Metropolis-Adjusted Langevin Algorithm\n" }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": "So far, we have used only the function values to guide the sampling. If gradients are available, they can be used to guide the sampling more effectively. The `pypesto.sample.MetropolisAdjustedLangevinAlgorithm` implements this idea. It can be used within the parallel tempering framework as well." }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "sampler = sample.AdaptiveParallelTemperingSampler(\n", " internal_sampler=sample.MalaSampler(), n_chains=3\n", @@ -513,10 +532,10 @@ ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "sample.geweke_test(result)\n", "for i_chain in range(len(result.sample_result.betas)):\n",