From e680439fc5288327786a1eb33528118858e2e0ef Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 28 May 2026 16:48:55 -0500 Subject: [PATCH 1/8] Add clone_replace_data mechanism and re-root Gaussian sigma --- ptgp/gp/svgp.py | 2 +- ptgp/gp/unapproximated.py | 3 +- ptgp/gp/vfe.py | 3 +- ptgp/likelihoods/base.py | 71 ++++++++++++++++++++++++ ptgp/likelihoods/gaussian.py | 30 ++-------- tests/likelihoods/test_base.py | 79 ++++++++++++++++++++++++++- tests/test_heteroskedastic_predict.py | 48 ++++++++-------- 7 files changed, 182 insertions(+), 54 deletions(-) diff --git a/ptgp/gp/svgp.py b/ptgp/gp/svgp.py index cdfa52b..a4f34b0 100644 --- a/ptgp/gp/svgp.py +++ b/ptgp/gp/svgp.py @@ -230,7 +230,7 @@ def predict_marginal(self, X, incl_lik=False): fmean, fvar = conditional_unwhitened(A, Kmn, Knn_diag, self.q_mu, self.q_sqrt) fmean = fmean + self.mean(X) if incl_lik: - return self.likelihood.predict_mean_and_var(fmean, fvar) + return self.likelihood.clone_replace_data(X).predict_mean_and_var(fmean, fvar) return fmean, fvar def predict_joint(self, X): diff --git a/ptgp/gp/unapproximated.py b/ptgp/gp/unapproximated.py index 0b37af0..795998f 100644 --- a/ptgp/gp/unapproximated.py +++ b/ptgp/gp/unapproximated.py @@ -64,6 +64,5 @@ def predict_marginal(self, X_new, X_train, y_train, incl_lik=False): fvar = Kss_diag - pt.sum(Kns * (Knn_inv @ Kns), axis=0) if incl_lik: - sigma_new = self.likelihood.sigma_at(X_train, X_new) - return fmean, fvar + sigma_new**2 + return self.likelihood.clone_replace_data(X_new).predict_mean_and_var(fmean, fvar) return fmean, fvar diff --git a/ptgp/gp/vfe.py b/ptgp/gp/vfe.py index 3c2550a..11c5fa6 100644 --- a/ptgp/gp/vfe.py +++ b/ptgp/gp/vfe.py @@ -87,6 +87,5 @@ def predict_marginal(self, X_new, X_train, y_train, incl_lik=False): fvar = Kss_diag - pt.sum(Kus * ((Kuu_inv - Sigma_inv) @ Kus), axis=0) if incl_lik: - sigma_new = self.likelihood.sigma_at(X_train, X_new) - return fmean, fvar + sigma_new**2 + return self.likelihood.clone_replace_data(X_new).predict_mean_and_var(fmean, fvar) return fmean, fvar diff --git a/ptgp/likelihoods/base.py b/ptgp/likelihoods/base.py index 24771d6..6669438 100644 --- a/ptgp/likelihoods/base.py +++ b/ptgp/likelihoods/base.py @@ -1,6 +1,63 @@ +import copy + import numpy as np import pytensor.tensor as pt +from pytensor.graph.replace import clone_replace +from pytensor.graph.traversal import ancestors +from pytensor.tensor.type import TensorType + + +def _data_inputs(value, X): + """Leaf nodes in ``value``'s graph that look like the design matrix ``X``. + + A leaf (no owner) with the same ndim as ``X`` and matching feature + dimensions is taken to be a copy of the model input — whether it is a + symbolic placeholder, a ``pm.Data`` / shared variable, or a constant array + baked in via ``pt.as_tensor_variable``. Matching is by type, so ``pm.Data`` + (a SharedVariable whose type is ``TensorType``) is caught. PyMC random + variables and other computed nodes carry an owner and are excluded; scalars + never match, so literal constants like ``0.1`` are left alone. + """ + feat = X.type.shape[1:] + return [ + v + for v in ancestors([value]) + if v.owner is None + and isinstance(v.type, TensorType) + and v.type.ndim == X.type.ndim + and v.type.ndim > 0 + and all(a is None or b is None or a == b for a, b in zip(v.type.shape[1:], feat)) + ] + + +def _reroot(value, X): + """Re-root ``value`` onto ``X`` by replacing design-matrix-shaped leaves. + + The input(s) to replace are discovered from the graph, so the caller only + supplies the new data. ``rebuild_strict=False`` lets a baked constant array + (whose row dimension is statically frozen) be replaced by ``X`` without + coercing ``X`` back to that frozen shape. A data-independent ``value`` + (scalar or PyMC RV) is returned unchanged. + + ``X`` must be a 2D design matrix ``(N, D)``. The discovery in + :func:`_data_inputs` keys on dimensionality: parameters are per-observation + 1D vectors, so a 2D ``X`` is what distinguishes the data from the parameter + itself. A 1D ``X`` would collapse that gap and could match the parameter, so + it is rejected. + """ + if X.type.ndim < 2: + raise ValueError( + "Re-rooting a likelihood parameter requires a 2D design matrix (N, D); " + f"got X with ndim={X.type.ndim}. Parameters are per-observation 1D " + "vectors, so a 1D X cannot be distinguished from the parameter itself." + ) + value = pt.as_tensor_variable(value) + inputs = _data_inputs(value, X) + if not inputs: + return value + return clone_replace(value, {v: X for v in inputs}, rebuild_strict=False) + class Likelihood: """Base class for all PTGP likelihoods. @@ -23,6 +80,20 @@ class Likelihood: n_points: int = 20 invlink = None + param_names: tuple = () + + def clone_replace_data(self, X): + """Return a copy with every data-dependent parameter re-rooted onto X. + + Each parameter named in ``param_names`` whose graph depends on a free + data input is re-expressed over ``X``; data-independent parameters are + left untouched. Used at predict time to evaluate parameters at the test + inputs rather than the training inputs they were built against. + """ + new = copy.copy(self) + for name in self.param_names: + setattr(new, name, _reroot(getattr(self, name), X)) + return new def _log_prob(self, f, y): """Symbolic log p(y|f). Subclasses must implement.""" diff --git a/ptgp/likelihoods/gaussian.py b/ptgp/likelihoods/gaussian.py index ef2cb8a..9b1964a 100644 --- a/ptgp/likelihoods/gaussian.py +++ b/ptgp/likelihoods/gaussian.py @@ -1,10 +1,5 @@ import pytensor.tensor as pt -from pytensor.graph.basic import Constant -from pytensor.graph.replace import graph_replace -from pytensor.graph.traversal import ancestors -from pytensor.tensor.type import TensorType - from ptgp.likelihoods.base import Likelihood LOG2PI = pt.log(2.0 * pt.pi) @@ -20,32 +15,15 @@ class Gaussian(Likelihood): sigma : tensor Observation noise standard deviation. May be a scalar (homoskedastic) or a vector built against a symbolic ``X`` (heteroskedastic). At - predict time, :meth:`sigma_at` substitutes the training ``X`` for - ``X_new`` in sigma's graph; mismatches surface as a strict-mode - ``graph_replace`` error. + predict time, :meth:`Likelihood.clone_replace_data` re-roots sigma's graph onto + the test inputs. """ + param_names = ("sigma",) + def __init__(self, sigma): self.sigma = pt.as_tensor_variable(sigma) - def sigma_at(self, X_train, X_new): - """Return ``sigma`` with ``X_train`` substituted by ``X_new``. - - Sigma is treated as data-dependent iff it has any free symbolic - tensor input in its graph (covers both raw ``pt.matrix`` X and - ``pm.Data`` X — the latter is a SharedVariable but its type is - ``TensorType``). For data-independent sigma the call is a no-op. - For data-dependent sigma, strict-mode ``graph_replace`` raises a - clear error if ``X_train`` is not in sigma's graph. - """ - has_data_dep = any( - v.owner is None and not isinstance(v, Constant) and isinstance(v.type, TensorType) - for v in ancestors([self.sigma]) - ) - if not has_data_dep: - return self.sigma - return graph_replace(self.sigma, {X_train: X_new}) - def _log_prob(self, f, y): return -0.5 * (LOG2PI + pt.log(self.sigma**2) + (y - f) ** 2 / self.sigma**2) diff --git a/tests/likelihoods/test_base.py b/tests/likelihoods/test_base.py index d87edea..9d2ab4f 100644 --- a/tests/likelihoods/test_base.py +++ b/tests/likelihoods/test_base.py @@ -1,10 +1,12 @@ """Tests for the likelihood base class — configurable inverse-link behavior.""" import numpy as np +import pymc as pm import pytensor import pytensor.tensor as pt +import pytest -from ptgp.likelihoods import Bernoulli, Poisson +from ptgp.likelihoods import Bernoulli, Gaussian, Poisson def _eval(*tensors): @@ -12,6 +14,18 @@ def _eval(*tensors): return f() +def _hetero(X): + return 0.5 + 0.1 * X[:, 0] ** 2 + + +def _provider(arr, kind): + if kind == "symbolic": + return pt.matrix("Xtr", shape=(None, arr.shape[1])) + if kind == "baked": + return pt.as_tensor_variable(arr) + return pm.Data("Xtr", arr) + + class TestConfigurableLink: def test_bernoulli_logit_link(self): """Bernoulli with logit link should differ from probit but still be valid.""" @@ -46,3 +60,66 @@ def softplus(f): ) ) assert np.isfinite(ve).all() + + +class TestCloneReplaceData: + @pytest.mark.parametrize("kind", ["symbolic", "baked", "pm_data"]) + def test_reroot_matches_direct_build(self, kind): + """Re-rooting yields the parameter evaluated at the new inputs, however + the training data was supplied. ``train`` and ``new`` differ in length so + a baked array's statically frozen row dimension cannot be re-imposed.""" + train = np.linspace(-1.0, 1.0, 5)[:, None] + new = np.array([[0.0], [1.0], [2.0]]) + X_new = pt.matrix("X_new", shape=(None, 1)) + with pm.Model(): + rerooted = Gaussian(_hetero(_provider(train, kind))).clone_replace_data(X_new).sigma + got = pytensor.function([X_new], rerooted)(new) + np.testing.assert_allclose(got, _hetero(new)) + + def test_reroot_matches_multidim_input(self): + new = np.array([[0.0, 1.0], [2.0, 3.0]]) + X = pt.matrix("X", shape=(None, 2)) + X_new = pt.matrix("X_new", shape=(None, 2)) + rerooted = Gaussian(0.1 + pt.sum(X**2, axis=1)).clone_replace_data(X_new).sigma + got = pytensor.function([X_new], rerooted)(new) + np.testing.assert_allclose(got, 0.1 + np.sum(new**2, axis=1)) + + def test_returns_copy_leaving_original_untouched(self): + X = pt.matrix("X", shape=(None, 1)) + lik = Gaussian(_hetero(X)) + original = lik.sigma + out = lik.clone_replace_data(pt.matrix("X_new", shape=(None, 1))) + assert lik.sigma is original + assert out.sigma is not original + + def test_scalar_hyperparam_survives_reroot(self): + """A parameter mixing data with a scalar placeholder re-roots only the + data; the scalar still flows through to the result.""" + X = pt.matrix("X", shape=(None, 1)) + hyp = pt.scalar("hyp") + new = np.array([[0.0], [1.0], [2.0]]) + X_new = pt.matrix("X_new", shape=(None, 1)) + out = Gaussian(hyp + 0.05 * X[:, 0] ** 2).clone_replace_data(X_new) + got = pytensor.function([X_new, hyp], out.sigma)(new, 2.0) + np.testing.assert_allclose(got, 2.0 + 0.05 * new[:, 0] ** 2) + + def test_eager_numpy_parameter_is_not_rerooted(self): + """Pure-numpy arithmetic collapses the data dependence to a fixed vector + before pytensor sees it, leaving nothing to re-root.""" + arr = np.linspace(-1.0, 1.0, 5)[:, None] + sigma = pt.as_tensor_variable(0.1 + 0.05 * arr[:, 0] ** 2) + out = Gaussian(sigma).clone_replace_data(pt.matrix("X_new", shape=(None, 1))) + assert out.sigma is sigma + + def test_mismatched_feature_count_is_not_rerooted(self): + sigma = 0.1 + pt.sum(pt.matrix("Xtr", shape=(None, 3)) ** 2, axis=1) + out = Gaussian(sigma).clone_replace_data(pt.matrix("X_new", shape=(None, 1))) + assert out.sigma is sigma + + def test_one_dimensional_X_is_rejected(self): + """A 1D X is indistinguishable from a per-observation parameter, so + re-rooting against it would risk replacing the parameter itself.""" + X = pt.matrix("X", shape=(None, 1)) + lik = Gaussian(0.1 + 0.05 * X[:, 0] ** 2) + with pytest.raises(ValueError, match=r"requires a 2D design matrix"): + lik.clone_replace_data(pt.vector("X_new", shape=(None,))) diff --git a/tests/test_heteroskedastic_predict.py b/tests/test_heteroskedastic_predict.py index d6d407b..75be4aa 100644 --- a/tests/test_heteroskedastic_predict.py +++ b/tests/test_heteroskedastic_predict.py @@ -2,11 +2,11 @@ import pymc as pm import pytensor import pytensor.tensor as pt -import pytest -from ptgp.gp import VFE, Unapproximated +from ptgp.gp import SVGP, VFE, Unapproximated, init_variational_params from ptgp.inducing import Points from ptgp.kernels import ExpQuad +from ptgp.likelihoods import Gaussian from ptgp.mean import Zero from ptgp.objectives import collapsed_elbo @@ -47,18 +47,6 @@ def test_incl_lik_adds_pointwise_noise(self): expected_sigma_new = 0.1 + 0.05 * X_new[:, 0] ** 2 np.testing.assert_allclose(yv, v + expected_sigma_new**2, atol=1e-10) - def test_wrong_X_raises(self): - """sigma built against X_a, predict called with X_b — ancestor check fires.""" - X_a = pt.matrix("X_a", shape=(None, 1)) - X_b = pt.matrix("X_b", shape=(None, 1)) - gp = Unapproximated( - kernel=ExpQuad(input_dim=1, ls=1.0), mean=Zero(), sigma=_hetero_sigma(X_a) - ) - X_new_t = pt.matrix("X_new", shape=(None, 1)) - y_t = pt.vector("y", shape=(None,)) - with pytest.raises(ValueError, match=r"Some replacements were not used"): - gp.predict_marginal(X_new_t, X_b, y_t, incl_lik=True) - class TestVFEHeteroskedastic: def _build(self, X): @@ -86,14 +74,30 @@ def test_incl_lik_adds_pointwise_noise(self): expected_sigma_new = 0.1 + 0.05 * X_new[:, 0] ** 2 np.testing.assert_allclose(yv, v + expected_sigma_new**2, atol=1e-10) - def test_wrong_X_raises(self): - X_a = pt.matrix("X_a", shape=(None, 1)) - X_b = pt.matrix("X_b", shape=(None, 1)) - vfe = self._build(X_a) + +class TestSVGPHeteroskedastic: + def test_incl_lik_adds_pointwise_noise(self): + X_train, y_train, X_new = _data() + M = 6 + Z = np.linspace(-1.5, 1.5, M)[:, None].astype(np.float64) + vp = init_variational_params(M) + X = pt.matrix("X", shape=(None, 1)) + svgp = SVGP( + kernel=ExpQuad(input_dim=1, ls=1.0), + mean=Zero(), + likelihood=Gaussian(_hetero_sigma(X)), + inducing_variable=Points(pt.as_tensor_variable(Z)), + variational_params=vp, + ) X_new_t = pt.matrix("X_new", shape=(None, 1)) - y_t = pt.vector("y", shape=(None,)) - with pytest.raises(ValueError, match=r"Some replacements were not used"): - vfe.predict_marginal(X_new_t, X_b, y_t, incl_lik=True) + + _, fvar = svgp.predict_marginal(X_new_t) + _, yvar = svgp.predict_marginal(X_new_t, incl_lik=True) + fn = pytensor.function([X_new_t, *vp.extra_vars], [fvar, yvar], on_unused_input="ignore") + v, yv = fn(X_new, *vp.extra_init) + + expected_sigma_new = 0.1 + 0.05 * X_new[:, 0] ** 2 + np.testing.assert_allclose(yv - v, expected_sigma_new**2, atol=1e-10) class TestScalarSigmaUnaffected: @@ -148,7 +152,7 @@ def test_pm_data_sigma_is_detected_as_data_dependent(): """sigma built against a pm.Data SharedVariable must still be treated as data-dependent. The heuristic filters by type (TensorType), not by class (SharedVariable) — without this, pm.Data is silently invisible - to sigma_at and predictions use the wrong noise. + to clone_replace_data and predictions use the wrong noise. """ X_train_arr = np.linspace(-1.5, 1.5, 10)[:, None].astype(np.float64) X_new_arr = np.linspace(-2.0, 2.0, 5)[:, None].astype(np.float64) From cce4c9272ab3b4ad6c52ab98504fc24aa54a1223 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 28 May 2026 16:49:34 -0500 Subject: [PATCH 2/8] Extend parameter re-rooting to StudentT and NegativeBinomial --- ptgp/likelihoods/negative_binomial.py | 4 +++- ptgp/likelihoods/student_t.py | 6 ++++-- tests/likelihoods/test_base.py | 29 ++++++++++++++++++++++++++- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/ptgp/likelihoods/negative_binomial.py b/ptgp/likelihoods/negative_binomial.py index 788e3d5..4c0a537 100644 --- a/ptgp/likelihoods/negative_binomial.py +++ b/ptgp/likelihoods/negative_binomial.py @@ -20,8 +20,10 @@ class NegativeBinomial(Likelihood): Number of Gauss-Hermite quadrature points (default 20). """ + param_names = ("alpha",) + def __init__(self, alpha, invlink=None, n_points=20): - self.alpha = alpha + self.alpha = pt.as_tensor_variable(alpha) self.invlink = invlink or pt.exp self.n_points = n_points diff --git a/ptgp/likelihoods/student_t.py b/ptgp/likelihoods/student_t.py index 247dd19..60a4329 100644 --- a/ptgp/likelihoods/student_t.py +++ b/ptgp/likelihoods/student_t.py @@ -18,9 +18,11 @@ class StudentT(Likelihood): Number of Gauss-Hermite quadrature points (default 20). """ + param_names = ("nu", "sigma") + def __init__(self, nu, sigma, n_points=20): - self.nu = nu - self.sigma = sigma + self.nu = pt.as_tensor_variable(nu) + self.sigma = pt.as_tensor_variable(sigma) self.n_points = n_points def _log_prob(self, f, y): diff --git a/tests/likelihoods/test_base.py b/tests/likelihoods/test_base.py index 9d2ab4f..71b9690 100644 --- a/tests/likelihoods/test_base.py +++ b/tests/likelihoods/test_base.py @@ -6,7 +6,7 @@ import pytensor.tensor as pt import pytest -from ptgp.likelihoods import Bernoulli, Gaussian, Poisson +from ptgp.likelihoods import Bernoulli, Gaussian, NegativeBinomial, Poisson, StudentT def _eval(*tensors): @@ -84,6 +84,28 @@ def test_reroot_matches_multidim_input(self): got = pytensor.function([X_new], rerooted)(new) np.testing.assert_allclose(got, 0.1 + np.sum(new**2, axis=1)) + def test_reroots_all_tensor_params(self): + X = pt.matrix("X", shape=(None, 1)) + new = np.array([[1.0], [2.0], [3.0]]) + out = StudentT(nu=3.0 + X[:, 0] ** 2, sigma=_hetero(X)).clone_replace_data( + pt.as_tensor_variable(new) + ) + nu_val, sigma_val = _eval(out.nu, out.sigma) + np.testing.assert_allclose(nu_val, 3.0 + new[:, 0] ** 2) + np.testing.assert_allclose(sigma_val, _hetero(new)) + + def test_negative_binomial_alpha(self): + X = pt.matrix("X", shape=(None, 1)) + new = np.array([[0.0], [2.0]]) + out = NegativeBinomial(alpha=_hetero(X)).clone_replace_data(pt.as_tensor_variable(new)) + np.testing.assert_allclose(_eval(out.alpha), _hetero(new)) + + def test_leaves_scalar_params_untouched(self): + X = pt.matrix("X", shape=(None, 1)) + lik = StudentT(nu=4.0, sigma=_hetero(X)) + out = lik.clone_replace_data(pt.matrix("X_new", shape=(None, 1))) + assert out.nu is lik.nu + def test_returns_copy_leaving_original_untouched(self): X = pt.matrix("X", shape=(None, 1)) lik = Gaussian(_hetero(X)) @@ -92,6 +114,11 @@ def test_returns_copy_leaving_original_untouched(self): assert lik.sigma is original assert out.sigma is not original + def test_preserves_non_parameter_attributes(self): + X = pt.matrix("X", shape=(None, 1)) + lik = StudentT(nu=_hetero(X), sigma=0.5, n_points=33) + assert lik.clone_replace_data(pt.matrix("X_new", shape=(None, 1))).n_points == 33 + def test_scalar_hyperparam_survives_reroot(self): """A parameter mixing data with a scalar placeholder re-roots only the data; the scalar still flows through to the result.""" From e29178ffcbcc79cd4bdcb045135e15137fe04a0a Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Fri, 29 May 2026 15:50:46 +0200 Subject: [PATCH 3/8] More functional design for Likelihood --- ptgp/gp/svgp.py | 2 +- ptgp/gp/unapproximated.py | 10 +- ptgp/gp/vfe.py | 10 +- ptgp/likelihoods/__init__.py | 26 +- ptgp/likelihoods/base.py | 285 ++++++++++++-------- ptgp/likelihoods/bernoulli.py | 48 ++-- ptgp/likelihoods/gaussian.py | 61 +++-- ptgp/likelihoods/negative_binomial.py | 52 ++-- ptgp/likelihoods/poisson.py | 46 ++-- ptgp/likelihoods/student_t.py | 49 ++-- tests/likelihoods/test_base.py | 125 ++++++--- tests/likelihoods/test_functional_api.py | 104 +++++++ tests/likelihoods/test_negative_binomial.py | 4 +- tests/likelihoods/test_poisson.py | 6 +- tests/test_heteroskedastic_predict.py | 19 +- 15 files changed, 567 insertions(+), 280 deletions(-) create mode 100644 tests/likelihoods/test_functional_api.py diff --git a/ptgp/gp/svgp.py b/ptgp/gp/svgp.py index a4f34b0..67eabca 100644 --- a/ptgp/gp/svgp.py +++ b/ptgp/gp/svgp.py @@ -230,7 +230,7 @@ def predict_marginal(self, X, incl_lik=False): fmean, fvar = conditional_unwhitened(A, Kmn, Knn_diag, self.q_mu, self.q_sqrt) fmean = fmean + self.mean(X) if incl_lik: - return self.likelihood.clone_replace_data(X).predict_mean_and_var(fmean, fvar) + return self.likelihood.at(X).predict_mean_and_var(fmean, fvar) return fmean, fvar def predict_joint(self, X): diff --git a/ptgp/gp/unapproximated.py b/ptgp/gp/unapproximated.py index 795998f..a2e3f9f 100644 --- a/ptgp/gp/unapproximated.py +++ b/ptgp/gp/unapproximated.py @@ -18,6 +18,10 @@ class Unapproximated: Mean function (default: ``Zero()``). sigma : tensor or PyMC random variable Observation noise standard deviation. + x : tensor, optional + The design matrix ``sigma`` was built against, for heteroskedastic + noise. Pass it so sigma can be re-rooted onto the test inputs at predict + time. """ extra_vars = () @@ -25,11 +29,11 @@ class Unapproximated: default_objective = staticmethod(marginal_log_likelihood) predict_needs_data = True - def __init__(self, kernel, mean=None, sigma=None): + def __init__(self, kernel, mean=None, sigma=None, x=None): """Store the kernel and mean; build a Gaussian likelihood from sigma.""" self.kernel = kernel self.mean = mean if mean is not None else Zero() - self.likelihood = Gaussian(sigma) + self.likelihood = Gaussian(sigma, x=x) def predict_marginal(self, X_new, X_train, y_train, incl_lik=False): """Posterior marginal mean and variance at each point in X_new. @@ -64,5 +68,5 @@ def predict_marginal(self, X_new, X_train, y_train, incl_lik=False): fvar = Kss_diag - pt.sum(Kns * (Knn_inv @ Kns), axis=0) if incl_lik: - return self.likelihood.clone_replace_data(X_new).predict_mean_and_var(fmean, fvar) + return self.likelihood.at(X_new).predict_mean_and_var(fmean, fvar) return fmean, fvar diff --git a/ptgp/gp/vfe.py b/ptgp/gp/vfe.py index 11c5fa6..a3a5f44 100644 --- a/ptgp/gp/vfe.py +++ b/ptgp/gp/vfe.py @@ -22,12 +22,16 @@ class VFE: Observation noise standard deviation. inducing_variable : InducingVariables Inducing point locations. + x : tensor, optional + The design matrix ``sigma`` was built against, for heteroskedastic + noise. Pass it so sigma can be re-rooted onto the test inputs at predict + time. """ default_objective = staticmethod(collapsed_elbo) predict_needs_data = True - def __init__(self, kernel, mean=None, sigma=None, inducing_variable=None): + def __init__(self, kernel, mean=None, sigma=None, inducing_variable=None, x=None): """Store the kernel, mean, and inducing variable; build a Gaussian likelihood from sigma.""" if not hasattr(inducing_variable, "Z"): raise TypeError( @@ -37,7 +41,7 @@ def __init__(self, kernel, mean=None, sigma=None, inducing_variable=None): ) self.kernel = kernel self.mean = mean if mean is not None else Zero() - self.likelihood = Gaussian(sigma) + self.likelihood = Gaussian(sigma, x=x) self.inducing_variable = inducing_variable @property @@ -87,5 +91,5 @@ def predict_marginal(self, X_new, X_train, y_train, incl_lik=False): fvar = Kss_diag - pt.sum(Kus * ((Kuu_inv - Sigma_inv) @ Kus), axis=0) if incl_lik: - return self.likelihood.clone_replace_data(X_new).predict_mean_and_var(fmean, fvar) + return self.likelihood.at(X_new).predict_mean_and_var(fmean, fvar) return fmean, fvar diff --git a/ptgp/likelihoods/__init__.py b/ptgp/likelihoods/__init__.py index f51989b..a14b3c8 100644 --- a/ptgp/likelihoods/__init__.py +++ b/ptgp/likelihoods/__init__.py @@ -1,8 +1,30 @@ -from ptgp.likelihoods.base import Likelihood +from ptgp.likelihoods.base import ( + Likelihood, + op_of, + param, + predict_log_density, + predict_mean_and_var, + variational_expectation, + at, +) from ptgp.likelihoods.bernoulli import Bernoulli from ptgp.likelihoods.gaussian import Gaussian from ptgp.likelihoods.negative_binomial import NegativeBinomial from ptgp.likelihoods.poisson import Poisson from ptgp.likelihoods.student_t import StudentT -__all__ = ["Likelihood", "Gaussian", "Bernoulli", "StudentT", "Poisson", "NegativeBinomial"] +__all__ = [ + "Likelihood", + "Gaussian", + "Bernoulli", + "StudentT", + "Poisson", + "NegativeBinomial", + # Purely functional API — operate on a likelihood node via ``owner.op``. + "op_of", + "param", + "at", + "variational_expectation", + "predict_mean_and_var", + "predict_log_density", +] diff --git a/ptgp/likelihoods/base.py b/ptgp/likelihoods/base.py index 6669438..8d7e4e1 100644 --- a/ptgp/likelihoods/base.py +++ b/ptgp/likelihoods/base.py @@ -3,139 +3,97 @@ import numpy as np import pytensor.tensor as pt -from pytensor.graph.replace import clone_replace -from pytensor.graph.traversal import ancestors -from pytensor.tensor.type import TensorType +from pytensor.graph.basic import Apply +from pytensor.graph.null_type import NullType +from pytensor.graph.op import Op +from pytensor.graph.replace import graph_replace +_NULL = NullType() -def _data_inputs(value, X): - """Leaf nodes in ``value``'s graph that look like the design matrix ``X``. - A leaf (no owner) with the same ndim as ``X`` and matching feature - dimensions is taken to be a copy of the model input — whether it is a - symbolic placeholder, a ``pm.Data`` / shared variable, or a constant array - baked in via ``pt.as_tensor_variable``. Matching is by type, so ``pm.Data`` - (a SharedVariable whose type is ``TensorType``) is caught. PyMC random - variables and other computed nodes carry an owner and are excluded; scalars - never match, so literal constants like ``0.1`` are left alone. - """ - feat = X.type.shape[1:] - return [ - v - for v in ancestors([value]) - if v.owner is None - and isinstance(v.type, TensorType) - and v.type.ndim == X.type.ndim - and v.type.ndim > 0 - and all(a is None or b is None or a == b for a, b in zip(v.type.shape[1:], feat)) - ] - - -def _reroot(value, X): - """Re-root ``value`` onto ``X`` by replacing design-matrix-shaped leaves. - - The input(s) to replace are discovered from the graph, so the caller only - supplies the new data. ``rebuild_strict=False`` lets a baked constant array - (whose row dimension is statically frozen) be replaced by ``X`` without - coercing ``X`` back to that frozen shape. A data-independent ``value`` - (scalar or PyMC RV) is returned unchanged. - - ``X`` must be a 2D design matrix ``(N, D)``. The discovery in - :func:`_data_inputs` keys on dimensionality: parameters are per-observation - 1D vectors, so a 2D ``X`` is what distinguishes the data from the parameter - itself. A 1D ``X`` would collapse that gap and could match the parameter, so - it is rejected. - """ - if X.type.ndim < 2: - raise ValueError( - "Re-rooting a likelihood parameter requires a 2D design matrix (N, D); " - f"got X with ndim={X.type.ndim}. Parameters are per-observation 1D " - "vectors, so a 1D X cannot be distinguished from the parameter itself." - ) - value = pt.as_tensor_variable(value) - inputs = _data_inputs(value, X) - if not inputs: - return value - return clone_replace(value, {v: X for v in inputs}, rebuild_strict=False) +class LikelihoodOp(Op): + """Structural marker keeping a likelihood inside the PyTensor graph. + The Apply node consumes the design matrix (optional; input 0 when present) + and the parameters as its *inputs*, and emits a single ``NoneType`` handle as + output. It computes nothing — its sole purpose is to be a real graph node, so + ``graph_replace`` / ``fgraph.clone`` / PyMC conditioning carry the likelihood + (and re-root its parameters) instead of it being Python-side state those + operations would silently drop. The Op *type* (the subclass) encodes the + family, so behaviour is recoverable via ``owner.op``; the parameters are the + node's inputs. -class Likelihood: - """Base class for all PTGP likelihoods. - - Subclasses must implement ``_log_prob(f, y)`` which returns the symbolic - log-likelihood log p(y|f). - - Gaussian likelihoods override ``variational_expectation`` and - ``predict_mean_and_var`` with closed-form expressions. Non-Gaussian - likelihoods inherit the default Gauss-Hermite quadrature implementations. + Because the parameters are *inputs* and the handle output is never consumed + by the loss, the node is never on the differentiation path nor in a compiled + loss graph — so it needs no gradient, view, shape, or strip machinery. To + keep it alive across graph operations, hold its output (or register it as a + PyMC named variable). Parameters ---------- - invlink : callable, optional - Inverse link function mapping latent f to the natural parameter. - Subclasses set a default (e.g. exp for Poisson, sigmoid for Bernoulli). + param_names : tuple of str + Parameter names, in input order (after the optional design matrix). n_points : int - Number of Gauss-Hermite quadrature points (default 20). + Gauss-Hermite quadrature points for the default expectations. + invlink : callable or None + Inverse link function (set by subclasses that need one). + has_data : bool + Whether input 0 is the design matrix ``X`` (i.e. parameters depend on it). """ - n_points: int = 20 - invlink = None - param_names: tuple = () + __props__ = ("param_names", "n_points", "invlink", "has_data") - def clone_replace_data(self, X): - """Return a copy with every data-dependent parameter re-rooted onto X. + def __init__(self, param_names=(), n_points=20, invlink=None, has_data=False): + self.param_names = tuple(param_names) + self.n_points = n_points + self.invlink = invlink + self.has_data = has_data - Each parameter named in ``param_names`` whose graph depends on a free - data input is re-expressed over ``X``; data-independent parameters are - left untouched. Used at predict time to evaluate parameters at the test - inputs rather than the training inputs they were built against. - """ - new = copy.copy(self) - for name in self.param_names: - setattr(new, name, _reroot(getattr(self, name), X)) - return new + def make_node(self, *args): + args = [pt.as_tensor_variable(a) for a in args] + return Apply(self, args, [_NULL()]) + + def perform(self, node, inputs, outputs): + outputs[0][0] = None - def _log_prob(self, f, y): + # --- symbolic behaviour: subclasses implement these three --- + + def _log_prob(self, f, y, *params): """Symbolic log p(y|f). Subclasses must implement.""" raise NotImplementedError - def _conditional_mean(self, f): + def _conditional_mean(self, f, *params): """E[y|f]. Subclasses must implement.""" raise NotImplementedError - def _conditional_variance(self, f): + def _conditional_variance(self, f, *params): """Var[y|f]. Subclasses must implement.""" raise NotImplementedError - def variational_expectation(self, y, mu, var): - """E_{q(f)}[log p(y|f)] where q(f) = N(mu, var). + # --- expectations (defaults via quadrature; subclasses may override) --- - Default: Gauss-Hermite quadrature. - """ - return self._gauss_hermite(lambda f, y: self._log_prob(f, y), y, mu, var) + def variational_expectation(self, params, y, mu, var): + """E_{q(f)}[log p(y|f)] where q(f) = N(mu, var). Default: quadrature.""" + return self._gauss_hermite(lambda f, yy: self._log_prob(f, yy, *params), y, mu, var) - def predict_mean_and_var(self, mu, var): - """Predictive mean and variance: E_{q(f)}[E[y|f]], E_{q(f)}[Var[y|f]] + Var_{q(f)}[E[y|f]]. - - Default: Gauss-Hermite quadrature. - """ + def predict_mean_and_var(self, params, mu, var): + """Predictive mean and variance. Default: Gauss-Hermite quadrature.""" E_mean = self._gauss_hermite( - lambda f, _: self._conditional_mean(f), pt.zeros_like(mu), mu, var + lambda f, _: self._conditional_mean(f, *params), pt.zeros_like(mu), mu, var ) E_mean_sq = self._gauss_hermite( - lambda f, _: self._conditional_mean(f) ** 2, pt.zeros_like(mu), mu, var + lambda f, _: self._conditional_mean(f, *params) ** 2, pt.zeros_like(mu), mu, var ) E_var = self._gauss_hermite( - lambda f, _: self._conditional_variance(f), pt.zeros_like(mu), mu, var + lambda f, _: self._conditional_variance(f, *params), pt.zeros_like(mu), mu, var ) return E_mean, E_var + E_mean_sq - E_mean**2 - def predict_log_density(self, y, mu, var): - """log E_{q(f)}[p(y|f)] — predictive log-density at test points. - - Default: Gauss-Hermite quadrature in log-space for numerical stability. - """ - return self._gauss_hermite_logspace(lambda f, y: self._log_prob(f, y), y, mu, var) + def predict_log_density(self, params, y, mu, var): + """log E_{q(f)}[p(y|f)]. Default: log-space Gauss-Hermite quadrature.""" + return self._gauss_hermite_logspace( + lambda f, yy: self._log_prob(f, yy, *params), y, mu, var + ) def _gauss_hermite(self, func, y, mu, var): """E_{q(f)}[func(f, y)] via Gauss-Hermite quadrature.""" @@ -143,27 +101,134 @@ def _gauss_hermite(self, func, y, mu, var): gh_points = pt.as_tensor_variable(gh_points) gh_weights = pt.as_tensor_variable(gh_weights / np.sqrt(np.pi)) - # f = mu + sqrt(2 * var) * t_j, shape (N, n_points) sd = pt.sqrt(var)[:, None] F = mu[:, None] + pt.sqrt(2.0) * sd * gh_points[None, :] - - # func(f, y) at each quadrature point, shape (N, n_points) vals = func(F, y[:, None]) - return pt.sum(vals * gh_weights[None, :], axis=1) def _gauss_hermite_logspace(self, func, y, mu, var): - """log E_{q(f)}[exp(func(f, y))] via Gauss-Hermite quadrature. - - Uses logsumexp for numerical stability. - """ + """log E_{q(f)}[exp(func(f, y))] via quadrature, using logsumexp.""" gh_points, gh_weights = np.polynomial.hermite.hermgauss(self.n_points) gh_points = pt.as_tensor_variable(gh_points) log_weights = pt.as_tensor_variable(np.log(gh_weights / np.sqrt(np.pi))) sd = pt.sqrt(var)[:, None] F = mu[:, None] + pt.sqrt(2.0) * sd * gh_points[None, :] - log_vals = func(F, y[:, None]) + log_weights[None, :] - return pt.logsumexp(log_vals, axis=1) + + +# --- Purely functional API ------------------------------------------------ +# +# A likelihood is fully described by its node: ``owner.op`` is the family (with +# behaviour), and the node's *inputs* are the optional design matrix followed by +# the parameters. These free functions take such a node (a "lik" — the NoneType +# handle) and dispatch through ``owner.op``, so a likelihood can be used with no +# holder at all. The :class:`Likelihood` holder is a thin view built on these. + + +def _params_of(node): + """The parameter inputs of a likelihood node (after the optional X slot).""" + off = 1 if node.op.has_data else 0 + return list(node.inputs[off:]) + + +def op_of(lik): + """The :class:`LikelihoodOp` behind a likelihood node (``owner.op``).""" + return lik.owner.op + + +def param(lik, name): + """The named parameter input of a likelihood node (e.g. ``"sigma"``).""" + node = lik.owner + off = 1 if node.op.has_data else 0 + return node.inputs[off + node.op.param_names.index(name)] + + +def at(lik, X): + """Re-root a likelihood node onto design matrix ``X``. + + A no-op for a likelihood with no design matrix (``op.has_data`` is False) — + there is no ``X`` input to swap. Otherwise returns a new handle whose + parameter inputs are re-expressed over ``X`` (RV-safe via ``graph_replace``). + """ + node = lik.owner + if not node.op.has_data: + return lik + return graph_replace([lik], {node.inputs[0]: X}, strict=False)[0] + + +def variational_expectation(lik, y, mu, var): + """E_{q(f)}[log p(y|f)] for a likelihood node, dispatched via ``owner.op``.""" + return lik.owner.op.variational_expectation(_params_of(lik.owner), y, mu, var) + + +def predict_mean_and_var(lik, mu, var): + """Predictive mean and variance for a likelihood node.""" + return lik.owner.op.predict_mean_and_var(_params_of(lik.owner), mu, var) + + +def predict_log_density(lik, y, mu, var): + """Predictive log-density for a likelihood node.""" + return lik.owner.op.predict_log_density(_params_of(lik.owner), y, mu, var) + + +def _param_property(name): + """Expose a named parameter as ``self.`` (the parameter input).""" + return property(lambda self: self._params[type(self).param_names.index(name)]) + + +class Likelihood: + """Public likelihood object — a thin view over a :class:`LikelihoodOp` node. + + Every likelihood is a node: construction builds ``op(x?, *params)`` whose + single ``NoneType`` output (``self._lik``) is the handle, and whose inputs + are the optional design matrix followed by the parameter graphs. The handle + keeps the likelihood in the PyTensor graph (carried by ``graph_replace`` / + ``clone``); the methods read the parameters off the node's inputs and + dispatch behaviour through ``owner.op`` — the same calls the free functions + above make. + + The node is never consumed by the loss (the loss uses the parameter graphs + directly, via ``.sigma`` etc.), so it stays off the gradient path and out of + compiled graphs. :meth:`at` re-roots the parameters when there's a design + matrix; it is a no-op otherwise (homoskedastic or parameterless). + + Subclasses set ``op_cls``, ``param_names``, and expose each parameter via + :func:`_param_property`. + """ + + op_cls = LikelihoodOp + param_names = () + n_points = 20 + + def __init__(self, x=None, n_points=20, invlink=None, **params): + self.n_points = n_points + self.invlink = invlink + has_data = bool(self.param_names) and x is not None + self._op = self.op_cls(self.param_names, n_points, invlink, has_data=has_data) + ordered = [pt.as_tensor_variable(params[name]) for name in self.param_names] + args = ([pt.as_tensor_variable(x)] if has_data else []) + ordered + self._lik = self._op(*args) # NoneType handle — every likelihood is a node + + @property + def _params(self): + return _params_of(self._lik.owner) + + def at(self, X): + """Return a copy with parameters re-rooted onto ``X`` (no-op without a design matrix).""" + new = copy.copy(self) + new._lik = at(self._lik, X) + return new + + def variational_expectation(self, y, mu, var): + """E_{q(f)}[log p(y|f)] where q(f) = N(mu, var).""" + return self._op.variational_expectation(self._params, y, mu, var) + + def predict_mean_and_var(self, mu, var): + """Predictive mean and variance under q(f) = N(mu, var).""" + return self._op.predict_mean_and_var(self._params, mu, var) + + def predict_log_density(self, y, mu, var): + """Predictive log-density log E_{q(f)}[p(y|f)] at test points.""" + return self._op.predict_log_density(self._params, y, mu, var) diff --git a/ptgp/likelihoods/bernoulli.py b/ptgp/likelihoods/bernoulli.py index 583f7a4..0c0b6f8 100644 --- a/ptgp/likelihoods/bernoulli.py +++ b/ptgp/likelihoods/bernoulli.py @@ -1,6 +1,6 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood +from ptgp.likelihoods.base import Likelihood, LikelihoodOp def inv_probit(x): @@ -9,22 +9,8 @@ def inv_probit(x): return 0.5 * (1.0 + pt.erf(x / pt.sqrt(2.0))) * (1.0 - 2.0 * jitter) + jitter -class Bernoulli(Likelihood): - """Bernoulli likelihood: p(y=1|f) = invlink(f). - - Default link is probit. Variational expectation via Gauss-Hermite quadrature. - - Parameters - ---------- - invlink : callable, optional - Inverse link function (default: probit). Use ``pt.sigmoid`` for logit link. - n_points : int - Number of Gauss-Hermite quadrature points (default 20). - """ - - def __init__(self, invlink=None, n_points=20): - self.invlink = invlink or inv_probit - self.n_points = n_points +class BernoulliOp(LikelihoodOp): + """Bernoulli likelihood Op. Closed-form predictive for the probit link.""" def _log_prob(self, f, y): p = self.invlink(f) @@ -37,12 +23,28 @@ def _conditional_variance(self, f): p = self.invlink(f) return p * (1.0 - p) - def predict_mean_and_var(self, mu, var): - """Closed-form for probit link: p = Phi(mu / sqrt(1 + var)). - - Falls back to quadrature for other link functions. - """ + def predict_mean_and_var(self, params, mu, var): if self.invlink is inv_probit: p = inv_probit(mu / pt.sqrt(1.0 + var)) return p, p - p**2 - return super().predict_mean_and_var(mu, var) + return super().predict_mean_and_var(params, mu, var) + + +class Bernoulli(Likelihood): + """Bernoulli likelihood: p(y=1|f) = invlink(f). + + Default link is probit. Variational expectation via Gauss-Hermite quadrature. + + Parameters + ---------- + invlink : callable, optional + Inverse link function (default: probit). Use ``pt.sigmoid`` for logit link. + n_points : int + Number of Gauss-Hermite quadrature points (default 20). + """ + + op_cls = BernoulliOp + param_names = () + + def __init__(self, invlink=None, n_points=20): + super().__init__(n_points=n_points, invlink=invlink or inv_probit) diff --git a/ptgp/likelihoods/gaussian.py b/ptgp/likelihoods/gaussian.py index 9b1964a..3e0119f 100644 --- a/ptgp/likelihoods/gaussian.py +++ b/ptgp/likelihoods/gaussian.py @@ -1,10 +1,36 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood +from ptgp.likelihoods.base import Likelihood, LikelihoodOp, _param_property LOG2PI = pt.log(2.0 * pt.pi) +class GaussianOp(LikelihoodOp): + """Gaussian likelihood Op: closed-form expectations (no quadrature).""" + + def _log_prob(self, f, y, sigma): + return -0.5 * (LOG2PI + pt.log(sigma**2) + (y - f) ** 2 / sigma**2) + + def _conditional_mean(self, f, sigma): + return f + + def _conditional_variance(self, f, sigma): + return pt.ones_like(f) * sigma**2 + + def variational_expectation(self, params, y, mu, var): + (sigma,) = params + return -0.5 * (LOG2PI + pt.log(sigma**2) + ((y - mu) ** 2 + var) / sigma**2) + + def predict_mean_and_var(self, params, mu, var): + (sigma,) = params + return mu, var + sigma**2 + + def predict_log_density(self, params, y, mu, var): + (sigma,) = params + total_var = var + sigma**2 + return -0.5 * (LOG2PI + pt.log(total_var) + (y - mu) ** 2 / total_var) + + class Gaussian(Likelihood): """Gaussian likelihood p(y|f) = N(y; f, sigma^2). @@ -14,31 +40,16 @@ class Gaussian(Likelihood): ---------- sigma : tensor Observation noise standard deviation. May be a scalar (homoskedastic) - or a vector built against a symbolic ``X`` (heteroskedastic). At - predict time, :meth:`Likelihood.clone_replace_data` re-roots sigma's graph onto - the test inputs. + or a vector built against a design matrix ``X`` (heteroskedastic). + x : tensor, optional + The design matrix ``sigma`` was built against. Pass it when ``sigma`` is + heteroskedastic so :meth:`Likelihood.at` can re-root + sigma onto the test inputs at predict time. """ + op_cls = GaussianOp param_names = ("sigma",) + sigma = _param_property("sigma") - def __init__(self, sigma): - self.sigma = pt.as_tensor_variable(sigma) - - def _log_prob(self, f, y): - return -0.5 * (LOG2PI + pt.log(self.sigma**2) + (y - f) ** 2 / self.sigma**2) - - def _conditional_mean(self, f): - return f - - def _conditional_variance(self, f): - return pt.ones_like(f) * self.sigma**2 - - def variational_expectation(self, y, mu, var): - return -0.5 * (LOG2PI + pt.log(self.sigma**2) + ((y - mu) ** 2 + var) / self.sigma**2) - - def predict_mean_and_var(self, mu, var): - return mu, var + self.sigma**2 - - def predict_log_density(self, y, mu, var): - total_var = var + self.sigma**2 - return -0.5 * (LOG2PI + pt.log(total_var) + (y - mu) ** 2 / total_var) + def __init__(self, sigma, x=None): + super().__init__(x=x, sigma=sigma) diff --git a/ptgp/likelihoods/negative_binomial.py b/ptgp/likelihoods/negative_binomial.py index 4c0a537..90433fe 100644 --- a/ptgp/likelihoods/negative_binomial.py +++ b/ptgp/likelihoods/negative_binomial.py @@ -1,6 +1,27 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood +from ptgp.likelihoods.base import Likelihood, LikelihoodOp, _param_property + + +class NegativeBinomialOp(LikelihoodOp): + """Negative binomial likelihood Op. Expectations via quadrature.""" + + def _log_prob(self, f, y, alpha): + mu = self.invlink(f) + return ( + pt.gammaln(y + alpha) + - pt.gammaln(alpha) + - pt.gammaln(y + 1.0) + + alpha * pt.log(alpha / (alpha + mu)) + + y * pt.log(mu / (alpha + mu)) + ) + + def _conditional_mean(self, f, alpha): + return self.invlink(f) + + def _conditional_variance(self, f, alpha): + mu = self.invlink(f) + return mu + mu**2 / alpha class NegativeBinomial(Likelihood): @@ -18,29 +39,14 @@ class NegativeBinomial(Likelihood): Inverse link function (default: exp). n_points : int Number of Gauss-Hermite quadrature points (default 20). + x : tensor, optional + The design matrix ``alpha`` was built against, for a heteroskedastic + parameter re-rooted onto the test inputs at predict time. """ + op_cls = NegativeBinomialOp param_names = ("alpha",) + alpha = _param_property("alpha") - def __init__(self, alpha, invlink=None, n_points=20): - self.alpha = pt.as_tensor_variable(alpha) - self.invlink = invlink or pt.exp - self.n_points = n_points - - def _log_prob(self, f, y): - mu = self.invlink(f) - alpha = self.alpha - return ( - pt.gammaln(y + alpha) - - pt.gammaln(alpha) - - pt.gammaln(y + 1.0) - + alpha * pt.log(alpha / (alpha + mu)) - + y * pt.log(mu / (alpha + mu)) - ) - - def _conditional_mean(self, f): - return self.invlink(f) - - def _conditional_variance(self, f): - mu = self.invlink(f) - return mu + mu**2 / self.alpha + def __init__(self, alpha, invlink=None, n_points=20, x=None): + super().__init__(x=x, n_points=n_points, invlink=invlink or pt.exp, alpha=alpha) diff --git a/ptgp/likelihoods/poisson.py b/ptgp/likelihoods/poisson.py index d434ff4..b6d970c 100644 --- a/ptgp/likelihoods/poisson.py +++ b/ptgp/likelihoods/poisson.py @@ -1,6 +1,25 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood +from ptgp.likelihoods.base import Likelihood, LikelihoodOp + + +class PoissonOp(LikelihoodOp): + """Poisson likelihood Op. Closed-form variational expectation for log link.""" + + def _log_prob(self, f, y): + lam = self.invlink(f) + return y * pt.log(lam) - lam - pt.gammaln(y + 1.0) + + def _conditional_mean(self, f): + return self.invlink(f) + + def _conditional_variance(self, f): + return self.invlink(f) + + def variational_expectation(self, params, y, mu, var): + if self.invlink is pt.exp: + return y * mu - pt.exp(mu + var / 2.0) - pt.gammaln(y + 1.0) + return super().variational_expectation(params, y, mu, var) class Poisson(Likelihood): @@ -17,25 +36,8 @@ class Poisson(Likelihood): Number of Gauss-Hermite quadrature points (default 20). """ - def __init__(self, invlink=None, n_points=20): - self.invlink = invlink or pt.exp - self.n_points = n_points - - def _log_prob(self, f, y): - lam = self.invlink(f) - return y * pt.log(lam) - lam - pt.gammaln(y + 1.0) - - def _conditional_mean(self, f): - return self.invlink(f) + op_cls = PoissonOp + param_names = () - def _conditional_variance(self, f): - return self.invlink(f) - - def variational_expectation(self, y, mu, var): - """Closed-form for log link: E_q[y*f - exp(f) - log(y!)]. - - Falls back to quadrature for other link functions. - """ - if self.invlink is pt.exp: - return y * mu - pt.exp(mu + var / 2.0) - pt.gammaln(y + 1.0) - return super().variational_expectation(y, mu, var) + def __init__(self, invlink=None, n_points=20): + super().__init__(n_points=n_points, invlink=invlink or pt.exp) diff --git a/ptgp/likelihoods/student_t.py b/ptgp/likelihoods/student_t.py index 60a4329..29145c1 100644 --- a/ptgp/likelihoods/student_t.py +++ b/ptgp/likelihoods/student_t.py @@ -1,6 +1,25 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood +from ptgp.likelihoods.base import Likelihood, LikelihoodOp, _param_property + + +class StudentTOp(LikelihoodOp): + """Student-T likelihood Op. Expectations via Gauss-Hermite quadrature.""" + + def _log_prob(self, f, y, nu, sigma): + z = (y - f) / sigma + return ( + pt.gammaln((nu + 1.0) / 2.0) + - pt.gammaln(nu / 2.0) + - 0.5 * pt.log(nu * pt.pi * sigma**2) + - 0.5 * (nu + 1.0) * pt.log1p(z**2 / nu) + ) + + def _conditional_mean(self, f, nu, sigma): + return f + + def _conditional_variance(self, f, nu, sigma): + return pt.ones_like(f) * sigma**2 * nu / (nu - 2.0) class StudentT(Likelihood): @@ -16,27 +35,15 @@ class StudentT(Likelihood): Scale parameter. n_points : int Number of Gauss-Hermite quadrature points (default 20). + x : tensor, optional + The design matrix ``nu``/``sigma`` were built against, for + heteroskedastic parameters re-rooted onto the test inputs at predict. """ + op_cls = StudentTOp param_names = ("nu", "sigma") + nu = _param_property("nu") + sigma = _param_property("sigma") - def __init__(self, nu, sigma, n_points=20): - self.nu = pt.as_tensor_variable(nu) - self.sigma = pt.as_tensor_variable(sigma) - self.n_points = n_points - - def _log_prob(self, f, y): - nu, sigma = self.nu, self.sigma - z = (y - f) / sigma - return ( - pt.gammaln((nu + 1.0) / 2.0) - - pt.gammaln(nu / 2.0) - - 0.5 * pt.log(nu * pt.pi * sigma**2) - - 0.5 * (nu + 1.0) * pt.log1p(z**2 / nu) - ) - - def _conditional_mean(self, f): - return f - - def _conditional_variance(self, f): - return pt.ones_like(f) * self.sigma**2 * self.nu / (self.nu - 2.0) + def __init__(self, nu, sigma, n_points=20, x=None): + super().__init__(x=x, n_points=n_points, nu=nu, sigma=sigma) diff --git a/tests/likelihoods/test_base.py b/tests/likelihoods/test_base.py index 71b9690..3bca5cb 100644 --- a/tests/likelihoods/test_base.py +++ b/tests/likelihoods/test_base.py @@ -63,31 +63,47 @@ def softplus(f): class TestCloneReplaceData: - @pytest.mark.parametrize("kind", ["symbolic", "baked", "pm_data"]) + @pytest.mark.parametrize("kind", ["symbolic", "pm_data"]) def test_reroot_matches_direct_build(self, kind): - """Re-rooting yields the parameter evaluated at the new inputs, however - the training data was supplied. ``train`` and ``new`` differ in length so - a baked array's statically frozen row dimension cannot be re-imposed.""" + """Re-rooting yields the parameter evaluated at the new inputs, for the + variable-length data providers used in prediction. ``train`` and ``new`` + differ in length to exercise re-rooting onto a different number of + points (a symbolic placeholder or pm.Data is ``(None, D)``).""" train = np.linspace(-1.0, 1.0, 5)[:, None] new = np.array([[0.0], [1.0], [2.0]]) X_new = pt.matrix("X_new", shape=(None, 1)) with pm.Model(): - rerooted = Gaussian(_hetero(_provider(train, kind))).clone_replace_data(X_new).sigma + Xtr = _provider(train, kind) + rerooted = Gaussian(_hetero(Xtr), x=Xtr).at(X_new).sigma got = pytensor.function([X_new], rerooted)(new) np.testing.assert_allclose(got, _hetero(new)) + def test_baked_constant_x_reroots_at_matching_length(self): + """A frozen numpy constant as ``x`` re-roots cleanly only at its own + length: its static row dimension propagates into the parameter graph at + build time, and re-rooting (which uses ``graph_replace`` to preserve the + identity of any RV hyperparameters) cannot un-specialize it afterward. + Variable-length re-rooting requires a symbolic placeholder or pm.Data.""" + arr = np.linspace(-1.0, 1.0, 5)[:, None] + same_len = np.linspace(2.0, 3.0, 5)[:, None] + X_new = pt.matrix("X_new", shape=(None, 1)) + Xc = pt.as_tensor_variable(arr) # one node, reused as both param input and x= + rerooted = Gaussian(_hetero(Xc), x=Xc).at(X_new).sigma + got = pytensor.function([X_new], rerooted)(same_len) + np.testing.assert_allclose(got, _hetero(same_len)) + def test_reroot_matches_multidim_input(self): new = np.array([[0.0, 1.0], [2.0, 3.0]]) X = pt.matrix("X", shape=(None, 2)) X_new = pt.matrix("X_new", shape=(None, 2)) - rerooted = Gaussian(0.1 + pt.sum(X**2, axis=1)).clone_replace_data(X_new).sigma + rerooted = Gaussian(0.1 + pt.sum(X**2, axis=1), x=X).at(X_new).sigma got = pytensor.function([X_new], rerooted)(new) np.testing.assert_allclose(got, 0.1 + np.sum(new**2, axis=1)) def test_reroots_all_tensor_params(self): X = pt.matrix("X", shape=(None, 1)) new = np.array([[1.0], [2.0], [3.0]]) - out = StudentT(nu=3.0 + X[:, 0] ** 2, sigma=_hetero(X)).clone_replace_data( + out = StudentT(nu=3.0 + X[:, 0] ** 2, sigma=_hetero(X), x=X).at( pt.as_tensor_variable(new) ) nu_val, sigma_val = _eval(out.nu, out.sigma) @@ -97,56 +113,99 @@ def test_reroots_all_tensor_params(self): def test_negative_binomial_alpha(self): X = pt.matrix("X", shape=(None, 1)) new = np.array([[0.0], [2.0]]) - out = NegativeBinomial(alpha=_hetero(X)).clone_replace_data(pt.as_tensor_variable(new)) + out = NegativeBinomial(alpha=_hetero(X), x=X).at( + pt.as_tensor_variable(new) + ) np.testing.assert_allclose(_eval(out.alpha), _hetero(new)) def test_leaves_scalar_params_untouched(self): + """A data-independent parameter keeps its value through re-rooting (it + shares the likelihood node, so its output object is rebuilt, but the + value it computes is unchanged).""" X = pt.matrix("X", shape=(None, 1)) - lik = StudentT(nu=4.0, sigma=_hetero(X)) - out = lik.clone_replace_data(pt.matrix("X_new", shape=(None, 1))) - assert out.nu is lik.nu + lik = StudentT(nu=4.0, sigma=_hetero(X), x=X) + out = lik.at(pt.as_tensor_variable(np.array([[0.0], [1.0], [2.0]]))) + np.testing.assert_allclose(_eval(out.nu), 4.0) def test_returns_copy_leaving_original_untouched(self): X = pt.matrix("X", shape=(None, 1)) - lik = Gaussian(_hetero(X)) + lik = Gaussian(_hetero(X), x=X) original = lik.sigma - out = lik.clone_replace_data(pt.matrix("X_new", shape=(None, 1))) + out = lik.at(pt.matrix("X_new", shape=(None, 1))) assert lik.sigma is original assert out.sigma is not original def test_preserves_non_parameter_attributes(self): X = pt.matrix("X", shape=(None, 1)) - lik = StudentT(nu=_hetero(X), sigma=0.5, n_points=33) - assert lik.clone_replace_data(pt.matrix("X_new", shape=(None, 1))).n_points == 33 + lik = StudentT(nu=_hetero(X), sigma=0.5, n_points=33, x=X) + assert lik.at(pt.matrix("X_new", shape=(None, 1))).n_points == 33 def test_scalar_hyperparam_survives_reroot(self): """A parameter mixing data with a scalar placeholder re-roots only the - data; the scalar still flows through to the result.""" + data; the scalar leaf keeps its identity and flows through to the + result (graph_replace preserves sibling leaves).""" X = pt.matrix("X", shape=(None, 1)) hyp = pt.scalar("hyp") new = np.array([[0.0], [1.0], [2.0]]) X_new = pt.matrix("X_new", shape=(None, 1)) - out = Gaussian(hyp + 0.05 * X[:, 0] ** 2).clone_replace_data(X_new) + out = Gaussian(hyp + 0.05 * X[:, 0] ** 2, x=X).at(X_new) got = pytensor.function([X_new, hyp], out.sigma)(new, 2.0) np.testing.assert_allclose(got, 2.0 + 0.05 * new[:, 0] ** 2) def test_eager_numpy_parameter_is_not_rerooted(self): - """Pure-numpy arithmetic collapses the data dependence to a fixed vector - before pytensor sees it, leaving nothing to re-root.""" + """Pure-numpy arithmetic collapses the data dependence to a fixed vector; + with no ``x=`` the likelihood carries no design matrix, so ``at`` is a + no-op and the value is preserved.""" arr = np.linspace(-1.0, 1.0, 5)[:, None] - sigma = pt.as_tensor_variable(0.1 + 0.05 * arr[:, 0] ** 2) - out = Gaussian(sigma).clone_replace_data(pt.matrix("X_new", shape=(None, 1))) - assert out.sigma is sigma - - def test_mismatched_feature_count_is_not_rerooted(self): + expected = 0.1 + 0.05 * arr[:, 0] ** 2 + lik = Gaussian(pt.as_tensor_variable(expected)) + out = lik.at(pt.matrix("X_new", shape=(None, 1))) + assert out.sigma is lik.sigma # at did nothing (no design matrix) + np.testing.assert_allclose(_eval(out.sigma), expected) + + def test_no_x_means_not_rerooted(self): + """Omitting ``x=`` is an explicit opt-out: the likelihood carries no + design matrix, so ``at`` leaves the parameter untouched.""" sigma = 0.1 + pt.sum(pt.matrix("Xtr", shape=(None, 3)) ** 2, axis=1) - out = Gaussian(sigma).clone_replace_data(pt.matrix("X_new", shape=(None, 1))) - assert out.sigma is sigma - - def test_one_dimensional_X_is_rejected(self): - """A 1D X is indistinguishable from a per-observation parameter, so - re-rooting against it would risk replacing the parameter itself.""" + lik = Gaussian(sigma) + out = lik.at(pt.matrix("X_new", shape=(None, 3))) + assert out.sigma is lik.sigma + + def test_one_dimensional_X_new_is_supported(self): + """With an explicit, identity-keyed handle there is no parameter/data + ambiguity, so the replacement inputs need not be 2D — a 1D X_new + substitutes cleanly.""" + X = pt.vector("X", shape=(None,)) + new = np.array([0.0, 1.0, 2.0]) + X_new = pt.vector("X_new", shape=(None,)) + out = Gaussian(0.1 + 0.05 * X**2, x=X).at(X_new) + got = pytensor.function([X_new], out.sigma)(new) + np.testing.assert_allclose(got, 0.1 + 0.05 * new**2) + + +class TestGradientThroughLikelihoodNode: + """The likelihood node is a pass-through, so gradients must flow through it + to the parameters' hyperparameters — for both homoskedastic (no design + matrix) and heteroskedastic nodes.""" + + def _ve_grad(self, lik, wrt, givens): + y = pt.as_tensor_variable(np.array([0.5, -0.3])) + mu = pt.as_tensor_variable(np.array([0.0, 0.2])) + var = pt.as_tensor_variable(np.array([1.0, 0.5])) + ve = pt.sum(lik.variational_expectation(y, mu, var)) + g = pytensor.grad(ve, wrt) + return pytensor.function(list(givens), g) + + def test_grad_homoskedastic(self): + alpha = pt.scalar("alpha") + lik = Gaussian(alpha) # node, has_data=False + val = self._ve_grad(lik, alpha, [alpha])(0.7) + assert np.isfinite(val) + + def test_grad_heteroskedastic(self): X = pt.matrix("X", shape=(None, 1)) - lik = Gaussian(0.1 + 0.05 * X[:, 0] ** 2) - with pytest.raises(ValueError, match=r"requires a 2D design matrix"): - lik.clone_replace_data(pt.vector("X_new", shape=(None,))) + alpha = pt.scalar("alpha") + lik = Gaussian(alpha + 0.05 * X[:, 0] ** 2, x=X) # node, has_data=True + fn = self._ve_grad(lik, alpha, [X, alpha]) + val = fn(np.array([[0.0], [1.0]]), 0.7) + assert np.isfinite(val) diff --git a/tests/likelihoods/test_functional_api.py b/tests/likelihoods/test_functional_api.py new file mode 100644 index 0000000..9956580 --- /dev/null +++ b/tests/likelihoods/test_functional_api.py @@ -0,0 +1,104 @@ +"""The likelihood works in a purely functional way: a likelihood node carries +its Op, design-matrix input, and parameters, so it can be used without the +stateful holder at all. Everything dispatches through ``owner.op``.""" + +import numpy as np +import pymc as pm +import pytensor +import pytensor.tensor as pt + +from pytensor.graph.fg import FunctionGraph + +from ptgp.likelihoods import ( + Gaussian, + op_of, + param, + predict_mean_and_var, + variational_expectation, + at, +) +from ptgp.likelihoods.gaussian import GaussianOp +from pytensor.graph.replace import clone_replace + + +def _eval(*tensors): + f = pytensor.function([], list(tensors) if len(tensors) > 1 else tensors[0]) + return f() + + +def test_likelihood_survives_clone_and_reroots_on_the_clone(): + """Regression: the likelihood handle lives in the graph as a node, so cloning + a model graph carries it — we recover the likelihood from the *clone* and + re-root it onto new inputs. Only possible because it is a graph node, not + Python-side state on the holder.""" + X = pt.matrix("X", shape=(None, 1)) + lik = Gaussian(0.1 + 0.05 * X[:, 0] ** 2, x=X) # holder; the handle is lik._lik + assert isinstance(op_of(lik._lik), GaussianOp) + X_new = pt.matrix("X_new", shape=(None, 1)) + X_new_test = np.array([[0.0], [1.0], [2.0]]) + mu = pt.as_tensor_variable(np.zeros(3)) + var = pt.as_tensor_variable(np.ones(3)) + + _, orig_var = lik.at(X_new).predict_mean_and_var(mu, var) + orig_eval = orig_var.eval({X_new: X_new_test}) + + [cloned_handle] = clone_replace([lik._lik]) + assert cloned_handle is not lik._lik + assert isinstance(op_of(cloned_handle), GaussianOp) + _, cloned_var = predict_mean_and_var(at(cloned_handle, X_new), mu, var) + cloned_eval = cloned_var.eval({X_new: X_new_test}) + + expected = 1.0 + (0.1 + 0.05 * X_new_test[:, 0] ** 2) ** 2 # var + sigma(X_new)**2 + np.testing.assert_allclose(orig_eval, expected) + np.testing.assert_allclose(cloned_eval, expected) + + +def test_node_built_without_holder_is_fully_usable(): + """Build a likelihood node directly from the Op — no holder — and drive it + through the functional API: param access, re-rooting, and prediction all + come from ``owner.op``.""" + X = pt.matrix("X", shape=(None, 1)) + sigma = 0.1 + 0.05 * X[:, 0] ** 2 + lik = GaussianOp(("sigma",), has_data=True)(X, sigma) # a node, no Likelihood object + + assert isinstance(op_of(lik), GaussianOp) + assert param(lik, "sigma") is sigma # the parameter is the node's input + + new = np.array([[0.0], [1.0], [2.0]]) + rerooted = at(lik, pt.as_tensor_variable(new)) + np.testing.assert_allclose(_eval(param(rerooted, "sigma")), 0.1 + 0.05 * new[:, 0] ** 2) + + +def test_functional_matches_holder(): + """The free functions and the holder methods are the same computation.""" + X = pt.matrix("X", shape=(None, 1)) + mu = pt.as_tensor_variable(np.array([0.0, 1.0])) + var = pt.as_tensor_variable(np.array([0.5, 0.2])) + new = pt.as_tensor_variable(np.array([[0.5], [1.5]])) + + holder = Gaussian(0.1 + 0.05 * X[:, 0] ** 2, x=X) + node = holder._lik # the likelihood handle (NoneType node); .sigma is the param + + hm, hv = holder.at(new).predict_mean_and_var(mu, var) + fm, fv = predict_mean_and_var(at(node, new), mu, var) + np.testing.assert_allclose(_eval(hm), _eval(fm)) + np.testing.assert_allclose(_eval(hv), _eval(fv)) + + +def test_functional_variational_expectation_preserves_rv_identity(): + """The functional path is RV-safe: an RV hyperparameter in the parameter + survives re-rooting, so it can be evaluated inside a model context.""" + with pm.Model(): + alpha = pm.HalfNormal("alpha") + X = pt.matrix("X", shape=(None, 1)) + lik = GaussianOp(("sigma",), has_data=True)(X, alpha + 0.05 * X[:, 0] ** 2) + new = pt.as_tensor_variable(np.array([[0.0], [2.0]])) + ve = variational_expectation( + at(lik, new), + pt.as_tensor_variable(np.array([0.0, 0.0])), + pt.as_tensor_variable(np.array([0.0, 0.0])), + pt.as_tensor_variable(np.array([1.0, 1.0])), + ) + # alpha is still a free input of the re-rooted expectation graph + val = pytensor.function([alpha], ve)(0.3) + assert np.isfinite(val).all() diff --git a/tests/likelihoods/test_negative_binomial.py b/tests/likelihoods/test_negative_binomial.py index 854129e..4245c8b 100644 --- a/tests/likelihoods/test_negative_binomial.py +++ b/tests/likelihoods/test_negative_binomial.py @@ -37,8 +37,8 @@ def test_converges_to_poisson(self): # Use quadrature for both so comparison is apples-to-apples poisson_lik = Poisson(n_points=50) ve_poisson = _eval( - poisson_lik._gauss_hermite( - poisson_lik._log_prob, + poisson_lik._op._gauss_hermite( + poisson_lik._op._log_prob, pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var), diff --git a/tests/likelihoods/test_poisson.py b/tests/likelihoods/test_poisson.py index 6835f0a..22f56e0 100644 --- a/tests/likelihoods/test_poisson.py +++ b/tests/likelihoods/test_poisson.py @@ -23,11 +23,11 @@ def test_closed_form_matches_quadrature(self): pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var) ) ) - # Use base class quadrature via _gauss_hermite directly + # Use base-op quadrature via _gauss_hermite directly lik = Poisson(n_points=50) ve_quad = _eval( - lik._gauss_hermite( - lik._log_prob, + lik._op._gauss_hermite( + lik._op._log_prob, pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var), diff --git a/tests/test_heteroskedastic_predict.py b/tests/test_heteroskedastic_predict.py index 75be4aa..229e586 100644 --- a/tests/test_heteroskedastic_predict.py +++ b/tests/test_heteroskedastic_predict.py @@ -33,7 +33,7 @@ def test_incl_lik_adds_pointwise_noise(self): X_train, y_train, X_new = _data() X = pt.matrix("X", shape=(None, 1)) gp = Unapproximated( - kernel=ExpQuad(input_dim=1, ls=1.0), mean=Zero(), sigma=_hetero_sigma(X) + kernel=ExpQuad(input_dim=1, ls=1.0), mean=Zero(), sigma=_hetero_sigma(X), x=X ) X_new_t = pt.matrix("X_new", shape=(None, 1)) y_t = pt.vector("y", shape=(None,)) @@ -54,7 +54,7 @@ def _build(self, X): return VFE( kernel=ExpQuad(input_dim=1, ls=1.0), mean=Zero(), - sigma=_hetero_sigma(X), + sigma=_hetero_sigma(X), x=X, inducing_variable=Points(pt.as_tensor_variable(Z)), ) @@ -85,7 +85,7 @@ def test_incl_lik_adds_pointwise_noise(self): svgp = SVGP( kernel=ExpQuad(input_dim=1, ls=1.0), mean=Zero(), - likelihood=Gaussian(_hetero_sigma(X)), + likelihood=Gaussian(_hetero_sigma(X), x=X), inducing_variable=Points(pt.as_tensor_variable(Z)), variational_params=vp, ) @@ -149,10 +149,11 @@ def test_vfe_scalar(self): def test_pm_data_sigma_is_detected_as_data_dependent(): - """sigma built against a pm.Data SharedVariable must still be treated - as data-dependent. The heuristic filters by type (TensorType), not by - class (SharedVariable) — without this, pm.Data is silently invisible - to clone_replace_data and predictions use the wrong noise. + """sigma built against a pm.Data SharedVariable must still be re-rooted. + + The design matrix handle is keyed by identity (the ``x=`` argument), so a + pm.Data SharedVariable works exactly like a symbolic placeholder — passing + it as ``x=`` lets at swap it for the test inputs. """ X_train_arr = np.linspace(-1.5, 1.5, 10)[:, None].astype(np.float64) X_new_arr = np.linspace(-2.0, 2.0, 5)[:, None].astype(np.float64) @@ -162,7 +163,7 @@ class (SharedVariable) — without this, pm.Data is silently invisible gp = Unapproximated( kernel=ExpQuad(input_dim=1, ls=1.0), mean=Zero(), - sigma=_hetero_sigma(X), + sigma=_hetero_sigma(X), x=X, ) X_new_t = pt.matrix("X_new", shape=(None, 1)) y_t = pt.vector("y", shape=(None,)) @@ -186,7 +187,7 @@ def test_collapsed_elbo_with_heteroskedastic_sigma(): vfe = VFE( kernel=ExpQuad(input_dim=1, ls=1.0), mean=Zero(), - sigma=_hetero_sigma(X), + sigma=_hetero_sigma(X), x=X, inducing_variable=Points(pt.as_tensor_variable(Z)), ) elbo = collapsed_elbo(vfe, X, y).elbo From 3c07b831c98efbc0f6a612f0614baef4250acaac Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Fri, 29 May 2026 18:13:17 +0200 Subject: [PATCH 4/8] Now with custom Likelihood variables --- ptgp/likelihoods/__init__.py | 12 +- ptgp/likelihoods/base.py | 206 ++++++++++---------- ptgp/likelihoods/bernoulli.py | 26 +-- ptgp/likelihoods/gaussian.py | 28 ++- ptgp/likelihoods/negative_binomial.py | 25 ++- ptgp/likelihoods/poisson.py | 27 +-- ptgp/likelihoods/student_t.py | 22 +-- tests/likelihoods/test_functional_api.py | 27 ++- tests/likelihoods/test_negative_binomial.py | 5 +- tests/likelihoods/test_poisson.py | 5 +- 10 files changed, 183 insertions(+), 200 deletions(-) diff --git a/ptgp/likelihoods/__init__.py b/ptgp/likelihoods/__init__.py index a14b3c8..0b533f8 100644 --- a/ptgp/likelihoods/__init__.py +++ b/ptgp/likelihoods/__init__.py @@ -1,11 +1,13 @@ from ptgp.likelihoods.base import ( - Likelihood, + LikelihoodOp, + LikelihoodType, + LikelihoodVariable, + at, op_of, param, predict_log_density, predict_mean_and_var, variational_expectation, - at, ) from ptgp.likelihoods.bernoulli import Bernoulli from ptgp.likelihoods.gaussian import Gaussian @@ -14,12 +16,16 @@ from ptgp.likelihoods.student_t import StudentT __all__ = [ - "Likelihood", + # Family helpers — build a LikelihoodVariable. "Gaussian", "Bernoulli", "StudentT", "Poisson", "NegativeBinomial", + # Graph types. + "LikelihoodOp", + "LikelihoodType", + "LikelihoodVariable", # Purely functional API — operate on a likelihood node via ``owner.op``. "op_of", "param", diff --git a/ptgp/likelihoods/base.py b/ptgp/likelihoods/base.py index 8d7e4e1..1d88521 100644 --- a/ptgp/likelihoods/base.py +++ b/ptgp/likelihoods/base.py @@ -1,57 +1,111 @@ -import copy - import numpy as np import pytensor.tensor as pt -from pytensor.graph.basic import Apply -from pytensor.graph.null_type import NullType +from pytensor.graph.basic import Apply, Variable from pytensor.graph.op import Op from pytensor.graph.replace import graph_replace +from pytensor.graph.type import Type + + +class LikelihoodVariable(Variable): + """A likelihood that lives in the PyTensor graph *and* carries the accessor API. + + It is the output of a :class:`LikelihoodOp` node, so ``owner.op`` recovers + the family (with behaviour) and ``clone`` / ``graph_replace`` carry it. It + also exposes the methods we used to put on a holder, each dispatched through + ``owner.op`` (the free functions below): + + - parameter access — ``lik.sigma`` / ``lik.nu`` / ... (the node's inputs), + - :meth:`at` — re-root the parameters onto new design inputs, + - :meth:`variational_expectation` / :meth:`predict_mean_and_var` / + :meth:`predict_log_density`. + """ + + def at(self, X): + """Re-root onto design matrix ``X`` (no-op without a design matrix).""" + return at(self, X) + + def variational_expectation(self, y, mu, var): + """E_{q(f)}[log p(y|f)] where q(f) = N(mu, var).""" + return variational_expectation(self, y, mu, var) + + def predict_mean_and_var(self, mu, var): + """Predictive mean and variance under q(f) = N(mu, var).""" + return predict_mean_and_var(self, mu, var) -_NULL = NullType() + def predict_log_density(self, y, mu, var): + """Predictive log-density log E_{q(f)}[p(y|f)] at test points.""" + return predict_log_density(self, y, mu, var) + + def __getattr__(self, name): + # Only fires for attributes not found normally. Expose the named + # parameters (node inputs) and a little Op config; everything else + # (incl. PyTensor internals) raises AttributeError as usual. + if name.startswith("_") or name in ("owner", "type", "index", "tag", "name", "auto_name"): + raise AttributeError(name) + owner = self.__dict__.get("owner") + if owner is None: + raise AttributeError(name) + op = owner.op + if name in op.param_names: + off = 1 if op.has_data else 0 + return owner.inputs[off + op.param_names.index(name)] + if name in ("n_points", "invlink", "param_names", "has_data"): + return getattr(op, name) + raise AttributeError(name) + + +class LikelihoodType(Type): + """Type of a likelihood handle. Carries no runtime value — the node it comes + from is a structural marker, never consumed by the loss or compiled.""" + + def filter(self, value, strict=False, allow_downcast=None): + return value + + def make_variable(self, name=None): + return LikelihoodVariable(self, None, name=name) + + def __eq__(self, other): + return type(self) is type(other) + + def __hash__(self): + return hash(type(self)) + + +_LIK_TYPE = LikelihoodType() class LikelihoodOp(Op): """Structural marker keeping a likelihood inside the PyTensor graph. The Apply node consumes the design matrix (optional; input 0 when present) - and the parameters as its *inputs*, and emits a single ``NoneType`` handle as - output. It computes nothing — its sole purpose is to be a real graph node, so - ``graph_replace`` / ``fgraph.clone`` / PyMC conditioning carry the likelihood - (and re-root its parameters) instead of it being Python-side state those - operations would silently drop. The Op *type* (the subclass) encodes the - family, so behaviour is recoverable via ``owner.op``; the parameters are the - node's inputs. + and the parameters as its *inputs*, and emits a single :class:`LikelihoodType` + handle (a :class:`LikelihoodVariable`) as output. It computes nothing — it is + a real graph node purely so ``graph_replace`` / ``fgraph.clone`` / PyMC + conditioning carry the likelihood (and re-root its parameters). The Op *type* + (the subclass) encodes the family; behaviour is recoverable via ``owner.op``. Because the parameters are *inputs* and the handle output is never consumed by the loss, the node is never on the differentiation path nor in a compiled - loss graph — so it needs no gradient, view, shape, or strip machinery. To - keep it alive across graph operations, hold its output (or register it as a - PyMC named variable). - - Parameters - ---------- - param_names : tuple of str - Parameter names, in input order (after the optional design matrix). - n_points : int - Gauss-Hermite quadrature points for the default expectations. - invlink : callable or None - Inverse link function (set by subclasses that need one). - has_data : bool - Whether input 0 is the design matrix ``X`` (i.e. parameters depend on it). + loss graph — no gradient, view, shape, or strip machinery is needed. + + Subclasses set the class attributes ``param_names`` and (optionally) + ``default_invlink`` and implement the behaviour methods. """ __props__ = ("param_names", "n_points", "invlink", "has_data") + param_names = () + default_invlink = None - def __init__(self, param_names=(), n_points=20, invlink=None, has_data=False): - self.param_names = tuple(param_names) + def __init__(self, n_points=20, invlink=None, has_data=False): + self.param_names = type(self).param_names self.n_points = n_points - self.invlink = invlink + self.invlink = invlink if invlink is not None else type(self).default_invlink self.has_data = has_data def make_node(self, *args): args = [pt.as_tensor_variable(a) for a in args] - return Apply(self, args, [_NULL()]) + return Apply(self, args, [_LIK_TYPE()]) def perform(self, node, inputs, outputs): outputs[0][0] = None @@ -118,13 +172,28 @@ def _gauss_hermite_logspace(self, func, y, mu, var): return pt.logsumexp(log_vals, axis=1) +def build(op_cls, params, x=None, n_points=20, invlink=None): + """Build a :class:`LikelihoodVariable` from an Op class and ordered params. + + ``params`` is the list of parameter graphs in ``op_cls.param_names`` order + (empty for parameterless likelihoods). ``x`` (the design matrix) is prepended + as input 0 when given, marking the parameters as data-dependent. Used by the + family helpers (:func:`~ptgp.likelihoods.Gaussian`, etc.). + """ + has_data = bool(op_cls.param_names) and x is not None + op = op_cls(n_points=n_points, invlink=invlink, has_data=has_data) + args = [pt.as_tensor_variable(p) for p in params] + if has_data: + args = [pt.as_tensor_variable(x), *args] + return op(*args) + + # --- Purely functional API ------------------------------------------------ # -# A likelihood is fully described by its node: ``owner.op`` is the family (with -# behaviour), and the node's *inputs* are the optional design matrix followed by -# the parameters. These free functions take such a node (a "lik" — the NoneType -# handle) and dispatch through ``owner.op``, so a likelihood can be used with no -# holder at all. The :class:`Likelihood` holder is a thin view built on these. +# Operate on a likelihood node (a :class:`LikelihoodVariable`) and dispatch +# through ``owner.op``. The variable's methods are thin wrappers over these, so +# either style works; the free functions are handy when a dummy node's outputs +# are plain TensorVariables rather than a LikelihoodVariable. def _params_of(node): @@ -148,9 +217,9 @@ def param(lik, name): def at(lik, X): """Re-root a likelihood node onto design matrix ``X``. - A no-op for a likelihood with no design matrix (``op.has_data`` is False) — - there is no ``X`` input to swap. Otherwise returns a new handle whose - parameter inputs are re-expressed over ``X`` (RV-safe via ``graph_replace``). + A no-op for a likelihood with no design matrix (``op.has_data`` is False). + Otherwise returns a new handle whose parameter inputs are re-expressed over + ``X`` (RV-safe via ``graph_replace``). """ node = lik.owner if not node.op.has_data: @@ -171,64 +240,3 @@ def predict_mean_and_var(lik, mu, var): def predict_log_density(lik, y, mu, var): """Predictive log-density for a likelihood node.""" return lik.owner.op.predict_log_density(_params_of(lik.owner), y, mu, var) - - -def _param_property(name): - """Expose a named parameter as ``self.`` (the parameter input).""" - return property(lambda self: self._params[type(self).param_names.index(name)]) - - -class Likelihood: - """Public likelihood object — a thin view over a :class:`LikelihoodOp` node. - - Every likelihood is a node: construction builds ``op(x?, *params)`` whose - single ``NoneType`` output (``self._lik``) is the handle, and whose inputs - are the optional design matrix followed by the parameter graphs. The handle - keeps the likelihood in the PyTensor graph (carried by ``graph_replace`` / - ``clone``); the methods read the parameters off the node's inputs and - dispatch behaviour through ``owner.op`` — the same calls the free functions - above make. - - The node is never consumed by the loss (the loss uses the parameter graphs - directly, via ``.sigma`` etc.), so it stays off the gradient path and out of - compiled graphs. :meth:`at` re-roots the parameters when there's a design - matrix; it is a no-op otherwise (homoskedastic or parameterless). - - Subclasses set ``op_cls``, ``param_names``, and expose each parameter via - :func:`_param_property`. - """ - - op_cls = LikelihoodOp - param_names = () - n_points = 20 - - def __init__(self, x=None, n_points=20, invlink=None, **params): - self.n_points = n_points - self.invlink = invlink - has_data = bool(self.param_names) and x is not None - self._op = self.op_cls(self.param_names, n_points, invlink, has_data=has_data) - ordered = [pt.as_tensor_variable(params[name]) for name in self.param_names] - args = ([pt.as_tensor_variable(x)] if has_data else []) + ordered - self._lik = self._op(*args) # NoneType handle — every likelihood is a node - - @property - def _params(self): - return _params_of(self._lik.owner) - - def at(self, X): - """Return a copy with parameters re-rooted onto ``X`` (no-op without a design matrix).""" - new = copy.copy(self) - new._lik = at(self._lik, X) - return new - - def variational_expectation(self, y, mu, var): - """E_{q(f)}[log p(y|f)] where q(f) = N(mu, var).""" - return self._op.variational_expectation(self._params, y, mu, var) - - def predict_mean_and_var(self, mu, var): - """Predictive mean and variance under q(f) = N(mu, var).""" - return self._op.predict_mean_and_var(self._params, mu, var) - - def predict_log_density(self, y, mu, var): - """Predictive log-density log E_{q(f)}[p(y|f)] at test points.""" - return self._op.predict_log_density(self._params, y, mu, var) diff --git a/ptgp/likelihoods/bernoulli.py b/ptgp/likelihoods/bernoulli.py index 0c0b6f8..0aec3e2 100644 --- a/ptgp/likelihoods/bernoulli.py +++ b/ptgp/likelihoods/bernoulli.py @@ -1,6 +1,6 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood, LikelihoodOp +from ptgp.likelihoods.base import LikelihoodOp, build def inv_probit(x): @@ -12,6 +12,9 @@ def inv_probit(x): class BernoulliOp(LikelihoodOp): """Bernoulli likelihood Op. Closed-form predictive for the probit link.""" + param_names = () + default_invlink = staticmethod(inv_probit) + def _log_prob(self, f, y): p = self.invlink(f) return y * pt.log(p) + (1.0 - y) * pt.log(1.0 - p) @@ -30,21 +33,10 @@ def predict_mean_and_var(self, params, mu, var): return super().predict_mean_and_var(params, mu, var) -class Bernoulli(Likelihood): - """Bernoulli likelihood: p(y=1|f) = invlink(f). - - Default link is probit. Variational expectation via Gauss-Hermite quadrature. +def Bernoulli(invlink=None, n_points=20): + """Build a Bernoulli likelihood p(y=1|f) = invlink(f). - Parameters - ---------- - invlink : callable, optional - Inverse link function (default: probit). Use ``pt.sigmoid`` for logit link. - n_points : int - Number of Gauss-Hermite quadrature points (default 20). + Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. Default link is + probit (closed-form predictive); pass ``invlink=pt.sigmoid`` for logit. """ - - op_cls = BernoulliOp - param_names = () - - def __init__(self, invlink=None, n_points=20): - super().__init__(n_points=n_points, invlink=invlink or inv_probit) + return build(BernoulliOp, [], n_points=n_points, invlink=invlink) diff --git a/ptgp/likelihoods/gaussian.py b/ptgp/likelihoods/gaussian.py index 3e0119f..136f75d 100644 --- a/ptgp/likelihoods/gaussian.py +++ b/ptgp/likelihoods/gaussian.py @@ -1,6 +1,6 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood, LikelihoodOp, _param_property +from ptgp.likelihoods.base import LikelihoodOp, build LOG2PI = pt.log(2.0 * pt.pi) @@ -8,6 +8,8 @@ class GaussianOp(LikelihoodOp): """Gaussian likelihood Op: closed-form expectations (no quadrature).""" + param_names = ("sigma",) + def _log_prob(self, f, y, sigma): return -0.5 * (LOG2PI + pt.log(sigma**2) + (y - f) ** 2 / sigma**2) @@ -31,25 +33,19 @@ def predict_log_density(self, params, y, mu, var): return -0.5 * (LOG2PI + pt.log(total_var) + (y - mu) ** 2 / total_var) -class Gaussian(Likelihood): - """Gaussian likelihood p(y|f) = N(y; f, sigma^2). +def Gaussian(sigma, x=None): + """Build a Gaussian likelihood p(y|f) = N(y; f, sigma^2). - All methods have closed-form expressions (no quadrature needed). + Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable` — a graph node + exposing ``.sigma``, ``.at``, ``.predict_mean_and_var``, etc. Parameters ---------- sigma : tensor - Observation noise standard deviation. May be a scalar (homoskedastic) - or a vector built against a design matrix ``X`` (heteroskedastic). + Observation noise standard deviation. Scalar (homoskedastic) or a vector + built against a design matrix ``X`` (heteroskedastic). x : tensor, optional - The design matrix ``sigma`` was built against. Pass it when ``sigma`` is - heteroskedastic so :meth:`Likelihood.at` can re-root - sigma onto the test inputs at predict time. + The design matrix ``sigma`` was built against; pass it for + heteroskedastic noise so ``.at`` can re-root sigma onto test inputs. """ - - op_cls = GaussianOp - param_names = ("sigma",) - sigma = _param_property("sigma") - - def __init__(self, sigma, x=None): - super().__init__(x=x, sigma=sigma) + return build(GaussianOp, [sigma], x=x) diff --git a/ptgp/likelihoods/negative_binomial.py b/ptgp/likelihoods/negative_binomial.py index 90433fe..2c91c18 100644 --- a/ptgp/likelihoods/negative_binomial.py +++ b/ptgp/likelihoods/negative_binomial.py @@ -1,11 +1,14 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood, LikelihoodOp, _param_property +from ptgp.likelihoods.base import LikelihoodOp, build class NegativeBinomialOp(LikelihoodOp): """Negative binomial likelihood Op. Expectations via quadrature.""" + param_names = ("alpha",) + default_invlink = staticmethod(pt.exp) + def _log_prob(self, f, y, alpha): mu = self.invlink(f) return ( @@ -24,12 +27,12 @@ def _conditional_variance(self, f, alpha): return mu + mu**2 / alpha -class NegativeBinomial(Likelihood): - """Negative binomial likelihood: p(y|f) = NB(y; invlink(f), alpha). +def NegativeBinomial(alpha, invlink=None, n_points=20, x=None): + """Build a negative binomial likelihood NB(y; invlink(f), alpha). - Parameterized as NB(y; mu, alpha) where mu = invlink(f) and alpha is the - overdispersion parameter. Variance is mu + mu^2 / alpha. - Default link is log (invlink=exp). + Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. Variance is + ``mu + mu**2 / alpha`` with ``mu = invlink(f)``; default link is log + (``invlink=exp``). Parameters ---------- @@ -41,12 +44,6 @@ class NegativeBinomial(Likelihood): Number of Gauss-Hermite quadrature points (default 20). x : tensor, optional The design matrix ``alpha`` was built against, for a heteroskedastic - parameter re-rooted onto the test inputs at predict time. + parameter re-rooted onto test inputs via ``.at``. """ - - op_cls = NegativeBinomialOp - param_names = ("alpha",) - alpha = _param_property("alpha") - - def __init__(self, alpha, invlink=None, n_points=20, x=None): - super().__init__(x=x, n_points=n_points, invlink=invlink or pt.exp, alpha=alpha) + return build(NegativeBinomialOp, [alpha], x=x, n_points=n_points, invlink=invlink) diff --git a/ptgp/likelihoods/poisson.py b/ptgp/likelihoods/poisson.py index b6d970c..d898b37 100644 --- a/ptgp/likelihoods/poisson.py +++ b/ptgp/likelihoods/poisson.py @@ -1,11 +1,14 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood, LikelihoodOp +from ptgp.likelihoods.base import LikelihoodOp, build class PoissonOp(LikelihoodOp): """Poisson likelihood Op. Closed-form variational expectation for log link.""" + param_names = () + default_invlink = staticmethod(pt.exp) + def _log_prob(self, f, y): lam = self.invlink(f) return y * pt.log(lam) - lam - pt.gammaln(y + 1.0) @@ -22,22 +25,10 @@ def variational_expectation(self, params, y, mu, var): return super().variational_expectation(params, y, mu, var) -class Poisson(Likelihood): - """Poisson likelihood: p(y|f) = Poisson(y; invlink(f)). - - Default link is log (invlink=exp). Has a closed-form variational - expectation with the log link; falls back to quadrature for other links. +def Poisson(invlink=None, n_points=20): + """Build a Poisson likelihood p(y|f) = Poisson(y; invlink(f)). - Parameters - ---------- - invlink : callable, optional - Inverse link function (default: exp). - n_points : int - Number of Gauss-Hermite quadrature points (default 20). + Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. Default link is + log (closed-form variational expectation); other links fall back to quadrature. """ - - op_cls = PoissonOp - param_names = () - - def __init__(self, invlink=None, n_points=20): - super().__init__(n_points=n_points, invlink=invlink or pt.exp) + return build(PoissonOp, [], n_points=n_points, invlink=invlink) diff --git a/ptgp/likelihoods/student_t.py b/ptgp/likelihoods/student_t.py index 29145c1..e9c05e8 100644 --- a/ptgp/likelihoods/student_t.py +++ b/ptgp/likelihoods/student_t.py @@ -1,11 +1,13 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import Likelihood, LikelihoodOp, _param_property +from ptgp.likelihoods.base import LikelihoodOp, build class StudentTOp(LikelihoodOp): """Student-T likelihood Op. Expectations via Gauss-Hermite quadrature.""" + param_names = ("nu", "sigma") + def _log_prob(self, f, y, nu, sigma): z = (y - f) / sigma return ( @@ -22,10 +24,11 @@ def _conditional_variance(self, f, nu, sigma): return pt.ones_like(f) * sigma**2 * nu / (nu - 2.0) -class StudentT(Likelihood): - """Student-T likelihood p(y|f) = StudentT(y; f, sigma, nu). +def StudentT(nu, sigma, n_points=20, x=None): + """Build a Student-T likelihood p(y|f) = StudentT(y; f, sigma, nu). - Variational expectation via Gauss-Hermite quadrature. + Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. Variational + expectation via Gauss-Hermite quadrature. Parameters ---------- @@ -37,13 +40,6 @@ class StudentT(Likelihood): Number of Gauss-Hermite quadrature points (default 20). x : tensor, optional The design matrix ``nu``/``sigma`` were built against, for - heteroskedastic parameters re-rooted onto the test inputs at predict. + heteroskedastic parameters re-rooted onto test inputs via ``.at``. """ - - op_cls = StudentTOp - param_names = ("nu", "sigma") - nu = _param_property("nu") - sigma = _param_property("sigma") - - def __init__(self, nu, sigma, n_points=20, x=None): - super().__init__(x=x, n_points=n_points, nu=nu, sigma=sigma) + return build(StudentTOp, [nu, sigma], x=x, n_points=n_points) diff --git a/tests/likelihoods/test_functional_api.py b/tests/likelihoods/test_functional_api.py index 9956580..b9d8a6e 100644 --- a/tests/likelihoods/test_functional_api.py +++ b/tests/likelihoods/test_functional_api.py @@ -27,13 +27,9 @@ def _eval(*tensors): def test_likelihood_survives_clone_and_reroots_on_the_clone(): - """Regression: the likelihood handle lives in the graph as a node, so cloning - a model graph carries it — we recover the likelihood from the *clone* and - re-root it onto new inputs. Only possible because it is a graph node, not - Python-side state on the holder.""" X = pt.matrix("X", shape=(None, 1)) - lik = Gaussian(0.1 + 0.05 * X[:, 0] ** 2, x=X) # holder; the handle is lik._lik - assert isinstance(op_of(lik._lik), GaussianOp) + lik = Gaussian(0.1 + 0.05 * X[:, 0] ** 2, x=X) + assert isinstance(lik.owner.op, GaussianOp) X_new = pt.matrix("X_new", shape=(None, 1)) X_new_test = np.array([[0.0], [1.0], [2.0]]) mu = pt.as_tensor_variable(np.zeros(3)) @@ -42,10 +38,10 @@ def test_likelihood_survives_clone_and_reroots_on_the_clone(): _, orig_var = lik.at(X_new).predict_mean_and_var(mu, var) orig_eval = orig_var.eval({X_new: X_new_test}) - [cloned_handle] = clone_replace([lik._lik]) - assert cloned_handle is not lik._lik - assert isinstance(op_of(cloned_handle), GaussianOp) - _, cloned_var = predict_mean_and_var(at(cloned_handle, X_new), mu, var) + [cloned_lik] = clone_replace([lik]) + assert cloned_lik is not lik + assert isinstance(cloned_lik.owner.op, GaussianOp) + _, cloned_var = predict_mean_and_var(at(cloned_lik, X_new), mu, var) cloned_eval = cloned_var.eval({X_new: X_new_test}) expected = 1.0 + (0.1 + 0.05 * X_new_test[:, 0] ** 2) ** 2 # var + sigma(X_new)**2 @@ -59,7 +55,7 @@ def test_node_built_without_holder_is_fully_usable(): come from ``owner.op``.""" X = pt.matrix("X", shape=(None, 1)) sigma = 0.1 + 0.05 * X[:, 0] ** 2 - lik = GaussianOp(("sigma",), has_data=True)(X, sigma) # a node, no Likelihood object + lik = GaussianOp(has_data=True)(X, sigma) # a node, via the Op directly assert isinstance(op_of(lik), GaussianOp) assert param(lik, "sigma") is sigma # the parameter is the node's input @@ -76,11 +72,10 @@ def test_functional_matches_holder(): var = pt.as_tensor_variable(np.array([0.5, 0.2])) new = pt.as_tensor_variable(np.array([[0.5], [1.5]])) - holder = Gaussian(0.1 + 0.05 * X[:, 0] ** 2, x=X) - node = holder._lik # the likelihood handle (NoneType node); .sigma is the param + lik = Gaussian(0.1 + 0.05 * X[:, 0] ** 2, x=X) # a LikelihoodVariable - hm, hv = holder.at(new).predict_mean_and_var(mu, var) - fm, fv = predict_mean_and_var(at(node, new), mu, var) + hm, hv = lik.at(new).predict_mean_and_var(mu, var) # method API + fm, fv = predict_mean_and_var(at(lik, new), mu, var) # functional API np.testing.assert_allclose(_eval(hm), _eval(fm)) np.testing.assert_allclose(_eval(hv), _eval(fv)) @@ -91,7 +86,7 @@ def test_functional_variational_expectation_preserves_rv_identity(): with pm.Model(): alpha = pm.HalfNormal("alpha") X = pt.matrix("X", shape=(None, 1)) - lik = GaussianOp(("sigma",), has_data=True)(X, alpha + 0.05 * X[:, 0] ** 2) + lik = GaussianOp(has_data=True)(X, alpha + 0.05 * X[:, 0] ** 2) new = pt.as_tensor_variable(np.array([[0.0], [2.0]])) ve = variational_expectation( at(lik, new), diff --git a/tests/likelihoods/test_negative_binomial.py b/tests/likelihoods/test_negative_binomial.py index 4245c8b..3bf37ab 100644 --- a/tests/likelihoods/test_negative_binomial.py +++ b/tests/likelihoods/test_negative_binomial.py @@ -36,9 +36,10 @@ def test_converges_to_poisson(self): # Use quadrature for both so comparison is apples-to-apples poisson_lik = Poisson(n_points=50) + pop = poisson_lik.owner.op ve_poisson = _eval( - poisson_lik._op._gauss_hermite( - poisson_lik._op._log_prob, + pop._gauss_hermite( + pop._log_prob, pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var), diff --git a/tests/likelihoods/test_poisson.py b/tests/likelihoods/test_poisson.py index 22f56e0..534a019 100644 --- a/tests/likelihoods/test_poisson.py +++ b/tests/likelihoods/test_poisson.py @@ -25,9 +25,10 @@ def test_closed_form_matches_quadrature(self): ) # Use base-op quadrature via _gauss_hermite directly lik = Poisson(n_points=50) + op = lik.owner.op ve_quad = _eval( - lik._op._gauss_hermite( - lik._op._log_prob, + op._gauss_hermite( + op._log_prob, pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var), From f86f6a0d15aa2cbcc23bb16a1cfe70039045e68e Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Fri, 29 May 2026 19:35:16 +0200 Subject: [PATCH 5/8] GP data barrier --- ptgp/likelihoods/__init__.py | 4 ++ ptgp/likelihoods/base.py | 73 ++++++++++++++++++++++-- tests/likelihoods/test_functional_api.py | 25 ++++++++ 3 files changed, 96 insertions(+), 6 deletions(-) diff --git a/ptgp/likelihoods/__init__.py b/ptgp/likelihoods/__init__.py index 0b533f8..2f0cc9e 100644 --- a/ptgp/likelihoods/__init__.py +++ b/ptgp/likelihoods/__init__.py @@ -1,8 +1,10 @@ from ptgp.likelihoods.base import ( + GPData, LikelihoodOp, LikelihoodType, LikelihoodVariable, at, + gp_data, op_of, param, predict_log_density, @@ -26,6 +28,8 @@ "LikelihoodOp", "LikelihoodType", "LikelihoodVariable", + "GPData", + "gp_data", # Purely functional API — operate on a likelihood node via ``owner.op``. "op_of", "param", diff --git a/ptgp/likelihoods/base.py b/ptgp/likelihoods/base.py index 1d88521..46272db 100644 --- a/ptgp/likelihoods/base.py +++ b/ptgp/likelihoods/base.py @@ -4,7 +4,9 @@ from pytensor.graph.basic import Apply, Variable from pytensor.graph.op import Op from pytensor.graph.replace import graph_replace +from pytensor.graph.rewriting.basic import node_rewriter from pytensor.graph.type import Type +from pytensor.tensor.rewriting.basic import register_specialize class LikelihoodVariable(Variable): @@ -172,20 +174,79 @@ def _gauss_hermite_logspace(self, func, y, mu, var): return pt.logsumexp(log_vals, axis=1) +class GPData(Op): + """Opaque identity barrier marking the design matrix in a parameter graph. + + The design matrix is routed through this Op before it reaches the + parameters (``sigma = f(gp_data(X))``), which keeps a *stable* handle to it: + rewrites cannot see through the barrier, so an algebraic cancellation + (e.g. ``exp(log(x)) -> x``) cannot dissolve the path from the parameter back + to ``X`` and sever the handle. It is also the likelihood node's design-matrix + input (input 0), so it is structurally findable and clone-safe. + + It carries no value of its own (identity passthrough) and is stripped at the + *specialize* stage (:func:`_strip_gp_data`) — i.e. only when building the + final executable, after re-rooting, never during the canonicalization passes + a model graph might go through while the handle still matters. + """ + + __props__ = () + view_map = {0: [0]} + + def make_node(self, x): + x = pt.as_tensor_variable(x) + return Apply(self, [x], [x.type()]) + + def perform(self, node, inputs, outputs): + outputs[0][0] = inputs[0] + + def infer_shape(self, fgraph, node, input_shapes): + return input_shapes + + def pullback(self, inputs, outputs, cotangents): + return list(cotangents) # identity + + +_gp_data_op = GPData() + + +def gp_data(x): + """Wrap the design matrix ``x`` in an opaque :class:`GPData` barrier.""" + return _gp_data_op(pt.as_tensor_variable(x)) + + +@register_specialize +@node_rewriter([GPData]) +def _strip_gp_data(fgraph, node): + """Elide the design-matrix barrier when building the final executable. + + Registered at the *specialize* stage (not canonicalize), so it does not fire + during the canonicalization passes the barrier is meant to survive — it only + drops the identity Op once all graph transforms (incl. re-rooting) are done. + """ + return [node.inputs[0]] + + def build(op_cls, params, x=None, n_points=20, invlink=None): """Build a :class:`LikelihoodVariable` from an Op class and ordered params. ``params`` is the list of parameter graphs in ``op_cls.param_names`` order - (empty for parameterless likelihoods). ``x`` (the design matrix) is prepended - as input 0 when given, marking the parameters as data-dependent. Used by the - family helpers (:func:`~ptgp.likelihoods.Gaussian`, etc.). + (empty for parameterless likelihoods). When a design matrix ``x`` is given, + it is wrapped in a :class:`GPData` barrier, the parameters are re-expressed + over that barrier (so it sits between ``X`` and the parameters), and the + barrier becomes the node's input 0 — the data demarcator. Used by the family + helpers (:func:`~ptgp.likelihoods.Gaussian`, etc.). """ has_data = bool(op_cls.param_names) and x is not None op = op_cls(n_points=n_points, invlink=invlink, has_data=has_data) - args = [pt.as_tensor_variable(p) for p in params] + params = [pt.as_tensor_variable(p) for p in params] if has_data: - args = [pt.as_tensor_variable(x), *args] - return op(*args) + x = pt.as_tensor_variable(x) + xd = gp_data(x) + # Route the parameters through the barrier so it sits between X and them. + params = list(graph_replace(params, {x: xd}, strict=False)) + params = [xd, *params] + return op(*params) # --- Purely functional API ------------------------------------------------ diff --git a/tests/likelihoods/test_functional_api.py b/tests/likelihoods/test_functional_api.py index b9d8a6e..8a364be 100644 --- a/tests/likelihoods/test_functional_api.py +++ b/tests/likelihoods/test_functional_api.py @@ -97,3 +97,28 @@ def test_functional_variational_expectation_preserves_rv_identity(): # alpha is still a free input of the re-rooted expectation graph val = pytensor.function([alpha], ve)(0.3) assert np.isfinite(val).all() + + +def test_design_matrix_handle_survives_rewrite(): + """Regression — the "don't lose the handle" case: when the design matrix is an + intermediate node (X = log(x)) and a parameter exps it (sigma = exp(X)), an + algebraic rewrite would cancel exp(log(x)) -> x and sever our handle to X. The + design matrix is routed through an opaque gp_data barrier, so canonicalization + cannot see through it: X survives the rewrite and re-rooting still reaches it. + """ + from pytensor.graph.rewriting.utils import rewrite_graph + from pytensor.graph.traversal import ancestors + + x = pt.matrix("x", shape=(None, 1)) + X = pt.log(x) # design matrix is an intermediate node, not a leaf + lik = Gaussian(pt.exp(X[:, 0]), x=X) # sigma = exp(log(x)) — would cancel without a barrier + sigma = lik.owner.inputs[1] # the parameter, routed through the gp_data barrier + + rewritten = rewrite_graph(sigma, include=("canonicalize",)) + # The barrier blocked the exp(log(x)) -> x cancellation, so X (=log(x)) survives. + assert X in set(ancestors([rewritten])) + + # And re-rooting the (held) likelihood still reaches the design matrix. + X_new = pt.matrix("X_new", shape=(None, 1)) + got = lik.at(X_new).sigma.eval({X_new: np.array([[2.0], [3.0]])}) + np.testing.assert_allclose(got, np.exp([2.0, 3.0])) From 20c226e5878dba7df207b4816de01af05a7e5572 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Fri, 29 May 2026 16:29:57 -0500 Subject: [PATCH 6/8] Remove section-divider comments from test modules --- tests/test_rewrites.py | 21 --------------------- tests/test_svgp_scipy_ref.py | 9 --------- tests/test_utils.py | 14 -------------- 3 files changed, 44 deletions(-) diff --git a/tests/test_rewrites.py b/tests/test_rewrites.py index 345dcc1..0035258 100644 --- a/tests/test_rewrites.py +++ b/tests/test_rewrites.py @@ -25,10 +25,6 @@ # Install the rewrites under test (side-effect import). import ptgp.rewrites # noqa: F401 -# --------------------------------------------------------------------------- -# Helpers for the structural rewrites below. -# --------------------------------------------------------------------------- - def _has_op(graph, op_type): fg = FunctionGraph(outputs=[graph] if not isinstance(graph, list) else graph, clone=False) @@ -40,11 +36,6 @@ def _has_op(graph, op_type): return False -# --------------------------------------------------------------------------- -# Det(L @ L.T) -> (prod(diag(L)))**2 rewrite (Rule H). -# --------------------------------------------------------------------------- - - def test_det_of_LLT_takes_diag_product_when_L_lower_triangular(): """``Det(L @ L.T)`` lowers to ``(prod(diag(L)))**2`` — no Det Apply remains.""" from pytensor.tensor.linalg.summary import det @@ -70,11 +61,6 @@ def test_det_of_LLT_diag_product_is_numerically_correct(): np.testing.assert_allclose(f(L_val), expected, atol=1e-10) -# --------------------------------------------------------------------------- -# ExtractDiag(A @ A.T) -> sum(A**2, axis=-1) rewrite (Rule I). -# --------------------------------------------------------------------------- - - def test_diag_of_AAT_to_row_norms_squared_eliminates_dot(): """``ExtractDiag(A @ A.T)`` lowers to ``sum(A**2, axis=-1)`` — no ExtractDiag.""" A = pt.dmatrix("A") @@ -111,11 +97,6 @@ def test_diag_of_ATA_is_numerically_correct(): np.testing.assert_allclose(f(A_val), np.diag(A_val.T @ A_val), atol=1e-12) -# --------------------------------------------------------------------------- -# MatrixInverse(PSD A) -> cho_solve(L, eye) rewrite (Rule J). -# --------------------------------------------------------------------------- - - def test_matrix_inverse_of_LTL_uses_solve_triangular(): """``MatrixInverse(L.T @ L)`` lowers to two solve_triangular calls — no fresh Cholesky.""" L = pta.assume(pt.dmatrix("L"), lower_triangular=True) @@ -181,12 +162,10 @@ def test_matrix_inverse_lowering_is_numerically_correct(): np.testing.assert_allclose(f(K_val), np.linalg.inv(K_val), atol=1e-10) -# --------------------------------------------------------------------------- # Joint-graph cubic-op floor pinning (was: Blocker A / merge_composites_*). # These pin the invariant the deleted composite-merge passes were claimed to # enforce; they pass without those passes because pytensor 3.0.3's fusion # pipeline already arrives at the floor. -# --------------------------------------------------------------------------- def _gp_mll_like_joint(): diff --git a/tests/test_svgp_scipy_ref.py b/tests/test_svgp_scipy_ref.py index 3c8cb58..52167c6 100644 --- a/tests/test_svgp_scipy_ref.py +++ b/tests/test_svgp_scipy_ref.py @@ -32,9 +32,6 @@ ATOL = 1e-5 -# ---- Reference ELBO machinery (numpy + scipy, no PTGP internals) ---------- - - def _matern52_numpy(X1, X2, ls, eta): """Matern52 kernel in numpy: k(r) = eta^2 (1 + sqrt(5)r + 5r^2/3) exp(-sqrt(5)r).""" sqd = np.sum(X1**2, axis=-1)[:, None] + np.sum(X2**2, axis=-1)[None, :] - 2.0 * X1 @ X2.T @@ -98,9 +95,6 @@ def _fixed_config(rng, N=40, M=8, x_range=(-2.0, 2.0)): return X, Z, q_mu, L -# ---- StudentT ------------------------------------------------------------- - - def _studentt_logprob(nu, sigma): """Return a numpy log_prob(f, y) for StudentT(y; f, sigma, nu).""" @@ -237,9 +231,6 @@ def test_elbo_match(self): np.testing.assert_allclose(e_ptgp, e_ref, atol=ATOL) -# ---- NegativeBinomial ----------------------------------------------------- - - def _negbinom_logprob(alpha): """Return a numpy log_prob(f, y) for NegativeBinomial(y; exp(f), alpha).""" diff --git a/tests/test_utils.py b/tests/test_utils.py index a549a62..f2696cf 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -8,10 +8,6 @@ from ptgp.utils import _build_index_labels, check_init -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - def _make_simple_model_and_objective(): """1-D GP on synthetic data; returns (fun, theta0, X, y, model).""" @@ -42,11 +38,6 @@ def _make_simple_model_and_objective(): return fun, theta0, X, y, model -# --------------------------------------------------------------------------- -# check_init tests -# --------------------------------------------------------------------------- - - class TestCheckInit: def test_returns_true_on_valid_init(self): fun, theta0, X, y, model = _make_simple_model_and_objective() @@ -119,11 +110,6 @@ def test_top_k_capped_at_theta_size(self, capsys): assert len(table_lines) == theta0.size -# --------------------------------------------------------------------------- -# _build_index_labels unit tests -# --------------------------------------------------------------------------- - - class TestBuildIndexLabels: def test_returns_none_without_model(self): result = _build_index_labels(5, model=None, extra_vars=None, extra_init=None) From 503dabd97017f473796b655af8e04ea63bd7c819 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Fri, 29 May 2026 16:32:08 -0500 Subject: [PATCH 7/8] Rework likelihood inverse links and node construction --- ptgp/likelihoods/base.py | 98 +++++++++++++++------------ ptgp/likelihoods/bernoulli.py | 34 +++++----- ptgp/likelihoods/gaussian.py | 5 +- ptgp/likelihoods/negative_binomial.py | 22 +++--- ptgp/likelihoods/poisson.py | 30 +++++--- ptgp/likelihoods/student_t.py | 7 +- tests/likelihoods/test_base.py | 29 ++++---- 7 files changed, 122 insertions(+), 103 deletions(-) diff --git a/ptgp/likelihoods/base.py b/ptgp/likelihoods/base.py index 46272db..39776de 100644 --- a/ptgp/likelihoods/base.py +++ b/ptgp/likelihoods/base.py @@ -9,6 +9,26 @@ from pytensor.tensor.rewriting.basic import register_specialize +def inv_probit(x): + """Probit inverse link Phi(x), clamped to (jitter, 1 - jitter).""" + jitter = 1e-3 + return 0.5 * (1.0 + pt.erf(x / pt.sqrt(2.0))) * (1.0 - 2.0 * jitter) + jitter + + +def inv_cloglog(x): + """Complementary log-log inverse link, clamped to (jitter, 1 - jitter).""" + jitter = 1e-3 + return (1.0 - pt.exp(-pt.exp(x))) * (1.0 - 2.0 * jitter) + jitter + + +LINKS = { + "log": pt.exp, + "logit": pt.sigmoid, + "probit": inv_probit, + "cloglog": inv_cloglog, +} + + class LikelihoodVariable(Variable): """A likelihood that lives in the PyTensor graph *and* carries the accessor API. @@ -40,9 +60,8 @@ def predict_log_density(self, y, mu, var): return predict_log_density(self, y, mu, var) def __getattr__(self, name): - # Only fires for attributes not found normally. Expose the named - # parameters (node inputs) and a little Op config; everything else - # (incl. PyTensor internals) raises AttributeError as usual. + # The guard keeps __getattr__ from recursing on its own attribute access + # and lets dunders and PyTensor internals fall through as AttributeError. if name.startswith("_") or name in ("owner", "type", "index", "tag", "name", "auto_name"): raise AttributeError(name) owner = self.__dict__.get("owner") @@ -50,9 +69,8 @@ def __getattr__(self, name): raise AttributeError(name) op = owner.op if name in op.param_names: - off = 1 if op.has_data else 0 - return owner.inputs[off + op.param_names.index(name)] - if name in ("n_points", "invlink", "param_names", "has_data"): + return param(self, name) + if name in ("n_points", "link", "param_names", "has_data"): return getattr(op, name) raise AttributeError(name) @@ -91,20 +109,34 @@ class LikelihoodOp(Op): by the loss, the node is never on the differentiation path nor in a compiled loss graph — no gradient, view, shape, or strip machinery is needed. - Subclasses set the class attributes ``param_names`` and (optionally) - ``default_invlink`` and implement the behaviour methods. + Subclasses set the class attributes ``param_names`` and ``allowed_links`` + and implement the behaviour methods. """ - __props__ = ("param_names", "n_points", "invlink", "has_data") + __props__ = ("param_names", "n_points", "link", "has_data") param_names = () - default_invlink = None + allowed_links = () - def __init__(self, n_points=20, invlink=None, has_data=False): + def __init__(self, n_points=20, link=None, has_data=False): self.param_names = type(self).param_names self.n_points = n_points - self.invlink = invlink if invlink is not None else type(self).default_invlink + self.link = link if link is not None else self.default_link + if self.link is not None and self.link not in self.allowed_links: + raise ValueError( + f"{type(self).__name__} does not support link {self.link!r}; " + f"choose from {self.allowed_links}." + ) self.has_data = has_data + @property + def default_link(self): + """Default link name: the first of ``allowed_links`` (``None`` if empty).""" + return self.allowed_links[0] if self.allowed_links else None + + def _invlink(self, f): + """Map the latent ``f`` to the natural parameter via the inverse link.""" + return LINKS[self.link](f) + def make_node(self, *args): args = [pt.as_tensor_variable(a) for a in args] return Apply(self, args, [_LIK_TYPE()]) @@ -112,8 +144,6 @@ def make_node(self, *args): def perform(self, node, inputs, outputs): outputs[0][0] = None - # --- symbolic behaviour: subclasses implement these three --- - def _log_prob(self, f, y, *params): """Symbolic log p(y|f). Subclasses must implement.""" raise NotImplementedError @@ -126,8 +156,6 @@ def _conditional_variance(self, f, *params): """Var[y|f]. Subclasses must implement.""" raise NotImplementedError - # --- expectations (defaults via quadrature; subclasses may override) --- - def variational_expectation(self, params, y, mu, var): """E_{q(f)}[log p(y|f)] where q(f) = N(mu, var). Default: quadrature.""" return self._gauss_hermite(lambda f, yy: self._log_prob(f, yy, *params), y, mu, var) @@ -227,34 +255,19 @@ def _strip_gp_data(fgraph, node): return [node.inputs[0]] -def build(op_cls, params, x=None, n_points=20, invlink=None): - """Build a :class:`LikelihoodVariable` from an Op class and ordered params. +def to_inputs(params, x): + """Node inputs for ``params`` and an optional design matrix ``x``. - ``params`` is the list of parameter graphs in ``op_cls.param_names`` order - (empty for parameterless likelihoods). When a design matrix ``x`` is given, - it is wrapped in a :class:`GPData` barrier, the parameters are re-expressed - over that barrier (so it sits between ``X`` and the parameters), and the - barrier becomes the node's input 0 — the data demarcator. Used by the family - helpers (:func:`~ptgp.likelihoods.Gaussian`, etc.). + Without ``x`` the inputs are just the parameters. With ``x``, it is wrapped in + a :class:`GPData` barrier and the parameters are re-expressed over it, so the + barrier becomes input 0 — the data demarcator that :func:`at` re-roots. """ - has_data = bool(op_cls.param_names) and x is not None - op = op_cls(n_points=n_points, invlink=invlink, has_data=has_data) params = [pt.as_tensor_variable(p) for p in params] - if has_data: - x = pt.as_tensor_variable(x) - xd = gp_data(x) - # Route the parameters through the barrier so it sits between X and them. - params = list(graph_replace(params, {x: xd}, strict=False)) - params = [xd, *params] - return op(*params) - - -# --- Purely functional API ------------------------------------------------ -# -# Operate on a likelihood node (a :class:`LikelihoodVariable`) and dispatch -# through ``owner.op``. The variable's methods are thin wrappers over these, so -# either style works; the free functions are handy when a dummy node's outputs -# are plain TensorVariables rather than a LikelihoodVariable. + if x is None: + return params + x = pt.as_tensor_variable(x) + xd = gp_data(x) + return [xd, *graph_replace(params, {x: xd}, strict=False)] def _params_of(node): @@ -271,8 +284,7 @@ def op_of(lik): def param(lik, name): """The named parameter input of a likelihood node (e.g. ``"sigma"``).""" node = lik.owner - off = 1 if node.op.has_data else 0 - return node.inputs[off + node.op.param_names.index(name)] + return _params_of(node)[node.op.param_names.index(name)] def at(lik, X): diff --git a/ptgp/likelihoods/bernoulli.py b/ptgp/likelihoods/bernoulli.py index 0aec3e2..4a3deb1 100644 --- a/ptgp/likelihoods/bernoulli.py +++ b/ptgp/likelihoods/bernoulli.py @@ -1,42 +1,44 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import LikelihoodOp, build - - -def inv_probit(x): - """Probit link: Phi(x) = 0.5 * (1 + erf(x / sqrt(2))), clamped to (jitter, 1-jitter).""" - jitter = 1e-3 - return 0.5 * (1.0 + pt.erf(x / pt.sqrt(2.0))) * (1.0 - 2.0 * jitter) + jitter +from ptgp.likelihoods.base import LikelihoodOp, inv_probit class BernoulliOp(LikelihoodOp): """Bernoulli likelihood Op. Closed-form predictive for the probit link.""" param_names = () - default_invlink = staticmethod(inv_probit) + allowed_links = ("probit", "logit", "cloglog") def _log_prob(self, f, y): - p = self.invlink(f) + p = self._invlink(f) return y * pt.log(p) + (1.0 - y) * pt.log(1.0 - p) def _conditional_mean(self, f): - return self.invlink(f) + return self._invlink(f) def _conditional_variance(self, f): - p = self.invlink(f) + p = self._invlink(f) return p * (1.0 - p) def predict_mean_and_var(self, params, mu, var): - if self.invlink is inv_probit: + if self.link == "probit": p = inv_probit(mu / pt.sqrt(1.0 + var)) return p, p - p**2 return super().predict_mean_and_var(params, mu, var) -def Bernoulli(invlink=None, n_points=20): +def Bernoulli(link=None, n_points=20): """Build a Bernoulli likelihood p(y=1|f) = invlink(f). - Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. Default link is - probit (closed-form predictive); pass ``invlink=pt.sigmoid`` for logit. + Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. The probit link + (default) has a closed-form predictive; other links fall back to quadrature. + + Parameters + ---------- + link : str, optional + Inverse link name, one of ``"probit"`` (default), ``"logit"``, or + ``"cloglog"``. + n_points : int + Number of Gauss-Hermite quadrature points (default 20). """ - return build(BernoulliOp, [], n_points=n_points, invlink=invlink) + return BernoulliOp(n_points=n_points, link=link)() diff --git a/ptgp/likelihoods/gaussian.py b/ptgp/likelihoods/gaussian.py index 136f75d..6b74818 100644 --- a/ptgp/likelihoods/gaussian.py +++ b/ptgp/likelihoods/gaussian.py @@ -1,6 +1,6 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import LikelihoodOp, build +from ptgp.likelihoods.base import LikelihoodOp, to_inputs LOG2PI = pt.log(2.0 * pt.pi) @@ -48,4 +48,5 @@ def Gaussian(sigma, x=None): The design matrix ``sigma`` was built against; pass it for heteroskedastic noise so ``.at`` can re-root sigma onto test inputs. """ - return build(GaussianOp, [sigma], x=x) + op = GaussianOp(has_data=x is not None) + return op(*to_inputs([sigma], x)) diff --git a/ptgp/likelihoods/negative_binomial.py b/ptgp/likelihoods/negative_binomial.py index 2c91c18..8cfad3e 100644 --- a/ptgp/likelihoods/negative_binomial.py +++ b/ptgp/likelihoods/negative_binomial.py @@ -1,16 +1,16 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import LikelihoodOp, build +from ptgp.likelihoods.base import LikelihoodOp, to_inputs class NegativeBinomialOp(LikelihoodOp): """Negative binomial likelihood Op. Expectations via quadrature.""" param_names = ("alpha",) - default_invlink = staticmethod(pt.exp) + allowed_links = ("log", "cloglog") def _log_prob(self, f, y, alpha): - mu = self.invlink(f) + mu = self._invlink(f) return ( pt.gammaln(y + alpha) - pt.gammaln(alpha) @@ -20,30 +20,30 @@ def _log_prob(self, f, y, alpha): ) def _conditional_mean(self, f, alpha): - return self.invlink(f) + return self._invlink(f) def _conditional_variance(self, f, alpha): - mu = self.invlink(f) + mu = self._invlink(f) return mu + mu**2 / alpha -def NegativeBinomial(alpha, invlink=None, n_points=20, x=None): +def NegativeBinomial(alpha, link=None, n_points=20, x=None): """Build a negative binomial likelihood NB(y; invlink(f), alpha). Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. Variance is - ``mu + mu**2 / alpha`` with ``mu = invlink(f)``; default link is log - (``invlink=exp``). + ``mu + mu**2 / alpha`` with ``mu = invlink(f)``; all links use quadrature. Parameters ---------- alpha : tensor or PyMC random variable Overdispersion parameter. - invlink : callable, optional - Inverse link function (default: exp). + link : str, optional + Inverse link name, one of ``"log"`` (default) or ``"cloglog"``. n_points : int Number of Gauss-Hermite quadrature points (default 20). x : tensor, optional The design matrix ``alpha`` was built against, for a heteroskedastic parameter re-rooted onto test inputs via ``.at``. """ - return build(NegativeBinomialOp, [alpha], x=x, n_points=n_points, invlink=invlink) + op = NegativeBinomialOp(n_points=n_points, link=link, has_data=x is not None) + return op(*to_inputs([alpha], x)) diff --git a/ptgp/likelihoods/poisson.py b/ptgp/likelihoods/poisson.py index d898b37..5466754 100644 --- a/ptgp/likelihoods/poisson.py +++ b/ptgp/likelihoods/poisson.py @@ -1,34 +1,42 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import LikelihoodOp, build +from ptgp.likelihoods.base import LikelihoodOp class PoissonOp(LikelihoodOp): """Poisson likelihood Op. Closed-form variational expectation for log link.""" param_names = () - default_invlink = staticmethod(pt.exp) + allowed_links = ("log",) def _log_prob(self, f, y): - lam = self.invlink(f) + lam = self._invlink(f) return y * pt.log(lam) - lam - pt.gammaln(y + 1.0) def _conditional_mean(self, f): - return self.invlink(f) + return self._invlink(f) def _conditional_variance(self, f): - return self.invlink(f) + return self._invlink(f) def variational_expectation(self, params, y, mu, var): - if self.invlink is pt.exp: + if self.link == "log": return y * mu - pt.exp(mu + var / 2.0) - pt.gammaln(y + 1.0) return super().variational_expectation(params, y, mu, var) -def Poisson(invlink=None, n_points=20): - """Build a Poisson likelihood p(y|f) = Poisson(y; invlink(f)). +def Poisson(link=None, n_points=20): + """Build a Poisson likelihood p(y|f) with rate ``invlink(f)``. - Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. Default link is - log (closed-form variational expectation); other links fall back to quadrature. + Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. The log link + (the default and only supported link) has a closed-form variational + expectation. + + Parameters + ---------- + link : str, optional + Inverse link name; only ``"log"`` is supported (the default). + n_points : int + Number of Gauss-Hermite quadrature points (default 20). """ - return build(PoissonOp, [], n_points=n_points, invlink=invlink) + return PoissonOp(n_points=n_points, link=link)() diff --git a/ptgp/likelihoods/student_t.py b/ptgp/likelihoods/student_t.py index e9c05e8..7f4974c 100644 --- a/ptgp/likelihoods/student_t.py +++ b/ptgp/likelihoods/student_t.py @@ -1,6 +1,6 @@ import pytensor.tensor as pt -from ptgp.likelihoods.base import LikelihoodOp, build +from ptgp.likelihoods.base import LikelihoodOp, to_inputs class StudentTOp(LikelihoodOp): @@ -25,7 +25,7 @@ def _conditional_variance(self, f, nu, sigma): def StudentT(nu, sigma, n_points=20, x=None): - """Build a Student-T likelihood p(y|f) = StudentT(y; f, sigma, nu). + """Build a Student-T likelihood p(y|f) with scale sigma and degrees of freedom nu. Returns a :class:`~ptgp.likelihoods.base.LikelihoodVariable`. Variational expectation via Gauss-Hermite quadrature. @@ -42,4 +42,5 @@ def StudentT(nu, sigma, n_points=20, x=None): The design matrix ``nu``/``sigma`` were built against, for heteroskedastic parameters re-rooted onto test inputs via ``.at``. """ - return build(StudentTOp, [nu, sigma], x=x, n_points=n_points) + op = StudentTOp(n_points=n_points, has_data=x is not None) + return op(*to_inputs([nu, sigma], x)) diff --git a/tests/likelihoods/test_base.py b/tests/likelihoods/test_base.py index 3bca5cb..2d1d7e7 100644 --- a/tests/likelihoods/test_base.py +++ b/tests/likelihoods/test_base.py @@ -38,7 +38,7 @@ def test_bernoulli_logit_link(self): ) ) ve_logit = _eval( - Bernoulli(invlink=pt.sigmoid).variational_expectation( + Bernoulli(link="logit").variational_expectation( pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var) ) ) @@ -46,20 +46,19 @@ def test_bernoulli_logit_link(self): assert np.all(ve_probit < 0) and np.all(ve_logit < 0) assert not np.allclose(ve_probit, ve_logit) - def test_poisson_custom_link_uses_quadrature(self): - """Poisson with non-exp link should fall back to quadrature and still work.""" - mu, var = np.array([1.0]), np.array([0.1]) - y = np.array([2.0]) - - def softplus(f): - return pt.log1p(pt.exp(f)) - + def test_bernoulli_cloglog_link_uses_quadrature(self): + """cloglog has no closed-form predictive, so it routes through quadrature.""" + mu, var, y = np.array([0.0, 1.0]), np.array([0.5, 0.5]), np.array([1.0, 0.0]) ve = _eval( - Poisson(invlink=softplus).variational_expectation( + Bernoulli(link="cloglog").variational_expectation( pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var) ) ) - assert np.isfinite(ve).all() + assert np.all(ve < 0) + + def test_unsupported_link_raises(self): + with pytest.raises(ValueError, match="does not support link"): + Poisson(link="logit") class TestCloneReplaceData: @@ -103,9 +102,7 @@ def test_reroot_matches_multidim_input(self): def test_reroots_all_tensor_params(self): X = pt.matrix("X", shape=(None, 1)) new = np.array([[1.0], [2.0], [3.0]]) - out = StudentT(nu=3.0 + X[:, 0] ** 2, sigma=_hetero(X), x=X).at( - pt.as_tensor_variable(new) - ) + out = StudentT(nu=3.0 + X[:, 0] ** 2, sigma=_hetero(X), x=X).at(pt.as_tensor_variable(new)) nu_val, sigma_val = _eval(out.nu, out.sigma) np.testing.assert_allclose(nu_val, 3.0 + new[:, 0] ** 2) np.testing.assert_allclose(sigma_val, _hetero(new)) @@ -113,9 +110,7 @@ def test_reroots_all_tensor_params(self): def test_negative_binomial_alpha(self): X = pt.matrix("X", shape=(None, 1)) new = np.array([[0.0], [2.0]]) - out = NegativeBinomial(alpha=_hetero(X), x=X).at( - pt.as_tensor_variable(new) - ) + out = NegativeBinomial(alpha=_hetero(X), x=X).at(pt.as_tensor_variable(new)) np.testing.assert_allclose(_eval(out.alpha), _hetero(new)) def test_leaves_scalar_params_untouched(self): From 096cf55ec295de3ef11ba6de0d0d914e5d675c73 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Fri, 29 May 2026 16:32:13 -0500 Subject: [PATCH 8/8] Expand likelihood test coverage --- tests/likelihoods/test_bernoulli.py | 20 +++++++++----- tests/likelihoods/test_gaussian.py | 10 +++---- tests/likelihoods/test_negative_binomial.py | 13 +++++++++ tests/likelihoods/test_poisson.py | 20 ++++++++++++++ tests/likelihoods/test_student_t.py | 30 +++++++++++++++++++++ 5 files changed, 82 insertions(+), 11 deletions(-) diff --git a/tests/likelihoods/test_bernoulli.py b/tests/likelihoods/test_bernoulli.py index ca75fa8..9ffa2e5 100644 --- a/tests/likelihoods/test_bernoulli.py +++ b/tests/likelihoods/test_bernoulli.py @@ -9,6 +9,7 @@ from gpjax.likelihoods import Bernoulli as GPJaxBernoulli from ptgp.likelihoods import Bernoulli +from ptgp.likelihoods.base import LikelihoodOp ATOL = 1e-5 @@ -42,13 +43,20 @@ def test_ve_against_gpjax(self): np.testing.assert_allclose(ve, gpjax_ve, atol=ATOL) - def test_predict_mean_and_var_closed_form(self): - mu, var = np.array([0.0, 2.0, -2.0]), np.array([0.1, 0.5, 1.0]) - pm_val, pv_val = _eval( - *Bernoulli().predict_mean_and_var(pt.as_tensor_variable(mu), pt.as_tensor_variable(var)) + def test_probit_closed_form_predict_matches_quadrature(self): + """The probit closed-form predictive should agree with the base quadrature + path it overrides — a correctness check, not just bounds.""" + mu, var = np.array([0.0, 0.7, -1.2]), np.array([0.2, 0.5, 1.0]) + op = Bernoulli(n_points=64).owner.op + m_closed, v_closed = op.predict_mean_and_var( + [], pt.as_tensor_variable(mu), pt.as_tensor_variable(var) + ) + m_quad, v_quad = LikelihoodOp.predict_mean_and_var( + op, [], pt.as_tensor_variable(mu), pt.as_tensor_variable(var) ) - assert np.all(pm_val >= 0.0) and np.all(pm_val <= 1.0) - assert np.all(pv_val >= 0.0) and np.all(pv_val <= 0.25) + mc, vc, mq, vq = _eval(m_closed, v_closed, m_quad, v_quad) + np.testing.assert_allclose(mc, mq, atol=1e-4) + np.testing.assert_allclose(vc, vq, atol=1e-4) def test_ve_negative(self): mu, var = np.array([0.0, 2.0, -2.0]), np.array([0.1, 0.5, 1.0]) diff --git a/tests/likelihoods/test_gaussian.py b/tests/likelihoods/test_gaussian.py index 3593739..24f4b11 100644 --- a/tests/likelihoods/test_gaussian.py +++ b/tests/likelihoods/test_gaussian.py @@ -43,18 +43,18 @@ def test_ve_against_gpjax(self): np.testing.assert_allclose(ve, gpjax_ve, atol=ATOL) - def test_zero_var_matches_log_prob(self): + def test_zero_var_collapses_to_log_likelihood(self): + """At zero latent variance the VE reduces to the pointwise log N(y; mu, sigma^2).""" mu, y, sigma = np.array([0.0, 1.0]), np.array([0.1, 0.9]), 0.3 - lik = Gaussian(sigma) ve = _eval( - lik.variational_expectation( + Gaussian(sigma).variational_expectation( pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(np.zeros(2)), ) ) - lp = _eval(lik._log_prob(pt.as_tensor_variable(mu), pt.as_tensor_variable(y))) - np.testing.assert_allclose(ve, lp, atol=1e-12) + expected = -0.5 * (np.log(2 * np.pi * sigma**2) + (y - mu) ** 2 / sigma**2) + np.testing.assert_allclose(ve, expected, atol=1e-12) def test_predict_mean_and_var(self): mu, var, sigma = np.array([1.0, 2.0]), np.array([0.5, 1.0]), 0.3 diff --git a/tests/likelihoods/test_negative_binomial.py b/tests/likelihoods/test_negative_binomial.py index 3bf37ab..61d79ca 100644 --- a/tests/likelihoods/test_negative_binomial.py +++ b/tests/likelihoods/test_negative_binomial.py @@ -29,6 +29,19 @@ def test_quadrature_convergence(self): ) np.testing.assert_allclose(ve_20, ve_50, atol=1e-6) + def test_predict_variance_includes_overdispersion(self): + """At zero latent variance the predictive variance is mu + mu**2 / alpha, + exceeding the Poisson mean-equals-variance.""" + mu, var, alpha = np.array([0.5, 1.0]), np.zeros(2), 3.0 + lam = np.exp(mu) + pm, pv = _eval( + *NegativeBinomial(alpha=alpha).predict_mean_and_var( + pt.as_tensor_variable(mu), pt.as_tensor_variable(var) + ) + ) + np.testing.assert_allclose(pm, lam, atol=1e-10) + np.testing.assert_allclose(pv, lam + lam**2 / alpha, atol=1e-10) + def test_converges_to_poisson(self): """NB with large alpha should approach Poisson.""" mu, var = np.array([0.5, 1.0]), np.array([0.1, 0.3]) diff --git a/tests/likelihoods/test_poisson.py b/tests/likelihoods/test_poisson.py index 534a019..1988c38 100644 --- a/tests/likelihoods/test_poisson.py +++ b/tests/likelihoods/test_poisson.py @@ -4,6 +4,8 @@ import pytensor import pytensor.tensor as pt +from scipy.special import gammaln + from ptgp.likelihoods import Poisson @@ -47,3 +49,21 @@ def test_ve_values(self): ) ) np.testing.assert_allclose(ve, np.array([-1.0]), atol=1e-12) + + def test_quadrature_predict_collapses_at_zero_variance(self): + """At zero latent variance the base quadrature predictives reduce to the + conditional moments and the pointwise log-density at f = mu.""" + mu, y, var = np.array([0.5, 1.0]), np.array([1.0, 3.0]), np.zeros(2) + lam = np.exp(mu) + lik = Poisson() + pm, pv = _eval( + *lik.predict_mean_and_var(pt.as_tensor_variable(mu), pt.as_tensor_variable(var)) + ) + pld = _eval( + lik.predict_log_density( + pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var) + ) + ) + np.testing.assert_allclose(pm, lam, atol=1e-10) + np.testing.assert_allclose(pv, lam, atol=1e-10) + np.testing.assert_allclose(pld, y * mu - lam - gammaln(y + 1.0), atol=1e-10) diff --git a/tests/likelihoods/test_student_t.py b/tests/likelihoods/test_student_t.py index 2a47a8e..3417257 100644 --- a/tests/likelihoods/test_student_t.py +++ b/tests/likelihoods/test_student_t.py @@ -43,3 +43,33 @@ def test_quadrature_convergence(self): ) ) np.testing.assert_allclose(ve_20, ve_50, atol=1e-6) + + def test_predict_variance_uses_studentt_formula(self): + """At zero latent variance the predictive variance is the Student-T + variance sigma**2 * nu / (nu - 2).""" + mu, var = np.array([0.5, -1.0]), np.zeros(2) + nu, sigma = 5.0, 0.7 + pm, pv = _eval( + *StudentT(nu=nu, sigma=sigma).predict_mean_and_var( + pt.as_tensor_variable(mu), pt.as_tensor_variable(var) + ) + ) + np.testing.assert_allclose(pm, mu, atol=1e-6) + np.testing.assert_allclose(pv, sigma**2 * nu / (nu - 2.0), atol=1e-6) + + def test_heavier_tails_than_gaussian_for_outlier(self): + """A far-from-mean observation is penalized less under Student-T than + Gaussian at matched scale — the heavy-tail behavior.""" + mu, var, y = np.array([0.0]), np.array([0.1]), np.array([10.0]) + sigma = 1.0 + ve_studentt = _eval( + StudentT(nu=3.0, sigma=sigma).variational_expectation( + pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var) + ) + ) + ve_gaussian = _eval( + Gaussian(sigma).variational_expectation( + pt.as_tensor_variable(y), pt.as_tensor_variable(mu), pt.as_tensor_variable(var) + ) + ) + assert ve_studentt > ve_gaussian