From c7b0b67e3c0c193bd5b8b0989a4be47caf7e274d Mon Sep 17 00:00:00 2001 From: Jing Xie Date: Thu, 30 Sep 2021 07:44:17 -0400 Subject: [PATCH] Add horseshoe distribution and corresponding gibbs sampler and tests --- pymc3_hmm/distributions.py | 31 +++++++ pymc3_hmm/step_methods.py | 156 +++++++++++++++++++++++++++++++++++- tests/test_distributions.py | 15 ++++ tests/test_step_methods.py | 138 ++++++++++++++++++++++++++++++- 4 files changed, 337 insertions(+), 3 deletions(-) diff --git a/pymc3_hmm/distributions.py b/pymc3_hmm/distributions.py index 1d7d855..1c7ed45 100644 --- a/pymc3_hmm/distributions.py +++ b/pymc3_hmm/distributions.py @@ -452,3 +452,34 @@ def logp(self, value): def _distr_parameters_for_repr(self): return ["c"] + + +class HorseShoe(pm.Normal): + """ + A Horseshoe distribution + + """ + + def __init__(self, tau, shape, **kwargs): + self.tau = tau + self.beta_raw = pm.Normal.dist(tau=1, shape=shape) + self.lmbda = pm.HalfCauchy.dist(beta=1, shape=shape) + super().__init__(shape=shape, **kwargs) + + def random(self, point=None, size=None): + beta_raw = self.beta_raw.random(point=point, size=size) + lmbda = self.lmbda.random(point=point, size=size) + return beta_raw * lmbda * self.tau + + def logp(self, values): + """ + XXX: This is only a placeholder for the log-probability. + It is required to get past PyMC3 design restrictions. + Do not use this distribution without the `HSStep` sampler! + + """ + warnings.warn( + "logp of HorseShoe class is not implemented." + "Do not use this distribution without the `HSStep` sampler!" + ) + return 0 diff --git a/pymc3_hmm/step_methods.py b/pymc3_hmm/step_methods.py index 4ddcbbb..6a12391 100644 --- a/pymc3_hmm/step_methods.py +++ b/pymc3_hmm/step_methods.py @@ -11,9 +11,12 @@ from aesara.graph.op import get_test_value as test_value from aesara.graph.opt import OpRemove, pre_greedy_local_optimizer from aesara.graph.optdb import Query + from aesara.scalar.basic import Dot + from aesara.sparse.basic import StructuredDot from aesara.tensor.elemwise import DimShuffle, Elemwise from aesara.tensor.subtensor import AdvancedIncSubtensor1 from aesara.tensor.var import TensorConstant + except ImportError: # pragma: no cover import theano.scalar as aes import theano.tensor as at @@ -26,13 +29,17 @@ from theano.tensor.elemwise import DimShuffle, Elemwise from theano.tensor.subtensor import AdvancedIncSubtensor1 from theano.tensor.var import TensorConstant + from theano.tensor.basic import Dot + from theano.sparse.basic import StructuredDot import pymc3 as pm +import scipy from pymc3.distributions.distribution import draw_values from pymc3.step_methods.arraystep import ArrayStep, BlockedStep, Competence from pymc3.util import get_untransformed_name +from scipy.stats import invgamma -from pymc3_hmm.distributions import DiscreteMarkovChain, SwitchingProcess +from pymc3_hmm.distributions import DiscreteMarkovChain, HorseShoe, SwitchingProcess from pymc3_hmm.utils import compute_trans_freqs big: float = 1e20 @@ -452,3 +459,150 @@ def competence(var): return Competence.COMPATIBLE return Competence.INCOMPATIBLE + + +def large_p_mvnormal_sampler(D_diag, Phi, a): + r"""Efficiently sample from a large multivariate normal. + + This function draws samples from the following distribution: + + .. math:: + \beta \sim \operatorname{N}\left( \mu, \Sigma \right) + + where + + .. math:: + \mu = \Sigma \Phi^\top a, \\ + \Sigma = \left( \Phi^\top \Phi + D^{-1} \right)^{-1} + + and :math:`a \in \mathbb{R}^{n}`, :math:`\Phi \in \mathbb{R}^{n \times p}`. + + This approach is particularly effective when :math:`p \gg n`. + + From "Fast sampling with Gaussian scale-mixture priors in high-dimensional + regression", Bhattacharya, Chakraborty, and Mallick, 2015. + + """ + N = a.shape[0] + u = np.random.normal(0, np.sqrt(D_diag)) + delta = np.random.normal(size=N) + if scipy.sparse.issparse(Phi): + Phi_D = Phi.multiply(D_diag) + v = Phi * u + delta + Z = (Phi_D * Phi.T + scipy.sparse.eye(N)).toarray() + w = scipy.linalg.solve(Z, a - v, assume_a="sym") + beta = u + Phi_D.T * w + else: + Phi_D = Phi * D_diag + v = Phi.dot(u) + delta + Z = Phi_D.dot(Phi.T) + Z.flat[:: N + 1] += 1 + w = scipy.linalg.solve(Z, a - v, assume_a="sym") + beta = u + Phi_D.T @ w + return beta + + +def hs_step( + lambda2: np.ndarray, + tau2: np.ndarray, + vi: np.ndarray, + xi: np.ndarray, + X: np.ndarray, + y: np.ndarray, +): + N, M = X.shape + + D_diag = tau2 * lambda2 + beta = large_p_mvnormal_sampler(D_diag, X, y) + beta2 = beta ** 2 + + lambda2 = invgamma(a=1, scale=1 / vi + beta2 / (2 * tau2)).rvs() + tau2 = invgamma(a=(M + 1) / 2, scale=1 / xi + (beta2 / lambda2).sum() / 2).rvs() + vi = invgamma(a=1, scale=1 + 1 / lambda2).rvs() + xi = invgamma(a=1, scale=1 + 1 / tau2).rvs() + + return beta, lambda2, tau2, vi, xi + + +class HSStep(BlockedStep): + name = "hsgibbs" + + def __init__(self, vars, values=None, model=None): + model = pm.modelcontext(model) + + if len(vars) > 1: + raise ValueError("This sampler only takes one variable.") + + (beta,) = pm.inputvars(vars) + + if not isinstance(beta.distribution, HorseShoe): + raise TypeError("This sampler only samples `HorseShoe`s.") + + non_var_rvs = [ + value for attr, value in model.named_vars.items() if value != beta + ] + y_fn, X_fn = None, None + # loop through all the rvs excpet for the stepping rv + for i in non_var_rvs: + # looking thru all the attributes of the rv and see if any of the + # parameters are consist of multiplication relationship of the horseshoe var + if hasattr(i, "distribution") and isinstance(i.distribution, pm.Normal): + mu = i.distribution.mu + elif isinstance(i, pm.model.DeterministicWrapper): + mu = i.owner.inputs[0] + else: + continue # pragma: no cover + dense_dot = mu.owner and isinstance(mu.owner.op, Dot) + sparse_dot = mu.owner and isinstance(mu.owner.op, StructuredDot) + + dense_inputs = dense_dot and beta in mu.owner.inputs + sparse_inputs = sparse_dot and beta in mu.owner.inputs[1].owner.inputs + + if not (dense_inputs or sparse_inputs): + continue + + y_fn = None + for j in model.observed_RVs: + + if i == j or ( + isinstance(j.distribution, pm.Normal) and i == j.distribution.mu + ): + + def y_fn(x): + return j.observations + + if not y_fn: + y_fn = model.fn(i) + + if dense_inputs: + X_fn = model.fn(mu.owner.inputs[1].T) + else: + X_fn = model.fn(mu.owner.inputs[0]) + + if not (X_fn and y_fn): + raise RuntimeError( + f"Cannot find Design matrix or dependent variable assoicate with {beta}" + ) + + self.vars = [beta] + + M = model.test_point[beta.name].shape[-1] + + self.vi = np.full(M, 1) + self.lambda2 = np.full(M, 1) + self.beta = np.full(M, 1) + self.tau2 = 1 + self.xi = 1 + self.y_fn = y_fn + self.X_fn = X_fn + + def step(self, point): + # breakpoint() + y = self.y_fn(point) + X = self.X_fn(point) + + self.beta, self.lambda2, self.tau2, self.vi, self.xi = hs_step( + self.lambda2, self.tau2, self.vi, self.xi, X, y + ) + point[self.vars[0].name] = self.beta + return point diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 677dc03..fb4c599 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -13,6 +13,7 @@ from pymc3_hmm.distributions import ( Constant, DiscreteMarkovChain, + HorseShoe, PoissonZeroProcess, SwitchingProcess, distribution_subset_args, @@ -562,3 +563,17 @@ def test_subset_args(): res = distribution_subset_args(test_dist, shape=[3], idx=test_idx) assert np.array_equal(res["mu"].eval(), np.r_[0.1, 2.3]) assert np.array_equal(res["alpha"].eval(), np.r_[2.0, 2.0]) + + +def test_HorseShoe(): + with pm.Model(): + test_beta = HorseShoe.dist(tau=1, shape=2) + test_sample = test_beta.random(size=1000).eval() + + assert test_sample.shape == (1000, 2) + assert np.isclose(np.median(test_sample), 0, atol=0.01) + assert np.percentile(test_sample.flatten(), 85) < 2 + assert np.percentile(test_sample.flatten(), 15) > -2 + assert np.percentile(test_sample.flatten(), 99) > 10 + assert np.percentile(test_sample.flatten(), 1) < -10 + assert test_beta.logp(1) == 0 diff --git a/tests/test_step_methods.py b/tests/test_step_methods.py index 66e0f96..8565e2a 100644 --- a/tests/test_step_methods.py +++ b/tests/test_step_methods.py @@ -5,16 +5,25 @@ try: import aesara.tensor as at from aesara.graph.op import get_test_value + from aesara.sparse import structured_dot as sp_dot except ImportError: import theano.tensor as at from theano.graph.op import get_test_value + from theano.sparse import structured_dot as sp_dot import pymc3 as pm import pytest import scipy as sp -from pymc3_hmm.distributions import DiscreteMarkovChain, PoissonZeroProcess -from pymc3_hmm.step_methods import FFBSStep, TransMatConjugateStep, ffbs_step +from pymc3_hmm.distributions import DiscreteMarkovChain, HorseShoe, PoissonZeroProcess +from pymc3_hmm.step_methods import ( + FFBSStep, + HSStep, + TransMatConjugateStep, + ffbs_step, + hs_step, + large_p_mvnormal_sampler, +) from pymc3_hmm.utils import compute_steady_state, compute_trans_freqs from tests.utils import simulate_poiszero_hmm @@ -337,3 +346,128 @@ def test_TransMatConjugateStep_subtensors(): ) transmat = TransMatConjugateStep(P_rv) + + +def test_large_p_mvnormal_sampler(): + + # test case for dense matrix + np.random.seed(2032) + X = np.random.normal(size=250).reshape((50, 5)) + beta_true = np.ones(5) + y = np.random.normal(X.dot(beta_true), 1) + + samples = large_p_mvnormal_sampler(np.ones(5), X, y) + assert samples.shape == (5,) + + sample_sigma = np.linalg.inv(X.T.dot(X) + np.eye(5)) + sample_mu = sample_sigma.dot(X.T).dot(y) + np.testing.assert_allclose(sample_mu.mean(), 1, rtol=0.1, atol=0) + + # test case for sparse matrix + samples_sp = large_p_mvnormal_sampler(np.ones(5), sp.sparse.csr_matrix(X), y) + assert samples_sp.shape == (5,) + np.testing.assert_allclose(samples_sp.mean(), 1, rtol=0.1, atol=0) + + +def test_hs_step(): + # test case for dense matrix + np.random.seed(2032) + M = 5 + X = np.random.normal(size=250).reshape((50, M)) + beta_true = np.random.normal(size=M) + y = np.random.normal(X.dot(beta_true), 1) + + vi = np.full(M, 1) + lambda2 = np.full(M, 1) + tau2 = 1 + xi = 1 + beta, lambda2, tau2, vi, xi = hs_step(lambda2, tau2, vi, xi, X, y) + assert beta.shape == beta_true.shape + assert (np.abs(beta - beta_true) / beta_true).mean() < 0.5 + + # test case for sparse matrix + + vi = np.full(M, 1) + lambda2 = np.full(M, 1) + tau2 = 1 + xi = 1 + beta, lambda2, tau2, vi, xi = hs_step( + lambda2, tau2, vi, xi, sp.sparse.csr_matrix(X), y + ) + assert beta.shape == beta_true.shape + assert (np.abs(beta - beta_true) / beta_true).mean() < 0.5 + + +def test_Hsstep(): + np.random.seed(2032) + M = 5 + N = 50 + X = np.random.normal(size=N * M).reshape((N, M)) + beta_true = np.random.normal(size=M) + y = np.random.normal(X.dot(beta_true), 1) + + M = X.shape[1] + with pm.Model(): + beta = HorseShoe("beta", tau=1, shape=M) + pm.Normal("y", mu=beta.dot(X.T), sigma=1, observed=y) + hsstep = HSStep([beta]) + trace = pm.sample( + draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True + ) + + beta_samples = trace.posterior["beta"][0].values + + assert beta_samples.shape == (50, M) + np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.3) + + with pm.Model(): + beta = HorseShoe("beta", tau=1, shape=M) + mu = pm.Deterministic("mu", beta.dot(X.T)) + pm.Normal("y", mu=mu, sigma=1, observed=y) + hsstep = HSStep([beta]) + trace = pm.sample( + draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True + ) + + beta_samples = trace.posterior["beta"][0].values + assert beta_samples.shape == (50, M) + np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.3) + + with pm.Model(): + beta = HorseShoe("beta", tau=1, shape=M) + pm.TruncatedNormal("alpha", mu=beta.dot(X.T), sigma=1, observed=y) + + with pytest.raises(RuntimeError): + hsstep = HSStep([beta]) + + with pm.Model(): + beta = HorseShoe("beta", tau=1, shape=M) + mu = pm.Deterministic("mu", beta.dot(X.T)) + pm.TruncatedNormal("y", mu=mu, sigma=1, observed=y) + hsstep = HSStep([beta]) + trace = pm.sample( + draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True + ) + + beta_samples = trace.posterior["beta"][0].values + assert beta_samples.shape == (50, M) + + # test case for sparse matrix + X = sp.sparse.csr_matrix(X) + + M = X.shape[1] + with pm.Model(): + beta = HorseShoe("beta", tau=1, shape=M) + pm.Normal("y", mu=sp_dot(X, at.shape_padright(beta)), sigma=1, observed=y) + hsstep = HSStep([beta]) + trace = pm.sample( + draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True + ) + beta_2 = HorseShoe("beta_2", tau=1, shape=M) + with pytest.raises(ValueError): + HSStep([beta, beta_2]) + + beta_samples = trace.posterior["beta"][0].values + + assert beta_samples.shape == (50, M) + np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.3)