From c7b0b67e3c0c193bd5b8b0989a4be47caf7e274d Mon Sep 17 00:00:00 2001
From: Jing Xie <xjing76@gmail.com>
Date: Thu, 30 Sep 2021 07:44:17 -0400
Subject: [PATCH] Add horseshoe distribution and corresponding gibbs sampler
 and tests

---
 pymc3_hmm/distributions.py  |  31 +++++++
 pymc3_hmm/step_methods.py   | 156 +++++++++++++++++++++++++++++++++++-
 tests/test_distributions.py |  15 ++++
 tests/test_step_methods.py  | 138 ++++++++++++++++++++++++++++++-
 4 files changed, 337 insertions(+), 3 deletions(-)

diff --git a/pymc3_hmm/distributions.py b/pymc3_hmm/distributions.py
index 1d7d855..1c7ed45 100644
--- a/pymc3_hmm/distributions.py
+++ b/pymc3_hmm/distributions.py
@@ -452,3 +452,34 @@ def logp(self, value):
 
     def _distr_parameters_for_repr(self):
         return ["c"]
+
+
+class HorseShoe(pm.Normal):
+    """
+    A Horseshoe distribution
+
+    """
+
+    def __init__(self, tau, shape, **kwargs):
+        self.tau = tau
+        self.beta_raw = pm.Normal.dist(tau=1, shape=shape)
+        self.lmbda = pm.HalfCauchy.dist(beta=1, shape=shape)
+        super().__init__(shape=shape, **kwargs)
+
+    def random(self, point=None, size=None):
+        beta_raw = self.beta_raw.random(point=point, size=size)
+        lmbda = self.lmbda.random(point=point, size=size)
+        return beta_raw * lmbda * self.tau
+
+    def logp(self, values):
+        """
+        XXX: This is only a placeholder for the log-probability.
+        It is required to get past PyMC3 design restrictions.
+        Do not use this distribution without the `HSStep` sampler!
+
+        """
+        warnings.warn(
+            "logp of HorseShoe class is not implemented."
+            "Do not use this distribution without the `HSStep` sampler!"
+        )
+        return 0
diff --git a/pymc3_hmm/step_methods.py b/pymc3_hmm/step_methods.py
index 4ddcbbb..6a12391 100644
--- a/pymc3_hmm/step_methods.py
+++ b/pymc3_hmm/step_methods.py
@@ -11,9 +11,12 @@
     from aesara.graph.op import get_test_value as test_value
     from aesara.graph.opt import OpRemove, pre_greedy_local_optimizer
     from aesara.graph.optdb import Query
+    from aesara.scalar.basic import Dot
+    from aesara.sparse.basic import StructuredDot
     from aesara.tensor.elemwise import DimShuffle, Elemwise
     from aesara.tensor.subtensor import AdvancedIncSubtensor1
     from aesara.tensor.var import TensorConstant
+
 except ImportError:  # pragma: no cover
     import theano.scalar as aes
     import theano.tensor as at
@@ -26,13 +29,17 @@
     from theano.tensor.elemwise import DimShuffle, Elemwise
     from theano.tensor.subtensor import AdvancedIncSubtensor1
     from theano.tensor.var import TensorConstant
+    from theano.tensor.basic import Dot
+    from theano.sparse.basic import StructuredDot
 
 import pymc3 as pm
+import scipy
 from pymc3.distributions.distribution import draw_values
 from pymc3.step_methods.arraystep import ArrayStep, BlockedStep, Competence
 from pymc3.util import get_untransformed_name
+from scipy.stats import invgamma
 
-from pymc3_hmm.distributions import DiscreteMarkovChain, SwitchingProcess
+from pymc3_hmm.distributions import DiscreteMarkovChain, HorseShoe, SwitchingProcess
 from pymc3_hmm.utils import compute_trans_freqs
 
 big: float = 1e20
@@ -452,3 +459,150 @@ def competence(var):
             return Competence.COMPATIBLE
 
         return Competence.INCOMPATIBLE
+
+
+def large_p_mvnormal_sampler(D_diag, Phi, a):
+    r"""Efficiently sample from a large multivariate normal.
+
+    This function draws samples from the following distribution:
+
+    .. math::
+       \beta \sim \operatorname{N}\left( \mu, \Sigma \right)
+
+    where
+
+    .. math::
+       \mu = \Sigma \Phi^\top a, \\
+       \Sigma = \left( \Phi^\top \Phi + D^{-1} \right)^{-1}
+
+    and :math:`a \in \mathbb{R}^{n}`, :math:`\Phi \in \mathbb{R}^{n \times p}`.
+
+    This approach is particularly effective when :math:`p \gg n`.
+
+    From "Fast sampling with Gaussian scale-mixture priors in high-dimensional
+    regression", Bhattacharya, Chakraborty, and Mallick, 2015.
+
+    """
+    N = a.shape[0]
+    u = np.random.normal(0, np.sqrt(D_diag))
+    delta = np.random.normal(size=N)
+    if scipy.sparse.issparse(Phi):
+        Phi_D = Phi.multiply(D_diag)
+        v = Phi * u + delta
+        Z = (Phi_D * Phi.T + scipy.sparse.eye(N)).toarray()
+        w = scipy.linalg.solve(Z, a - v, assume_a="sym")
+        beta = u + Phi_D.T * w
+    else:
+        Phi_D = Phi * D_diag
+        v = Phi.dot(u) + delta
+        Z = Phi_D.dot(Phi.T)
+        Z.flat[:: N + 1] += 1
+        w = scipy.linalg.solve(Z, a - v, assume_a="sym")
+        beta = u + Phi_D.T @ w
+    return beta
+
+
+def hs_step(
+    lambda2: np.ndarray,
+    tau2: np.ndarray,
+    vi: np.ndarray,
+    xi: np.ndarray,
+    X: np.ndarray,
+    y: np.ndarray,
+):
+    N, M = X.shape
+
+    D_diag = tau2 * lambda2
+    beta = large_p_mvnormal_sampler(D_diag, X, y)
+    beta2 = beta ** 2
+
+    lambda2 = invgamma(a=1, scale=1 / vi + beta2 / (2 * tau2)).rvs()
+    tau2 = invgamma(a=(M + 1) / 2, scale=1 / xi + (beta2 / lambda2).sum() / 2).rvs()
+    vi = invgamma(a=1, scale=1 + 1 / lambda2).rvs()
+    xi = invgamma(a=1, scale=1 + 1 / tau2).rvs()
+
+    return beta, lambda2, tau2, vi, xi
+
+
+class HSStep(BlockedStep):
+    name = "hsgibbs"
+
+    def __init__(self, vars, values=None, model=None):
+        model = pm.modelcontext(model)
+
+        if len(vars) > 1:
+            raise ValueError("This sampler only takes one variable.")
+
+        (beta,) = pm.inputvars(vars)
+
+        if not isinstance(beta.distribution, HorseShoe):
+            raise TypeError("This sampler only samples `HorseShoe`s.")
+
+        non_var_rvs = [
+            value for attr, value in model.named_vars.items() if value != beta
+        ]
+        y_fn, X_fn = None, None
+        # loop through all the rvs excpet for the stepping rv
+        for i in non_var_rvs:
+            # looking thru all the attributes of the rv and see if any of the
+            # parameters are consist of multiplication relationship of the horseshoe var
+            if hasattr(i, "distribution") and isinstance(i.distribution, pm.Normal):
+                mu = i.distribution.mu
+            elif isinstance(i, pm.model.DeterministicWrapper):
+                mu = i.owner.inputs[0]
+            else:
+                continue  # pragma: no cover
+            dense_dot = mu.owner and isinstance(mu.owner.op, Dot)
+            sparse_dot = mu.owner and isinstance(mu.owner.op, StructuredDot)
+
+            dense_inputs = dense_dot and beta in mu.owner.inputs
+            sparse_inputs = sparse_dot and beta in mu.owner.inputs[1].owner.inputs
+
+            if not (dense_inputs or sparse_inputs):
+                continue
+
+            y_fn = None
+            for j in model.observed_RVs:
+
+                if i == j or (
+                    isinstance(j.distribution, pm.Normal) and i == j.distribution.mu
+                ):
+
+                    def y_fn(x):
+                        return j.observations
+
+            if not y_fn:
+                y_fn = model.fn(i)
+
+            if dense_inputs:
+                X_fn = model.fn(mu.owner.inputs[1].T)
+            else:
+                X_fn = model.fn(mu.owner.inputs[0])
+
+        if not (X_fn and y_fn):
+            raise RuntimeError(
+                f"Cannot find Design matrix or dependent variable assoicate with {beta}"
+            )
+
+        self.vars = [beta]
+
+        M = model.test_point[beta.name].shape[-1]
+
+        self.vi = np.full(M, 1)
+        self.lambda2 = np.full(M, 1)
+        self.beta = np.full(M, 1)
+        self.tau2 = 1
+        self.xi = 1
+        self.y_fn = y_fn
+        self.X_fn = X_fn
+
+    def step(self, point):
+        # breakpoint()
+        y = self.y_fn(point)
+        X = self.X_fn(point)
+
+        self.beta, self.lambda2, self.tau2, self.vi, self.xi = hs_step(
+            self.lambda2, self.tau2, self.vi, self.xi, X, y
+        )
+        point[self.vars[0].name] = self.beta
+        return point
diff --git a/tests/test_distributions.py b/tests/test_distributions.py
index 677dc03..fb4c599 100644
--- a/tests/test_distributions.py
+++ b/tests/test_distributions.py
@@ -13,6 +13,7 @@
 from pymc3_hmm.distributions import (
     Constant,
     DiscreteMarkovChain,
+    HorseShoe,
     PoissonZeroProcess,
     SwitchingProcess,
     distribution_subset_args,
@@ -562,3 +563,17 @@ def test_subset_args():
     res = distribution_subset_args(test_dist, shape=[3], idx=test_idx)
     assert np.array_equal(res["mu"].eval(), np.r_[0.1, 2.3])
     assert np.array_equal(res["alpha"].eval(), np.r_[2.0, 2.0])
+
+
+def test_HorseShoe():
+    with pm.Model():
+        test_beta = HorseShoe.dist(tau=1, shape=2)
+        test_sample = test_beta.random(size=1000).eval()
+
+    assert test_sample.shape == (1000, 2)
+    assert np.isclose(np.median(test_sample), 0, atol=0.01)
+    assert np.percentile(test_sample.flatten(), 85) < 2
+    assert np.percentile(test_sample.flatten(), 15) > -2
+    assert np.percentile(test_sample.flatten(), 99) > 10
+    assert np.percentile(test_sample.flatten(), 1) < -10
+    assert test_beta.logp(1) == 0
diff --git a/tests/test_step_methods.py b/tests/test_step_methods.py
index 66e0f96..8565e2a 100644
--- a/tests/test_step_methods.py
+++ b/tests/test_step_methods.py
@@ -5,16 +5,25 @@
 try:
     import aesara.tensor as at
     from aesara.graph.op import get_test_value
+    from aesara.sparse import structured_dot as sp_dot
 except ImportError:
     import theano.tensor as at
     from theano.graph.op import get_test_value
+    from theano.sparse import structured_dot as sp_dot
 
 import pymc3 as pm
 import pytest
 import scipy as sp
 
-from pymc3_hmm.distributions import DiscreteMarkovChain, PoissonZeroProcess
-from pymc3_hmm.step_methods import FFBSStep, TransMatConjugateStep, ffbs_step
+from pymc3_hmm.distributions import DiscreteMarkovChain, HorseShoe, PoissonZeroProcess
+from pymc3_hmm.step_methods import (
+    FFBSStep,
+    HSStep,
+    TransMatConjugateStep,
+    ffbs_step,
+    hs_step,
+    large_p_mvnormal_sampler,
+)
 from pymc3_hmm.utils import compute_steady_state, compute_trans_freqs
 from tests.utils import simulate_poiszero_hmm
 
@@ -337,3 +346,128 @@ def test_TransMatConjugateStep_subtensors():
         )
 
         transmat = TransMatConjugateStep(P_rv)
+
+
+def test_large_p_mvnormal_sampler():
+
+    # test case for dense matrix
+    np.random.seed(2032)
+    X = np.random.normal(size=250).reshape((50, 5))
+    beta_true = np.ones(5)
+    y = np.random.normal(X.dot(beta_true), 1)
+
+    samples = large_p_mvnormal_sampler(np.ones(5), X, y)
+    assert samples.shape == (5,)
+
+    sample_sigma = np.linalg.inv(X.T.dot(X) + np.eye(5))
+    sample_mu = sample_sigma.dot(X.T).dot(y)
+    np.testing.assert_allclose(sample_mu.mean(), 1, rtol=0.1, atol=0)
+
+    # test case for sparse matrix
+    samples_sp = large_p_mvnormal_sampler(np.ones(5), sp.sparse.csr_matrix(X), y)
+    assert samples_sp.shape == (5,)
+    np.testing.assert_allclose(samples_sp.mean(), 1, rtol=0.1, atol=0)
+
+
+def test_hs_step():
+    # test case for dense matrix
+    np.random.seed(2032)
+    M = 5
+    X = np.random.normal(size=250).reshape((50, M))
+    beta_true = np.random.normal(size=M)
+    y = np.random.normal(X.dot(beta_true), 1)
+
+    vi = np.full(M, 1)
+    lambda2 = np.full(M, 1)
+    tau2 = 1
+    xi = 1
+    beta, lambda2, tau2, vi, xi = hs_step(lambda2, tau2, vi, xi, X, y)
+    assert beta.shape == beta_true.shape
+    assert (np.abs(beta - beta_true) / beta_true).mean() < 0.5
+
+    # test case for sparse matrix
+
+    vi = np.full(M, 1)
+    lambda2 = np.full(M, 1)
+    tau2 = 1
+    xi = 1
+    beta, lambda2, tau2, vi, xi = hs_step(
+        lambda2, tau2, vi, xi, sp.sparse.csr_matrix(X), y
+    )
+    assert beta.shape == beta_true.shape
+    assert (np.abs(beta - beta_true) / beta_true).mean() < 0.5
+
+
+def test_Hsstep():
+    np.random.seed(2032)
+    M = 5
+    N = 50
+    X = np.random.normal(size=N * M).reshape((N, M))
+    beta_true = np.random.normal(size=M)
+    y = np.random.normal(X.dot(beta_true), 1)
+
+    M = X.shape[1]
+    with pm.Model():
+        beta = HorseShoe("beta", tau=1, shape=M)
+        pm.Normal("y", mu=beta.dot(X.T), sigma=1, observed=y)
+        hsstep = HSStep([beta])
+        trace = pm.sample(
+            draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True
+        )
+
+    beta_samples = trace.posterior["beta"][0].values
+
+    assert beta_samples.shape == (50, M)
+    np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.3)
+
+    with pm.Model():
+        beta = HorseShoe("beta", tau=1, shape=M)
+        mu = pm.Deterministic("mu", beta.dot(X.T))
+        pm.Normal("y", mu=mu, sigma=1, observed=y)
+        hsstep = HSStep([beta])
+        trace = pm.sample(
+            draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True
+        )
+
+    beta_samples = trace.posterior["beta"][0].values
+    assert beta_samples.shape == (50, M)
+    np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.3)
+
+    with pm.Model():
+        beta = HorseShoe("beta", tau=1, shape=M)
+        pm.TruncatedNormal("alpha", mu=beta.dot(X.T), sigma=1, observed=y)
+
+        with pytest.raises(RuntimeError):
+            hsstep = HSStep([beta])
+
+    with pm.Model():
+        beta = HorseShoe("beta", tau=1, shape=M)
+        mu = pm.Deterministic("mu", beta.dot(X.T))
+        pm.TruncatedNormal("y", mu=mu, sigma=1, observed=y)
+        hsstep = HSStep([beta])
+        trace = pm.sample(
+            draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True
+        )
+
+    beta_samples = trace.posterior["beta"][0].values
+    assert beta_samples.shape == (50, M)
+
+    # test case for sparse matrix
+    X = sp.sparse.csr_matrix(X)
+
+    M = X.shape[1]
+    with pm.Model():
+        beta = HorseShoe("beta", tau=1, shape=M)
+        pm.Normal("y", mu=sp_dot(X, at.shape_padright(beta)), sigma=1, observed=y)
+        hsstep = HSStep([beta])
+        trace = pm.sample(
+            draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True
+        )
+        beta_2 = HorseShoe("beta_2", tau=1, shape=M)
+        with pytest.raises(ValueError):
+            HSStep([beta, beta_2])
+
+    beta_samples = trace.posterior["beta"][0].values
+
+    assert beta_samples.shape == (50, M)
+    np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.3)