add _draw_auto_regression_np (#161)

* add _predict_auto_regression_np * Update mesmer/create_emulations/create_emus_lv.py * Apply suggestions from code review * linting * restructure, rename, docstring * add tests * add to docs * add test intercept cells * fix docstring * CHANGELOG * Update mesmer/create_emulations/create_emus_gv.py * Apply suggestions from code review Co-authored-by: Zeb Nicholls <zebedee.nicholls@climate-energy-college.org> * rename function, clean names Co-authored-by: Zeb Nicholls <zebedee.nicholls@climate-energy-college.org>
MESMER-group · Jun 21, 2022 · 0c5b265 · 0c5b265
1 parent 7b3c535
commit 0c5b265
Show file tree

Hide file tree

Showing 6 changed files with 213 additions and 50 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,7 @@ v0.9.0 - unreleased
 
 New Features
 ^^^^^^^^^^^^
+
 - Create :py:class:`mesmer.core.linear_regression.LinearRegression` which encapsulates
   ``fit``, ``predict``, etc. methods around linear regression
   (`#134 <https://github.com/MESMER-group/mesmer/pull/134>`_).
@@ -19,6 +20,9 @@ New Features
 - Add ``mesmer.core.auto_regression._fit_auto_regression_xr``: xarray wrapper to fit an
   auto regression model (`#139 <https://github.com/MESMER-group/mesmer/pull/139>`_).
   By `Mathias Hauser <https://github.com/mathause>`_.
+- Add ``mesmer.core.auto_regression._draw_auto_regression_correlated_np``: to draw samples of an
+  auto regression model (`#161 <https://github.com/MESMER-group/mesmer/pull/161>`_).
+  By `Mathias Hauser <https://github.com/mathause>`_.
 
 Breaking changes
 ^^^^^^^^^^^^^^^^

diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -22,6 +22,8 @@ Statistical core functions
    ~core.linear_regression.LinearRegression.residuals
    ~core.linear_regression.LinearRegression.to_netcdf
    ~core.linear_regression.LinearRegression.from_netcdf
+   ~core.auto_regression._fit_auto_regression_xr
+   ~core.auto_regression._draw_auto_regression_correlated_np
 
 Train mesmer
 ------------

diff --git a/mesmer/core/auto_regression.py b/mesmer/core/auto_regression.py
@@ -2,6 +2,80 @@
 import xarray as xr
 
 
+def _draw_auto_regression_correlated_np(
+    *, intercept, coefs, covariance, n_samples, n_ts, seed, buffer
+):
+    """
+    Draw time series of an auto regression process with possibly spatially-correlated
+    innovations
+
+    Creates `n_samples` auto-correlated time series of order `ar_order` and length
+    `n_ts` for each set of `n_coefs` coefficients (typically one set for each grid
+    point), the resulting array has shape n_samples x n_ts x n_coefs. The innovations
+    can be spatially correlated.
+
+    Parameters
+    ----------
+    intercept : float or ndarray of length n_coefs
+        Intercept of the model.
+    coefs : ndarray of shape ar_order x n_coefs
+        The coefficients of the autoregressive process. Must be a 2D array with the
+        autoregressive coefficients along axis=0, while axis=1 contains all idependent
+        coefficients.
+    covariance : float or ndarray of shape n_coefs x n_coefs
+        The (co-)variance array. Must be symmetric and positive-semidefinite.
+    n_samples : int
+        Number of samples to draw for each set of coefficients.
+    n_ts : int
+        Number of time steps to draw.
+    seed : int
+        Seed used to initialize the pseudo-random number generator.
+    buffer : int
+        Buffer to initialize the autoregressive process (ensures that start at 0 does
+        not influence overall result).
+
+    Returns
+    -------
+    out : ndarray
+        Drawn realizations of the specified autoregressive process. The array has shape
+        n_samples x n_ts x n_coefs.
+
+    Notes
+    -----
+    The 'innovations' is the error or noise term.
+
+    As this is not a deterministic function it is not called `predict`. "Predicting"
+    an autoregressive process does not include the innovations and therefore asymptotes
+    towards a certain value (in contrast to this function).
+    """
+    intercept = np.array(intercept)
+    covariance = np.atleast_2d(covariance)
+
+    # coeffs assumed to be ar_order x n_coefs
+    ar_order, n_coefs = coefs.shape
+
+    # TODO: allow arbitrary lags? (e.g., [1, 12]) -> need to pass `ar_lags` (see #164)
+    ar_lags = np.arange(1, ar_order + 1, dtype=int)
+
+    # ensure reproducibility (TODO: clarify approach to this, see #35)
+    np.random.seed(seed)
+
+    innovations = np.random.multivariate_normal(
+        mean=np.zeros(n_coefs),
+        cov=covariance,
+        size=[n_samples, n_ts + buffer],
+    )
+
+    out = np.zeros([n_samples, n_ts + buffer, n_coefs])
+    for t in range(ar_order + 1, n_ts + buffer):
+
+        ar = np.sum(coefs * out[:, t - ar_lags, :], axis=1)
+
+        out[:, t, :] = intercept + ar + innovations[:, t, :]
+
+    return out[:, buffer:, :]
+
+
 def _fit_auto_regression_xr(data, dim, lags):
     """
     fit an auto regression - xarray wrapper

diff --git a/mesmer/create_emulations/create_emus_gv.py b/mesmer/create_emulations/create_emus_gv.py
@@ -12,6 +12,8 @@
 import joblib
 import numpy as np
 
+from mesmer.core.auto_regression import _draw_auto_regression_correlated_np
+
 
 def create_emus_gv(params_gv, preds_gv, cfg, save_emus=True):
     """Create global variablity emulations for specified method.
@@ -157,33 +159,24 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
     AR_order_sel = params_gv["AR_order_sel"]
     AR_std_innovs = params_gv["AR_std_innovs"]
 
-    ar_lags = np.arange(1, AR_order_sel + 1, dtype=int)
-    shape = (nr_emus_v, nr_ts_emus_v + buffer)
-
     # if AR(0) process chosen, no AR_coefs are available -> to have code run
     # nevertheless ar_coefs and ar_lags are set to 0 (-> emus are created with
     # ar_int + innovs)
     if len(ar_coefs) == 0:
         ar_coefs = [0]
-        ar_lags = [0]
-
-    innovs_emus_gv = np.random.normal(loc=0, scale=AR_std_innovs, size=shape)
-
-    # initialize global variability emulations (dim array: nr_emus x nr_ts)
-    emus_gv = np.zeros(shape, dtype=float)
 
     # only use the selected coeffs
     ar_coefs = ar_coefs[:AR_order_sel]
 
-    # simulate from AR process (all realizations)
-    for t in range(AR_order_sel + 1, nr_ts_emus_v + buffer):
-        emus_gv[:, t] = (
-            ar_int
-            + emus_gv[:, t - ar_lags] @ ar_coefs  # sum of products
-            + innovs_emus_gv[:, t]
-        )
-
-    # remove buffer
-    emus_gv = emus_gv[:, buffer:]
-
-    return emus_gv
+    emus_gv = _draw_auto_regression_correlated_np(
+        intercept=ar_int,
+        # reshape to n_coefs x n_cells
+        coefs=ar_coefs[:, np.newaxis],
+        covariance=AR_std_innovs**2,  # pass the (co-)variance!
+        n_samples=nr_emus_v,
+        n_ts=nr_ts_emus_v,
+        seed=seed,
+        buffer=buffer,
+    )
+
+    return emus_gv.squeeze()
diff --git a/mesmer/create_emulations/create_emus_lv.py b/mesmer/create_emulations/create_emus_lv.py
@@ -12,6 +12,8 @@
 import joblib
 import numpy as np
 
+from mesmer.core.auto_regression import _draw_auto_regression_correlated_np
+
 
 def create_emus_lv(params_lv, preds_lv, cfg, save_emus=True, submethod=""):
     """Create local variablity emulations.
@@ -180,39 +182,20 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
             # in case no emus_lv[scen] exist yet, initialize it. Otherwise build up on
             # existing one
             if len(emus_lv[scen]) == 0:
-
                 emus_lv[scen][targ] = np.zeros(nr_emus_v, nr_ts_emus_stoch_v, nr_gps)
 
-            # ensure reproducibility
-            np.random.seed(seed)
-
-            # buffer so that initial start at 0 does not influence overall result
-            buffer = 20
-
-            print("Draw the innovations")
-            # draw the innovations
-            innovs = np.random.multivariate_normal(
-                np.zeros(nr_gps),
-                params_lv["loc_ecov_AR1_innovs"][targ],
-                size=[nr_emus_v, nr_ts_emus_stoch_v + buffer],
-            )
-
-            print(
-                "Compute the contribution to emus_lv by the AR(1) process with the "
-                "spatially correlated innovations"
+            emus_ar = _draw_auto_regression_correlated_np(
+                intercept=params_lv["AR1_int"][targ],
+                # reshape to n_coefs x n_cells
+                coefs=params_lv["AR1_coef"][targ][np.newaxis, :],
+                covariance=params_lv["loc_ecov_AR1_innovs"][targ],
+                n_samples=nr_emus_v,
+                n_ts=nr_ts_emus_stoch_v,
+                seed=seed,
+                buffer=20,
             )
-            emus_lv_tmp = np.zeros([nr_emus_v, nr_ts_emus_stoch_v + buffer, nr_gps])
-            for t in range(1, nr_ts_emus_stoch_v + buffer):
-                emus_lv_tmp[:, t, :] = (
-                    params_lv["AR1_int"][targ]
-                    + params_lv["AR1_coef"][targ] * emus_lv_tmp[:, t - 1, :]
-                    + innovs[:, t, :]
-                )
-            emus_lv_tmp = emus_lv_tmp[:, buffer:, :]
-
-            print("Create the full local variability emulations")
-            emus_lv[scen][targ] += emus_lv_tmp
 
+            emus_lv[scen][targ] += emus_ar.squeeze()
     return emus_lv
 
 

diff --git a/tests/integration/test_auto_regression.py b/tests/integration/test_auto_regression.py
@@ -10,6 +10,113 @@
 from .utils import trend_data_1D, trend_data_2D
 
 
+@pytest.mark.parametrize("ar_order", [1, 8])
+@pytest.mark.parametrize("n_cells", [1, 10])
+@pytest.mark.parametrize("n_samples", [2, 5])
+@pytest.mark.parametrize("n_ts", [3, 7])
+def test_draw_auto_regression_correlated_np_shape(ar_order, n_cells, n_samples, n_ts):
+
+    intercept = np.zeros(n_cells)
+    coefs = np.ones((ar_order, n_cells))
+    covariance = np.ones((n_cells, n_cells))
+
+    result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
+        intercept=intercept,
+        coefs=coefs,
+        covariance=covariance,
+        n_samples=n_samples,
+        n_ts=n_ts,
+        seed=0,
+        buffer=10,
+    )
+
+    expected_shape = (n_samples, n_ts, n_cells)
+
+    assert result.shape == expected_shape
+
+
+@pytest.mark.parametrize("intercept", [0, 1, 3.14])
+def test_draw_auto_regression_deterministic_intercept(intercept):
+
+    result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
+        intercept=intercept,
+        coefs=np.array([[0]]),
+        covariance=[0],
+        n_samples=1,
+        n_ts=3,
+        seed=0,
+        buffer=10,
+    )
+
+    expected = np.full((1, 3, 1), intercept)
+
+    np.testing.assert_equal(result, expected)
+
+    result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
+        intercept=np.array([[0, intercept]]),
+        coefs=np.array([[0, 0]]),
+        covariance=np.zeros((2, 2)),
+        n_samples=1,
+        n_ts=1,
+        seed=0,
+        buffer=10,
+    )
+
+    expected = np.array([0, intercept]).reshape(1, 1, 2)
+
+    np.testing.assert_equal(result, expected)
+
+
+def test_draw_auto_regression_deterministic_coefs_buffer():
+
+    result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
+        intercept=1,
+        coefs=np.array([[1]]),
+        covariance=[0],
+        n_samples=1,
+        n_ts=4,
+        seed=0,
+        buffer=1,
+    )
+
+    expected = np.arange(4).reshape(1, -4, 1)
+
+    np.testing.assert_equal(result, expected)
+
+    expected = np.array([0, 1, 1.5, 1.75, 1.875]).reshape(1, -1, 1)
+
+    for i, buffer in enumerate([1, 2]):
+        result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
+            intercept=1,
+            coefs=np.array([[0.5]]),
+            covariance=[0],
+            n_samples=1,
+            n_ts=4,
+            seed=0,
+            buffer=buffer,
+        )
+
+        np.testing.assert_allclose(result, expected[:, i : i + 4])
+
+
+def test_draw_auto_regression_random():
+
+    result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
+        intercept=1,
+        coefs=np.array([[0.375], [0.125]]),
+        covariance=0.5,
+        n_samples=1,
+        n_ts=4,
+        seed=0,
+        buffer=3,
+    )
+
+    expected = np.array([2.58455078, 3.28976946, 1.86569258, 2.78266986])
+    expected = expected.reshape(1, 4, 1)
+
+    np.testing.assert_allclose(result, expected)
+
+
 @pytest.mark.parametrize("obj", [xr.Dataset(), None])
 def test_fit_auto_regression_xr_errors(obj):