Skip to content

Commit

Permalink
add _draw_auto_regression_np (#161)
Browse files Browse the repository at this point in the history
* add _predict_auto_regression_np

* Update mesmer/create_emulations/create_emus_lv.py

* Apply suggestions from code review

* linting

* restructure, rename, docstring

* add tests

* add to docs

* add test intercept cells

* fix docstring

* CHANGELOG

* Update mesmer/create_emulations/create_emus_gv.py

* Apply suggestions from code review

Co-authored-by: Zeb Nicholls <zebedee.nicholls@climate-energy-college.org>

* rename function, clean names

Co-authored-by: Zeb Nicholls <zebedee.nicholls@climate-energy-college.org>
  • Loading branch information
mathause and znicholls authored Jun 21, 2022
1 parent 7b3c535 commit 0c5b265
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 50 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ v0.9.0 - unreleased

New Features
^^^^^^^^^^^^

- Create :py:class:`mesmer.core.linear_regression.LinearRegression` which encapsulates
``fit``, ``predict``, etc. methods around linear regression
(`#134 <https://github.com/MESMER-group/mesmer/pull/134>`_).
Expand All @@ -19,6 +20,9 @@ New Features
- Add ``mesmer.core.auto_regression._fit_auto_regression_xr``: xarray wrapper to fit an
auto regression model (`#139 <https://github.com/MESMER-group/mesmer/pull/139>`_).
By `Mathias Hauser <https://github.com/mathause>`_.
- Add ``mesmer.core.auto_regression._draw_auto_regression_correlated_np``: to draw samples of an
auto regression model (`#161 <https://github.com/MESMER-group/mesmer/pull/161>`_).
By `Mathias Hauser <https://github.com/mathause>`_.

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
2 changes: 2 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Statistical core functions
~core.linear_regression.LinearRegression.residuals
~core.linear_regression.LinearRegression.to_netcdf
~core.linear_regression.LinearRegression.from_netcdf
~core.auto_regression._fit_auto_regression_xr
~core.auto_regression._draw_auto_regression_correlated_np

Train mesmer
------------
Expand Down
74 changes: 74 additions & 0 deletions mesmer/core/auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,80 @@
import xarray as xr


def _draw_auto_regression_correlated_np(
*, intercept, coefs, covariance, n_samples, n_ts, seed, buffer
):
"""
Draw time series of an auto regression process with possibly spatially-correlated
innovations
Creates `n_samples` auto-correlated time series of order `ar_order` and length
`n_ts` for each set of `n_coefs` coefficients (typically one set for each grid
point), the resulting array has shape n_samples x n_ts x n_coefs. The innovations
can be spatially correlated.
Parameters
----------
intercept : float or ndarray of length n_coefs
Intercept of the model.
coefs : ndarray of shape ar_order x n_coefs
The coefficients of the autoregressive process. Must be a 2D array with the
autoregressive coefficients along axis=0, while axis=1 contains all idependent
coefficients.
covariance : float or ndarray of shape n_coefs x n_coefs
The (co-)variance array. Must be symmetric and positive-semidefinite.
n_samples : int
Number of samples to draw for each set of coefficients.
n_ts : int
Number of time steps to draw.
seed : int
Seed used to initialize the pseudo-random number generator.
buffer : int
Buffer to initialize the autoregressive process (ensures that start at 0 does
not influence overall result).
Returns
-------
out : ndarray
Drawn realizations of the specified autoregressive process. The array has shape
n_samples x n_ts x n_coefs.
Notes
-----
The 'innovations' is the error or noise term.
As this is not a deterministic function it is not called `predict`. "Predicting"
an autoregressive process does not include the innovations and therefore asymptotes
towards a certain value (in contrast to this function).
"""
intercept = np.array(intercept)
covariance = np.atleast_2d(covariance)

# coeffs assumed to be ar_order x n_coefs
ar_order, n_coefs = coefs.shape

# TODO: allow arbitrary lags? (e.g., [1, 12]) -> need to pass `ar_lags` (see #164)
ar_lags = np.arange(1, ar_order + 1, dtype=int)

# ensure reproducibility (TODO: clarify approach to this, see #35)
np.random.seed(seed)

innovations = np.random.multivariate_normal(
mean=np.zeros(n_coefs),
cov=covariance,
size=[n_samples, n_ts + buffer],
)

out = np.zeros([n_samples, n_ts + buffer, n_coefs])
for t in range(ar_order + 1, n_ts + buffer):

ar = np.sum(coefs * out[:, t - ar_lags, :], axis=1)

out[:, t, :] = intercept + ar + innovations[:, t, :]

return out[:, buffer:, :]


def _fit_auto_regression_xr(data, dim, lags):
"""
fit an auto regression - xarray wrapper
Expand Down
35 changes: 14 additions & 21 deletions mesmer/create_emulations/create_emus_gv.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import joblib
import numpy as np

from mesmer.core.auto_regression import _draw_auto_regression_correlated_np


def create_emus_gv(params_gv, preds_gv, cfg, save_emus=True):
"""Create global variablity emulations for specified method.
Expand Down Expand Up @@ -157,33 +159,24 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
AR_order_sel = params_gv["AR_order_sel"]
AR_std_innovs = params_gv["AR_std_innovs"]

ar_lags = np.arange(1, AR_order_sel + 1, dtype=int)
shape = (nr_emus_v, nr_ts_emus_v + buffer)

# if AR(0) process chosen, no AR_coefs are available -> to have code run
# nevertheless ar_coefs and ar_lags are set to 0 (-> emus are created with
# ar_int + innovs)
if len(ar_coefs) == 0:
ar_coefs = [0]
ar_lags = [0]

innovs_emus_gv = np.random.normal(loc=0, scale=AR_std_innovs, size=shape)

# initialize global variability emulations (dim array: nr_emus x nr_ts)
emus_gv = np.zeros(shape, dtype=float)

# only use the selected coeffs
ar_coefs = ar_coefs[:AR_order_sel]

# simulate from AR process (all realizations)
for t in range(AR_order_sel + 1, nr_ts_emus_v + buffer):
emus_gv[:, t] = (
ar_int
+ emus_gv[:, t - ar_lags] @ ar_coefs # sum of products
+ innovs_emus_gv[:, t]
)

# remove buffer
emus_gv = emus_gv[:, buffer:]

return emus_gv
emus_gv = _draw_auto_regression_correlated_np(
intercept=ar_int,
# reshape to n_coefs x n_cells
coefs=ar_coefs[:, np.newaxis],
covariance=AR_std_innovs**2, # pass the (co-)variance!
n_samples=nr_emus_v,
n_ts=nr_ts_emus_v,
seed=seed,
buffer=buffer,
)

return emus_gv.squeeze()
41 changes: 12 additions & 29 deletions mesmer/create_emulations/create_emus_lv.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import joblib
import numpy as np

from mesmer.core.auto_regression import _draw_auto_regression_correlated_np


def create_emus_lv(params_lv, preds_lv, cfg, save_emus=True, submethod=""):
"""Create local variablity emulations.
Expand Down Expand Up @@ -180,39 +182,20 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
# in case no emus_lv[scen] exist yet, initialize it. Otherwise build up on
# existing one
if len(emus_lv[scen]) == 0:

emus_lv[scen][targ] = np.zeros(nr_emus_v, nr_ts_emus_stoch_v, nr_gps)

# ensure reproducibility
np.random.seed(seed)

# buffer so that initial start at 0 does not influence overall result
buffer = 20

print("Draw the innovations")
# draw the innovations
innovs = np.random.multivariate_normal(
np.zeros(nr_gps),
params_lv["loc_ecov_AR1_innovs"][targ],
size=[nr_emus_v, nr_ts_emus_stoch_v + buffer],
)

print(
"Compute the contribution to emus_lv by the AR(1) process with the "
"spatially correlated innovations"
emus_ar = _draw_auto_regression_correlated_np(
intercept=params_lv["AR1_int"][targ],
# reshape to n_coefs x n_cells
coefs=params_lv["AR1_coef"][targ][np.newaxis, :],
covariance=params_lv["loc_ecov_AR1_innovs"][targ],
n_samples=nr_emus_v,
n_ts=nr_ts_emus_stoch_v,
seed=seed,
buffer=20,
)
emus_lv_tmp = np.zeros([nr_emus_v, nr_ts_emus_stoch_v + buffer, nr_gps])
for t in range(1, nr_ts_emus_stoch_v + buffer):
emus_lv_tmp[:, t, :] = (
params_lv["AR1_int"][targ]
+ params_lv["AR1_coef"][targ] * emus_lv_tmp[:, t - 1, :]
+ innovs[:, t, :]
)
emus_lv_tmp = emus_lv_tmp[:, buffer:, :]

print("Create the full local variability emulations")
emus_lv[scen][targ] += emus_lv_tmp

emus_lv[scen][targ] += emus_ar.squeeze()
return emus_lv


Expand Down
107 changes: 107 additions & 0 deletions tests/integration/test_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,113 @@
from .utils import trend_data_1D, trend_data_2D


@pytest.mark.parametrize("ar_order", [1, 8])
@pytest.mark.parametrize("n_cells", [1, 10])
@pytest.mark.parametrize("n_samples", [2, 5])
@pytest.mark.parametrize("n_ts", [3, 7])
def test_draw_auto_regression_correlated_np_shape(ar_order, n_cells, n_samples, n_ts):

intercept = np.zeros(n_cells)
coefs = np.ones((ar_order, n_cells))
covariance = np.ones((n_cells, n_cells))

result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
intercept=intercept,
coefs=coefs,
covariance=covariance,
n_samples=n_samples,
n_ts=n_ts,
seed=0,
buffer=10,
)

expected_shape = (n_samples, n_ts, n_cells)

assert result.shape == expected_shape


@pytest.mark.parametrize("intercept", [0, 1, 3.14])
def test_draw_auto_regression_deterministic_intercept(intercept):

result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
intercept=intercept,
coefs=np.array([[0]]),
covariance=[0],
n_samples=1,
n_ts=3,
seed=0,
buffer=10,
)

expected = np.full((1, 3, 1), intercept)

np.testing.assert_equal(result, expected)

result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
intercept=np.array([[0, intercept]]),
coefs=np.array([[0, 0]]),
covariance=np.zeros((2, 2)),
n_samples=1,
n_ts=1,
seed=0,
buffer=10,
)

expected = np.array([0, intercept]).reshape(1, 1, 2)

np.testing.assert_equal(result, expected)


def test_draw_auto_regression_deterministic_coefs_buffer():

result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
intercept=1,
coefs=np.array([[1]]),
covariance=[0],
n_samples=1,
n_ts=4,
seed=0,
buffer=1,
)

expected = np.arange(4).reshape(1, -4, 1)

np.testing.assert_equal(result, expected)

expected = np.array([0, 1, 1.5, 1.75, 1.875]).reshape(1, -1, 1)

for i, buffer in enumerate([1, 2]):
result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
intercept=1,
coefs=np.array([[0.5]]),
covariance=[0],
n_samples=1,
n_ts=4,
seed=0,
buffer=buffer,
)

np.testing.assert_allclose(result, expected[:, i : i + 4])


def test_draw_auto_regression_random():

result = mesmer.core.auto_regression._draw_auto_regression_correlated_np(
intercept=1,
coefs=np.array([[0.375], [0.125]]),
covariance=0.5,
n_samples=1,
n_ts=4,
seed=0,
buffer=3,
)

expected = np.array([2.58455078, 3.28976946, 1.86569258, 2.78266986])
expected = expected.reshape(1, 4, 1)

np.testing.assert_allclose(result, expected)


@pytest.mark.parametrize("obj", [xr.Dataset(), None])
def test_fit_auto_regression_xr_errors(obj):

Expand Down

0 comments on commit 0c5b265

Please sign in to comment.