Skip to content

Commit

Permalink
Switch to numpy rng for seeding (MESMER-group#495)
Browse files Browse the repository at this point in the history
* switch to rng

* comment

* changelog

* test reproducibility and different months

* linting

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
1 parent a244f72 commit 6e937ff
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 8 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ New Features

Breaking changes
^^^^^^^^^^^^^^^^
- Switch random number generation for drawing emulations from np.random.seed() to np.random.default_rng()
(`#495 <https://github.com/MESMER-group/mesmer/pull/495>`_). By `Victoria Bauer`_.
- Using Cholesky decomposition for finding covariance localization radius and drawing from the multivariate normal distribution (`#408 <https://github.com/MESMER-group/mesmer/pull/408>`_)
By `Victoria Bauer`_.
- The supported versions of some dependencies were changed (`#399 <https://github.com/MESMER-group/mesmer/pull/399>`_, `#405 <https://github.com/MESMER-group/mesmer/pull/405>`_):
Expand Down
17 changes: 10 additions & 7 deletions mesmer/stats/_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,11 +475,11 @@ def _draw_auto_regression_correlated_np(
# arbitrary lags? no, see: https://github.com/MESMER-group/mesmer/issues/164
ar_lags = np.arange(1, ar_order + 1, dtype=int)

# ensure reproducibility (TODO: https://github.com/MESMER-group/mesmer/issues/35)
np.random.seed(seed)
# ensure reproducibility
rng = np.random.default_rng(seed)

innovations = _draw_innovations_correlated_np(
covariance, n_coeffs, n_samples, n_ts, buffer
covariance, rng, n_coeffs, n_samples, n_ts, buffer
)

out = np.zeros([n_samples, n_ts + buffer, n_coeffs])
Expand All @@ -492,7 +492,9 @@ def _draw_auto_regression_correlated_np(
return out[:, buffer:, :]


def _draw_innovations_correlated_np(covariance, n_gridcells, n_samples, n_ts, buffer):
def _draw_innovations_correlated_np(
covariance, rng, n_gridcells, n_samples, n_ts, buffer
):
# NOTE: 'innovations' is the error or noise term.
# innovations has shape (n_samples, n_ts + buffer, n_coeffs)
try:
Expand All @@ -510,6 +512,7 @@ def _draw_innovations_correlated_np(covariance, n_gridcells, n_samples, n_ts, bu
mean=np.zeros(n_gridcells),
cov=cov,
size=[n_samples, n_ts + buffer],
random_state=rng,
).reshape(n_samples, n_ts + buffer, n_gridcells)

return innovations
Expand Down Expand Up @@ -879,16 +882,16 @@ def _draw_auto_regression_monthly_np(

_, n_gridcells = intercept.shape

# ensure reproducibility (TODO: https://github.com/MESMER-group/mesmer/issues/35)
np.random.seed(seed)
# ensure reproducibility
rng = np.random.default_rng(seed)

# draw innovations for each month
innovations = np.zeros([n_samples, n_ts // 12 + buffer, 12, n_gridcells])
if covariance is not None:
for month in range(12):
cov_month = covariance[month, :, :]
innovations[:, :, month, :] = _draw_innovations_correlated_np(
cov_month, n_gridcells, n_samples, n_ts // 12, buffer
cov_month, rng, n_gridcells, n_samples, n_ts // 12, buffer
)
# reshape innovations into continuous time series
innovations = innovations.reshape(n_samples, n_ts + buffer * 12, n_gridcells)
Expand Down
Binary file not shown.
Binary file not shown.
35 changes: 34 additions & 1 deletion tests/unit/test_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def test_draw_auto_regression_random():
buffer=3,
)

expected = np.array([2.58455078, 3.28976946, 1.86569258, 2.78266986])
expected = np.array([1.07417558, 1.0240404, 1.77397341, 2.71531235])
expected = expected.reshape(1, 4, 1)

np.testing.assert_allclose(result, expected)
Expand Down Expand Up @@ -694,6 +694,39 @@ def test_draw_auto_regression_monthly_np_buffer(buffer):
)


def test_draw_autoregression_monthly_np_rng():
n_realisations = 1
n_gridcells = 10
n_ts = 120

seed = 0

# zero slope and intercept to only get innovations
slope = np.zeros((12, n_gridcells))
intercept = np.zeros((12, n_gridcells))

covariance = np.tile(np.eye(n_gridcells), [12, 1, 1])

# ensure reproducibility, i.e. reinitializing yields the same results
res = mesmer.stats._auto_regression._draw_auto_regression_monthly_np(
intercept, slope, covariance, n_realisations, n_ts, seed, buffer=1
)

res2 = mesmer.stats._auto_regression._draw_auto_regression_monthly_np(
intercept, slope, covariance, n_realisations, n_ts, seed, buffer=1
)

np.testing.assert_equal(res, res2)

# ensure that innovations for each month are different though
# (even with the same covariance matrix)
jan = res[:, 0::12, :]
feb = res[:, 1::12, :]

# any because some values might be equal by chance
assert np.not_equal(jan, feb).any()


def test_draw_auto_regression_monthly():
n_gridcells = 10
n_realisations = 5
Expand Down

0 comments on commit 6e937ff

Please sign in to comment.