Skip to content

Commit

Permalink
Remove bounds on cyclo-stationary AR(1) (MESMER-group#480)
Browse files Browse the repository at this point in the history
* remove bounds to monthly AR fit

* let fit also return residuals

* delete predict function

* adjust tests

* adjust init

* changelog

* unwrapping
---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>
  • Loading branch information
3 people authored Jul 30, 2024
1 parent 6cf3716 commit e24c135
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 177 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ that this led to some numerical changes compared to the MESMER-M publication
- Implement functions performing the monthly (cyclo-stationary) auto-regression and adapt these functions to
work with xarray. This includes extracting the drawing of spatially correlated innovations to a
stand-alone function. (`#473 <https://github.com/MESMER-group/mesmer/pull/473>`_)
- Remove the bounds of -1 and 1 on the slope of the cyclo-stationary AR(1) process. This bound is not necessary
since cyclo-stationarity is also given if the slopes of a few months are (slightly) larger than one. We
now return the residuals of the cyclo-stationary AR(1) process to fit the covariance matrix on these residuals.
As a consequence, adjustment of the covariance matrix with the AR slopes is no longer necessary.
After this, no adjustment is necessary anymore. (`#480 <https://github.com/MESMER-group/mesmer/pull/480>`_)
Compare discussion in `#472 <https://github.com/MESMER-group/mesmer/issues/472>`_.
- Implement function to localize the empirical covarince matrix for each month individually to use in drawing
of spatially correlated noise in the AR process. (`#479 <https://github.com/MESMER-group/mesmer/pull/479>`_)

Expand Down
2 changes: 0 additions & 2 deletions mesmer/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
draw_auto_regression_uncorrelated,
fit_auto_regression,
fit_auto_regression_monthly,
predict_auto_regression_monthly,
select_ar_order,
)
from mesmer.stats._gaspari_cohn import gaspari_cohn, gaspari_cohn_correlation_matrices
Expand All @@ -27,7 +26,6 @@
"fit_auto_regression",
"select_ar_order",
"fit_auto_regression_monthly",
"predict_auto_regression_monthly",
"draw_auto_regression_monthly",
# gaspari cohn
"gaspari_cohn_correlation_matrices",
Expand Down
86 changes: 13 additions & 73 deletions mesmer/stats/_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,13 +633,14 @@ def fit_auto_regression_monthly(monthly_data, time_dim="time"):
Returns
-------
ar_params : ``xr.Dataset``
obj : ``xr.Dataset``
Dataset containing the estimated parameters of the AR(1) process, the ``intercept`` and the
``slope`` for each month and gridpoint.
``slope`` for each month and gridpoint. Additionally, the ``residuals`` are returned.
"""
_check_dataarray_form(monthly_data, "monthly_data", ndim=2, required_dims=time_dim)
monthly_data = monthly_data.groupby(f"{time_dim}.month")
ar_params = []
residuals = []

for month in range(1, 13):
if month == 1:
Expand All @@ -654,21 +655,24 @@ def fit_auto_regression_monthly(monthly_data, time_dim="time"):
prev_month = monthly_data[month - 1]
cur_month = monthly_data[month]

slope, intercept = xr.apply_ufunc(
slope, intercept, resids = xr.apply_ufunc(
_fit_auto_regression_monthly_np,
cur_month,
prev_month,
input_core_dims=[[time_dim], [time_dim]],
output_core_dims=[[], []],
output_core_dims=[[], [], [time_dim]],
exclude_dims={time_dim},
vectorize=True,
)

ar_params.append(xr.Dataset({"slope": slope, "intercept": intercept}))
residuals.append(resids.assign_coords({time_dim: cur_month[time_dim]}))

month = xr.Variable("month", np.arange(1, 13))
ar_params = xr.concat(ar_params, dim=month)
residuals = xr.concat(residuals, dim=time_dim).rename("residuals")

return xr.concat(ar_params, dim=month)
return xr.merge([ar_params, residuals])


def _fit_auto_regression_monthly_np(data_month, data_prev_month):
Expand Down Expand Up @@ -698,72 +702,11 @@ def lin_func(indep_var, slope, intercept):
lin_func,
data_prev_month, # independent variable
data_month, # dependent variable
bounds=([-1, -np.inf], [1, np.inf]),
)[0]

return slope, intercept
residuals = data_month - lin_func(data_prev_month, slope, intercept)


def predict_auto_regression_monthly(ar_params, time, buffer):
"""deterministically predict time series of an auto regression process
with lag one (AR(1)) using individual parameters for each month.
This function is deterministic, i.e. does not produce random noise!
Parameters
----------
ar_params : xr.Dataset
Dataset containing the estimated parameters of the AR1 process. Must contain the
following DataArray objects:
- intercept
- slope
both of shape (12, n_gridpoints).
time : xr.DataArray
The time coordinates that determines the length of the predicted timeseries and
that will be the assigned time dimension of the predictions.
buffer : int
Buffer to initialize the auto-regressive process (ensures that start at 0 does
not influence overall result).
Returns
-------
result : xr.DataArray
Predicted deterministic time series of the specified AR(1). The array has
shape n_timesteps x n_gridpoints.
"""
_check_dataset_form(ar_params, "ar_params", required_vars=("intercept", "slope"))

(month_dim, gridcell_dim) = ar_params.intercept.dims
(n_months, n_gridpoints) = ar_params.intercept.shape

_check_dataarray_form(
ar_params.intercept,
"intercept",
ndim=2,
required_dims=(month_dim, gridcell_dim),
)
_check_dataarray_form(
ar_params.slope,
"slope",
ndim=2,
required_dims=(month_dim, gridcell_dim),
shape=(n_months, n_gridpoints),
)
result = _draw_ar_corr_monthly_xr_internal(
intercept=ar_params.intercept,
slope=ar_params.slope,
covariance=None,
time=time,
realisation=1,
seed=0,
buffer=buffer,
time_dim="time",
realisation_dim="realisation",
).squeeze("realisation", drop=True)

return result
return slope, intercept, residuals


def draw_auto_regression_monthly(
Expand Down Expand Up @@ -872,13 +815,10 @@ def _draw_ar_corr_monthly_xr_internal(
# make sure non-dimension coords are properly caught
gridpoint_coords = dict(slope[gridpoint_dim].coords)

if covariance is not None:
covariance = covariance.values

out = _draw_auto_regression_monthly_np(
intercept=intercept.values,
slope=slope.transpose(..., gridpoint_dim).values,
covariance=covariance,
covariance=covariance.values,
n_samples=n_realisations,
n_ts=n_ts,
seed=seed,
Expand Down Expand Up @@ -926,7 +866,7 @@ def _draw_auto_regression_monthly_np(
Predicted time series of the specified AR(1) including spatially correllated innovations.
"""
intercept = np.asarray(intercept)
# covariance = np.atleast_3d(covariance)
covariance = np.atleast_3d(covariance)

_, n_gridcells = intercept.shape

Expand Down
126 changes: 24 additions & 102 deletions tests/unit/test_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,33 +597,44 @@ def test_fit_auto_regression_np(lags):

@pytest.mark.parametrize("intercept", [1.0, -4.0])
@pytest.mark.parametrize("slope", [0.2, -0.3])
def test_fit_autoregression_monthly_np(slope, intercept):
def test_fit_autoregression_monthly_np_no_noise(slope, intercept):
# test if autoregrerssion can fit using previous month as independent variable
# and current month as dependent variable
# NOTE: the fit for the slope is bounded between -1 and 1
n_ts = 100
np.random.seed(0)
prev_month = np.random.normal(size=100)
prev_month = np.random.normal(size=n_ts)
cur_month = prev_month * slope + intercept

result = mesmer.stats._auto_regression._fit_auto_regression_monthly_np(
cur_month, prev_month
)
expected = (slope, intercept)
np.testing.assert_allclose(result, expected)
slope_fit, intercept_fit, residuals = result
expected_resids = np.zeros_like(cur_month)

np.testing.assert_allclose(slope, slope_fit)
np.testing.assert_allclose(intercept, intercept_fit)
np.testing.assert_allclose(residuals, expected_resids, atol=1e-6)


@pytest.mark.parametrize("slope", [2.0, -2])
def test_fit_autoregression_monthly_np_outside_bounds(slope):
# test that given a slope outside the bounds of -1 and 1, the slope is clipped
@pytest.mark.parametrize("intercept", [1.0, -4.0])
@pytest.mark.parametrize("slope", [0.2, -0.3])
@pytest.mark.parametrize("std", [1, 0.5])
def test_fit_autoregression_monthly_np_with_noise(slope, intercept, std):
# test if autoregrerssion can fit using previous month as independent variable
# and current month as dependent variable
n_ts = 5000
np.random.seed(0)
prev_month = np.random.normal(size=100)
cur_month = prev_month * slope + 3
prev_month = np.random.normal(size=n_ts)
cur_month = prev_month * slope + intercept + np.random.normal(0, std, size=n_ts)

result = mesmer.stats._auto_regression._fit_auto_regression_monthly_np(
cur_month, prev_month
)
expected_slope = 1.0 * np.sign(slope)
np.testing.assert_allclose(result[0], expected_slope)
slope_fit, intercept_fit, residuals = result

np.testing.assert_allclose(slope, slope_fit, atol=1e-1)
np.testing.assert_allclose(intercept, intercept_fit, atol=1e-1)
np.testing.assert_allclose(np.std(residuals), std, atol=1e-1)


def test_fit_auto_regression_monthly():
Expand Down Expand Up @@ -661,103 +672,14 @@ def test_fit_auto_regression_monthly():
mesmer.stats.fit_auto_regression_monthly(data.values)


def test_predict_auto_regression_monthly_intercept():
n_gridcells = 2
slope = np.zeros((12, n_gridcells))
slope = xr.DataArray(
slope,
dims=("month", "gridcell"),
coords={"month": np.arange(1, 13), "gridcell": np.arange(n_gridcells)},
)
intercept = np.tile(np.arange(1, 13), [n_gridcells, 1]).T
intercept = xr.DataArray(
intercept,
dims=("month", "gridcell"),
coords={"month": np.arange(1, 13), "gridcell": np.arange(n_gridcells)},
)
ar_params = xr.Dataset({"intercept": intercept, "slope": slope})

n_years = 10
time = pd.date_range("2000-01-01", periods=12 * n_years, freq="M")
time = xr.DataArray(time, dims="time", coords={"time": time})

result = mesmer.stats.predict_auto_regression_monthly(ar_params, time, buffer=0)

expected = np.tile(np.arange(1, 13), [n_gridcells, n_years]).T
expected[0] = 0
expected = xr.DataArray(
expected,
dims=("time", "gridcell"),
coords={"time": time, "gridcell": np.arange(n_gridcells)},
)
np.testing.assert_allclose(result, expected)


def test_predict_auto_regression_monthly():
n_gridcells = 10
n_years = 10
np.random.seed(0)
slopes = xr.DataArray(
np.random.normal(-0.99, 0.99, size=(12, n_gridcells)),
dims=("month", "gridcell"),
coords={"month": np.arange(1, 13), "gridcell": np.arange(n_gridcells)},
)
intercepts = xr.DataArray(
np.random.normal(-10, 10, size=(12, n_gridcells)),
dims=("month", "gridcell"),
coords={"month": np.arange(1, 13), "gridcell": np.arange(n_gridcells)},
)
ar_params = xr.Dataset({"intercept": intercepts, "slope": slopes})

time = pd.date_range("2000-01-01", periods=n_years * 12, freq="M")
time = xr.DataArray(time, dims="time", coords={"time": time})

result = mesmer.stats.predict_auto_regression_monthly(ar_params, time, buffer=10)

_check_dataarray_form(
result,
"result",
ndim=2,
required_dims={"time", "gridcell"},
shape=(len(time), n_gridcells),
)


def test_fit_predict_autoregression_monthly_roundtrip():
n_gridcells = 10
n_years = 150
buffer = 30 * 12
np.random.seed(0)
slopes = xr.DataArray(
np.random.uniform(-0.99, 0.99, size=(12, n_gridcells)),
dims=("month", "gridcell"),
coords={"month": np.arange(1, 13), "gridcell": np.arange(n_gridcells)},
)
intercepts = xr.DataArray(
np.random.normal(-10, 10, size=(12, n_gridcells)),
dims=("month", "gridcell"),
coords={"month": np.arange(1, 13), "gridcell": np.arange(n_gridcells)},
)
ar_params = xr.Dataset({"intercept": intercepts, "slope": slopes})

time = pd.date_range("2000-01-01", periods=n_years * 12, freq="M")
time = xr.DataArray(time, dims="time", coords={"time": time})

data = mesmer.stats.predict_auto_regression_monthly(ar_params, time, buffer)
AR_fit = mesmer.stats.fit_auto_regression_monthly(data)
predicted = mesmer.stats.predict_auto_regression_monthly(AR_fit, data.time, buffer)

np.testing.assert_allclose(predicted, data)


@pytest.mark.parametrize("buffer", [1, 10, 20])
def test_draw_auto_regression_monthly_np_buffer(buffer):
n_realisations = 1
n_gridcells = 10
seed = 0
slope = np.random.uniform(-1, 1, size=(12, n_gridcells))
intercept = np.ones((12, n_gridcells))
covariance = None
covariance = np.zeros((12, n_gridcells, n_gridcells))
n_ts = 120

res_wo_buffer = mesmer.stats._auto_regression._draw_auto_regression_monthly_np(
Expand Down

0 comments on commit e24c135

Please sign in to comment.