Skip to content

Commit

Permalink
add wrapper for finding monthly localized covariance (MESMER-group#479)
Browse files Browse the repository at this point in the history
* add monthly wrapper for finding localized covariance

* fix init

* changelog

* fixes in tests

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>
  • Loading branch information
3 people authored Jul 27, 2024
1 parent 21543e9 commit 6cf3716
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ that this led to some numerical changes compared to the MESMER-M publication
- Implement functions performing the monthly (cyclo-stationary) auto-regression and adapt these functions to
work with xarray. This includes extracting the drawing of spatially correlated innovations to a
stand-alone function. (`#473 <https://github.com/MESMER-group/mesmer/pull/473>`_)
- Implement function to localize the empirical covarince matrix for each month individually to use in drawing
of spatially correlated noise in the AR process. (`#479 <https://github.com/MESMER-group/mesmer/pull/479>`_)

**Yeo-Johnson power transformer**

Expand Down
2 changes: 2 additions & 0 deletions mesmer/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from mesmer.stats._localized_covariance import (
adjust_covariance_ar1,
find_localized_empirical_covariance,
find_localized_empirical_covariance_monthly,
)
from mesmer.stats._smoothing import lowess

Expand All @@ -36,6 +37,7 @@
# localized covariance
"adjust_covariance_ar1",
"find_localized_empirical_covariance",
"find_localized_empirical_covariance_monthly",
# smoothing
"lowess",
]
60 changes: 59 additions & 1 deletion mesmer/stats/_localized_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def find_localized_empirical_covariance(
Parameters
----------
data : xr.DataArray
2D DataArray with with to calculate the covariance for.
2D DataArray with data to calculate the covariance for.
weights : xr.DataArray
Weights for the individual samples.
localizer : dict of DataArray```
Expand Down Expand Up @@ -144,6 +144,64 @@ def find_localized_empirical_covariance(
return xr.Dataset(data_vars)


def find_localized_empirical_covariance_monthly(
data, weights, localizer, dim, k_folds, equal_dim_suffixes=("_i", "_j")
):
"""determine localized empirical covariance by cross validation for each month
Parameters
----------
data : xr.DataArray
2D DataArray with monthly data to calculate the covariance for.
weights : xr.DataArray
Weights for the individual samples.
localizer : dict of DataArray
Dictionary containing the localization radii as keys and the localization matrix
as values. The localization must be 2D and of shape n_gridpoints x n_gridpoints.
Currently only the Gaspari-Cohn localizer is implemented in MESMER.
dim : str
Dimension along which to calculate the covariance.
k_folds : int
Number of folds to use for cross validation.
equal_dim_suffixes : tuple of str, default: ("_i", "_j")
Suffixes to add to the the name of ``dim`` for the covariance array
(xr.DataArray cannot have two dimensions with the same name).
Returns
-------
localized_empirical_covariance : xr.Dataset
Dataset containing three DataArrays:
localization_radius : float
Selected localization radius.
covariance : xr.DataArray
Empirical covariance matrix.
localized_covariance : xr.DataArray
Localized empirical covariance matrix.
Notes
-----
Runs a k-fold cross validation if ``k_folds`` is smaller than the number of samples
and a leave-one-out cross validation otherwise.
"""
localized_ecov = []
data_grouped = data.groupby(f"{dim}.month")
weights_grouped = weights.groupby(f"{dim}.month")

for mon in range(1, 13):
res = find_localized_empirical_covariance(
data_grouped[mon],
weights_grouped[mon],
localizer,
dim=dim,
k_folds=k_folds,
equal_dim_suffixes=equal_dim_suffixes,
)
localized_ecov.append(res)

month = xr.Variable("month", range(1, 13))
return xr.concat(localized_ecov, dim=month)


def _find_localized_empirical_covariance_np(data, weights, localizer, k_folds):
"""determine localized empirical covariance by cross validation
Expand Down
70 changes: 68 additions & 2 deletions tests/unit/test_localized_covariance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pandas as pd
import pytest
import xarray as xr

Expand Down Expand Up @@ -79,7 +80,9 @@ def test_find_localized_empirical_covariance():

assert result.localization_radius == 1
_check_dataarray_form(result.covariance, "covariance", **required_form)
_check_dataarray_form(result.covariance, "localized_covariance", **required_form)
_check_dataarray_form(
result.localized_covariance, "localized_covariance", **required_form
)

# ensure can pass equal_dim_suffixes
result = mesmer.stats.find_localized_empirical_covariance(
Expand All @@ -95,7 +98,70 @@ def test_find_localized_empirical_covariance():

assert result.localization_radius == 1
_check_dataarray_form(result.covariance, "covariance", **required_form)
_check_dataarray_form(result.covariance, "localized_covariance", **required_form)
_check_dataarray_form(
result.localized_covariance, "localized_covariance", **required_form
)


@pytest.mark.filterwarnings("ignore:First element is local minimum.")
def test_find_localized_empirical_covariance_monthly():

n_samples = 20 * 12
n_gridpoints = 60
time = pd.date_range("2000-01-01", periods=n_samples, freq="MS")

data = get_random_data(n_samples, n_gridpoints)
data = data.assign_coords({"samples": time})

localizer = get_localizer_dict(n_gridpoints, as_dataarray=True)

weights = get_weights(n_samples)
weights = weights.assign_coords({"samples": time})

required_form = {
"ndim": 3,
"required_dims": ("month", "cell_i", "cell_j"),
"shape": (12, n_gridpoints, n_gridpoints),
}

result = mesmer.stats.find_localized_empirical_covariance_monthly(
data, weights, localizer, dim="samples", k_folds=2
)

np.testing.assert_equal(result.localization_radius.values, np.zeros(12))
_check_dataarray_form(result.covariance, "covariance", **required_form)
_check_dataarray_form(
result.localized_covariance, "localized_covariance", **required_form
)

# ensure it works if data is transposed
result = mesmer.stats.find_localized_empirical_covariance_monthly(
data.T, weights, localizer, dim="samples", k_folds=3
)

np.testing.assert_equal(result.localization_radius.values, np.zeros(12))
_check_dataarray_form(result.covariance, "covariance", **required_form)
_check_dataarray_form(
result.localized_covariance, "localized_covariance", **required_form
)

# ensure can pass equal_dim_suffixes
result = mesmer.stats.find_localized_empirical_covariance_monthly(
data,
weights,
localizer,
dim="samples",
k_folds=3,
equal_dim_suffixes=(":j", ":i"),
)

required_form["required_dims"] = ("cell:j", "cell:i")

np.testing.assert_equal(result.localization_radius.values, np.zeros(12))
_check_dataarray_form(result.covariance, "covariance", **required_form)
_check_dataarray_form(
result.localized_covariance, "localized_covariance", **required_form
)


@pytest.mark.filterwarnings("ignore:First element is local minimum.")
Expand Down

0 comments on commit 6cf3716

Please sign in to comment.