Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MESMER-X : Add calibration of local variability to calibration test #550

Merged
3 changes: 2 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ In the release the MESMER-X functionality is integrated into the MESMER Codebase
`#470 <https://github.com/MESMER-group/mesmer/pull/470>`_,
`#502 <https://github.com/MESMER-group/mesmer/pull/502>`_)
- Add tests (`#524 <https://github.com/MESMER-group/mesmer/pull/524>`_,
`#540 <https://github.com/MESMER-group/mesmer/pull/540>`_)
`#540 <https://github.com/MESMER-group/mesmer/pull/540>`_,
`#550 <https://github.com/MESMER-group/mesmer/pull/550>`_)

Integration of MESMER-M
^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
2 changes: 2 additions & 0 deletions examples/example_mesmer_x.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def main():
# probability integral transform: projection of the data on a standard normal distribution
transf_target = mesmer_x_train_utils.probability_integral_transform( # noqa: F841
data=target,
target_name=targ,
expr_start=expr,
coeffs_start=xr_coeffs_distrib,
preds_start=predictors,
Expand Down Expand Up @@ -273,6 +274,7 @@ def main():

# probability integral transform: projection of the transformed data on the known distributions
emus = mesmer_x_train_utils.probability_integral_transform(
target_name=targ,
data=transf_emus,
expr_start="norm(loc=0, scale=1)",
expr_end=expr,
Expand Down
2 changes: 1 addition & 1 deletion mesmer/mesmer_x/train_utils_mesmerx.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def probability_integral_transform(
Parameters
----------
data : not sure yet what data format will be used at the end.
Assumed to be a xarray Dataset with coordinates 'time' and 'gridpoint' and one
Assumed to be a xarray Dataset with coordinates 'time' and 'gridpoint', and one
2D variable with both coordinates
target_name : str
name of the variable to train
Expand Down
117 changes: 105 additions & 12 deletions tests/integration/test_calibrate_mesmer_x.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def mask_and_stack(ds, threshold_land):
target = ((txx_stacked_hist, "hist"), (txx_stacked_ssp585, "ssp585"))

# do the training
result, _ = mesmer.mesmer_x.xr_train_distrib(
transform_params, _ = mesmer.mesmer_x.xr_train_distrib(
predictors=predictor,
target=target,
target_name="tasmax",
Expand All @@ -96,20 +96,113 @@ def mask_and_stack(ds, threshold_land):
)

# probability integral transform: projection of the data on a standard normal distribution
# transf_target = mesmer.mesmer_x.probability_integral_transform( # noqa: F841
# data=target,
# expr_start=expr,
# coeffs_start=result,
# preds_start=predictor,
# expr_end="norm(loc=0, scale=1)",
# target_name="tasmax",
# )
transf_target = mesmer.mesmer_x.probability_integral_transform( # noqa: F841
data=target,
target_name="tasmax",
expr_start=expr,
coeffs_start=transform_params,
preds_start=predictor,
expr_end="norm(loc=0, scale=1)",
)

# make transformed target into DataArrays
transf_target_xr_hist = xr.DataArray(
transf_target[0][0],
dims=["time", "gridpoint"],
coords={"time": txx_stacked_hist.time},
).assign_coords(txx_stacked_hist.gridpoint.coords)
transf_target_xr_ssp585 = xr.DataArray(
transf_target[1][0],
dims=["time", "gridpoint"],
coords={"time": txx_stacked_ssp585.time},
).assign_coords(txx_stacked_hist.gridpoint.coords)

# training of auto-regression with spatially correlated innovations
local_ar_params = mesmer.stats._fit_auto_regression_scen_ens(
transf_target_xr_hist,
transf_target_xr_ssp585,
ens_dim=None,
dim="time",
lags=1,
)

# estimate covariance matrix
# prep distance matrix
geodist = mesmer.geospatial.geodist_exact(
txx_stacked_hist.lon, txx_stacked_hist.lat
)
# prep localizer
phi_gc_localizer = mesmer.stats.gaspari_cohn_correlation_matrices(
geodist, range(4000, 6001, 500)
)

# stack target
transf_target_stacked = xr.concat(
[transf_target_xr_hist, transf_target_xr_ssp585], dim="scenario"
)
transf_target_stacked = transf_target_stacked.assign_coords(
scenario=["hist", "ssp585"]
)
transf_target_stacked = transf_target_stacked.stack(
{"sample": ["time", "scenario"]}, create_index=False
).dropna("sample")

# make weights
weights = xr.ones_like(transf_target_stacked.isel(gridpoint=0))

# find covariance
dim = "sample"
k_folds = 15

localized_ecov = mesmer.stats.find_localized_empirical_covariance(
transf_target_stacked, weights, phi_gc_localizer, dim, k_folds
)

# Adjust regularized covariance matrix # TODO: varify if this is actually done in MESMER-X
localized_ecov["localized_covariance_adjusted"] = (
mesmer.stats.adjust_covariance_ar1(
localized_ecov.localized_covariance, local_ar_params.coeffs
)
)

if update_expected_files:
# save the parameters
result.to_netcdf(TEST_PATH / f"test-mesmer_x-params_{outname}.nc")
transform_params.to_netcdf(
TEST_PATH / f"test-mesmer_x-transf_params_{outname}.nc"
)
local_ar_params.to_netcdf(
TEST_PATH / f"test-mesmer_x-local_ar_params_{outname}.nc"
)
localized_ecov.to_netcdf(
TEST_PATH / f"test-mesmer_x-localized_ecov_{outname}.nc"
)

else:
# load the parameters
expected = xr.open_dataset(TEST_PATH / f"test-mesmer_x-params_{outname}.nc")
xr.testing.assert_allclose(result, expected)
expected_transform_params = xr.open_dataset(
TEST_PATH / f"test-mesmer_x-transf_params_{outname}.nc"
)
xr.testing.assert_allclose(transform_params, expected_transform_params)

expected_local_ar_params = xr.open_dataset(
TEST_PATH / f"test-mesmer_x-local_ar_params_{outname}.nc"
)
xr.testing.assert_allclose(
local_ar_params["intercept"],
expected_local_ar_params["intercept"],
atol=1e-7,
)
xr.testing.assert_allclose(
local_ar_params["coeffs"], expected_local_ar_params["coeffs"]
)
xr.testing.assert_allclose(
local_ar_params["variance"], expected_local_ar_params["variance"]
)
xr.testing.assert_equal(
local_ar_params["nobs"], expected_local_ar_params["nobs"]
)

expected_localized_ecov = xr.open_dataset(
TEST_PATH / f"test-mesmer_x-localized_ecov_{outname}.nc"
)
xr.testing.assert_allclose(localized_ecov, expected_localized_ecov)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading