diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 23a4bd05..9f570214 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -90,7 +90,8 @@ In the release the MESMER-X functionality is integrated into the MESMER Codebase `#470 `_, `#502 `_) - Add tests (`#524 `_, - `#540 `_) + `#540 `_, + `#550 `_) Integration of MESMER-M ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/examples/example_mesmer_x.py b/examples/example_mesmer_x.py index fcd7c513..aaf83b15 100644 --- a/examples/example_mesmer_x.py +++ b/examples/example_mesmer_x.py @@ -246,6 +246,7 @@ def main(): # probability integral transform: projection of the data on a standard normal distribution transf_target = mesmer_x_train_utils.probability_integral_transform( # noqa: F841 data=target, + target_name=targ, expr_start=expr, coeffs_start=xr_coeffs_distrib, preds_start=predictors, @@ -273,6 +274,7 @@ def main(): # probability integral transform: projection of the transformed data on the known distributions emus = mesmer_x_train_utils.probability_integral_transform( + target_name=targ, data=transf_emus, expr_start="norm(loc=0, scale=1)", expr_end=expr, diff --git a/mesmer/mesmer_x/train_utils_mesmerx.py b/mesmer/mesmer_x/train_utils_mesmerx.py index 3d01dbd3..85b91204 100644 --- a/mesmer/mesmer_x/train_utils_mesmerx.py +++ b/mesmer/mesmer_x/train_utils_mesmerx.py @@ -454,7 +454,7 @@ def probability_integral_transform( Parameters ---------- data : not sure yet what data format will be used at the end. - Assumed to be a xarray Dataset with coordinates 'time' and 'gridpoint' and one + Assumed to be a xarray Dataset with coordinates 'time' and 'gridpoint', and one 2D variable with both coordinates target_name : str name of the variable to train diff --git a/tests/integration/test_calibrate_mesmer_x.py b/tests/integration/test_calibrate_mesmer_x.py index cead52c2..8d7affc0 100644 --- a/tests/integration/test_calibrate_mesmer_x.py +++ b/tests/integration/test_calibrate_mesmer_x.py @@ -84,7 +84,7 @@ def mask_and_stack(ds, threshold_land): target = ((txx_stacked_hist, "hist"), (txx_stacked_ssp585, "ssp585")) # do the training - result, _ = mesmer.mesmer_x.xr_train_distrib( + transform_params, _ = mesmer.mesmer_x.xr_train_distrib( predictors=predictor, target=target, target_name="tasmax", @@ -96,20 +96,113 @@ def mask_and_stack(ds, threshold_land): ) # probability integral transform: projection of the data on a standard normal distribution - # transf_target = mesmer.mesmer_x.probability_integral_transform( # noqa: F841 - # data=target, - # expr_start=expr, - # coeffs_start=result, - # preds_start=predictor, - # expr_end="norm(loc=0, scale=1)", - # target_name="tasmax", - # ) + transf_target = mesmer.mesmer_x.probability_integral_transform( # noqa: F841 + data=target, + target_name="tasmax", + expr_start=expr, + coeffs_start=transform_params, + preds_start=predictor, + expr_end="norm(loc=0, scale=1)", + ) + + # make transformed target into DataArrays + transf_target_xr_hist = xr.DataArray( + transf_target[0][0], + dims=["time", "gridpoint"], + coords={"time": txx_stacked_hist.time}, + ).assign_coords(txx_stacked_hist.gridpoint.coords) + transf_target_xr_ssp585 = xr.DataArray( + transf_target[1][0], + dims=["time", "gridpoint"], + coords={"time": txx_stacked_ssp585.time}, + ).assign_coords(txx_stacked_hist.gridpoint.coords) + + # training of auto-regression with spatially correlated innovations + local_ar_params = mesmer.stats._fit_auto_regression_scen_ens( + transf_target_xr_hist, + transf_target_xr_ssp585, + ens_dim=None, + dim="time", + lags=1, + ) + + # estimate covariance matrix + # prep distance matrix + geodist = mesmer.geospatial.geodist_exact( + txx_stacked_hist.lon, txx_stacked_hist.lat + ) + # prep localizer + phi_gc_localizer = mesmer.stats.gaspari_cohn_correlation_matrices( + geodist, range(4000, 6001, 500) + ) + + # stack target + transf_target_stacked = xr.concat( + [transf_target_xr_hist, transf_target_xr_ssp585], dim="scenario" + ) + transf_target_stacked = transf_target_stacked.assign_coords( + scenario=["hist", "ssp585"] + ) + transf_target_stacked = transf_target_stacked.stack( + {"sample": ["time", "scenario"]}, create_index=False + ).dropna("sample") + + # make weights + weights = xr.ones_like(transf_target_stacked.isel(gridpoint=0)) + + # find covariance + dim = "sample" + k_folds = 15 + + localized_ecov = mesmer.stats.find_localized_empirical_covariance( + transf_target_stacked, weights, phi_gc_localizer, dim, k_folds + ) + + # Adjust regularized covariance matrix # TODO: verify if this is actually done in MESMER-X + localized_ecov["localized_covariance_adjusted"] = ( + mesmer.stats.adjust_covariance_ar1( + localized_ecov.localized_covariance, local_ar_params.coeffs + ) + ) if update_expected_files: # save the parameters - result.to_netcdf(TEST_PATH / f"test-mesmer_x-params_{outname}.nc") + transform_params.to_netcdf( + TEST_PATH / f"test-mesmer_x-transf_params_{outname}.nc" + ) + local_ar_params.to_netcdf( + TEST_PATH / f"test-mesmer_x-local_ar_params_{outname}.nc" + ) + localized_ecov.to_netcdf( + TEST_PATH / f"test-mesmer_x-localized_ecov_{outname}.nc" + ) else: # load the parameters - expected = xr.open_dataset(TEST_PATH / f"test-mesmer_x-params_{outname}.nc") - xr.testing.assert_allclose(result, expected) + expected_transform_params = xr.open_dataset( + TEST_PATH / f"test-mesmer_x-transf_params_{outname}.nc" + ) + xr.testing.assert_allclose(transform_params, expected_transform_params) + + expected_local_ar_params = xr.open_dataset( + TEST_PATH / f"test-mesmer_x-local_ar_params_{outname}.nc" + ) + xr.testing.assert_allclose( + local_ar_params["intercept"], + expected_local_ar_params["intercept"], + atol=1e-7, + ) + xr.testing.assert_allclose( + local_ar_params["coeffs"], expected_local_ar_params["coeffs"] + ) + xr.testing.assert_allclose( + local_ar_params["variance"], expected_local_ar_params["variance"] + ) + xr.testing.assert_equal( + local_ar_params["nobs"], expected_local_ar_params["nobs"] + ) + + expected_localized_ecov = xr.open_dataset( + TEST_PATH / f"test-mesmer_x-localized_ecov_{outname}.nc" + ) + xr.testing.assert_allclose(localized_ecov, expected_localized_ecov) diff --git a/tests/test-data/output/txx/test-mesmer_x-local_ar_params_exp1.nc b/tests/test-data/output/txx/test-mesmer_x-local_ar_params_exp1.nc new file mode 100644 index 00000000..69d36237 Binary files /dev/null and b/tests/test-data/output/txx/test-mesmer_x-local_ar_params_exp1.nc differ diff --git a/tests/test-data/output/txx/test-mesmer_x-local_ar_params_exp1_2ndfit.nc b/tests/test-data/output/txx/test-mesmer_x-local_ar_params_exp1_2ndfit.nc new file mode 100644 index 00000000..901001f6 Binary files /dev/null and b/tests/test-data/output/txx/test-mesmer_x-local_ar_params_exp1_2ndfit.nc differ diff --git a/tests/test-data/output/txx/test-mesmer_x-localized_ecov_exp1.nc b/tests/test-data/output/txx/test-mesmer_x-localized_ecov_exp1.nc new file mode 100644 index 00000000..66b500e5 Binary files /dev/null and b/tests/test-data/output/txx/test-mesmer_x-localized_ecov_exp1.nc differ diff --git a/tests/test-data/output/txx/test-mesmer_x-localized_ecov_exp1_2ndfit.nc b/tests/test-data/output/txx/test-mesmer_x-localized_ecov_exp1_2ndfit.nc new file mode 100644 index 00000000..3882be1a Binary files /dev/null and b/tests/test-data/output/txx/test-mesmer_x-localized_ecov_exp1_2ndfit.nc differ diff --git a/tests/test-data/output/txx/test-mesmer_x-params_exp1.nc b/tests/test-data/output/txx/test-mesmer_x-transf_params_exp1.nc similarity index 97% rename from tests/test-data/output/txx/test-mesmer_x-params_exp1.nc rename to tests/test-data/output/txx/test-mesmer_x-transf_params_exp1.nc index 21d7e908..d1fb598c 100644 Binary files a/tests/test-data/output/txx/test-mesmer_x-params_exp1.nc and b/tests/test-data/output/txx/test-mesmer_x-transf_params_exp1.nc differ diff --git a/tests/test-data/output/txx/test-mesmer_x-params_exp1_2ndfit.nc b/tests/test-data/output/txx/test-mesmer_x-transf_params_exp1_2ndfit.nc similarity index 98% rename from tests/test-data/output/txx/test-mesmer_x-params_exp1_2ndfit.nc rename to tests/test-data/output/txx/test-mesmer_x-transf_params_exp1_2ndfit.nc index 1050a43f..3205ee0a 100644 Binary files a/tests/test-data/output/txx/test-mesmer_x-params_exp1_2ndfit.nc and b/tests/test-data/output/txx/test-mesmer_x-transf_params_exp1_2ndfit.nc differ