diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6a33e9e5d..db8694af5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -87,6 +87,7 @@ This release implements using `DataTree` from `xarray-datatree` to handle multip - Add utility functions for `DataTree` (`#556 `_). - Add `xarray-datatree` as dependency (`#554 `_) - Add calibration integration tests for multiple scenarios and change parameter files to netcdfs with new naming structure (`#537 `_) +- Add new integration tests for drawing realisations (`#599 `_) By `Victoria Bauer`_ and `Mathias Hauser`_. diff --git a/setup.cfg b/setup.cfg index 2cbd6a37b..72f657cb5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,6 +33,7 @@ include_package_data = True python_requires = >=3.10 install_requires = dask[array,distributed] >=2023.8 + filefisher >=1.0.1 joblib >=1.3 netcdf4 >=1.6 numpy >=1.24 diff --git a/tests/integration/test_draw_realisations_from_bundle.py b/tests/integration/test_draw_realisations_from_bundle.py index 83a4a972d..3c836e35f 100644 --- a/tests/integration/test_draw_realisations_from_bundle.py +++ b/tests/integration/test_draw_realisations_from_bundle.py @@ -44,7 +44,7 @@ def test_make_realisations( ouput_dir = os.path.join(test_data_root_dir, "output", outname) expected_output_file = os.path.join( - ouput_dir, "test_make_realisations_expected_output.nc" + ouput_dir, "test_make_realisations_expected_output_legacy.nc" ) tseeds = {"IPSL-CM6A-LR": {"all": {"gv": 0, "lv": 1_000_000}}} diff --git a/tests/integration/test_draw_realisations_mesmer_newcodepath.py b/tests/integration/test_draw_realisations_mesmer_newcodepath.py new file mode 100644 index 000000000..b60058585 --- /dev/null +++ b/tests/integration/test_draw_realisations_mesmer_newcodepath.py @@ -0,0 +1,373 @@ +import pathlib + +import datatree +import pytest +import xarray as xr +from datatree import DataTree, map_over_subtree +from filefisher import FileFinder + +import mesmer + + +def create_forcing_data(test_data_root_dir, scenarios, use_hfds, use_tas2): + # define config values + REFERENCE_PERIOD = slice("1850", "1900") + + esm = "IPSL-CM6A-LR" + cmip_generation = 6 + + # define paths and load data + TEST_DATA_PATH = pathlib.Path(test_data_root_dir) + + cmip_data_path = ( + TEST_DATA_PATH / "calibrate-coarse-grid" / f"cmip{cmip_generation}-ng" + ) + + CMIP_FILEFINDER = FileFinder( + path_pattern=cmip_data_path / "{variable}/{time_res}/{resolution}", # type: ignore + file_pattern="{variable}_{time_res}_{model}_{scenario}_{member}_{resolution}.nc", + ) + + fc_scens = CMIP_FILEFINDER.find_files( + variable="tas", scenario=scenarios, model=esm, resolution="g025", time_res="ann" + ) + + # only get the historical members that are also in the future scenarios, but only once + unique_scen_members = fc_scens.df.member.unique() + + fc_hist = CMIP_FILEFINDER.find_files( + variable="tas", + scenario="historical", + model=esm, + resolution="g025", + time_res="ann", + member=unique_scen_members, + ) + + # load data for each scenario + def _get_hist_path(meta, fc_hist): + + meta_hist = meta | {"scenario": "historical"} + + fc = fc_hist.search(**meta_hist) + + if len(fc) == 0: + raise FileNotFoundError("no hist file found") + if len(fc) != 1: + raise ValueError("more than one hist file found") + + return fc.paths.pop() + + def load_hist(meta, fc_hist): + fN = _get_hist_path(meta, fc_hist) + return xr.open_dataset(fN, use_cftime=True) + + def load_hist_scen_continuous(fc_hist, fc_scens): + dt = DataTree() + for scen in fc_scens.df.scenario.unique(): + files = fc_scens.search(scenario=scen) + + members = [] + + for fN, meta in files.items(): + + try: + hist = load_hist(meta, fc_hist) + except FileNotFoundError: + continue + + proj = xr.open_dataset(fN, use_cftime=True) + + ds = xr.combine_by_coords( + [hist, proj], + combine_attrs="override", + data_vars="minimal", + compat="override", + coords="minimal", + ) + + ds = ds.drop_vars( + ("height", "time_bnds", "file_qf", "area"), errors="ignore" + ) + + ds = mesmer.grid.wrap_to_180(ds) + + # assign member-ID as coordinate + ds = ds.assign_coords({"member": meta["member"]}) + + members.append(ds) + + # create a Dataset that holds each member along the member dimension + scen_data = xr.concat(members, dim="member") + # put the scenario dataset into the DataTree + dt[scen] = DataTree(scen_data) + return dt + + tas = load_hist_scen_continuous(fc_hist, fc_scens) + ref = tas.sel(time=REFERENCE_PERIOD).mean("time") + tas_anoms = tas - ref + tas_globmean = map_over_subtree(mesmer.weighted.global_mean)(tas_anoms) + + tas_globmean_ensmean = tas_globmean.mean(dim="member") + tas_globmean_forcing = map_over_subtree(mesmer.stats.lowess)( + tas_globmean_ensmean, dim="time", n_steps=30, use_coords=False + ) + + def _get_hfds(): + fc_hfds = CMIP_FILEFINDER.find_files( + variable="hfds", + scenario=scenarios, + model=esm, + resolution="g025", + time_res="ann", + member=unique_scen_members, + ) + + fc_hfds_hist = CMIP_FILEFINDER.find_files( + variable="hfds", + scenario="historical", + model=esm, + resolution="g025", + time_res="ann", + member=unique_scen_members, + ) + + hfds = load_hist_scen_continuous(fc_hfds_hist, fc_hfds) + hfds_ref = hfds.sel(time=REFERENCE_PERIOD).mean("time") + hfds_anoms = hfds - hfds_ref + hfds_globmean = map_over_subtree(mesmer.weighted.global_mean)(hfds_anoms) + hfds_globmean_ensmean = hfds_globmean.mean(dim="member") + hfds_globmean_smoothed = map_over_subtree(mesmer.stats.lowess)( + hfds_globmean_ensmean, "time", n_steps=50, use_coords=False + ) + return hfds_globmean_smoothed + + if use_hfds: + hfds_globmean_smoothed = _get_hfds() + + else: + hfds_globmean_smoothed = None + + tas2 = tas_globmean_forcing**2 if use_tas2 else None + + return tas_globmean_forcing, hfds_globmean_smoothed, tas2 + + +@pytest.mark.parametrize( + "scenarios, use_tas2, use_hfds, n_realisations, outname", + ( + # tas + pytest.param( + ["ssp126"], + False, + False, + 1, + "tas/one_scen_one_ens", + ), + pytest.param( + ["ssp585"], + False, + False, + 30, + "tas/one_scen_multi_ens", + ), + pytest.param( + ["ssp126", "ssp585"], + False, + False, + 100, + "tas/multi_scen_multi_ens", + ), + # tas and tas**2 + pytest.param( + ["ssp126"], + True, + False, + 1, + "tas_tas2/one_scen_one_ens", + ), + # tas and hfds + pytest.param( + ["ssp126"], + False, + True, + 1, + "tas_hfds/one_scen_one_ens", + ), + # tas, tas**2, and hfds + pytest.param( + ["ssp126"], + True, + True, + 1, + "tas_tas2_hfds/one_scen_one_ens", + ), + pytest.param( + ["ssp585"], + True, + True, + 1, + "tas_tas2_hfds/one_scen_multi_ens", + ), + pytest.param( + ["ssp126", "ssp585"], + True, + True, + 1, + "tas_tas2_hfds/multi_scen_multi_ens", + ), + ), +) +def test_make_realisations( + scenarios, + use_tas2, + use_hfds, + n_realisations, + outname, + test_data_root_dir, + update_expected_files=False, +): + esm = "IPSL-CM6A-LR" + + TEST_DATA_PATH = pathlib.Path(test_data_root_dir) + TEST_PATH = TEST_DATA_PATH / "output" / outname + + PARAM_FILEFINDER = FileFinder( + path_pattern=TEST_PATH / "test-params/{module}/", + file_pattern="params_{module}_{esm}_{scen}.nc", + ) + scen_str = "-".join(scenarios) + + volcanic_file = PARAM_FILEFINDER.find_single_file( + module="volcanic", esm=esm, scen=scen_str + ).paths.pop() + global_ar_file = PARAM_FILEFINDER.find_single_file( + module="global-variability", esm=esm, scen=scen_str + ).paths.pop() + local_forced_file = PARAM_FILEFINDER.find_single_file( + module="local-trends", esm=esm, scen=scen_str + ).paths.pop() + local_ar_file = PARAM_FILEFINDER.find_single_file( + module="local-variability", esm=esm, scen=scen_str + ).paths.pop() + localized_ecov_file = PARAM_FILEFINDER.find_single_file( + module="covariance", esm=esm, scen=scen_str + ).paths.pop() + + expected_output_file = TEST_PATH / f"test_realisations_expected_{scen_str}.nc" + + HIST_PERIOD = slice("1850", "2014") + + # we need a maximum of 4 seeds if there are max 2 scenarios (1 for global and 1 for local) + seed_list = [981, 314, 272, 42] + + seed_global_variability = DataTree() + seed_local_variability = DataTree() + + for scen in scenarios: + seed_global_variability[scen] = DataTree( + xr.Dataset({"seed": xr.DataArray(seed_list.pop())}) + ) + seed_local_variability[scen] = DataTree( + xr.Dataset({"seed": xr.DataArray(seed_list.pop())}) + ) + + tas_forcing, hfds, tas2 = create_forcing_data( + test_data_root_dir, scenarios, use_hfds, use_tas2 + ) + scen0 = scenarios[0] + time = tas_forcing[scen0].time + + buffer_global_variability = 50 + buffer_local_variability = 20 + + # 1.) superimpose volcanic influence + volcanic_params = xr.open_dataset(volcanic_file) + tas_forcing = map_over_subtree(mesmer.volc.superimpose_volcanic_influence)( + tas_forcing, volcanic_params, hist_period=HIST_PERIOD + ) + + # 2.) compute the global variability + global_ar_params = xr.open_dataset(global_ar_file) + global_variability = map_over_subtree( + mesmer.stats.draw_auto_regression_uncorrelated + )( + global_ar_params, + realisation=n_realisations, + time=time, + seed=seed_global_variability, + buffer=buffer_global_variability, + ).rename( + {"samples": "tas"} + ) + + # 3.) compute the local forced response + lr = mesmer.stats.LinearRegression().from_netcdf(local_forced_file) + + predictors = DataTree() + for scen in scenarios: + predictors[scen] = DataTree.from_dict( + {"tas": tas_forcing[scen], "tas_resids": global_variability[scen]} + ) + + if tas2 is not None: + predictors[scen]["tas2"] = tas2[scen] + if hfds is not None: + predictors[scen]["hfds"] = hfds[scen] + + # uses ``exclude`` to split the linear response + local_forced_response = DataTree() + local_variability_from_global_var = DataTree() + + for scen in predictors.children: + local_forced_response[scen] = DataTree( + lr.predict(predictors[scen], exclude={"tas_resids"}).rename("tas") + ) + + # 4.) compute the local variability part driven by global variabilty + exclude = {"tas", "intercept"} + if use_tas2: + exclude.add("tas2") + if use_hfds: + exclude.add("hfds") + + local_variability_from_global_var[scen] = DataTree( + lr.predict(predictors[scen], exclude=exclude).rename("tas") + ) + + # 5.) compute local variability + local_ar = xr.open_dataset(local_ar_file) + localized_covariance = xr.open_dataset(localized_ecov_file) + localized_covariance_adjusted = localized_covariance.localized_covariance_adjusted + + local_variability = map_over_subtree(mesmer.stats.draw_auto_regression_correlated)( + local_ar, + localized_covariance_adjusted, + time=time, + realisation=n_realisations, + seed=seed_local_variability, + buffer=buffer_local_variability, + ).rename({"samples": "tas"}) + + local_variability_total = local_variability_from_global_var + local_variability + + result = local_forced_response + local_variability_total + + # save the emulations + if update_expected_files: + result.to_netcdf(expected_output_file) + pytest.skip("Updated expected output file.") + else: + expected = datatree.open_datatree(expected_output_file, use_cftime=True) + for scen in scenarios: + exp_scen = expected[scen].to_dataset() + res_scen = result[scen].to_dataset() + xr.testing.assert_allclose(res_scen, exp_scen) + + # make sure we can get onto a lat lon grid from what is saved + exp_reshaped = exp_scen.set_index(gridcell=("lat", "lon")).unstack( + "gridcell" + ) + expected_dims = {"realisation", "lon", "lat", "time"} + + assert set(exp_reshaped.dims) == expected_dims diff --git a/tests/test-data/output/tas/multi_scen_multi_ens/test_realisations_expected_ssp126-ssp585.nc b/tests/test-data/output/tas/multi_scen_multi_ens/test_realisations_expected_ssp126-ssp585.nc new file mode 100644 index 000000000..6b5df3566 Binary files /dev/null and b/tests/test-data/output/tas/multi_scen_multi_ens/test_realisations_expected_ssp126-ssp585.nc differ diff --git a/tests/test-data/output/tas/one_scen_multi_ens/test_realisations_expected_ssp585.nc b/tests/test-data/output/tas/one_scen_multi_ens/test_realisations_expected_ssp585.nc new file mode 100644 index 000000000..429b5ab79 Binary files /dev/null and b/tests/test-data/output/tas/one_scen_multi_ens/test_realisations_expected_ssp585.nc differ diff --git a/tests/test-data/output/tas/one_scen_one_ens/test_make_realisations_expected_output.nc b/tests/test-data/output/tas/one_scen_one_ens/test_make_realisations_expected_output.nc deleted file mode 100644 index fea858cf7..000000000 Binary files a/tests/test-data/output/tas/one_scen_one_ens/test_make_realisations_expected_output.nc and /dev/null differ diff --git a/tests/test-data/output/tas/one_scen_one_ens/test_make_realisations_expected_output_legacy.nc b/tests/test-data/output/tas/one_scen_one_ens/test_make_realisations_expected_output_legacy.nc new file mode 100644 index 000000000..012946e4c Binary files /dev/null and b/tests/test-data/output/tas/one_scen_one_ens/test_make_realisations_expected_output_legacy.nc differ diff --git a/tests/test-data/output/tas/one_scen_one_ens/test_realisations_expected_ssp126.nc b/tests/test-data/output/tas/one_scen_one_ens/test_realisations_expected_ssp126.nc new file mode 100644 index 000000000..478d99667 Binary files /dev/null and b/tests/test-data/output/tas/one_scen_one_ens/test_realisations_expected_ssp126.nc differ diff --git a/tests/test-data/output/tas_hfds/one_scen_one_ens/test_realisations_expected_ssp126.nc b/tests/test-data/output/tas_hfds/one_scen_one_ens/test_realisations_expected_ssp126.nc new file mode 100644 index 000000000..747057d04 Binary files /dev/null and b/tests/test-data/output/tas_hfds/one_scen_one_ens/test_realisations_expected_ssp126.nc differ diff --git a/tests/test-data/output/tas_tas2/one_scen_one_ens/test_realisations_expected_ssp126.nc b/tests/test-data/output/tas_tas2/one_scen_one_ens/test_realisations_expected_ssp126.nc new file mode 100644 index 000000000..bb994ef0a Binary files /dev/null and b/tests/test-data/output/tas_tas2/one_scen_one_ens/test_realisations_expected_ssp126.nc differ diff --git a/tests/test-data/output/tas_tas2_hfds/multi_scen_multi_ens/test_realisations_expected_ssp126-ssp585.nc b/tests/test-data/output/tas_tas2_hfds/multi_scen_multi_ens/test_realisations_expected_ssp126-ssp585.nc new file mode 100644 index 000000000..88e54117c Binary files /dev/null and b/tests/test-data/output/tas_tas2_hfds/multi_scen_multi_ens/test_realisations_expected_ssp126-ssp585.nc differ diff --git a/tests/test-data/output/tas_tas2_hfds/one_scen_multi_ens/test_realisations_expected_ssp585.nc b/tests/test-data/output/tas_tas2_hfds/one_scen_multi_ens/test_realisations_expected_ssp585.nc new file mode 100644 index 000000000..bccbe672b Binary files /dev/null and b/tests/test-data/output/tas_tas2_hfds/one_scen_multi_ens/test_realisations_expected_ssp585.nc differ diff --git a/tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_make_realisations_expected_output.nc b/tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_make_realisations_expected_output_legacy.nc similarity index 50% rename from tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_make_realisations_expected_output.nc rename to tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_make_realisations_expected_output_legacy.nc index 549bbd4e3..dab9e13f7 100644 Binary files a/tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_make_realisations_expected_output.nc and b/tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_make_realisations_expected_output_legacy.nc differ diff --git a/tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_realisations_expected_ssp126.nc b/tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_realisations_expected_ssp126.nc new file mode 100644 index 000000000..736152b6b Binary files /dev/null and b/tests/test-data/output/tas_tas2_hfds/one_scen_one_ens/test_realisations_expected_ssp126.nc differ