Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new emulation integration tests #599

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ This release implements using `DataTree` from `xarray-datatree` to handle multip
- Add utility functions for `DataTree` (`#556 <https://github.com/MESMER-group/mesmer/pull/556>`_).
- Add `xarray-datatree` as dependency (`#554 <https://github.com/MESMER-group/mesmer/pull/554>`_)
- Add calibration integration tests for multiple scenarios and change parameter files to netcdfs with new naming structure (`#537 <https://github.com/MESMER-group/mesmer/pull/537>`_)
- Add new integration tests for drawing realisations (`#599 <https://github.com/MESMER-group/mesmer/pull/599>`_)

By `Victoria Bauer`_ and `Mathias Hauser`_.

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ include_package_data = True
python_requires = >=3.10
install_requires =
dask[array,distributed] >=2023.8
filefisher >=1.0.1
joblib >=1.3
netcdf4 >=1.6
numpy >=1.24
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_draw_realisations_from_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_make_realisations(
ouput_dir = os.path.join(test_data_root_dir, "output", outname)

expected_output_file = os.path.join(
ouput_dir, "test_make_realisations_expected_output.nc"
ouput_dir, "test_make_realisations_expected_output_legacy.nc"
)

tseeds = {"IPSL-CM6A-LR": {"all": {"gv": 0, "lv": 1_000_000}}}
Expand Down
373 changes: 373 additions & 0 deletions tests/integration/test_draw_realisations_mesmer_newcodepath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,373 @@
import pathlib

import datatree
import pytest
import xarray as xr
from datatree import DataTree, map_over_subtree
from filefisher import FileFinder

import mesmer


def create_forcing_data(test_data_root_dir, scenarios, use_hfds, use_tas2):
# define config values
REFERENCE_PERIOD = slice("1850", "1900")

esm = "IPSL-CM6A-LR"
cmip_generation = 6

# define paths and load data
TEST_DATA_PATH = pathlib.Path(test_data_root_dir)

cmip_data_path = (
TEST_DATA_PATH / "calibrate-coarse-grid" / f"cmip{cmip_generation}-ng"
)

CMIP_FILEFINDER = FileFinder(
path_pattern=cmip_data_path / "{variable}/{time_res}/{resolution}", # type: ignore
file_pattern="{variable}_{time_res}_{model}_{scenario}_{member}_{resolution}.nc",
)

fc_scens = CMIP_FILEFINDER.find_files(
variable="tas", scenario=scenarios, model=esm, resolution="g025", time_res="ann"
)

# only get the historical members that are also in the future scenarios, but only once
unique_scen_members = fc_scens.df.member.unique()

fc_hist = CMIP_FILEFINDER.find_files(
variable="tas",
scenario="historical",
model=esm,
resolution="g025",
time_res="ann",
member=unique_scen_members,
)

# load data for each scenario
def _get_hist_path(meta, fc_hist):

meta_hist = meta | {"scenario": "historical"}

fc = fc_hist.search(**meta_hist)

if len(fc) == 0:
raise FileNotFoundError("no hist file found")
if len(fc) != 1:
raise ValueError("more than one hist file found")

return fc.paths.pop()

def load_hist(meta, fc_hist):
fN = _get_hist_path(meta, fc_hist)
return xr.open_dataset(fN, use_cftime=True)

def load_hist_scen_continuous(fc_hist, fc_scens):
dt = DataTree()
for scen in fc_scens.df.scenario.unique():
files = fc_scens.search(scenario=scen)

members = []

for fN, meta in files.items():

try:
hist = load_hist(meta, fc_hist)
except FileNotFoundError:
continue

proj = xr.open_dataset(fN, use_cftime=True)

ds = xr.combine_by_coords(
[hist, proj],
combine_attrs="override",
data_vars="minimal",
compat="override",
coords="minimal",
)

ds = ds.drop_vars(
("height", "time_bnds", "file_qf", "area"), errors="ignore"
)

ds = mesmer.grid.wrap_to_180(ds)

# assign member-ID as coordinate
ds = ds.assign_coords({"member": meta["member"]})

members.append(ds)

# create a Dataset that holds each member along the member dimension
scen_data = xr.concat(members, dim="member")
# put the scenario dataset into the DataTree
dt[scen] = DataTree(scen_data)
return dt

tas = load_hist_scen_continuous(fc_hist, fc_scens)
ref = tas.sel(time=REFERENCE_PERIOD).mean("time")
tas_anoms = tas - ref
tas_globmean = map_over_subtree(mesmer.weighted.global_mean)(tas_anoms)

tas_globmean_ensmean = tas_globmean.mean(dim="member")
tas_globmean_forcing = map_over_subtree(mesmer.stats.lowess)(
tas_globmean_ensmean, dim="time", n_steps=30, use_coords=False
)

def _get_hfds():
fc_hfds = CMIP_FILEFINDER.find_files(
variable="hfds",
scenario=scenarios,
model=esm,
resolution="g025",
time_res="ann",
member=unique_scen_members,
)

fc_hfds_hist = CMIP_FILEFINDER.find_files(
variable="hfds",
scenario="historical",
model=esm,
resolution="g025",
time_res="ann",
member=unique_scen_members,
)

hfds = load_hist_scen_continuous(fc_hfds_hist, fc_hfds)
hfds_ref = hfds.sel(time=REFERENCE_PERIOD).mean("time")
hfds_anoms = hfds - hfds_ref
hfds_globmean = map_over_subtree(mesmer.weighted.global_mean)(hfds_anoms)
hfds_globmean_ensmean = hfds_globmean.mean(dim="member")
hfds_globmean_smoothed = map_over_subtree(mesmer.stats.lowess)(
hfds_globmean_ensmean, "time", n_steps=50, use_coords=False
)
return hfds_globmean_smoothed

if use_hfds:
hfds_globmean_smoothed = _get_hfds()

else:
hfds_globmean_smoothed = None

tas2 = tas_globmean_forcing**2 if use_tas2 else None

return tas_globmean_forcing, hfds_globmean_smoothed, tas2


@pytest.mark.parametrize(
"scenarios, use_tas2, use_hfds, n_realisations, outname",
(
# tas
pytest.param(
["ssp126"],
False,
False,
1,
"tas/one_scen_one_ens",
),
pytest.param(
["ssp585"],
False,
False,
30,
"tas/one_scen_multi_ens",
),
pytest.param(
["ssp126", "ssp585"],
False,
False,
100,
"tas/multi_scen_multi_ens",
),
# tas and tas**2
pytest.param(
["ssp126"],
True,
False,
1,
"tas_tas2/one_scen_one_ens",
),
# tas and hfds
pytest.param(
["ssp126"],
False,
True,
1,
"tas_hfds/one_scen_one_ens",
),
# tas, tas**2, and hfds
pytest.param(
["ssp126"],
True,
True,
1,
"tas_tas2_hfds/one_scen_one_ens",
),
pytest.param(
["ssp585"],
True,
True,
1,
"tas_tas2_hfds/one_scen_multi_ens",
),
pytest.param(
["ssp126", "ssp585"],
True,
True,
1,
"tas_tas2_hfds/multi_scen_multi_ens",
),
),
)
def test_make_realisations(
scenarios,
use_tas2,
use_hfds,
n_realisations,
outname,
test_data_root_dir,
update_expected_files=False,
):
esm = "IPSL-CM6A-LR"

TEST_DATA_PATH = pathlib.Path(test_data_root_dir)
TEST_PATH = TEST_DATA_PATH / "output" / outname

PARAM_FILEFINDER = FileFinder(
path_pattern=TEST_PATH / "test-params/{module}/",
file_pattern="params_{module}_{esm}_{scen}.nc",
)
scen_str = "-".join(scenarios)

volcanic_file = PARAM_FILEFINDER.find_single_file(
module="volcanic", esm=esm, scen=scen_str
).paths.pop()
global_ar_file = PARAM_FILEFINDER.find_single_file(
module="global-variability", esm=esm, scen=scen_str
).paths.pop()
local_forced_file = PARAM_FILEFINDER.find_single_file(
module="local-trends", esm=esm, scen=scen_str
).paths.pop()
local_ar_file = PARAM_FILEFINDER.find_single_file(
module="local-variability", esm=esm, scen=scen_str
).paths.pop()
localized_ecov_file = PARAM_FILEFINDER.find_single_file(
module="covariance", esm=esm, scen=scen_str
).paths.pop()

expected_output_file = TEST_PATH / f"test_realisations_expected_{scen_str}.nc"

HIST_PERIOD = slice("1850", "2014")

# we need a maximum of 4 seeds if there are max 2 scenarios (1 for global and 1 for local)
seed_list = [981, 314, 272, 42]

seed_global_variability = DataTree()
seed_local_variability = DataTree()

for scen in scenarios:
seed_global_variability[scen] = DataTree(
xr.Dataset({"seed": xr.DataArray(seed_list.pop())})
)
seed_local_variability[scen] = DataTree(
xr.Dataset({"seed": xr.DataArray(seed_list.pop())})
)

tas_forcing, hfds, tas2 = create_forcing_data(
test_data_root_dir, scenarios, use_hfds, use_tas2
)
scen0 = scenarios[0]
time = tas_forcing[scen0].time

buffer_global_variability = 50
buffer_local_variability = 20

# 1.) superimpose volcanic influence
volcanic_params = xr.open_dataset(volcanic_file)
tas_forcing = map_over_subtree(mesmer.volc.superimpose_volcanic_influence)(
tas_forcing, volcanic_params, hist_period=HIST_PERIOD
)

# 2.) compute the global variability
global_ar_params = xr.open_dataset(global_ar_file)
global_variability = map_over_subtree(
mesmer.stats.draw_auto_regression_uncorrelated
)(
global_ar_params,
realisation=n_realisations,
time=time,
seed=seed_global_variability,
buffer=buffer_global_variability,
).rename(
{"samples": "tas"}
)

# 3.) compute the local forced response
lr = mesmer.stats.LinearRegression().from_netcdf(local_forced_file)

predictors = DataTree()
for scen in scenarios:
predictors[scen] = DataTree.from_dict(
{"tas": tas_forcing[scen], "tas_resids": global_variability[scen]}
)

if tas2 is not None:
predictors[scen]["tas2"] = tas2[scen]
if hfds is not None:
predictors[scen]["hfds"] = hfds[scen]

# uses ``exclude`` to split the linear response
local_forced_response = DataTree()
local_variability_from_global_var = DataTree()

for scen in predictors.children:
local_forced_response[scen] = DataTree(
lr.predict(predictors[scen], exclude={"tas_resids"}).rename("tas")
)

# 4.) compute the local variability part driven by global variabilty
exclude = {"tas", "intercept"}
if use_tas2:
exclude.add("tas2")
if use_hfds:
exclude.add("hfds")

local_variability_from_global_var[scen] = DataTree(
lr.predict(predictors[scen], exclude=exclude).rename("tas")
)

# 5.) compute local variability
local_ar = xr.open_dataset(local_ar_file)
localized_covariance = xr.open_dataset(localized_ecov_file)
localized_covariance_adjusted = localized_covariance.localized_covariance_adjusted

local_variability = map_over_subtree(mesmer.stats.draw_auto_regression_correlated)(
local_ar,
localized_covariance_adjusted,
time=time,
realisation=n_realisations,
seed=seed_local_variability,
buffer=buffer_local_variability,
).rename({"samples": "tas"})

local_variability_total = local_variability_from_global_var + local_variability

result = local_forced_response + local_variability_total

# save the emulations
if update_expected_files:
result.to_netcdf(expected_output_file)
pytest.skip("Updated expected output file.")
else:
expected = datatree.open_datatree(expected_output_file, use_cftime=True)
for scen in scenarios:
exp_scen = expected[scen].to_dataset()
res_scen = result[scen].to_dataset()
xr.testing.assert_allclose(res_scen, exp_scen)

# make sure we can get onto a lat lon grid from what is saved
exp_reshaped = exp_scen.set_index(gridcell=("lat", "lon")).unstack(
"gridcell"
)
expected_dims = {"realisation", "lon", "lat", "time"}

assert set(exp_reshaped.dims) == expected_dims
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading