Skip to content

Commit

Permalink
remove ensemble anomaly type 'first' (#247)
Browse files Browse the repository at this point in the history
* remove ensemble anomaly type 'first'

* CHANGELOG
  • Loading branch information
mathause authored Jan 22, 2023
1 parent f5b36fb commit f158f94
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 57 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ Breaking changes
and the ``reg_dict`` argument to :py:func:`mesmer.utils.select.extract_land`. These arguments
no longer have any affect (`#235 <https://github.com/MESMER-group/mesmer/pull/235>`_).
By `Mathias Hauser <https://github.com/mathause>`_.

- Removed ``ref["type"] == "first"``, i.e., caculating the anomaly w.r.t. the first
ensemble member (`#247 <https://github.com/MESMER-group/mesmer/pull/247>`_).
By `Mathias Hauser <https://github.com/mathause>`_.

Deprecations
^^^^^^^^^^^^
Expand Down
64 changes: 19 additions & 45 deletions mesmer/io/_load_cmipng.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,53 +152,33 @@ def find_files_cmipng(gen, esm, var, scenario, dir_cmipng):
runs_excl = []
print("TO DO: create list of excluded runs / ESMs for this variable")

# print(
# "TO DO: rewrite selection of p2 in way that needs less space for CanESM5 (+ see if other ESMs need p2 too)"
# TODO: how to handle p2?
# )
path = os.path.join(dir_name, f"{var}_ann_{esm}_{scenario}_r*i1p1f*_g025.nc")
path_runs_list_scen = sorted(glob.glob(path))

path = os.path.join(dir_name, f"{var}_ann_{esm}_historical_r*i1p1f*_g025.nc")
path_runs_list_hist = sorted(glob.glob(path))

# check if both scenario and historical run are available for this realization, if yes add to path_runs_list
# check if both scenario and historical run are available for this realization,
# if yes add to path_runs_list
path_runs_list = []
for run in path_runs_list_scen:
run_hist = run.replace(scenario, "historical")
if run_hist in path_runs_list_hist:
path_runs_list.append(run)

# TODO: redecide if I am fine with CanESM5 p2 but not all scenarios or if I prefer the worse p1 which has all scenarios
# code below = old version when used p2 instead
# if esm != "CanESM5":
# path_runs_list = sorted(
# glob.glob(
# dir_name
# + var
# + "_ann_"
# + esm
# + "_"
# + scenario
# + "_"
# + "r*i1p1f*"
# + "_g025.nc"
# )
# )

# TODO: redecide if I am fine with CanESM5 p2 but not all scenarios or if I
# prefer the worse p1 which has all scenarios code below = old version when used
# p2 instead
# if esm == "CanESM5":
# variant = "r*i1p2f*"
# else:
# path_runs_list = sorted(
# glob.glob(
# dir_name
# + var
# + "_ann_"
# + esm
# + "_"
# + scenario
# + "_"
# + "r*i1p2f*"
# + "_g025.nc"
# )
# )
# variant = "r*i1p1f*"

# path = os.path.join(dir_name, f"{var}_ann_{esm}_{scenario}_{variant}_g025.nc")
# path_runs_list = sorted(glob.glob(path))

if len(path_runs_list) == 0: # if no entries found, return the empty list
return path_runs_list

Expand Down Expand Up @@ -266,8 +246,9 @@ def load_cmipng(targ, esm, scen, cfg):
"No version without historical time period is currently implemented."
)

# once start working with other vars, extend dict eg {"pr": load_cmipng_pr,
# "hfds": load_cmipng_hfds, "tas": load_cmipng_tas}
targ_func_mapping = {"hfds": load_cmipng_hfds, "tas": load_cmipng_tas}
# once start working with other vars, extend dict eg {"pr": load_cmipng_pr, "hfds": load_cmipng_hfds, "tas": load_cmipng_tas}

load_targ = targ_func_mapping[targ]

Expand Down Expand Up @@ -420,6 +401,9 @@ def _load_cmipng_var(esm, scen, cfg, varn):
ref = cfg.ref
dir_cmipng = cfg.dir_cmipng

if ref["type"] == "first":
raise ValueError("reference type 'first' is no longer supported")

# find the files which fulfill the specifications
path_runs_list = find_files_cmipng(gen, esm, varn, scen, dir_cmipng)

Expand Down Expand Up @@ -468,17 +452,7 @@ def _load_cmipng_var(esm, scen, cfg, varn):
)
dta[run] -= dta_ref # compute anomalies

# TODO: remove "first" (used for Leas first paper but does not really make sense)
if ref["type"] == "first" and run == "1":
# TO CHECK
dta_ref = (
data[varn]
.sel(time=slice(ref["start"], ref["end"]))
.mean(dim="time")
.values
)

if ref["type"] == "all" or ref["type"] == "first":
if ref["type"] == "all":
for run_path in path_runs_list:
run = run_nrs[run_path]
dta[run] -= dta_ref # compute anomalies
Expand Down
85 changes: 74 additions & 11 deletions tests/integration/test_load_cmipng.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os

import numpy as np
import numpy.testing as npt
import pytest

import mesmer
Expand All @@ -11,7 +10,6 @@

def _get_default_kwargs(
test_data_root_dir,
varn=None,
esm="IPSL-CM6A-LR",
scen="ssp126",
type="all",
Expand Down Expand Up @@ -50,6 +48,14 @@ def test_load_cmipng_var_missing_data():
assert time is None


def test_load_cmipng_var_type_first_error(test_data_root_dir):

with pytest.raises(ValueError, match="reference type 'first'"):
_load_cmipng_var(
varn="tas", **_get_default_kwargs(test_data_root_dir, type="first")
)


@pytest.mark.parametrize("varn", ["tas", "hfds"])
def test_load_cmipng(test_data_root_dir, varn):

Expand All @@ -68,16 +74,73 @@ def test_load_cmipng(test_data_root_dir, varn):
assert dta[1].shape == (251, 20, 20)
assert dta_global[1].shape == (251,)

npt.assert_allclose(lon["c"], lon_expected)
npt.assert_allclose(lat["c"], lat_expected)
npt.assert_allclose(time, time_expected)
np.testing.assert_allclose(lon["c"], lon_expected)
np.testing.assert_allclose(lat["c"], lat_expected)
np.testing.assert_allclose(time, time_expected)


@pytest.mark.parametrize("varn", ["tas", "hfds"])
@pytest.mark.parametrize("start, end", [("1850", "1900"), ("1850", "1860")])
def test_load_cmipng_ref(test_data_root_dir, varn, start, end):
def test_load_cmipng_ref_two_ens_all(test_data_root_dir, varn, start, end):

dta, dta_global, lon, lat, time = _load_cmipng_var(
varn=varn,
**_get_default_kwargs(test_data_root_dir, scen="ssp585", start=start, end=end),
)
time = np.asarray(time)
sel = (time >= int(start)) & (time <= int(end))

# stack ensemble members, new axis at end
dta_arr = np.stack(list(dta.values()), axis=-1)
dta_global_arr = np.stack(list(dta_global.values()), axis=-1)

# test mean over all grid points is 0 (over reference period AND ensembles)
np.testing.assert_allclose(0, np.nanmean(dta_arr[sel, :, :]), atol=1e-6)

# test individual gridpoints are 0 (over reference period)
result = np.mean(dta_arr[sel, :, :], axis=(0, -1))

print(result.shape)
expected = np.zeros_like(result)
expected[np.isnan(result)] = np.NaN
np.testing.assert_allclose(expected, result, atol=1e-6)

# test global mean is 0 (over reference period)
np.testing.assert_allclose(0, np.mean(dta_global_arr[sel]), atol=1e-6)


# TODO: test ref["type"] once there is more than one test dataset
@pytest.mark.parametrize("varn", ["tas", "hfds"])
@pytest.mark.parametrize("start, end", [("1850", "1900"), ("1850", "1860")])
def test_load_cmipng_ref_two_ens_indiv(test_data_root_dir, varn, start, end):

dta, dta_global, lon, lat, time = _load_cmipng_var(
varn=varn,
**_get_default_kwargs(
test_data_root_dir, scen="ssp585", type="individ", start=start, end=end
),
)
time = np.asarray(time)
sel = (time >= int(start)) & (time <= int(end))

# test mean over all grid points is 0 (over reference period)
for arr in dta.values():
np.testing.assert_allclose(0, np.nanmean(arr[sel, :, :]), atol=1e-6)

# test individual gridpoints are 0 (over reference period)
for arr in dta.values():
result = np.mean(arr[sel, :, :], axis=0)
expected = np.zeros_like(result)
expected[np.isnan(result)] = np.NaN
np.testing.assert_allclose(expected, result, atol=1e-6)

# test global mean is 0 (over reference period)
for arr_global in dta_global.values():
np.testing.assert_allclose(0, np.mean(arr_global[sel]), atol=1e-6)


@pytest.mark.parametrize("varn", ["tas", "hfds"])
@pytest.mark.parametrize("start, end", [("1850", "1900"), ("1850", "1860")])
def test_load_cmipng_ref(test_data_root_dir, varn, start, end):

dta, dta_global, lon, lat, time = _load_cmipng_var(
varn=varn, **_get_default_kwargs(test_data_root_dir, start=start, end=end)
Expand All @@ -86,16 +149,16 @@ def test_load_cmipng_ref(test_data_root_dir, varn, start, end):
sel = (time >= int(start)) & (time <= int(end))

# test mean over all grid points is 0 (over reference period)
npt.assert_allclose(0, np.nanmean(dta[1][sel, :, :]), atol=1e-6)
np.testing.assert_allclose(0, np.nanmean(dta[1][sel, :, :]), atol=1e-6)

# test individual gridpoints are 0 (over reference period)
result = np.mean(dta[1][sel, :, :], axis=0)
expected = np.zeros_like(result)
expected[np.isnan(result)] = np.NaN
npt.assert_allclose(expected, result, atol=1e-6)
np.testing.assert_allclose(expected, result, atol=1e-6)

# test global mean is 0 (over reference period)
npt.assert_allclose(0, np.mean(dta_global[1][sel]), atol=1e-6)
np.testing.assert_allclose(0, np.mean(dta_global[1][sel]), atol=1e-6)


@pytest.mark.parametrize("varn", ["tas", "hfds"])
Expand All @@ -118,4 +181,4 @@ def test_load_cmimpng_vs_load_var(test_data_root_dir, varn):
assert_dict_allclose(dta_global_d, dta_global_i, "direct", "indirect")
assert_dict_allclose(lon_d, lon_i, "direct", "indirect")
assert_dict_allclose(lat_d, lat_i, "direct", "indirect")
npt.assert_allclose(time_d, time_i)
np.testing.assert_allclose(time_d, time_i)

0 comments on commit f158f94

Please sign in to comment.