From 993d39182c82b8a210e249578b0dc856ee594664 Mon Sep 17 00:00:00 2001 From: forsyth2 <30700190+forsyth2@users.noreply.github.com> Date: Thu, 30 May 2024 13:58:10 -0700 Subject: [PATCH] Refactor area_mean_time_series and add ccb slice flag feature (#750) Co-authored-by: Tom Vo --- .../662-area-mean-time-series/annual_avgs.py | 31 + .../debug/759_slice_flag_ccb.py | 154 ++ .../debug/759_slice_flag_co.py | 137 ++ .../regression-test-json.ipynb | 370 ++++ .../regression-test-netcdf.ipynb | 1565 +++++++++++++++++ .../662-area-mean-time-series/run.cfg | 7 + .../662-area-mean-time-series/run_script.py | 12 + .../test_non_submonthly.py | 46 + .../cdat_regression_testing/utils.py | 10 +- e3sm_diags/derivations/formulas.py | 26 +- .../driver/area_mean_time_series_driver.py | 230 +-- e3sm_diags/driver/utils/dataset_xr.py | 257 ++- e3sm_diags/driver/utils/regrid.py | 2 +- .../cartopy/area_mean_time_series_plot.py | 210 ++- .../driver/utils/test_dataset_xr.py | 195 +- tests/test_area_mean_time_series.py | 1 + 16 files changed, 2876 insertions(+), 377 deletions(-) create mode 100644 auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/annual_avgs.py create mode 100644 auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/debug/759_slice_flag_ccb.py create mode 100644 auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/debug/759_slice_flag_co.py create mode 100644 auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/regression-test-json.ipynb create mode 100644 auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/regression-test-netcdf.ipynb create mode 100644 auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run.cfg create mode 100644 auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run_script.py create mode 100644 auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/test_non_submonthly.py create mode 100644 tests/test_area_mean_time_series.py diff --git a/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/annual_avgs.py b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/annual_avgs.py new file mode 100644 index 000000000..90d10b71b --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/annual_avgs.py @@ -0,0 +1,31 @@ +# %% +import cdms2 +import cdutil +import xcdat as xc + +# %% +# CDAT +# Time coordinates at the first day of the month (1850-01-01) +fn1 = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1/FSNS_185001_201312.nc" +var_key = "FSNS" + +ds_cdat2 = cdms2.open(fn1) +var = ds_cdat2(var_key) + +var_avg = cdutil.averager(var, axis="xy") +var_avg_year = cdutil.YEAR(var_avg) + +print(var_avg_year.getTime().asComponentTime()[-3:-1]) +# [2011-7-2 12:0:0.0, 2012-7-2 12:0:0.0] + +# %% +# xCDAT +ds_xcdat = xc.open_dataset(fn1, decode_times=True) +ds_xcdat_avg = ds_xcdat.spatial.average(var_key, axis=["X", "Y"]) +ds_xcdat_avg_year = ds_xcdat_avg.temporal.group_average(var_key, freq="year") + +var_avg_year_xc = ds_xcdat_avg_year[var_key] + +print(var_avg_year_xc.time.values[-3:-1]) +# [cftime.DatetimeNoLeap(2012, 1, 1, 0, 0, 0, 0, has_year_zero=True) +# cftime.DatetimeNoLeap(2013, 1, 1, 0, 0, 0, 0, has_year_zero=True)] diff --git a/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/debug/759_slice_flag_ccb.py b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/debug/759_slice_flag_ccb.py new file mode 100644 index 000000000..8ed922033 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/debug/759_slice_flag_ccb.py @@ -0,0 +1,154 @@ +""" +https://github.com/E3SM-Project/e3sm_diags/blob/633b52c314325e605fe7f62687cc4d00e5a0a3d5/e3sm_diags/driver/utils/dataset.py#L665-L672 + + if sub_monthly: + start_time = "{}-01-01".format(start_year) + end_time = "{}-01-01".format(str(int(end_year) + 1)) + slice_flag = "co" + else: + start_time = "{}-01-15".format(start_year) + end_time = "{}-12-15".format(end_year) + slice_flag = "ccb" + + var_time = fin(var, time=(start_time, end_time, slice_flag))(squeeze=1) +""" +# %% +import cdms2 +import xcdat as xc + +# Parameters +# Time coordinates at the first day of the month (1850-01-01) +fn1 = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1/FSNS_185001_201312.nc" +var = "FSNS" +start_time = "2011-01-15" +end_time = "2013-12-15" + +slice_flag = "ccb" + + +ds_cdat2 = cdms2.open(fn1) + + +# "ccb" slice +# RESULT: Adds a coordinate to the end because the end time of the dataset is +# 2013-12-01, and "ccb" only allows the right side to be closed. It will use +# bounds to determine the right bound value to add the coordinate point. + +# Example: +# 1. Actual end time: 2013-12-01 +# 2. Slice end time: 2013-12-15 +# 3. Bound values: [2013-12-01, 2014-1-1] +# 4. Use 2014-1-1 as last coordinate point. +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +var_s = ds_cdat2(var, time=(start_time, end_time, slice_flag))(squeeze=1) + +time_s = var_s.getTime() +time_s_dt = time_s.asComponentTime() + +""" +[2011-2-1 0:0:0.0, + 2011-3-1 0:0:0.0, + 2011-4-1 0:0:0.0, + 2011-5-1 0:0:0.0, + 2011-6-1 0:0:0.0, + 2011-7-1 0:0:0.0, + 2011-8-1 0:0:0.0, + 2011-9-1 0:0:0.0, + 2011-10-1 0:0:0.0, + 2011-11-1 0:0:0.0, + 2011-12-1 0:0:0.0, + 2012-1-1 0:0:0.0, + 2012-2-1 0:0:0.0, + 2012-3-1 0:0:0.0, + 2012-4-1 0:0:0.0, + 2012-5-1 0:0:0.0, + 2012-6-1 0:0:0.0, + 2012-7-1 0:0:0.0, + 2012-8-1 0:0:0.0, + 2012-9-1 0:0:0.0, + 2012-10-1 0:0:0.0, + 2012-11-1 0:0:0.0, + 2012-12-1 0:0:0.0, + 2013-1-1 0:0:0.0, + 2013-2-1 0:0:0.0, + 2013-3-1 0:0:0.0, + 2013-4-1 0:0:0.0, + 2013-5-1 0:0:0.0, + 2013-6-1 0:0:0.0, + 2013-7-1 0:0:0.0, + 2013-8-1 0:0:0.0, + 2013-9-1 0:0:0.0, + 2013-10-1 0:0:0.0, + 2013-11-1 0:0:0.0, + 2013-12-1 0:0:0.0, + 2014-1-1 0:0:0.0] +""" + +# 36 2014-1-1 0:0:0.0 +print(len(time_s), time_s_dt[-1]) +# [59829. 59860.] +print(time_s.getBounds()[-1]) + +# ~~~~~~~ + +# xCDAT +ds_xcdat = xc.open_dataset(fn1, decode_times=True) + +ds_xcdat_s = ds_xcdat.sel(time=slice(start_time, end_time)) + +# 35 2013-12-01 00:00:00 +print(len(ds_xcdat_s.time), ds_xcdat_s.time.values[-1]) + +# [cftime.DatetimeNoLeap(2013, 11, 1, 0, 0, 0, 0, has_year_zero=True) +# cftime.DatetimeNoLeap(2013, 12, 1, 0, 0, 0, 0, has_year_zero=True)] +print(ds_xcdat_s.time_bnds.values[-1]) + + +# No slice +# ~~~~~~~ +var_ns = ds_cdat2(var, time=(start_time, end_time))(squeeze=1) +time_ns = var_ns.getTime() +time_ns_dt = time_ns.asComponentTime() + +""" +[2011-2-1 0:0:0.0, + 2011-3-1 0:0:0.0, + 2011-4-1 0:0:0.0, + 2011-5-1 0:0:0.0, + 2011-6-1 0:0:0.0, + 2011-7-1 0:0:0.0, + 2011-8-1 0:0:0.0, + 2011-9-1 0:0:0.0, + 2011-10-1 0:0:0.0, + 2011-11-1 0:0:0.0, + 2011-12-1 0:0:0.0, + 2012-1-1 0:0:0.0, + 2012-2-1 0:0:0.0, + 2012-3-1 0:0:0.0, + 2012-4-1 0:0:0.0, + 2012-5-1 0:0:0.0, + 2012-6-1 0:0:0.0, + 2012-7-1 0:0:0.0, + 2012-8-1 0:0:0.0, + 2012-9-1 0:0:0.0, + 2012-10-1 0:0:0.0, + 2012-11-1 0:0:0.0, + 2012-12-1 0:0:0.0, + 2013-1-1 0:0:0.0, + 2013-2-1 0:0:0.0, + 2013-3-1 0:0:0.0, + 2013-4-1 0:0:0.0, + 2013-5-1 0:0:0.0, + 2013-6-1 0:0:0.0, + 2013-7-1 0:0:0.0, + 2013-8-1 0:0:0.0, + 2013-9-1 0:0:0.0, + 2013-10-1 0:0:0.0, + 2013-11-1 0:0:0.0, + 2013-12-1 0:0:0.0] +""" + +# 35 2013-12-1 0:0:0.0 +print(len(time_ns), time_ns_dt[-1]) +# [59799. 59829.] +print(time_ns.getBounds()[-1]) diff --git a/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/debug/759_slice_flag_co.py b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/debug/759_slice_flag_co.py new file mode 100644 index 000000000..20c5ca08d --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/debug/759_slice_flag_co.py @@ -0,0 +1,137 @@ +# %% +import cdms2 +import xcdat as xc + +# Parameters +# Time coordinates at the first day of the month (1850-01-01) +fn1 = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1/FSNS_185001_201312.nc" +var = "FSNS" +start_time = "2011-01-01" +end_time = "2014-01-01" + +# "co" - YYYY-01-01 +1 for the end year +slice_flag = "co" + + +ds_cdat = cdms2.open(fn1) + +# With slice -- nothing changes because "co" allows the right side to be open, +# The actual end time (2013-12-01) is within the slice end time (2014-01-01). +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +var_s = ds_cdat(var, time=(start_time, end_time, slice_flag))(squeeze=1) + +time_s = var_s.getTime() +time_s_dt = time_s.asComponentTime() + +""" +[2011-1-1 0:0:0.0, + 2011-2-1 0:0:0.0, + 2011-3-1 0:0:0.0, + 2011-4-1 0:0:0.0, + 2011-5-1 0:0:0.0, + 2011-6-1 0:0:0.0, + 2011-7-1 0:0:0.0, + 2011-8-1 0:0:0.0, + 2011-9-1 0:0:0.0, + 2011-10-1 0:0:0.0, + 2011-11-1 0:0:0.0, + 2011-12-1 0:0:0.0, + 2012-1-1 0:0:0.0, + 2012-2-1 0:0:0.0, + 2012-3-1 0:0:0.0, + 2012-4-1 0:0:0.0, + 2012-5-1 0:0:0.0, + 2012-6-1 0:0:0.0, + 2012-7-1 0:0:0.0, + 2012-8-1 0:0:0.0, + 2012-9-1 0:0:0.0, + 2012-10-1 0:0:0.0, + 2012-11-1 0:0:0.0, + 2012-12-1 0:0:0.0, + 2013-1-1 0:0:0.0, + 2013-2-1 0:0:0.0, + 2013-3-1 0:0:0.0, + 2013-4-1 0:0:0.0, + 2013-5-1 0:0:0.0, + 2013-6-1 0:0:0.0, + 2013-7-1 0:0:0.0, + 2013-8-1 0:0:0.0, + 2013-9-1 0:0:0.0, + 2013-10-1 0:0:0.0, + 2013-11-1 0:0:0.0, + 2013-12-1 0:0:0.0] +""" + + +# 36 2013-12-1 0:0:0.0 +print(len(time_s), time_s_dt[-1]) +# [59799. 59829.] +print(time_s.getBounds()[-1]) + + +# ~~~~~~~ + +# xCDAT +ds_xcdat = xc.open_dataset(fn1, decode_times=True) + +ds_xcdat_s = ds_xcdat.sel(time=slice(start_time, end_time)) + +# 35 2013-12-01 00:00:00 +print(len(ds_xcdat_s.time), ds_xcdat_s.time.values[-1]) + +# [cftime.DatetimeNoLeap(2013, 11, 1, 0, 0, 0, 0, has_year_zero=True) +# cftime.DatetimeNoLeap(2013, 12, 1, 0, 0, 0, 0, has_year_zero=True)] +print(ds_xcdat_s.time_bnds.values[-1]) + + +# No slice +# RESULT: Adds a coordinate to the end ("ccn" is default) +# ~~~~~~~ +var_ns = ds_cdat(var, time=(start_time, end_time))(squeeze=1) +time_ns = var_ns.getTime() +time_ns_dt = time_ns.asComponentTime() + +""" +[2011-1-1 0:0:0.0, + 2011-2-1 0:0:0.0, + 2011-3-1 0:0:0.0, + 2011-4-1 0:0:0.0, + 2011-5-1 0:0:0.0, + 2011-6-1 0:0:0.0, + 2011-7-1 0:0:0.0, + 2011-8-1 0:0:0.0, + 2011-9-1 0:0:0.0, + 2011-10-1 0:0:0.0, + 2011-11-1 0:0:0.0, + 2011-12-1 0:0:0.0, + 2012-1-1 0:0:0.0, + 2012-2-1 0:0:0.0, + 2012-3-1 0:0:0.0, + 2012-4-1 0:0:0.0, + 2012-5-1 0:0:0.0, + 2012-6-1 0:0:0.0, + 2012-7-1 0:0:0.0, + 2012-8-1 0:0:0.0, + 2012-9-1 0:0:0.0, + 2012-10-1 0:0:0.0, + 2012-11-1 0:0:0.0, + 2012-12-1 0:0:0.0, + 2013-1-1 0:0:0.0, + 2013-2-1 0:0:0.0, + 2013-3-1 0:0:0.0, + 2013-4-1 0:0:0.0, + 2013-5-1 0:0:0.0, + 2013-6-1 0:0:0.0, + 2013-7-1 0:0:0.0, + 2013-8-1 0:0:0.0, + 2013-9-1 0:0:0.0, + 2013-10-1 0:0:0.0, + 2013-11-1 0:0:0.0, + 2013-12-1 0:0:0.0, + 2014-1-1 0:0:0.0] +""" + +# 37 2014-1-1 0:0:0.0 +print(len(time_ns), time_ns_dt[-1]) +# [59829. 59860.] +print(time_ns.getBounds()[-1]) diff --git a/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/regression-test-json.ipynb b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/regression-test-json.ipynb new file mode 100644 index 000000000..4025d7012 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/regression-test-json.ipynb @@ -0,0 +1,370 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CDAT Migration Regression Testing Notebook (`.json` metrics)\n", + "\n", + "This notebook is used to perform regression testing between the development and\n", + "production versions of a diagnostic set.\n", + "\n", + "## How it works\n", + "\n", + "It compares the relative differences (%) between two sets of `.json` files in two\n", + "separate directories, one for the refactored code and the other for the `main` branch.\n", + "\n", + "It will display metrics values with relative differences >= 2%. Relative differences are used instead of absolute differences because:\n", + "\n", + "- Relative differences are in percentages, which shows the scale of the differences.\n", + "- Absolute differences are just a raw number that doesn't factor in\n", + " floating point size (e.g., 100.00 vs. 0.0001), which can be misleading.\n", + "\n", + "## How to use\n", + "\n", + "PREREQUISITE: The diagnostic set's metrics stored in `.json` files in two directories\n", + "(dev and `main` branches).\n", + "\n", + "1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/`.\n", + "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray netcdf4 dask pandas matplotlib-base ipykernel`\n", + "3. Run `mamba activate cdat_regression_test`\n", + "4. Update `DEV_PATH` and `MAIN_PATH` in the copy of your notebook.\n", + "5. Run all cells IN ORDER.\n", + "6. Review results for any outstanding differences (>= 2%).\n", + " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "from typing import List\n", + "\n", + "import pandas as pd\n", + "\n", + "from auxiliary_tools.cdat_regression_testing.utils import (\n", + " get_num_metrics_above_diff_thres,\n", + " get_rel_diffs,\n", + " update_diffs_to_pct,\n", + " highlight_large_diffs,\n", + ")\n", + "\n", + "SET_NAME = \"area_mean_time_series\"\n", + "SET_DIR = \"662-area-mean-time-series\"\n", + "\n", + "DEV_PATH = f\"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/{SET_DIR}/{SET_NAME}/**\"\n", + "MAIN_PATH = f\"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/{SET_NAME}/**\"\n", + "\n", + "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", + "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.json\"))\n", + "\n", + "if len(DEV_GLOB) == 0 or len(MAIN_GLOB) == 0:\n", + " raise IOError(\"No files found at DEV_PATH and/or MAIN_PATH.\")\n", + "\n", + "if len(DEV_GLOB) != len(MAIN_GLOB):\n", + " raise IOError(\"Number of files do not match at DEV_PATH and MAIN_PATH.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_metrics(filepaths: List[str]) -> pd.DataFrame:\n", + " \"\"\"Get the metrics using a glob of `.json` metric files in a directory.\n", + "\n", + " Parameters\n", + " ----------\n", + " filepaths : List[str]\n", + " The filepaths for metrics `.json` files.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " The DataFrame containing the metrics for all of the variables in\n", + " the results directory.\n", + " \"\"\"\n", + " metrics = []\n", + "\n", + " for filepath in filepaths:\n", + " df = pd.read_json(filepath)\n", + "\n", + " filename = filepath.split(\"/\")[-1]\n", + " var_key = filename.split(\"-\")[0]\n", + " region = filename.split(\"-\")[-1]\n", + "\n", + " # Add the variable key to the MultiIndex and update the index\n", + " # before stacking to make the DataFrame easier to parse.\n", + " multiindex = pd.MultiIndex.from_product([[var_key], [region], [*df.index]])\n", + " df = df.set_index(multiindex)\n", + " df.stack()\n", + "\n", + " metrics.append(df)\n", + "\n", + " df_final = pd.concat(metrics)\n", + "\n", + " return df_final" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = []\n", + "\n", + "for filepath in DEV_GLOB:\n", + " df = pd.read_json(filepath)\n", + "\n", + " filename = filepath.split(\"/\")[-1]\n", + " var_key = filename.split(\"-\")[0]\n", + "\n", + " # Add the variable key to the MultiIndex and update the index\n", + " # before stacking to make the DataFrame easier to parse.\n", + " multiindex = pd.MultiIndex.from_product([[var_key], [filename], [*df.index]])\n", + " df = df.set_index(multiindex)\n", + " df.stack()\n", + "\n", + " metrics.append(df)\n", + "\n", + "df_final = pd.concat(metrics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Get the metrics for the development and `main` branches and their differences.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_dev = get_metrics(DEV_GLOB)\n", + "df_metrics_main = get_metrics(MAIN_GLOB)\n", + "df_metrics_diffs = get_rel_diffs(df_metrics_dev, df_metrics_main)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Filter differences to those above maximum threshold (2%).\n", + "\n", + "All values below maximum threshold will be labeled as `NaN`.\n", + "\n", + "- **If all cells in a row are NaN (< 2%)**, the entire row is dropped to make the results easier to parse.\n", + "- Any remaining NaN cells are below < 2% difference and **should be ignored**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_diffs_thres = df_metrics_diffs[df_metrics_diffs >= 0.02]\n", + "df_metrics_diffs_thres = df_metrics_diffs_thres.dropna(\n", + " axis=0, how=\"all\", ignore_index=False\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Combine all DataFrames to get the final result.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "df_metrics_all = pd.concat(\n", + " [df_metrics_dev.add_suffix(\"_dev\"), df_metrics_main.add_suffix(\"_main\")],\n", + " axis=1,\n", + " join=\"outer\",\n", + ")\n", + "df_final = df_metrics_diffs_thres.join(df_metrics_all)\n", + "df_final = update_diffs_to_pct(df_final, [\"e3sm_v2 (0051-0060) DIFF (%)\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
e3sm_v2 (0051-0060) DIFF (%)e3sm_v2 (0051-0060)_deve3sm_v2 (0051-0060)_main
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [e3sm_v2 (0051-0060) DIFF (%), e3sm_v2 (0051-0060)_dev, e3sm_v2 (0051-0060)_main]\n", + "Index: []" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Review variables and metrics above difference threshold.\n", + "\n", + "- Red cells are differences >= 2%\n", + "- `nan` cells are differences < 2% and **should be ignored**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Related variables []\n", + "* Number of metrics above 2% max threshold: 0 / 720\n" + ] + } + ], + "source": [ + "df_final_adj = df_final.reset_index(names=[\"var_key\", \"region\", \"metric\"])\n", + "get_num_metrics_above_diff_thres(df_metrics_all, df_final_adj)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 var_keyregionmetrice3sm_v2 (0051-0060) DIFF (%)e3sm_v2 (0051-0060)_deve3sm_v2 (0051-0060)_main
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "highlight_large_diffs(df_final_adj, [\"e3sm_v2 (0051-0060) DIFF (%)\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results\n", + "\n", + "- 792 / 792 metrics are within diff tolerance\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cdat_regression_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/regression-test-netcdf.ipynb b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/regression-test-netcdf.ipynb new file mode 100644 index 000000000..cd7a385f2 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/regression-test-netcdf.ipynb @@ -0,0 +1,1565 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CDAT Migration Regression Testing Notebook (`.nc` files)\n", + "\n", + "This notebook is used to perform regression testing between the development and\n", + "production versions of a diagnostic set.\n", + "\n", + "## How it works\n", + "\n", + "It compares the relative differences (%) between ref and test variables between\n", + "the dev and `main` branches.\n", + "\n", + "## How to use\n", + "\n", + "PREREQUISITE: The diagnostic set's netCDF stored in `.json` files in two directories\n", + "(dev and `main` branches).\n", + "\n", + "1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/`.\n", + "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray netcdf4 dask pandas matplotlib-base ipykernel`\n", + "3. Run `mamba activate cdat_regression_test`\n", + "4. Update `SET_DIR` and `SET_NAME` in the copy of your notebook.\n", + "5. Run all cells IN ORDER.\n", + "6. Review results for any outstanding differences (>=1e-5 relative tolerance).\n", + " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "import glob\n", + "from typing import Tuple\n", + "\n", + "import numpy as np\n", + "import xarray as xr\n", + "from e3sm_diags.derivations.derivations import DERIVED_VARIABLES\n", + "\n", + "\n", + "# TODO: Update SET_NAME and SET_DIR\n", + "SET_NAME = \"area_mean_time_series\"\n", + "SET_DIR = \"662-area-mean-time-series\"\n", + "\n", + "DEV_PATH = f\"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/{SET_DIR}/{SET_NAME}/**\"\n", + "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.nc\"))\n", + "DEV_NUM_FILES = len(DEV_GLOB)\n", + "\n", + "MAIN_PATH = f\"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/{SET_NAME}/**\"\n", + "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.nc\"))\n", + "MAIN_NUM_FILES = len(MAIN_GLOB)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def _check_if_files_found():\n", + " if DEV_NUM_FILES == 0 or MAIN_NUM_FILES == 0:\n", + " raise IOError(\n", + " \"No files found at DEV_PATH and/or MAIN_PATH. \"\n", + " f\"Please check {DEV_PATH} and {MAIN_PATH}.\"\n", + " )\n", + "\n", + "\n", + "def _check_if_matching_filecount():\n", + " if DEV_NUM_FILES != MAIN_NUM_FILES:\n", + " raise IOError(\n", + " \"Number of files do not match at DEV_PATH and MAIN_PATH \"\n", + " f\"({DEV_NUM_FILES} vs. {MAIN_NUM_FILES}).\"\n", + " )\n", + "\n", + " print(f\"Matching file count ({DEV_NUM_FILES} and {MAIN_NUM_FILES}).\")\n", + "\n", + "\n", + "def _check_if_missing_files():\n", + " missing_count = 0\n", + "\n", + " for fp_main in MAIN_GLOB:\n", + " fp_dev = fp_main.replace(SET_DIR, \"main-area-mean-time-series\")\n", + "\n", + " if fp_dev not in MAIN_GLOB:\n", + " print(f\"No production file found to compare with {fp_dev}!\")\n", + " missing_count += 1\n", + "\n", + " for fp_dev in DEV_GLOB:\n", + " fp_main = fp_main.replace(\"main-area-mean-time-series\", SET_DIR)\n", + "\n", + " if fp_main not in DEV_GLOB:\n", + " print(f\"No development file found to compare with {fp_main}!\")\n", + " missing_count += 1\n", + "\n", + " print(f\"Number of files missing: {missing_count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def _get_relative_diffs():\n", + " # We are mainly focusing on relative tolerance here (in percentage terms).\n", + " atol = 0\n", + " rtol = 1e-5\n", + "\n", + " for fp_main in MAIN_GLOB:\n", + " if \"test.nc\" in fp_main or \"ref.nc\" in fp_main:\n", + " fp_dev = fp_main.replace(\"main-area-mean-time-series\", SET_DIR)\n", + "\n", + " print(\"Comparing:\")\n", + " print(f\" * {fp_dev}\")\n", + " print(f\" * {fp_main}\")\n", + "\n", + " ds1 = xr.open_dataset(fp_dev)\n", + " ds2 = xr.open_dataset(fp_main)\n", + "\n", + " var_key = fp_main.split(\"-\")[-3]\n", + " # for 3d vars such as T-200\n", + " var_key.isdigit()\n", + " if var_key.isdigit():\n", + " var_key = fp_main.split(\"-\")[-4]\n", + "\n", + " print(f\" * var_key: {var_key}\")\n", + "\n", + " dev_data = _get_var_data(ds1, var_key)\n", + " main_data = _get_var_data(ds2, var_key)\n", + "\n", + " if dev_data is None or main_data is None:\n", + " print(\" * Could not find variable key in the dataset(s)\")\n", + " continue\n", + "\n", + " try:\n", + " np.testing.assert_allclose(\n", + " dev_data,\n", + " main_data,\n", + " atol=atol,\n", + " rtol=rtol,\n", + " )\n", + " except (KeyError, AssertionError) as e:\n", + " print(f\" {e}\")\n", + " else:\n", + " print(f\" * All close and within relative tolerance ({rtol})\")\n", + "\n", + "\n", + "def _get_var_data(ds: xr.Dataset, var_key: str) -> np.ndarray:\n", + " \"\"\"Get the variable data using a list of matching keys.\n", + "\n", + " The `main` branch saves the dataset using the original variable name,\n", + " while the dev branch saves the variable with the derived variable name.\n", + " The dev branch is performing the expected behavior here.\n", + "\n", + " Parameters\n", + " ----------\n", + " ds : xr.Dataset\n", + " _description_\n", + " var_key : str\n", + " _description_\n", + "\n", + " Returns\n", + " -------\n", + " np.ndarray\n", + " _description_\n", + " \"\"\"\n", + "\n", + " data = None\n", + "\n", + " var_keys = DERIVED_VARIABLES[var_key].keys()\n", + " var_keys = [var_key] + list(sum(var_keys, ()))\n", + "\n", + " for key in var_keys:\n", + " if key in ds.data_vars.keys():\n", + " data = ds[key].values\n", + " break\n", + "\n", + " return data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Check for matching and equal number of files\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "_check_if_files_found()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matching file count (72 and 72).\n" + ] + } + ], + "source": [ + "_check_if_matching_filecount()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of files missing: 0\n" + ] + } + ], + "source": [ + "_check_if_missing_files()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2 Compare the netCDF files between branches\n", + "\n", + "- Compare \"ref\" and \"test\" files\n", + "- \"diff\" files are ignored because getting relative diffs for these does not make sense (relative diff will be above tolerance)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-20N50N_test.nc\n", + "var_key FLUT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-20S20N_test.nc\n", + "var_key FLUT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-50N90N_test.nc\n", + "var_key FLUT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-50S20S_test.nc\n", + "var_key FLUT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-90S50S_test.nc\n", + "var_key FLUT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-global_test.nc\n", + "var_key FLUT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-land_test.nc\n", + "var_key FLUT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-ocean_test.nc\n", + "var_key FLUT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-20N50N_test.nc\n", + "var_key FSNTOA\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-20S20N_test.nc\n", + "var_key FSNTOA\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-50N90N_test.nc\n", + "var_key FSNTOA\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-50S20S_test.nc\n", + "var_key FSNTOA\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-90S50S_test.nc\n", + "var_key FSNTOA\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-global_test.nc\n", + "var_key FSNTOA\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-land_test.nc\n", + "var_key FSNTOA\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FSNTOA/FSNTOA-ocean_test.nc\n", + "var_key FSNTOA\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-20N50N_test.nc\n", + "var_key LHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-20S20N_test.nc\n", + "var_key LHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-50N90N_test.nc\n", + "var_key LHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-50S20S_test.nc\n", + "var_key LHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-90S50S_test.nc\n", + "var_key LHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-global_test.nc\n", + "var_key LHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-land_test.nc\n", + "var_key LHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LHFLX/LHFLX-ocean_test.nc\n", + "var_key LHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LWCF/LWCF-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LWCF/LWCF-20N50N_test.nc\n", + "var_key LWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LWCF/LWCF-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LWCF/LWCF-20S20N_test.nc\n", + "var_key LWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LWCF/LWCF-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LWCF/LWCF-50N90N_test.nc\n", + "var_key LWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LWCF/LWCF-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LWCF/LWCF-50S20S_test.nc\n", + "var_key LWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LWCF/LWCF-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LWCF/LWCF-90S50S_test.nc\n", + "var_key LWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LWCF/LWCF-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LWCF/LWCF-global_test.nc\n", + "var_key LWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LWCF/LWCF-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LWCF/LWCF-land_test.nc\n", + "var_key LWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/LWCF/LWCF-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/LWCF/LWCF-ocean_test.nc\n", + "var_key LWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/PRECT/PRECT-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/PRECT/PRECT-20N50N_test.nc\n", + "var_key PRECT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/PRECT/PRECT-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/PRECT/PRECT-20S20N_test.nc\n", + "var_key PRECT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/PRECT/PRECT-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/PRECT/PRECT-50N90N_test.nc\n", + "var_key PRECT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/PRECT/PRECT-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/PRECT/PRECT-50S20S_test.nc\n", + "var_key PRECT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/PRECT/PRECT-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/PRECT/PRECT-90S50S_test.nc\n", + "var_key PRECT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/PRECT/PRECT-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/PRECT/PRECT-global_test.nc\n", + "var_key PRECT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/PRECT/PRECT-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/PRECT/PRECT-land_test.nc\n", + "var_key PRECT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/PRECT/PRECT-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/PRECT/PRECT-ocean_test.nc\n", + "var_key PRECT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/QFLX/QFLX-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/QFLX/QFLX-20N50N_test.nc\n", + "var_key QFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/QFLX/QFLX-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/QFLX/QFLX-20S20N_test.nc\n", + "var_key QFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/QFLX/QFLX-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/QFLX/QFLX-50N90N_test.nc\n", + "var_key QFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/QFLX/QFLX-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/QFLX/QFLX-50S20S_test.nc\n", + "var_key QFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/QFLX/QFLX-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/QFLX/QFLX-90S50S_test.nc\n", + "var_key QFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/QFLX/QFLX-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/QFLX/QFLX-global_test.nc\n", + "var_key QFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/QFLX/QFLX-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/QFLX/QFLX-land_test.nc\n", + "var_key QFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/QFLX/QFLX-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/QFLX/QFLX-ocean_test.nc\n", + "var_key QFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-20N50N_test.nc\n", + "var_key SHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-20S20N_test.nc\n", + "var_key SHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-50N90N_test.nc\n", + "var_key SHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-50S20S_test.nc\n", + "var_key SHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-90S50S_test.nc\n", + "var_key SHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-global_test.nc\n", + "var_key SHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-land_test.nc\n", + "var_key SHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SHFLX/SHFLX-ocean_test.nc\n", + "var_key SHFLX\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SWCF/SWCF-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SWCF/SWCF-20N50N_test.nc\n", + "var_key SWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SWCF/SWCF-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SWCF/SWCF-20S20N_test.nc\n", + "var_key SWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SWCF/SWCF-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SWCF/SWCF-50N90N_test.nc\n", + "var_key SWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SWCF/SWCF-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SWCF/SWCF-50S20S_test.nc\n", + "var_key SWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SWCF/SWCF-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SWCF/SWCF-90S50S_test.nc\n", + "var_key SWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SWCF/SWCF-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SWCF/SWCF-global_test.nc\n", + "var_key SWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SWCF/SWCF-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SWCF/SWCF-land_test.nc\n", + "var_key SWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/SWCF/SWCF-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/SWCF/SWCF-ocean_test.nc\n", + "var_key SWCF\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-20N50N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-20N50N_test.nc\n", + "var_key TREFHT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-20S20N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-20S20N_test.nc\n", + "var_key TREFHT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-50N90N_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-50N90N_test.nc\n", + "var_key TREFHT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-50S20S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-50S20S_test.nc\n", + "var_key TREFHT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-90S50S_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-90S50S_test.nc\n", + "var_key TREFHT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-global_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-global_test.nc\n", + "var_key TREFHT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-land_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-land_test.nc\n", + "var_key TREFHT\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-ocean_test.nc \n", + " /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/TREFHT/TREFHT-ocean_test.nc\n", + "var_key TREFHT\n", + " * All close and within relative tolerance (1e-05)\n" + ] + } + ], + "source": [ + "# We are mainly focusing on relative tolerance here (in percentage terms).\n", + "atol = 0\n", + "rtol = 1e-5\n", + "\n", + "count_mismatch = 0\n", + "\n", + "for filepath_dev in DEV_GLOB:\n", + " if \"test.nc\" in filepath_dev or \"ref.nc\" in filepath_dev:\n", + " filepath_main = filepath_dev.replace(SET_DIR, \"main-area-mean-time-series\")\n", + " print(\"Comparing:\")\n", + " print(filepath_dev, \"\\n\", filepath_main)\n", + " ds1 = xr.open_dataset(filepath_dev)\n", + " ds2 = xr.open_dataset(filepath_main)\n", + "\n", + " var_key = filepath_dev.split(\"/\")[-2]\n", + " # for 3d vars such as T-200\n", + " var_key.isdigit()\n", + " if var_key.isdigit():\n", + " var_key = filepath_dev.split(\"-\")[-4]\n", + "\n", + " print(\"var_key\", var_key)\n", + " try:\n", + " np.testing.assert_allclose(\n", + " ds1[var_key].values,\n", + " ds2[var_key].values,\n", + " atol=atol,\n", + " rtol=rtol,\n", + " )\n", + " except AssertionError as e:\n", + " print(e)\n", + " count_mismatch += 1\n", + " else:\n", + " print(f\" * All close and within relative tolerance ({rtol})\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "72" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count_mismatch" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "fp1 = \"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series/area_mean_time_series/FLUT/FLUT-land_test.nc\"\n", + "fp2 = \"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/area_mean_time_series/FLUT/FLUT-land_test.nc\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "ds_fp1 = xr.open_dataset(fp1)\n", + "ds_fp2 = xr.open_dataset(fp2)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 160B\n",
+       "Dimensions:  (time: 10)\n",
+       "Coordinates:\n",
+       "  * time     (time) object 80B 0051-01-01 00:00:00 ... 0060-01-01 00:00:00\n",
+       "Data variables:\n",
+       "    FLUT     (time) float64 80B ...
" + ], + "text/plain": [ + " Size: 160B\n", + "Dimensions: (time: 10)\n", + "Coordinates:\n", + " * time (time) object 80B 0051-01-01 00:00:00 ... 0060-01-01 00:00:00\n", + "Data variables:\n", + " FLUT (time) float64 80B ..." + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fp1" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 320B\n",
+       "Dimensions:      (time: 10, bound: 2)\n",
+       "Coordinates:\n",
+       "  * time         (time) object 80B 0051-07-02 12:00:00 ... 0060-07-02 12:00:00\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables:\n",
+       "    bounds_time  (time, bound) object 160B ...\n",
+       "    FLUT         (time) float64 80B ...\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.0
" + ], + "text/plain": [ + " Size: 320B\n", + "Dimensions: (time: 10, bound: 2)\n", + "Coordinates:\n", + " * time (time) object 80B 0051-07-02 12:00:00 ... 0060-07-02 12:00:00\n", + "Dimensions without coordinates: bound\n", + "Data variables:\n", + " bounds_time (time, bound) object 160B ...\n", + " FLUT (time) float64 80B ...\n", + "Attributes:\n", + " Conventions: CF-1.0" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_fp2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Results\n", + "\n", + "All files are within rtol 1e-5, so the changes should be good to go.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cdat_regression_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run.cfg b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run.cfg new file mode 100644 index 000000000..a4ad88de4 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run.cfg @@ -0,0 +1,7 @@ +[#] +sets = ["area_mean_time_series"] +case_id = "FSNTOA" +variables = ["FSNTOA"] +ref_names = ["ceres_ebaf_toa_v4.1"] +# regions = ['global', 'land', 'ocean', '90S50S','50S20S','20S20N','20N50N','50N90N'] +regions = ['90S50S'] diff --git a/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run_script.py b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run_script.py new file mode 100644 index 000000000..6ba432f8c --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run_script.py @@ -0,0 +1,12 @@ +# %% +# python -m auxiliary_tools.cdat_regression_testing.662-area-mean-time-series.run_script +# chmod -R o=rx /global/cfs/cdirs/e3sm/www/cdat-migration-fy24/662-area-mean-time-series +from auxiliary_tools.cdat_regression_testing.base_run_script import run_set + +SET_NAME = "area_mean_time_series" +SET_DIR = "662-area-mean-time-series" +CFG_PATH: str | None = None +# CFG_PATH: str | None = "/global/u2/v/vo13/E3SM-Project/e3sm_diags/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/run.cfg" +MULTIPROCESSING = True + +run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING) diff --git a/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/test_non_submonthly.py b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/test_non_submonthly.py new file mode 100644 index 000000000..d5265f116 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/662-area-mean-time-series/test_non_submonthly.py @@ -0,0 +1,46 @@ +# %% +from datetime import datetime + +import pandas as pd +import xarray as xr +import xcdat as xc + +# %% +fn1 = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1/FSNS_185001_201312.nc" +var = "FSNS" +slice_flag = "ccb" + +start_time = "2011-01-15" +end_time = "2013-12-15" +time_slice = slice(start_time, end_time, None) + +# %% +ds = xr.open_dataset(fn1) + + +# %% +def get_new_end_time(ds: xr.Dataset, end_time: str) -> str: + # Get the dimension coordinates and bounds key. + time_coords = xc.get_dim_coords(ds, axis="T") + time_dim = time_coords.name + time_bnds_key = time_coords.attrs["bounds"] + + # Extract the sub-dataset for all data at the last time coordinate. + ds_last_time = ds.isel({time_dim: -1}) + + # Get the delta between time coordinates using the difference between + # bounds values. + time_bnds = ds_last_time[time_bnds_key] + time_delta = time_bnds[-1] - time_bnds[0] + time_delta_py = pd.to_timedelta(time_delta.values).to_pytimedelta() + + old_end_time = datetime.strptime(end_time, "%Y-%m-%d") + new_end_time = old_end_time + time_delta_py + new_end_time_str = new_end_time.strftime("%Y-%m-%d") + + return new_end_time_str + + +new_end_time = get_new_end_time(ds, end_time) + +ds.sel(time=slice(start_time, new_end_time, None)).squeeze() diff --git a/auxiliary_tools/cdat_regression_testing/utils.py b/auxiliary_tools/cdat_regression_testing/utils.py index 2434416dd..b23658ba7 100644 --- a/auxiliary_tools/cdat_regression_testing/utils.py +++ b/auxiliary_tools/cdat_regression_testing/utils.py @@ -115,7 +115,7 @@ def sort_columns(df: pd.DataFrame) -> pd.DataFrame: return df_new -def update_diffs_to_pct(df: pd.DataFrame): +def update_diffs_to_pct(df: pd.DataFrame, cols: List[str] = PERCENTAGE_COLUMNS): """Update relative diff columns from float to string percentage. Parameters @@ -129,14 +129,14 @@ def update_diffs_to_pct(df: pd.DataFrame): The final DataFrame containing metrics and diffs (str percentage). """ df_new = df.copy() - df_new[PERCENTAGE_COLUMNS] = df_new[PERCENTAGE_COLUMNS].map( + df_new[cols] = df_new[cols].map( lambda x: "{0:.2f}%".format(x * 100) if not math.isnan(x) else x ) return df_new -def highlight_large_diffs(df: pd.DataFrame): +def highlight_large_diffs(df: pd.DataFrame, cols: List[str] = PERCENTAGE_COLUMNS): if "var_key" not in df.columns and "metric" not in df.columns: df_new = df.reset_index(names=["var_key", "metric"]) else: @@ -144,7 +144,7 @@ def highlight_large_diffs(df: pd.DataFrame): df_new = df_new.style.map( lambda x: "background-color : red" if isinstance(x, str) else "", - subset=pd.IndexSlice[:, PERCENTAGE_COLUMNS], + subset=pd.IndexSlice[:, cols], ) display(df_new) @@ -169,7 +169,7 @@ def get_image_diffs(actual_path: str, expected_path: str): This function is useful for comparing two datasets that can't be compared directly using `np.testing.assert_allclose()` due to `x and y nan location mismatch` error. This error might happen after using the land-sea mask - after regridding, which can differ slightly between xCDAT/xESMF and + after regridding, which can differ slightly between xCDAT/xESMF and CDAT/ESMF. Parameters diff --git a/e3sm_diags/derivations/formulas.py b/e3sm_diags/derivations/formulas.py index ef0f012e3..a3b7ca0f0 100644 --- a/e3sm_diags/derivations/formulas.py +++ b/e3sm_diags/derivations/formulas.py @@ -41,18 +41,20 @@ def sum_vars(vars: List[xr.DataArray]) -> xr.DataArray: def qflxconvert_units(var: xr.DataArray): - if ( - var.attrs["units"] == "kg/m2/s" - or var.attrs["units"] == "kg m-2 s-1" - or var.attrs["units"] == "mm/s" - ): - # need to find a solution for units not included in udunits - # var = convert_units( var, 'kg/m2/s' ) - var = var * 3600.0 * 24 # convert to mm/day - var.attrs["units"] = "mm/day" - elif var.attrs["units"] == "mm/hr": - var = var * 24.0 - var.attrs["units"] = "mm/day" + with xr.set_options(keep_attrs=True): + if ( + var.attrs["units"] == "kg/m2/s" + or var.attrs["units"] == "kg m-2 s-1" + or var.attrs["units"] == "mm/s" + ): + # need to find a solution for units not included in udunits + # var = convert_units( var, 'kg/m2/s' ) + var = var * 3600.0 * 24 # convert to mm/day + var.attrs["units"] = "mm/day" + elif var.attrs["units"] == "mm/hr": + var = var * 24.0 + var.attrs["units"] = "mm/day" + return var diff --git a/e3sm_diags/driver/area_mean_time_series_driver.py b/e3sm_diags/driver/area_mean_time_series_driver.py index 20c79416c..997cb1599 100644 --- a/e3sm_diags/driver/area_mean_time_series_driver.py +++ b/e3sm_diags/driver/area_mean_time_series_driver.py @@ -1,17 +1,18 @@ from __future__ import annotations -import collections import json import os -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, NamedTuple -import cdms2 -import cdutil +import xarray as xr +import xcdat as xc -import e3sm_diags -from e3sm_diags.driver import utils +from e3sm_diags.driver import LAND_OCEAN_MASK_PATH, utils +from e3sm_diags.driver.utils.dataset_xr import Dataset, squeeze_time_dim +from e3sm_diags.driver.utils.io import _write_to_netcdf +from e3sm_diags.driver.utils.regrid import _apply_land_sea_mask, _subset_on_region from e3sm_diags.logger import custom_logger -from e3sm_diags.metrics import mean +from e3sm_diags.metrics.metrics import spatial_avg from e3sm_diags.plot.cartopy import area_mean_time_series_plot if TYPE_CHECKING: @@ -22,125 +23,87 @@ logger = custom_logger(__name__) -RefsTestMetrics = collections.namedtuple("RefsTestMetrics", ["refs", "test", "metrics"]) - -def create_metrics(ref_domain): - """ - For this plotset, calculate the mean of the - reference data and return a dict of that. - """ - return {"mean": mean(ref_domain)} +class RefsTestMetrics(NamedTuple): + test: xr.Dataset + refs: list + metrics: list def run_diag(parameter: AreaMeanTimeSeriesParameter) -> AreaMeanTimeSeriesParameter: + """Run the diagnostics for area_mean_time_series. + + Parameters + ---------- + parameter : AreaMeanTimeSeriesParameter + The parameter for area_mean_time_series. + + Returns + ------- + AreaMeanTimeSeriesParameter + The parameter for area_mean_time_series with the results of the + diagnostic run. + """ variables = parameter.variables regions = parameter.regions ref_names = parameter.ref_names - # Both input data sets must be time-series files. - # Raising an error will cause this specific set of - # diagnostics with these parameters to be skipped. - # if test_data.is_climo() or ref_data.is_climo(): - # msg = 'Cannot run the plotset regional_mean_time_series ' - # msg += 'because both the test and ref data need to be time-series files.' - # raise RuntimeError(msg) + test_ds = Dataset(parameter, data_type="test") + parameter.test_name_yrs = test_ds.get_name_yrs_attr() + + ds_mask = _get_default_land_sea_mask() for var in variables: - # The data that'll be sent to the plotting function. - # There are eight tuples, each will be plotted like so: - # [ 0 ] [ 1 ] [ 2 ] - # [ ] [ ] [ ] - # - # [ 3 ] [ 4 ] [ 5 ] - # [ ] [ ] [ ] - # - # [ 6 ] [ 7 ] - # [ ] [ ] - regions_to_data = collections.OrderedDict() - save_data = {} logger.info("Variable: {}".format(var)) - # Get land/ocean fraction for masking. - # For now, we're only using the climo data that we saved below. - # So no time-series LANDFRAC or OCNFRAC from the user is used. - mask_path = os.path.join( - e3sm_diags.INSTALL_PATH, "acme_ne30_ocean_land_mask.nc" - ) - with cdms2.open(mask_path) as f: - land_frac = f("LANDFRAC") - ocean_frac = f("OCNFRAC") + metrics_dict = {} + save_data = {} for region in regions: - # The regions that are supported are in e3sm_diags/derivations/default_regions.py - # You can add your own if it's not in there. logger.info("Selected region: {}".format(region)) - test_data = utils.dataset.Dataset(parameter, test=True) - test = test_data.get_timeseries_variable(var) - logger.info( - "Start and end time for selected time slices for test data: " - f"{test.getTime().asComponentTime()[0]} " - f"{test.getTime().asComponentTime()[-1]}", - ) - parameter.viewer_descr[var] = getattr(test, "long_name", var) - # Get the name of the data, appended with the years averaged. - parameter.test_name_yrs = utils.general.get_name_and_yrs( - parameter, test_data - ) - test_domain = utils.general.select_region( - region, test, land_frac, ocean_frac, parameter - ) - - # Average over selected region, and average - # over months to get the yearly mean. - test_domain = cdutil.averager(test_domain, axis="xy") - cdutil.setTimeBoundsMonthly(test_domain) - test_domain_year = cdutil.YEAR(test_domain) - # add back attributes since they got lost after applying cdutil.YEAR - test_domain_year.long_name = test.long_name - test_domain_year.units = test.units + # Test variable metrics. + # ------------------------------------------------------------------ + ds_test = test_ds.get_time_series_dataset(var) + _log_time(ds_test, "test") - save_data[parameter.test_name_yrs] = test_domain_year.asma().tolist() + ds_test_domain_avg = _get_region_yearly_avg( + parameter, ds_test, ds_mask, var, region, data_type="test" + ) + save_data[parameter.test_name_yrs] = ds_test_domain_avg[var].values.tolist() + # Calculate reference metrics (optional, if valid time range). + # ------------------------------------------------------------------ refs = [] for ref_name in ref_names: - setattr(parameter, "ref_name", ref_name) - ref_data = utils.dataset.Dataset(parameter, ref=True) + parameter.ref_name = ref_name - parameter.ref_name_yrs = utils.general.get_name_and_yrs( - parameter, ref_data - ) + ref_ds = Dataset(parameter, data_type="ref") + parameter.ref_name_yrs = ref_ds.get_name_yrs_attr() + ds_ref_domain_avg_yr = None try: - ref = ref_data.get_timeseries_variable(var) - - ref_domain = utils.general.select_region( - region, ref, land_frac, ocean_frac, parameter + ds_ref = ref_ds.get_time_series_dataset(var) + except (IOError, ValueError): + logger.exception( + "No valid value for reference datasets available for the specified time range" ) - - ref_domain = cdutil.averager(ref_domain, axis="xy") - cdutil.setTimeBoundsMonthly(ref_domain) - logger.info( - ( - "Start and end time for selected time slices for ref data: " - f"{ref_domain.getTime().asComponentTime()[0]} " - f"{ref_domain.getTime().asComponentTime()[-1]}" - ) + else: + ds_ref_domain_avg_yr = _get_region_yearly_avg( + parameter, ds_ref, ds_mask, var, region, data_type="ref" ) - ref_domain_year = cdutil.YEAR(ref_domain) - ref_domain_year.ref_name = ref_name - save_data[ref_name] = ref_domain_year.asma().tolist() + if ds_ref_domain_avg_yr is not None: + save_data[ref_name] = ds_ref_domain_avg_yr.asma().tolist() - refs.append(ref_domain_year) - except Exception: - logger.exception( - "No valid value for reference datasets available for the specified time range" - ) + # Set the ref name attribute on the ref variable for the + # plot label. + ds_ref_domain_avg_yr[var].attrs["ref_name"] = ref_name + refs.append(ds_ref_domain_avg_yr) - # save data for potential later use + # I/O and plotting. + # ------------------------------------------------------------------ parameter.output_file = "-".join([var, region]) fnm = os.path.join( utils.general.get_output_dir(parameter.current_set, parameter), @@ -150,10 +113,77 @@ def run_diag(parameter: AreaMeanTimeSeriesParameter) -> AreaMeanTimeSeriesParame with open(fnm, "w") as outfile: json.dump(save_data, outfile) - regions_to_data[region] = RefsTestMetrics( - test=test_domain_year, refs=refs, metrics=[] + metrics_dict[region] = RefsTestMetrics( + test=ds_test_domain_avg, refs=refs, metrics=[] ) - area_mean_time_series_plot.plot(var, regions_to_data, parameter) + if parameter.save_netcdf: + _write_to_netcdf(parameter, ds_test_domain_avg[var], var, "test") + + parameter.viewer_descr[var] = ds_test[var].attrs.get("long_name", var) + area_mean_time_series_plot.plot(var, parameter, metrics_dict) return parameter + + +def _get_default_land_sea_mask() -> xr.Dataset: + """Get the e3sm_diags default land sea mask. + + Returns + ------- + xr.Dataset + The land sea mask dataset object. + """ + ds_mask = xr.open_dataset(LAND_OCEAN_MASK_PATH) + ds_mask = squeeze_time_dim(ds_mask) + + return ds_mask + + +def _get_region_yearly_avg( + parameter: AreaMeanTimeSeriesParameter, + ds: xr.Dataset, + ds_mask: xr.Dataset, + var: str, + region: str, + data_type: Literal["test", "ref"], +) -> xr.Dataset: + ds_new = ds.copy() + + if "land" in region or "ocean" in region: + ds_new = _apply_land_sea_mask( + ds_new, + ds_mask, + var, + region, # type: ignore + parameter.regrid_tool, + parameter.regrid_method, + ) + + if "global" not in region and data_type == "test": + ds_new = _subset_on_region(ds_new, var, region) + + da_region_avg: xr.DataArray = spatial_avg( # type: ignore + ds_new, var, axis=["X", "Y"], as_list=False + ) + + # Convert the DataArray back to a Dataset to add time bounds and + # to calculate the yearly means. + ds_region_avg = da_region_avg.to_dataset() + ds_region_avg = ds_region_avg.bounds.add_time_bounds("freq", freq="month") + if data_type == "ref": + _log_time(ds_region_avg, "ref") + + ds_region_avg_yr = ds_region_avg.temporal.group_average(var, freq="year") + + return ds_region_avg_yr + + +def _log_time(ds: xr.Dataset, data_type: str): + time_coords = xc.get_dim_coords(ds, axis="T").values + + logger.info( + f"Start and end time for selected time slices for {data_type} data: " + f"{time_coords[0]} " + f"{time_coords[1]}", + ) diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 17cc29577..dcb887ac5 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -16,8 +16,10 @@ import glob import os import re +from datetime import datetime from typing import TYPE_CHECKING, Callable, Dict, Literal, Tuple +import pandas as pd import xarray as xr import xcdat as xc @@ -42,6 +44,35 @@ TS_EXT_FILEPATTERN = r"_.{13}.nc" +def squeeze_time_dim(ds: xr.Dataset) -> xr.Dataset: + """Squeeze single coordinate climatology time dimensions. + + For example, "ANN" averages over the year and collapses the time dim. + Time bounds are also dropped if they exist. + + Parameters + ---------- + ds : xr.Dataset + The dataset with a time dimension + + Returns + ------- + xr.Dataset + The dataset with a time dimension. + """ + time_dim = xc.get_dim_coords(ds, axis="T") + + if len(time_dim) == 1: + ds = ds.squeeze(dim=time_dim.name) + ds = ds.drop_vars(time_dim.name) + + bnds_key = time_dim.attrs.get("bounds") + if bnds_key is not None and bnds_key in ds.data_vars.keys(): + ds = ds.drop_vars(bnds_key) + + return ds + + class Dataset: def __init__( self, @@ -408,7 +439,7 @@ def _get_climo_dataset(self, season: str) -> xr.Dataset: "it defined in the derived variables dictionary." ) - ds = self._squeeze_time_dim(ds) + ds = squeeze_time_dim(ds) # slat and slon are lat lon pair for staggered FV grid included in # remapped files. @@ -746,7 +777,7 @@ def get_time_series_dataset( The key of the time series variable to get the dataset for. single_point : bool, optional Single point indicating the data is sub monthly, by default False. - If True, center the time coordinates using time bounds. + If False, center the time coordinates using time bounds. Returns ------- @@ -777,7 +808,10 @@ def get_time_series_dataset( ds = self._get_time_series_dataset_obj(self.var) if single_point: - ds = xc.center_times(ds) + self.is_sub_monthly = True + + if not self.is_sub_monthly: + ds = self._center_time_for_non_submonthly_data(ds) return ds @@ -952,7 +986,7 @@ def _get_dataset_with_source_vars(self, vars_to_get: Tuple[str, ...]) -> xr.Data datasets.append(ds) ds = xr.merge(datasets) - ds = self._squeeze_time_dim(ds) + ds = squeeze_time_dim(ds) return ds @@ -974,9 +1008,11 @@ def _get_time_series_dataset_obj(self, var) -> xr.Dataset: f"No time series `.nc` file was found for '{var}' in '{self.root_path}'" ) - time_slice = self._get_time_slice(filepath) + ds = xc.open_dataset( + filepath, add_bounds=["X", "Y", "T"], decode_times=True, use_cftime=True + ) - ds = xr.open_dataset(filepath, decode_times=True, use_cftime=True) + time_slice = self._get_time_slice(ds, filepath) ds_subset = ds.sel(time=time_slice).squeeze() return ds_subset @@ -1102,11 +1138,13 @@ def _get_matching_time_series_filepath( return None - def _get_time_slice(self, filename: str) -> slice: + def _get_time_slice(self, ds: xr.Dataset, filename: str) -> slice: """Get time slice to subset a dataset. Parameters ---------- + ds : xr.Dataset + The dataset. filename : str The filename. @@ -1120,34 +1158,183 @@ def _get_time_slice(self, filename: str) -> slice: ValueError If invalid date range specified for test/reference time series data. """ - start_year = int(self.start_yr) - end_year = int(self.end_yr) - - if self.is_sub_monthly: - start_time = f"{start_year}-01-01" - end_time = f"{str(int(end_year) + 1)}-01-01" - else: - start_time = f"{start_year}-01-15" - end_time = f"{end_year}-12-15" + start_yr_int = int(self.start_yr) + end_yr_int = int(self.end_yr) # Get the available start and end years from the file name. # Example: {var}_{start_yr}01_{end_yr}12.nc var_start_year = int(filename.split("/")[-1].split("_")[-2][:4]) var_end_year = int(filename.split("/")[-1].split("_")[-1][:4]) - if start_year < var_start_year: + if start_yr_int < var_start_year: raise ValueError( "Invalid year range specified for test/reference time series data: " - f"start_year ({start_year}) < var_start_yr ({var_start_year})." + f"start_year ({start_yr_int}) < var_start_yr ({var_start_year})." ) - elif end_year > var_end_year: + elif end_yr_int > var_end_year: raise ValueError( "Invalid year range specified for test/reference time series data: " - f"end_year ({end_year}) > var_end_yr ({var_end_year})." + f"end_year ({end_yr_int}) > var_end_yr ({var_end_year})." ) + start_yr_str = self._get_year_str(start_yr_int) + end_yr_str = self._get_year_str(end_yr_int) + + if self.is_sub_monthly: + start_time = f"{start_yr_str}-01-01" + end_time = f"{str(int(end_yr_str) + 1)}-01-01" + else: + start_time = f"{start_yr_str}-01-15" + end_time = self._get_end_time_with_bounds(ds, end_yr_str) + return slice(start_time, end_time) + def _get_end_time_with_bounds(self, ds: xr.Dataset, old_end_time_str: str) -> str: + """Get the end time for non-submonthly data using bounds. + + For example, let's say we have non-submonthly time coordinates with a + start time of "2011-01-01" and end time of "2014-01-01". We pre-define + a time slice of ("2011-01-15", "2013-12-15"). However slicing with + an end time of "2013-12-15" will exclude the last coordinate + "2014-01-01". To rectify this situation, we use time bounds to extend + the coordinates like so: + + 1. Get the time delta between bound values ["2013-12-15", + "2014-01-15"], which is one month. + 2. Add the time delta to the old end time of "2013-12-15", which + results in a new end time of "2014-01-15". + 3. Now slice the time coordinates using ("2011-01-15", "2014-01-15"). + This new time slice will correctly subset to include the last + coordinate value of "2014-01-01". + + Parameters + ---------- + ds : xr.Dataset + The dataset. + old_end_time_str : str + The old end time string. + + Returns + ------- + str + The new end time string. + + Notes + ----- + This function replicates the cdms2/cdutil "ccb" slice flag used for + subsetting. "ccb" only allows the right side to be closed. It will get + the difference between bounds values and add it to the last coordinate + point to get a new stopping point to slice on. + """ + time_coords = xc.get_dim_coords(ds, axis="T") + time_dim = time_coords.name + time_bnds_key = time_coords.attrs["bounds"] + + # Extract the sub-dataset for all data at the last time coordinate and + # get the delta between time coordinates using the difference between + # bounds values. + ds_last_time = ds.isel({time_dim: -1}) + time_bnds = ds_last_time[time_bnds_key] + time_delta = time_bnds[-1] - time_bnds[0] + time_delta_py = pd.to_timedelta(time_delta.values).to_pytimedelta() + + # Add the time delta to the old stop point to get a new stop point. + old_stop = datetime.strptime(f"{old_end_time_str}-12-15", "%Y-%m-%d") + new_stop = old_stop + time_delta_py + + # Convert the new stopping point from datetime to an ISO-8061 formatted + # string (e.g., "2012-01-01", "0051-12-01"). + year_str = self._get_year_str(new_stop.year) + month_day_str = self._get_month_day_str(new_stop.month, new_stop.day) + new_stop_str = f"{year_str}-{month_day_str}" + + return new_stop_str + + def _get_year_str(self, year: int) -> str: + """Get the year string in ISO-8601 format from an integer. + + When subsetting with Xarray, Xarray requires time strings to comply + with ISO-8601 (e.g., "2012-01-01"). Otherwise, Xarray will raise + `ValueError: no ISO-8601 or cftime-string-like match for string:` + + This function pads the year string if the year is less than 1000. For + example, year 51 becomes "0051" and year 501 becomes "0501". + + Parameters + ---------- + year : int + The year integer. + + Returns + ------- + str + The year as a string (e.g., "2001", "0001"). + """ + if year >= 0 and year < 1000: + return f"{year:04}" + + return str(year) + + def _get_month_day_str(self, month: int, day: int) -> str: + """Get the month and day string in ISO-8601 format from integers. + + When subsetting with Xarray, Xarray requires time strings to comply + with ISO-8601 (e.g., "2012-01-01"). Otherwise, Xarray will raise + `ValueError: no ISO-8601 or cftime-string-like match for string:` + + This function pads pad the month and/or day string with a "0" if the + value is less than 10. For example, a month of 6 will become "06". + + Parameters + ---------- + month : int + The month integer. + day : int + The day integer. + + Returns + ------- + str + The month day string (e.g., "06-12", "12-05"). + """ + month_str = str(month) + day_str = str(day) + + if month >= 1 and month < 10: + month_str = f"{month:02}" + + if day >= 1 and day < 10: + day_str = f"{day:02}" + + return f"{month_str}-{day_str}" + + def _center_time_for_non_submonthly_data(self, ds: xr.Dataset) -> xr.Dataset: + """Center time coordinates using bounds for non-submonthly data. + + This is important for data where the absolute time doesn't fall in the + middle of the time interval, such as E3SM native format data where + the time was recorded at the end of each time bounds. + + Parameters + ---------- + ds : xr.Dataset + The dataset. + + Returns + ------- + ds : xr.Dataset + The dataset with centered time coordinates. + """ + try: + time_dim = xc.get_dim_keys(ds, axis="T") + except (ValueError, KeyError): + time_dim = None + + if time_dim is not None: + return xc.center_times(ds) + + return ds + def _get_land_sea_mask(self, season: str) -> xr.Dataset: """Get the land sea mask from the dataset or use the default file. @@ -1175,36 +1362,8 @@ def _get_land_sea_mask(self, season: str) -> xr.Dataset: ) ds_mask = xr.open_dataset(LAND_OCEAN_MASK_PATH) - ds_mask = self._squeeze_time_dim(ds_mask) + ds_mask = squeeze_time_dim(ds_mask) else: ds_mask = xr.merge([ds_land_frac, ds_ocean_frac]) return ds_mask - - def _squeeze_time_dim(self, ds: xr.Dataset) -> xr.Dataset: - """Squeeze single coordinate climatology time dimensions. - - For example, "ANN" averages over the year and collapses the time dim. - Time bounds are also dropped if they exist. - - Parameters - ---------- - ds : xr.Dataset - The dataset with a time dimension - - Returns - ------- - xr.Dataset - The dataset with a time dimension. - """ - time_dim = xc.get_dim_coords(ds, axis="T") - - if len(time_dim) == 1: - ds = ds.squeeze(dim=time_dim.name) - ds = ds.drop_vars(time_dim.name) - - bnds_key = time_dim.attrs.get("bounds") - if bnds_key is not None and bnds_key in ds.data_vars.keys(): - ds = ds.drop_vars(bnds_key) - - return ds diff --git a/e3sm_diags/driver/utils/regrid.py b/e3sm_diags/driver/utils/regrid.py index 4c75a46ec..4eef030ae 100644 --- a/e3sm_diags/driver/utils/regrid.py +++ b/e3sm_diags/driver/utils/regrid.py @@ -290,7 +290,7 @@ def _subset_on_region(ds: xr.Dataset, var_key: str, region: str) -> xr.Dataset: Returns ------- xr.Dataset - The dataest with the subsetted variable. + The dataset with the subsetted variable. Notes ----- diff --git a/e3sm_diags/plot/cartopy/area_mean_time_series_plot.py b/e3sm_diags/plot/cartopy/area_mean_time_series_plot.py index eebafc0d4..cd7dc25a0 100644 --- a/e3sm_diags/plot/cartopy/area_mean_time_series_plot.py +++ b/e3sm_diags/plot/cartopy/area_mean_time_series_plot.py @@ -1,93 +1,100 @@ -import os +from __future__ import annotations + +from typing import TYPE_CHECKING, Dict import matplotlib import numpy as np +import xarray as xr -from e3sm_diags.driver.utils.general import get_output_dir from e3sm_diags.logger import custom_logger +from e3sm_diags.parameter.area_mean_time_series_parameter import ( + AreaMeanTimeSeriesParameter, +) +from e3sm_diags.plot.utils import _save_plot + +if TYPE_CHECKING: + from e3sm_diags.driver.area_mean_time_series_driver import RefsTestMetrics matplotlib.use("agg") import matplotlib.pyplot as plt # isort:skip # noqa: E402 logger = custom_logger(__name__) +# Data is a list based on the len of the regions parameter. Each element is a +# tuple (test data, ref data and metrics) for that region. +LINE_COLOR = ["r", "b", "g", "m", "c", "y"] + +# Position and sizes of subplot axes in page coordinates (0 to 1). The +# dimensions [left, bottom, width, height] of the new axes. All quantities are +# in fractions of figure width and height. +PANEL_CFG = [ + (0.1, 0.70, 0.25, 0.225), + (0.4, 0.70, 0.25, 0.225), + (0.7, 0.70, 0.25, 0.225), + (0.1, 0.38, 0.25, 0.225), + (0.4, 0.38, 0.25, 0.225), + (0.7, 0.38, 0.25, 0.225), + (0.1, 0.06, 0.25, 0.225), + (0.4, 0.06, 0.25, 0.225), + (0.7, 0.06, 0.25, 0.225), +] + +# Border padding relative to subplot axes for saving individual panels. +# (left, bottom, right, top) in page coordinates. +BORDER_PADDING = (-0.047, -0.06, 0.006, 0.03) + + +def plot( + var: str, + parameter: AreaMeanTimeSeriesParameter, + metrics_dict: Dict[str, RefsTestMetrics], +): + # Create the figure. + fig = plt.figure(figsize=(17.0, 10.0), dpi=parameter.dpi) -def plot(var, regions_to_data, parameter): - # Data is a list based on the len of the regions parameter. - # Each element is a tuple with ref data, - # test data, and metrics for that region. - - # plot time series - - line_color = ["r", "b", "g", "m", "c", "y"] + test_name_yrs = parameter.test_name_yrs + test_name = test_name_yrs.split("(")[0].replace(" ", "") + if test_name == "": + test_name = "test data" - # Position and sizes of subplot axes in page coordinates (0 to 1) - # The dimensions [left, bottom, width, height] of the new axes. All quantities are in fractions of figure width and height. - panel = [ - (0.1, 0.70, 0.25, 0.225), - (0.4, 0.70, 0.25, 0.225), - (0.7, 0.70, 0.25, 0.225), - (0.1, 0.38, 0.25, 0.225), - (0.4, 0.38, 0.25, 0.225), - (0.7, 0.38, 0.25, 0.225), - (0.1, 0.06, 0.25, 0.225), - (0.4, 0.06, 0.25, 0.225), - (0.7, 0.06, 0.25, 0.225), - ] + for idx_region, ds_region in enumerate(metrics_dict.values()): + # Add the figure axis object by region. + ax = fig.add_axes(PANEL_CFG[idx_region]) - # Border padding relative to subplot axes for saving individual panels - # (left, bottom, right, top) in page coordinates - border = (-0.047, -0.06, 0.006, 0.03) + # Plot the test data. + test_var = ds_region.test[var] + test_label = _get_mean_and_std_label(test_var, test_name) + ax.plot(test_var, "k", linewidth=2, label=test_label) - # Create the figure. - figsize = [17.0, 10] - fig = plt.figure(figsize=figsize, dpi=parameter.dpi) - start_time = int(parameter.start_yr) - end_time = int(parameter.end_yr) - num_year = end_time - start_time + 1 - - s = parameter.test_name_yrs - test_name = s.split("(")[0].replace(" ", "") - if test_name == "": - test_name = "test data" + # Plot the list of reference data (if they exist). + refs = ds_region.refs + for idx_ref, ref_var in enumerate(refs): + ref_label = _get_mean_and_std_label(ref_var, ref_var.attrs["ref_name"]) + ax.plot(ref_var, LINE_COLOR[idx_ref], linewidth=2, label=ref_label) - for i_region, data_set_for_region in enumerate(regions_to_data.values()): - refs = data_set_for_region.refs - test = data_set_for_region.test - ax1 = fig.add_axes(panel[i_region]) - ax1.plot( - test.asma(), - "k", - linewidth=2, - label=test_name - + "(mean: {0:.2f}, std: {1:.3f})".format( - np.mean(test.asma()), np.std(test.asma()) - ), - ) - for i_ref, ref in enumerate(refs): - ax1.plot( - ref.asma(), - line_color[i_ref], - linewidth=2, - label=ref.ref_name - + "(mean: {0:.2f}, std: {1:.3f})".format( - np.mean(ref.asma()), np.std(ref.asma()) - ), - ) + # Perform truncation division to accomodate long time records to get + # the step sizes for the X axis ticks. + start_time = int(parameter.start_yr) # type: ignore + end_time = int(parameter.end_yr) # type: ignore + num_year = end_time - start_time + 1 - x = np.arange(num_year) - # do Truncation Division to accommodating long time records if num_year > 19: stepsize = num_year // 10 else: stepsize = 1 - ax1.set_xticks(x[::stepsize]) + + # Configure X and Y axes. + x = np.arange(num_year) + ax.set_xticks(x[::stepsize]) x_ticks_labels = np.arange(start_time, end_time + 1, stepsize) - ax1.set_xticklabels(x_ticks_labels, rotation=45, fontsize=8) - ax1.set_ylabel(var + " (" + test.units + ")") - ax1.set_xlabel("Year") - ax1.legend(loc="best", prop={"size": 7}) - ax1.set_title(parameter.regions[i_region], fontsize=10) + ax.set_xticklabels(x_ticks_labels, rotation=45, fontsize=8) + ax.set_xlabel("Year") + + ax.set_ylabel(var + " (" + test_var.units + ")") + + # Configure legend and title. + ax.legend(loc="best", prop={"size": 7}) + ax.set_title(parameter.regions[idx_region], fontsize=10) # Figure title. fig.suptitle( @@ -97,48 +104,29 @@ def plot(var, regions_to_data, parameter): fontsize=15, ) - # Save the figure. - output_file_name = var - for f in parameter.output_format: - f = f.lower().split(".")[-1] - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - output_file_name + "." + f, - ) - plt.savefig(fnm) - # Get the filename that the user has passed in and display that. - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - output_file_name + "." + f, - ) - logger.info(f"Plot saved in: {fnm}") - - # Save individual subplots - for f in parameter.output_format_subplot: - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file, - ) - page = fig.get_size_inches() - i = 0 - for p in panel: - # Extent of subplot - subpage = np.array(p).reshape(2, 2) - subpage[1, :] = subpage[0, :] + subpage[1, :] - subpage = subpage + np.array(border).reshape(2, 2) - subpage = list(((subpage) * page).flatten()) # type: ignore - extent = matplotlib.transforms.Bbox.from_extents(*subpage) - # Save subplot - fname = fnm + ".%i." % (i) + f - plt.savefig(fname, bbox_inches=extent) - - orig_fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file, - ) - fname = orig_fnm + ".%i." % (i) + f - logger.info(f"Sub-plot saved in: {fname}") - - i += 1 + parameter.output_file = var + _save_plot(fig, parameter, PANEL_CFG, BORDER_PADDING) plt.close() + + +def _get_mean_and_std_label(var: xr.DataArray, name: str) -> str: + """Get the plot label using the mean and std deviation of the yearly mean. + + Parameters + ---------- + var : xr.DataArray + The test or reference variable. + name : str + The test or reference label name. + + Returns + ------- + str + The plot label. + """ + mean = np.mean(var) + std = np.std(var) + label = name + f"(mean: {mean:.2f}, std: {std:.3f})" + + return label diff --git a/tests/e3sm_diags/driver/utils/test_dataset_xr.py b/tests/e3sm_diags/driver/utils/test_dataset_xr.py index 664b8df76..5aa81e0d7 100644 --- a/tests/e3sm_diags/driver/utils/test_dataset_xr.py +++ b/tests/e3sm_diags/driver/utils/test_dataset_xr.py @@ -942,9 +942,22 @@ def test_returns_climo_dataset_using_climo_of_time_series_files(self): # Since the data is not sub-monthly, the first time coord (2001-01-01) # is dropped when subsetting with the middle of the month (2000-01-15). expected = self.ds_ts.isel(time=slice(1, 4)) + expected["time"].data[:] = np.array( + [ + cftime.DatetimeGregorian(2000, 2, 15, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2000, 3, 16, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2001, 1, 16, 12, 0, 0, 0, has_year_zero=False), + ], + dtype=object, + ) expected["ts"] = xr.DataArray( name="ts", data=np.array([[1.0, 1.0], [1.0, 1.0]]), dims=["lat", "lon"] ) + # Set all of the correct attributes. + expected = expected.assign(**spatial_coords, **spatial_bounds) # type: ignore + expected["lat"].attrs["units"] = "degrees_north" + expected["lat_bnds"].attrs["xcdat_bounds"] = "True" + expected["lon_bnds"].attrs["xcdat_bounds"] = "True" xr.testing.assert_identical(result, expected) @@ -1049,8 +1062,7 @@ def setup(self, tmp_path): self.ts_path = f"{self.data_path}/ts_200001_200112.nc" self.ds_ts = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -1079,6 +1091,7 @@ def setup(self, tmp_path): ), }, data_vars={ + **spatial_bounds, "time_bnds": xr.DataArray( name="time_bnds", data=np.array( @@ -1178,6 +1191,14 @@ def test_returns_time_series_dataset_using_file(self): # Since the data is not sub-monthly, the first time coord (2001-01-01) # is dropped when subsetting with the middle of the month (2000-01-15). expected = self.ds_ts.isel(time=slice(1, 4)) + expected["time"].data[:] = np.array( + [ + cftime.DatetimeGregorian(2000, 2, 15, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2000, 3, 16, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2001, 1, 16, 12, 0, 0, 0, has_year_zero=False), + ], + dtype=object, + ) xr.testing.assert_identical(result, expected) @@ -1202,54 +1223,20 @@ def test_returns_time_series_dataset_using_sub_monthly_sets(self): def test_returns_time_series_dataset_using_derived_var(self): # We will derive the "PRECT" variable using the "pr" variable. - ds_pr = xr.Dataset( - coords={ - "lat": [-90, 90], - "lon": [0, 180], - "time": xr.DataArray( - dims="time", - data=np.array( - [ - cftime.DatetimeGregorian( - 2000, 1, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 2, 15, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 3, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 1, 16, 12, 0, 0, 0, has_year_zero=False - ), - ], - dtype=object, - ), - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ), - }, - data_vars={ - "pr": xr.DataArray( - xr.DataArray( - data=np.array( - [ - [[1.0, 1.0], [1.0, 1.0]], - [[1.0, 1.0], [1.0, 1.0]], - [[1.0, 1.0], [1.0, 1.0]], - [[1.0, 1.0], [1.0, 1.0]], - ] - ), - dims=["time", "lat", "lon"], - attrs={"units": "mm/s"}, - ) - ), - }, + ds_pr = self.ds_ts.copy() + ds_pr["pr"] = xr.DataArray( + data=np.array( + [ + [[1.0, 1.0], [1.0, 1.0]], + [[1.0, 1.0], [1.0, 1.0]], + [[1.0, 1.0], [1.0, 1.0]], + [[1.0, 1.0], [1.0, 1.0]], + ] + ), + dims=["time", "lat", "lon"], + attrs={"units": "mm/s"}, ) + ds_pr = ds_pr.drop_vars("ts") ds_pr.to_netcdf(f"{self.data_path}/pr_200001_200112.nc") parameter = _create_parameter_object( @@ -1259,61 +1246,35 @@ def test_returns_time_series_dataset_using_derived_var(self): ds = Dataset(parameter, data_type="ref") result = ds.get_time_series_dataset("PRECT") - expected = ds_pr.copy() + expected = ds_pr.sel(time=slice("2000-02-01", "2001-01-01")) + expected["time"].data[:] = np.array( + [ + cftime.DatetimeGregorian(2000, 2, 15, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2000, 3, 16, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2001, 1, 16, 12, 0, 0, 0, has_year_zero=False), + ], + dtype=object, + ) expected["PRECT"] = expected["pr"] * 3600 * 24 expected["PRECT"].attrs["units"] = "mm/day" xr.testing.assert_identical(result, expected) def test_returns_time_series_dataset_using_derived_var_directly_from_dataset(self): - ds_precst = xr.Dataset( - coords={ - "lat": [-90, 90], - "lon": [0, 180], - "time": xr.DataArray( - dims="time", - data=np.array( - [ - cftime.DatetimeGregorian( - 2000, 1, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 2, 15, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 3, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 1, 16, 12, 0, 0, 0, has_year_zero=False - ), - ], - dtype=object, - ), - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ), - }, - data_vars={ - "PRECST": xr.DataArray( - xr.DataArray( - data=np.array( - [ - [[1.0, 1.0], [1.0, 1.0]], - [[1.0, 1.0], [1.0, 1.0]], - [[1.0, 1.0], [1.0, 1.0]], - [[1.0, 1.0], [1.0, 1.0]], - ] - ), - dims=["time", "lat", "lon"], - attrs={"units": "mm/s"}, - ) - ), - }, + ds_precst = self.ds_ts.copy() + ds_precst["PRECST"] = xr.DataArray( + data=np.array( + [ + [[1.0, 1.0], [1.0, 1.0]], + [[1.0, 1.0], [1.0, 1.0]], + [[1.0, 1.0], [1.0, 1.0]], + [[1.0, 1.0], [1.0, 1.0]], + ] + ), + dims=["time", "lat", "lon"], + attrs={"units": "mm/s"}, ) + ds_precst = ds_precst.drop_vars("ts") ds_precst.to_netcdf(f"{self.data_path}/PRECST_200001_200112.nc") parameter = _create_parameter_object( @@ -1324,6 +1285,16 @@ def test_returns_time_series_dataset_using_derived_var_directly_from_dataset(sel result = ds.get_time_series_dataset("PRECST") expected = ds_precst.copy() + expected = ds_precst.sel(time=slice("2000-02-01", "2001-01-01")) + expected["PRECST"].attrs["units"] = "mm/s" + expected["time"].data[:] = np.array( + [ + cftime.DatetimeGregorian(2000, 2, 15, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2000, 3, 16, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2001, 1, 16, 12, 0, 0, 0, has_year_zero=False), + ], + dtype=object, + ) xr.testing.assert_identical(result, expected) @@ -1338,7 +1309,9 @@ def test_raises_error_if_no_datasets_found_to_derive_variable(self): with pytest.raises(IOError): ds.get_time_series_dataset("PRECT") - def test_returns_time_series_dataset_with_centered_time_if_single_point(self): + def test_returns_time_series_dataset_without_centered_time_if_single_point_data( + self, + ): self.ds_ts.to_netcdf(self.ts_path) parameter = _create_parameter_object( @@ -1350,9 +1323,25 @@ def test_returns_time_series_dataset_with_centered_time_if_single_point(self): result = ds.get_time_series_dataset("ts", single_point=True) expected = self.ds_ts.copy() + + xr.testing.assert_identical(result, expected) + + def test_returns_time_series_dataset_with_centered_time_if_non_sub_monthly_data( + self, + ): + self.ds_ts.to_netcdf(self.ts_path) + + parameter = _create_parameter_object( + "ref", "time_series", self.data_path, "2000", "2001" + ) + + ds = Dataset(parameter, data_type="ref") + ds.is_sub_monthly = False + + result = ds.get_time_series_dataset("ts") + expected = self.ds_ts.isel(time=slice(1, 4)) expected["time"].data[:] = np.array( [ - cftime.DatetimeGregorian(2000, 1, 16, 12, 0, 0, 0, has_year_zero=False), cftime.DatetimeGregorian(2000, 2, 15, 12, 0, 0, 0, has_year_zero=False), cftime.DatetimeGregorian(2000, 3, 16, 12, 0, 0, 0, has_year_zero=False), cftime.DatetimeGregorian(2001, 1, 16, 12, 0, 0, 0, has_year_zero=False), @@ -1379,6 +1368,14 @@ def test_returns_time_series_dataset_using_file_with_ref_name_prepended(self): # Since the data is not sub-monthly, the first time coord (2001-01-01) # is dropped when subsetting with the middle of the month (2000-01-15). expected = ds_ts.isel(time=slice(1, 4)) + expected["time"].data[:] = np.array( + [ + cftime.DatetimeGregorian(2000, 2, 15, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2000, 3, 16, 12, 0, 0, 0, has_year_zero=False), + cftime.DatetimeGregorian(2001, 1, 16, 12, 0, 0, 0, has_year_zero=False), + ], + dtype=object, + ) xr.testing.assert_identical(result, expected) diff --git a/tests/test_area_mean_time_series.py b/tests/test_area_mean_time_series.py new file mode 100644 index 000000000..464090415 --- /dev/null +++ b/tests/test_area_mean_time_series.py @@ -0,0 +1 @@ +# TODO