Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Refactor]: Validate fix in PR #750 for #759 #815

Merged
merged 10 commits into from
Jun 3, 2024
Merged
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ ENV/

# NetCDF files needed
!e3sm_diags/driver/acme_ne30_ocean_land_mask.nc
!auxiliary_tools/cdat_regression_testing/759-slice-flag/debug/*.nc

# Folder for storing quality assurance files and notes
qa/
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[#]
sets = ["lat_lon"]
case_id = "model_vs_model"
variables = ["TREFHT"]
regions = ["land"]
seasons = ["ANN", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "DJF", "MAM", "JJA", "SON"]
contour_levels = [-35, -30, -25, -20, -15, -10, -5, 0, 5, 10, 15, 20, 25, 30, 35, 40]
diff_levels = [-5, -4, -3, -2, -1, -0.5, -0.2, 0.2, 0.5, 1, 2, 3, 4, 5]
regrid_method = "bilinear"
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# %%
import os

import cdms2
import numpy as np
import xarray as xr
import xcdat as xc # noqa: F401

var_key = "TREFHT"
regrid_tool = "esmf"
regrid_method = "bilinear"

dir_path = "/global/u2/v/vo13/E3SM-Project/e3sm_diags/auxiliary_tools/cdat_regression_testing/759-slice-flag/debug/"
fp_a = os.path.join(dir_path, "ds_a.nc")
fp_b = os.path.join(dir_path, "ds_b.nc")
fp_mv1 = os.path.join(dir_path, "mv1.nc")
fp_mv2 = os.path.join(dir_path, "mv2.nc")


# %%
# Regridding with xCDAT
ds_a = xr.open_dataset(fp_a)
ds_b = xr.open_dataset(fp_b)

output_grid = ds_a.regridder.grid
ds_b_regrid = ds_b.regridder.horizontal(
var_key, output_grid, tool=f"x{regrid_tool}", method=regrid_method
)

# Write out to netCDF for visualization
# ds_b_regrid.to_netcdf("ds_b_regrid.nc")

# %%
# Regridding with CDAT
ds_mv1 = cdms2.open(fp_mv1)
ds_mv2 = cdms2.open(fp_mv2)

mv1 = ds_mv1("variable_21")
mv2 = ds_mv2("variable_36")
mv_grid = mv1.getGrid()
mv2_reg = mv2.regrid(mv_grid, regridTool=regrid_tool, regridMethod=regrid_method)

# Write out to netCDF for visualization
# f1 = cdms2.open("mv2_regrid.nc", "w")
# f1.write(mv2)
# f1.close()

# %%
"""
1. Compare CDAT original data with regridded data

Result: There is no difference between the original and regridded reference
data.
"""
# Test closeness (same)
np.testing.assert_allclose(mv2, mv2_reg, atol=0, rtol=0)

mv2_filled = mv2.filled(np.nan)
mv2_reg_filled = mv2_reg.filled(np.nan)

# Count of np.nan: 22170 vs. 22170 (same)
np.count_nonzero(np.isnan(mv2_filled.flatten())) == np.count_nonzero(
np.isnan(mv2_reg_filled.flatten())
)
# Sum: -68837.01023016105 vs. -68837.01023016105 (same)
np.nansum(mv2_filled) == np.nansum(mv2_reg_filled)
# Mean: -6.342086809486 == -6.342086809486 (same)
np.nanmean(mv2_filled) == np.nanmean(mv2_reg_filled)
Comment on lines +48 to +68
Copy link
Collaborator Author

@tomvothecoder tomvothecoder Jun 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that for the "TREFHT" variable, the CDAT code on main reaches the conditional to regrid the reference variable (mv2) to the test variable (mv1).

# use nlat to decide data resolution, higher number means higher data
# resolution. For the difference plot, regrid toward lower resolution
if len(axes1[1]) <= len(axes2[1]):
mv_grid = mv1.getGrid()
mv1_reg = mv1
mv2_reg = mv2.regrid(
mv_grid, regridTool=regrid_tool, regridMethod=regrid_method
)
mv2_reg.units = mv2.units

However, the regridded reference data (mv2_reg) is the same as the original reference data (mv2) as shown in the comparison script above. No regridding seems to be happening. I opened #817 to address this later.


# %%
"""
2. Compare regridded data between xCDAT and CDAT

Result: The regridded reference data produced by xCDAT results in more `np.nan`,
which subsequently results in different np.nan locations, sum, and mean.

In https://github.com/E3SM-Project/e3sm_diags/pull/794, I noted that there
seems tobe a minor difference in how the bilinear regridding works with xCDAT + xESMF vs.
CDAT + ESMF.
"""
# Test closeness (x and y nan location mismatch)
np.testing.assert_allclose(ds_b_regrid[var_key].values, mv2_reg, atol=0, rtol=0)

# Count of np.nan: 23150 vs. 22170 (different)
np.count_nonzero(np.isnan(ds_b_regrid[var_key].values.flatten())) == np.count_nonzero(
np.isnan(mv2_reg_filled.flatten())
)
# Sum: -77674.06388742107 vs. -68837.01023016105 (different)
np.nansum(ds_b_regrid[var_key].values) == np.nansum(mv2_reg_filled.data)
# Mean: -7.866524598685545 vs. -6.342086809486 (different)
np.nanmean(ds_b_regrid[var_key].values) == np.nanmean(mv2_reg_filled.data)

# %%
Comment on lines +71 to +93
Copy link
Collaborator Author

@tomvothecoder tomvothecoder Jun 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noted how xCDAT actually performs the regridding. In debug_TREFHT_diff.png we can see the difference in the plots.

Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""This script compares the xCDAT and CDAT regridded reference data results.
The `debug_TREFHT_diff.png` shows there there is a difference along the coastlines
of the continents. CDAT doesn't seem to actually regrid the data, which is
noted in `trefht_cdat_only_visual.py`.
"""
# %%
import os

import numpy as np
import xarray as xr

from auxiliary_tools.cdat_regression_testing.utils import get_image_diffs

dir_path = "/global/u2/v/vo13/E3SM-Project/e3sm_diags/auxiliary_tools/cdat_regression_testing/759-slice-flag/debug/"
fp_a = os.path.join(dir_path, "ds_b_regrid.nc")
fp_b = os.path.join(dir_path, "mv2_regrid.nc")


# %%
ds1 = xr.open_dataset(fp_a)
ds2 = xr.open_dataset(fp_b)

var_key = "TREFHT"
ds2["TREFHT"] = ds2["variable_36"].copy()
ds2 = ds2.drop_vars("variable_36")

# %%
try:
np.testing.assert_allclose(ds1[var_key], ds2[var_key])
except AssertionError as e:
print(e)

# %%
# Check the sum values -- close
# array(213927.85820162)
np.abs(ds1[var_key]).sum()

# %%
# array(228804.60960445)
np.abs(ds2[var_key]).sum()

# Check the mean values -- close
# array(-7.8665246)
ds1[var_key].mean()

# array(-6.34208681)
ds2[var_key].mean()


# %%
# Check the plots and their diffs
root_dir = "auxiliary_tools/cdat_regression_testing/759-slice-flag"
actual_path = os.path.join(root_dir, "debug_TREFHT_actual.png")
expected_path = os.path.join(root_dir, "debug_TREFHT_expected.png")

ax1 = ds1[var_key].plot()
ax1.figure.savefig(actual_path)

# %%
ax2 = ds2[var_key].plot()
ax2.figure.savefig(expected_path)

# %%
get_image_diffs(actual_path, expected_path)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
64 changes: 64 additions & 0 deletions auxiliary_tools/cdat_regression_testing/759-slice-flag/ex1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# %%
import os
import sys

from e3sm_diags.parameter.core_parameter import CoreParameter
from e3sm_diags.run import runner

param = CoreParameter()

# Location of the data.
param.test_data_path = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1"
param.reference_data_path = "/global/cfs/cdirs/e3sm/e3sm_diags/test_model_data_for_acme_diags/time-series/E3SM_v1"

# Variables
# param.variables = ["PRECT"]

# Set this parameter to True.
# By default, e3sm_diags expects the test data to be climo data.
param.test_timeseries_input = True
# Years to slice the test data, base this off the years in the filenames.
param.test_start_yr = "2011"
param.test_end_yr = "2013"

# Set this parameter to True.
# By default, e3sm_diags expects the ref data to be climo data.
param.ref_timeseries_input = True
# Years to slice the ref data, base this off the years in the filenames
param.ref_start_yr = "1850"
param.ref_end_yr = "1852"

# When running with time-series data, you don't need to specify the name of the data.
# But you should, otherwise nothing is displayed when the test/ref name is needed.
param.short_test_name = "historical_H1"
param.short_ref_name = "historical_H1"

# This parameter modifies the software to accommodate model vs model runs.
# The default setting for run_type is 'model_vs_obs'.
param.run_type = "model_vs_model"
# Name of the folder where the results are stored.
# Change `prefix` to use your directory.
prefix = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24"
param.results_dir = os.path.join(prefix, "759-slice-flag")

# Below are more optional arguments.

# What plotsets to run the diags on.
# If not defined, then all available sets are used.
param.sets = ["lat_lon"]
# What seasons to run the diags on.
# If not defined, diags are run on ['ANN', 'DJF', 'MAM', 'JJA', 'SON'].
param.seasons = ["ANN"]
# Title of the difference plots.
param.diff_title = "Model (2011-2013) - Model (1850-1852)"

# For running with multiprocessing.
param.multiprocessing = True
# param.num_workers = 24

# %%
cfg_path = "/global/u2/v/vo13/E3SM-Project/e3sm_diags/auxiliary_tools/cdat_regression_testing/759-slice-flag/671-diags.cfg"
sys.argv.extend(["--diags", cfg_path])
runner.run_diags([param])

# %%
Loading
Loading