Skip to content

Commit

Permalink
Add comments to metrics comparison script
Browse files Browse the repository at this point in the history
  • Loading branch information
tomvothecoder committed Sep 6, 2023
1 parent a13ef31 commit ab41c9e
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions auxiliary_tools/issue-658-compare-metrics.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %%
"""
This script compares the absolute and relative differences between metrics
generates by the `ex1` script on the `refactor/658-lat-lon-set` and `main`
Expand All @@ -8,14 +9,27 @@
- Relative differences show the scale using a percentage unit.
- Absolute differences is just a raw number that doesn't factor in
floating point size (e.g., 100.00 vs. 0.0001), which can be misleading.
Metrics flow
1. Loop over variables
2. Get climatology variable (test and reference (optional))
- If time series, calculate climatology (CONFIRMED SAME)
3. <OPTIONAL> Apply land sea mask if region is land or ocean
- Requires regridding land sea mask to the grid of the variable
4. <OPTIONAL> If region is not global then subset on region
5. <OPTONAL> Regrid to the lower resolution if one of the variables has a lower res
6. Calculate metrics
- min, max, mean (spatial avg), rmse, std, corr
"""
import glob
from typing import List

import pandas as pd

DEV_RESULTS = "/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model"
PROD_RESULTS = "/global/cfs/cdirs/e3sm/www/forsyth/examples/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model"
DEV_RESULTS = "/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years_658/lat_lon/model_vs_model"
PROD_RESULTS = "/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years_main/lat_lon/model_vs_model"

DEV_GLOB = sorted(glob.glob(DEV_RESULTS + "/*.json"))
PROD_GLOB = sorted(glob.glob(PROD_RESULTS + "/*.json"))
Expand Down Expand Up @@ -53,6 +67,10 @@ def get_metrics(filepaths: List[str]) -> pd.DataFrame:

df_final = pd.concat(metrics)

# Reorder columns and drop "unit" column (string dtype breaks Pandas
# arithmetic).
df_final = df_final[["test", "test_regrid", "ref", "ref_regrid", "diff", "misc"]]

return df_final


Expand All @@ -72,10 +90,6 @@ def get_diffs(df_a: pd.DataFrame, df_b: pd.DataFrame) -> pd.DataFrame:
The DataFrame containing absolute and relative differences between
the metrics DataFrames.
"""
# NOTE: Drop the unit attributes since it breaks Pandas arithmetic.
df_a = df_a.drop(columns=["unit"])
df_b = df_b.drop(columns=["unit"])

# Absolute difference: abs(actual - reference)
df_abs = abs(df_a - df_b)
df_abs = df_abs.add_suffix("_abs")
Expand All @@ -101,10 +115,10 @@ def _sort_cols(df: pd.DataFrame) -> pd.DataFrame:
"ref_rel",
"ref_regrid_abs",
"ref_regrid_rel",
"misc_abs",
"misc_rel",
"diff_abs",
"diff_rel",
"misc_abs",
"misc_rel",
]
df = df[columns]

Expand All @@ -115,5 +129,7 @@ def _sort_cols(df: pd.DataFrame) -> pd.DataFrame:
df_dev = get_metrics(DEV_GLOB)
df_prod = get_metrics(PROD_GLOB)

# %%
df_diff = get_diffs(df_dev, df_prod)
# %%
df_diff.to_excel("20230830-issue-658-metrics-diff.xlsx")

0 comments on commit ab41c9e

Please sign in to comment.