Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CDAT Migration Phase 2: Refactor arm_diags set #842

Merged
merged 35 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6d1a434
clean up arm_diags annual-cycle subset
chengzhuzhang Aug 26, 2024
1109188
update climo_xr for supporting annual cycle and performance
chengzhuzhang Aug 26, 2024
39a973d
more updates on climo_xr
chengzhuzhang Aug 28, 2024
e79a9db
finish annual_cycle(2d)
chengzhuzhang Aug 29, 2024
454f004
complete diurnal cycle (2d);enhance diurnal_cycle_xr performance
chengzhuzhang Aug 29, 2024
1f1ff50
update diurnal cycle zt
chengzhuzhang Sep 5, 2024
a43a1a6
update convection onset
chengzhuzhang Sep 5, 2024
5124e0f
update aerosol activation
chengzhuzhang Sep 6, 2024
d303550
Add `.load()` to `_get_dataset_with_source_vars()` to improve perform…
tomvothecoder Sep 9, 2024
b1dfeeb
Clean up run script
tomvothecoder Sep 13, 2024
aad246f
Revert `.load()` in `_get_dataset_with_source_vars()`
tomvothecoder Sep 13, 2024
04f0892
Replace `fastAllGridFT` with `_fft_all_grid`
tomvothecoder Sep 13, 2024
e789222
Code clean up and fix pre-commit issues
tomvothecoder Sep 13, 2024
76ff0d2
Update `fastAllGridFT` to `_fft_all_grid`
tomvothecoder Sep 13, 2024
65a096f
add png regression test script
chengzhuzhang Sep 10, 2024
2ffb6e0
add png regression test script
chengzhuzhang Sep 18, 2024
5818318
fix CI for a missing file
chengzhuzhang Sep 18, 2024
9049725
Remove unused annual_cycle_aerosol diags
tomvothecoder Sep 27, 2024
d8b794a
Remove unused `_select_point()` function
tomvothecoder Sep 27, 2024
fc9a940
Add arm_diags png regression testing
tomvothecoder Sep 27, 2024
54999a7
Update regression testing png notebook
tomvothecoder Sep 27, 2024
420821f
Fix ncycle=1 misaligned dims in `climo_xr.py`
tomvothecoder Sep 27, 2024
640b42a
Address ref FIXME comment
tomvothecoder Sep 30, 2024
0d0671c
Fix `_get_time_slice()` `end_time` format when years <1000
tomvothecoder Sep 30, 2024
311a630
Refactor `arm_diags_plot.py`
tomvothecoder Sep 30, 2024
fb8a320
Update regression testing notebook
tomvothecoder Sep 30, 2024
da6d110
Add logic to exclude last time coordinate for sub-monthly data
tomvothecoder Sep 30, 2024
67de95c
Fix unit tests for submonthly time series data
tomvothecoder Sep 30, 2024
5821a85
Update .get statement for long_name attr when it does not exist
tomvothecoder Sep 30, 2024
c13ad23
Add .get for long_name and other attrs
tomvothecoder Sep 30, 2024
49ee2a5
Apply suggestions from code review
tomvothecoder Sep 30, 2024
8c936be
Fix RegionStats type annotation
tomvothecoder Sep 30, 2024
dd4e7fe
Add `_exclude_sub_monthly_coord_spanning_year` to replicate cdms2 "co…
tomvothecoder Oct 1, 2024
7b53932
Update e3sm_diags/driver/utils/dataset_xr.py
tomvothecoder Oct 1, 2024
858966b
Update e3sm_diags/driver/utils/climo_xr.py
tomvothecoder Oct 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# python -m auxiliary_tools.cdat_regression_testing.667-arm_diags.667-arm_diags_run_script
from auxiliary_tools.cdat_regression_testing.base_run_script import run_set

SET_NAME = "arm_diags"
SET_DIR = "667-arm_diags-final"
CFG_PATH: str | None = None
# CFG_PATH = (
# "./auxiliary_tools/cdat_regression_testing/667-arm_diags/arm_diags_model_vs_obs.cfg"
# )
MULTIPROCESSING = True

run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING)
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[#]
sets = ["arm_diags"]
diags_set = "annual_cycle"
regions = ["sgpc1"]
ref_name = "armdiags"
variables = ["PRECT"]

[#]
sets = ["arm_diags"]
diags_set = "diurnal_cycle"
regions = ["sgpc1"]
ref_name = "armdiags"
variables = ["PRECT"]

[#]
sets = ["arm_diags"]
diags_set = "diurnal_cycle_zt"
regions = ["sgpc1"]
ref_name = "armdiags"
variables = ["CLOUD"]

[#]
sets = ["arm_diags"]
diags_set = "convection_onset"
regions = ["twpc1"]
ref_name = "armdiags"


[#]
sets = ["arm_diags"]
diags_set = "aerosol_activation"
regions = ["sgpc1"]
variables = ["ccn02"]
ref_name = "armdiags"
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# %%
import timeit

setup_code = """
import xarray as xr

AIR_DENS = 1.225 # standard air density 1.225kg/m3

a1 = xr.open_dataarray("qa/667-arms-diags/a1.nc")
a2 = xr.open_dataarray("qa/667-arms-diags/a2.nc")
a3 = xr.open_dataarray("qa/667-arms-diags/a3.nc")
"""

setup_code2 = """
import xarray as xr

AIR_DENS = 1.225 # standard air density 1.225kg/m3

a1 = xr.open_dataarray("qa/667-arms-diags/a1.nc")
a2 = xr.open_dataarray("qa/667-arms-diags/a2.nc")
a3 = xr.open_dataarray("qa/667-arms-diags/a3.nc")

a1.load(scheduler="sync")
a2.load(scheduler="sync")
a3.load(scheduler="sync")
"""

setup_code3 = """
import xarray as xr

AIR_DENS = 1.225 # standard air density 1.225kg/m3

a1_chunked = xr.open_dataarray("qa/667-arms-diags/a1.nc", chunks={"time": "auto"})
a2_chunked = xr.open_dataarray("qa/667-arms-diags/a2.nc", chunks={"time": "auto"})
a3_chunked = xr.open_dataarray("qa/667-arms-diags/a3.nc", chunks={"time": "auto"})
"""

code_statement1 = """
with xr.set_options(keep_attrs=True):
var = (a1 + a2 + a3) * AIR_DENS / 1e6
"""


code_statement2 = """
with xr.set_options(keep_attrs=True):
var = (a1_chunked + a2_chunked + a3_chunked) * AIR_DENS / 1e6
"""

code_statement3 = """
var_data = (a1.values + a2.values + a3.values) * AIR_DENS / 1e6
var_new = xr.DataArray(
var_data,
dims=a1.dims,
coords=a1.coords,
name="a_num",
attrs={"units": "/cm3", "long_name": "aerosol number concentration"},
)
"""

code_statement4 = """
var_data2 = (a1.data + a2.data + a3.data) * AIR_DENS / 1e6
var_new2 = xr.DataArray(
name="a_num", data=var_data2, dims=a1.dims, coords=a1.coords, attrs=a1.attrs
)
var_new2.attrs.update(
{"units": "/cm3", "long_name": "aerosol number concentration"}
)
"""


def run_timeit(code_statement: str, setup_code: str) -> float:
elapsed_time = timeit.repeat(
code_statement, setup=setup_code, globals=globals(), repeat=3, number=1
)

return min(elapsed_time)


elapsed_time_xarray = run_timeit(code_statement1, setup_code)
print(f"1. Elapsed time (Xarray non-chunked): {elapsed_time_xarray} seconds")

elapsed_time_xarray_load = run_timeit(code_statement1, setup_code2)
print(
f"2. Elapsed time (Xarray non-chunked with .load()): {elapsed_time_xarray_load} seconds"
)
elapsed_time_xarray_chunked = run_timeit(code_statement2, setup_code3)
print(f"3. Elapsed time (Xarray chunked): {elapsed_time_xarray_chunked} seconds")

elapsed_time_numpy_1 = run_timeit(code_statement3, setup_code)
print(f"4. Elapsed time (numpy .values): {elapsed_time_numpy_1} seconds")

elapsed_time_numpy_2 = run_timeit(code_statement4, setup_code)
print(f"5. Elapsed time (numpy .data): {elapsed_time_numpy_2} seconds")


"""
Results
----------
1. Elapsed time (Xarray non-chunked): 6.540755605790764 seconds
2. Elapsed time (Xarray non-chunked with .load()): 0.17097265785560012 seconds
3. Elapsed time (Xarray chunked): 0.1452920027077198 seconds
4. Elapsed time (numpy .values): 6.418793010059744 seconds
5. Elapsed time (numpy .data): 7.334999438840896 seconds
"""
36 changes: 36 additions & 0 deletions e3sm_diags/derivations/derivations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from typing import Callable, Dict, Tuple, Union

from e3sm_diags.derivations.formulas import (
a_num_sum,
aero_burden_fxn,
aero_mass_fxn,
albedo,
Expand Down Expand Up @@ -63,6 +64,7 @@
restom3,
rst,
rstcs,
so4_mass_sum,
sum_vars,
swcf,
swcfsrf,
Expand Down Expand Up @@ -857,6 +859,40 @@
("pom_a?_CLXF",): lambda x: molec_convert_units(x, 12.0),
},
"Mass_pom": {("Mass_pom",): rename},
# total aerosol number concentration (#/CC)
"a_num": {
("cpc",): rename,
# Aerosol concentration from Aitken, Accumu., and Coarse mode
(
"num_a1",
"num_a2",
"num_a3",
): lambda a1, a2, a3: a_num_sum(a1, a2, a3),
},
# total so4 mass concentration (ng/m3)
"so4_mass": {
("sulfate",): rename,
# Aerosol concentration from Aitken, Accumu., and Coarse mode
(
"so4_a1",
"so4_a2",
): lambda a1, a2: so4_mass_sum(a1, a2),
},
# CCN 0.1%SS concentration (1/CC)
"ccn01": {
("ccn01",): rename,
("CCN3",): rename,
},
# CCN 0.2%SS concentration (1/CC)
"ccn02": {
("ccn02",): rename,
("CCN4",): rename,
},
# CCN 0.5%SS concentration (1/CC)
"ccn05": {
("ccn05",): rename,
("CCN5",): rename,
},
# Land variables
"SOILWATER_10CM": {("mrsos",): rename},
"SOILWATER_SUM": {("mrso",): rename},
Expand Down
22 changes: 22 additions & 0 deletions e3sm_diags/derivations/formulas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from e3sm_diags.derivations.utils import convert_units

AVOGADRO_CONST = 6.022e23
AIR_DENS = 1.225 # standard air density 1.225kg/m3


def sum_vars(vars: List[xr.DataArray]) -> xr.DataArray:
Expand Down Expand Up @@ -126,6 +127,27 @@ def molec_convert_units(vars: List[xr.DataArray], molar_weight: float) -> xr.Dat
return result


def a_num_sum(a1: xr.DataArray, a2: xr.DataArray, a3: xr.DataArray):
# Calculate: total aerosol number concentration (#/cm3)

with xr.set_options(keep_attrs=True):
var = (a1 + a2 + a3) * AIR_DENS / 1e6
var.name = "a_num"
var["units"] = "/cm3"
var["long_name"] = "aerosol number concentration"
return var


def so4_mass_sum(a1: xr.DataArray, a2: xr.DataArray):
# Calculate: SO4 mass conc. (ng/m3) (< 1um)
with xr.set_options(keep_attrs=True):
var = (a1 + a2) * AIR_DENS * 1e9
var.name = "so4_mass"
var.units = "\u03bcg/m3"
var.long_name = "SO4 mass conc."
return var


def qflx_convert_to_lhflx(
qflx: xr.DataArray,
precc: xr.DataArray,
Expand Down
Loading
Loading