Skip to content

Commit

Permalink
CDAT Migration Phase 2: Refactor arm_diags set (#842)
Browse files Browse the repository at this point in the history
  • Loading branch information
chengzhuzhang authored and tomvothecoder committed Oct 29, 2024
1 parent 3fc1b5b commit b48a1ec
Show file tree
Hide file tree
Showing 14 changed files with 2,046 additions and 443 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# python -m auxiliary_tools.cdat_regression_testing.667-arm_diags.667-arm_diags_run_script
from auxiliary_tools.cdat_regression_testing.base_run_script import run_set

SET_NAME = "arm_diags"
SET_DIR = "667-arm_diags-final"
CFG_PATH: str | None = None
# CFG_PATH = (
# "./auxiliary_tools/cdat_regression_testing/667-arm_diags/arm_diags_model_vs_obs.cfg"
# )
MULTIPROCESSING = True

run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING)
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[#]
sets = ["arm_diags"]
diags_set = "annual_cycle"
regions = ["sgpc1"]
ref_name = "armdiags"
variables = ["PRECT"]

[#]
sets = ["arm_diags"]
diags_set = "diurnal_cycle"
regions = ["sgpc1"]
ref_name = "armdiags"
variables = ["PRECT"]

[#]
sets = ["arm_diags"]
diags_set = "diurnal_cycle_zt"
regions = ["sgpc1"]
ref_name = "armdiags"
variables = ["CLOUD"]

[#]
sets = ["arm_diags"]
diags_set = "convection_onset"
regions = ["twpc1"]
ref_name = "armdiags"


[#]
sets = ["arm_diags"]
diags_set = "aerosol_activation"
regions = ["sgpc1"]
variables = ["ccn02"]
ref_name = "armdiags"
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# %%
import timeit

setup_code = """
import xarray as xr
AIR_DENS = 1.225 # standard air density 1.225kg/m3
a1 = xr.open_dataarray("qa/667-arms-diags/a1.nc")
a2 = xr.open_dataarray("qa/667-arms-diags/a2.nc")
a3 = xr.open_dataarray("qa/667-arms-diags/a3.nc")
"""

setup_code2 = """
import xarray as xr
AIR_DENS = 1.225 # standard air density 1.225kg/m3
a1 = xr.open_dataarray("qa/667-arms-diags/a1.nc")
a2 = xr.open_dataarray("qa/667-arms-diags/a2.nc")
a3 = xr.open_dataarray("qa/667-arms-diags/a3.nc")
a1.load(scheduler="sync")
a2.load(scheduler="sync")
a3.load(scheduler="sync")
"""

setup_code3 = """
import xarray as xr
AIR_DENS = 1.225 # standard air density 1.225kg/m3
a1_chunked = xr.open_dataarray("qa/667-arms-diags/a1.nc", chunks={"time": "auto"})
a2_chunked = xr.open_dataarray("qa/667-arms-diags/a2.nc", chunks={"time": "auto"})
a3_chunked = xr.open_dataarray("qa/667-arms-diags/a3.nc", chunks={"time": "auto"})
"""

code_statement1 = """
with xr.set_options(keep_attrs=True):
var = (a1 + a2 + a3) * AIR_DENS / 1e6
"""


code_statement2 = """
with xr.set_options(keep_attrs=True):
var = (a1_chunked + a2_chunked + a3_chunked) * AIR_DENS / 1e6
"""

code_statement3 = """
var_data = (a1.values + a2.values + a3.values) * AIR_DENS / 1e6
var_new = xr.DataArray(
var_data,
dims=a1.dims,
coords=a1.coords,
name="a_num",
attrs={"units": "/cm3", "long_name": "aerosol number concentration"},
)
"""

code_statement4 = """
var_data2 = (a1.data + a2.data + a3.data) * AIR_DENS / 1e6
var_new2 = xr.DataArray(
name="a_num", data=var_data2, dims=a1.dims, coords=a1.coords, attrs=a1.attrs
)
var_new2.attrs.update(
{"units": "/cm3", "long_name": "aerosol number concentration"}
)
"""


def run_timeit(code_statement: str, setup_code: str) -> float:
elapsed_time = timeit.repeat(
code_statement, setup=setup_code, globals=globals(), repeat=3, number=1
)

return min(elapsed_time)


elapsed_time_xarray = run_timeit(code_statement1, setup_code)
print(f"1. Elapsed time (Xarray non-chunked): {elapsed_time_xarray} seconds")

elapsed_time_xarray_load = run_timeit(code_statement1, setup_code2)
print(
f"2. Elapsed time (Xarray non-chunked with .load()): {elapsed_time_xarray_load} seconds"
)
elapsed_time_xarray_chunked = run_timeit(code_statement2, setup_code3)
print(f"3. Elapsed time (Xarray chunked): {elapsed_time_xarray_chunked} seconds")

elapsed_time_numpy_1 = run_timeit(code_statement3, setup_code)
print(f"4. Elapsed time (numpy .values): {elapsed_time_numpy_1} seconds")

elapsed_time_numpy_2 = run_timeit(code_statement4, setup_code)
print(f"5. Elapsed time (numpy .data): {elapsed_time_numpy_2} seconds")


"""
Results
----------
1. Elapsed time (Xarray non-chunked): 6.540755605790764 seconds
2. Elapsed time (Xarray non-chunked with .load()): 0.17097265785560012 seconds
3. Elapsed time (Xarray chunked): 0.1452920027077198 seconds
4. Elapsed time (numpy .values): 6.418793010059744 seconds
5. Elapsed time (numpy .data): 7.334999438840896 seconds
"""
36 changes: 36 additions & 0 deletions e3sm_diags/derivations/derivations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from typing import Callable, Dict, Tuple, Union

from e3sm_diags.derivations.formulas import (
a_num_sum,
aero_burden_fxn,
aero_mass_fxn,
albedo,
Expand Down Expand Up @@ -63,6 +64,7 @@
restom3,
rst,
rstcs,
so4_mass_sum,
sum_vars,
swcf,
swcfsrf,
Expand Down Expand Up @@ -857,6 +859,40 @@
("pom_a?_CLXF",): lambda x: molec_convert_units(x, 12.0),
},
"Mass_pom": {("Mass_pom",): rename},
# total aerosol number concentration (#/CC)
"a_num": {
("cpc",): rename,
# Aerosol concentration from Aitken, Accumu., and Coarse mode
(
"num_a1",
"num_a2",
"num_a3",
): lambda a1, a2, a3: a_num_sum(a1, a2, a3),
},
# total so4 mass concentration (ng/m3)
"so4_mass": {
("sulfate",): rename,
# Aerosol concentration from Aitken, Accumu., and Coarse mode
(
"so4_a1",
"so4_a2",
): lambda a1, a2: so4_mass_sum(a1, a2),
},
# CCN 0.1%SS concentration (1/CC)
"ccn01": {
("ccn01",): rename,
("CCN3",): rename,
},
# CCN 0.2%SS concentration (1/CC)
"ccn02": {
("ccn02",): rename,
("CCN4",): rename,
},
# CCN 0.5%SS concentration (1/CC)
"ccn05": {
("ccn05",): rename,
("CCN5",): rename,
},
# Land variables
"SOILWATER_10CM": {("mrsos",): rename},
"SOILWATER_SUM": {("mrso",): rename},
Expand Down
22 changes: 22 additions & 0 deletions e3sm_diags/derivations/formulas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from e3sm_diags.derivations.utils import convert_units

AVOGADRO_CONST = 6.022e23
AIR_DENS = 1.225 # standard air density 1.225kg/m3


def sum_vars(vars: List[xr.DataArray]) -> xr.DataArray:
Expand Down Expand Up @@ -126,6 +127,27 @@ def molec_convert_units(vars: List[xr.DataArray], molar_weight: float) -> xr.Dat
return result


def a_num_sum(a1: xr.DataArray, a2: xr.DataArray, a3: xr.DataArray):
# Calculate: total aerosol number concentration (#/cm3)

with xr.set_options(keep_attrs=True):
var = (a1 + a2 + a3) * AIR_DENS / 1e6
var.name = "a_num"
var["units"] = "/cm3"
var["long_name"] = "aerosol number concentration"
return var


def so4_mass_sum(a1: xr.DataArray, a2: xr.DataArray):
# Calculate: SO4 mass conc. (ng/m3) (< 1um)
with xr.set_options(keep_attrs=True):
var = (a1 + a2) * AIR_DENS * 1e9
var.name = "so4_mass"
var.units = "\u03bcg/m3"
var.long_name = "SO4 mass conc."
return var


def qflx_convert_to_lhflx(
qflx: xr.DataArray,
precc: xr.DataArray,
Expand Down
Loading

0 comments on commit b48a1ec

Please sign in to comment.