From 0d728af780ed067de23609d61a1956cad1dde93b Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Thu, 23 Jan 2025 13:48:33 +0000 Subject: [PATCH 01/41] initial workf for smoke and dust --- Externals.cfg | 8 +- environment.yml | 6 + jobs/JSRW_PREPSTART | 165 +++++ jobs/JSRW_SMOKE_DUST | 168 ++++++ parm/FV3.input.yml | 161 +++++ parm/diag_table_smoke_dust.FV3_HRRR_gf | 523 ++++++++++++++++ parm/field_table_smoke_dust.FV3_HRRR_gf | 80 +++ parm/input.nml.FV3 | 2 +- parm/wflow/smoke_dust.yaml | 90 +++ scripts/exsrw_prepstart.sh | 149 +++++ scripts/exsrw_smoke_dust.sh | 148 +++++ sorc/CMakeLists.txt | 2 +- ...ust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml | 66 ++ ush/config.smoke_dust.yaml | 66 ++ ush/config_defaults.yaml | 44 +- ush/machine/gaea-c6.yaml | 9 + ush/machine/hera.yaml | 7 + ush/machine/hercules.yaml | 7 + ush/machine/orion.yaml | 7 + ush/smoke_dust_add_smoke.py | 105 ++++ ush/smoke_dust_fire_emiss_tools.py | 415 +++++++++++++ ush/smoke_dust_generate_fire_emissions.py | 230 +++++++ ush/smoke_dust_hwp_tools.py | 276 +++++++++ ush/smoke_dust_interp_tools.py | 566 ++++++++++++++++++ ush/valid_param_vals.yaml | 1 + 25 files changed, 3294 insertions(+), 7 deletions(-) create mode 100755 jobs/JSRW_PREPSTART create mode 100755 jobs/JSRW_SMOKE_DUST create mode 100644 parm/diag_table_smoke_dust.FV3_HRRR_gf create mode 100644 parm/field_table_smoke_dust.FV3_HRRR_gf create mode 100644 parm/wflow/smoke_dust.yaml create mode 100755 scripts/exsrw_prepstart.sh create mode 100755 scripts/exsrw_smoke_dust.sh create mode 100644 tests/WE2E/test_configs/smoke_dust/config.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml create mode 100644 ush/config.smoke_dust.yaml create mode 100755 ush/smoke_dust_add_smoke.py create mode 100755 ush/smoke_dust_fire_emiss_tools.py create mode 100755 ush/smoke_dust_generate_fire_emissions.py create mode 100755 ush/smoke_dust_hwp_tools.py create mode 100755 ush/smoke_dust_interp_tools.py diff --git a/Externals.cfg b/Externals.cfg index ec064ce084..5e94961fb4 100644 --- a/Externals.cfg +++ b/Externals.cfg @@ -1,9 +1,9 @@ [ufs_utils] protocol = git -repo_url = https://github.com/ufs-community/UFS_UTILS +repo_url = https://github.com/NOAA-EPIC/UFS_UTILS # Specify either a branch name or a hash but not both. -#branch = develop -hash = 57bd832 +#branch = release/srw-v3.0.0 +hash = 1ee0fee local_path = sorc/UFS_UTILS required = True @@ -12,7 +12,7 @@ protocol = git repo_url = https://github.com/ufs-community/ufs-weather-model # Specify either a branch name or a hash but not both. #branch = develop -hash = 5324d64 +hash = 3a5e52e local_path = sorc/ufs-weather-model required = True diff --git a/environment.yml b/environment.yml index a735213198..bef636c2ae 100644 --- a/environment.yml +++ b/environment.yml @@ -6,3 +6,9 @@ dependencies: - pylint=2.17* - pytest=7.2* - uwtools=2.3* + - esmpy=8.6.* + - netcdf4=1.6.* + - numpy=1.23.* + - pandas=1.5.* + - scipy=1.10.* + - xarray=2022.11.* diff --git a/jobs/JSRW_PREPSTART b/jobs/JSRW_PREPSTART new file mode 100755 index 0000000000..50476d6f45 --- /dev/null +++ b/jobs/JSRW_PREPSTART @@ -0,0 +1,165 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# This script runs prepstart to update IC files for Smoke/Dust +# +#----------------------------------------------------------------------- +# +date +export PS4='+ $SECONDS + ' +set -xue +# +#----------------------------------------------------------------------- +# +# Set the NCO standard environment variables (Table 1, pp.4) +# +#----------------------------------------------------------------------- +# +export USHsrw="${HOMEdir}/ush" +export EXECsrw="${HOMEdir}/exec" +export PARMsrw="${HOMEdir}/parm" +export SCRIPTSsrw="${HOMEdir}/scripts" +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +export USHdir="${USHsrw}" # should be removed later +. ${USHsrw}/source_util_funcs.sh +for sect in user nco platform workflow global smoke_dust_parm ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# +scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) +scrfunc_fn=$( basename "${scrfunc_fp}" ) +scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Print message indicating entry into script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Entering script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" + +This is the J-job script for the task that copies or fetches RAVE fire +emission data files from disk, or HPSS. +========================================================================" +# +#----------------------------------------------------------------------- +# +# Define job and jobid by default for rocoto +# +#----------------------------------------------------------------------- +# +WORKFLOW_MANAGER="${WORKFLOW_MANAGER:-rocoto}" +if [ "${WORKFLOW_MANAGER}" = "rocoto" ]; then + if [ "${SCHED}" = "slurm" ]; then + job=${SLURM_JOB_NAME} + pid=${SLURM_JOB_ID} + elif [ "${SCHED}" = "pbspro" ]; then + job=${PBS_JOBNAME} + pid=${PBS_JOBID} + else + job="task" + pid=$$ + fi + jobid="${job}.${PDY}${cyc}.${pid}" +fi +# +#----------------------------------------------------------------------- +# +# Create a temp working directory (DATA) and cd into it. +# +#----------------------------------------------------------------------- +# +export DATA="${DATA:-${DATAROOT}/${jobid}}" +mkdir -p $DATA +cd $DATA +# +#----------------------------------------------------------------------- +# +# Define NCO environment variables and set COM type definitions. +# +#----------------------------------------------------------------------- +# +export NET="${NET:-${NET_default}}" +export RUN="${RUN:-${RUN_default}}" + +[[ "$WORKFLOW_MANAGER" = "rocoto" ]] && export COMROOT=$COMROOT +if [ "${MACHINE}" = "WCOSS2" ]; then + export COMIN="${COMIN:-$(compath.py -o ${NET}/${model_ver})}" + export COMOUT="${COMOUT:-$(compath.py -o ${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR})}" +else + export COMIN="${COMIN:-${COMROOT}/${NET}/${model_ver}}" + export COMOUT="${COMOUT:-${COMROOT}/${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR}}" +fi + +mkdir -p ${COMOUT} + +# Create a teomporary share directory +export DATA_SHARE="${DATA_SHARE:-${DATAROOT}/DATA_SHARE/${PDY}${cyc}}" +mkdir -p ${DATA_SHARE} + +# Run setpdy to initialize PDYm and PDYp variables +export cycle="${cycle:-t${cyc}z}" +setpdy.sh +. ./PDY +# +#----------------------------------------------------------------------- +# +# Set sub-cycle and ensemble member names in file/diectory names +# +#----------------------------------------------------------------------- +# +if [ ${subcyc} -ne 0 ]; then + export cycle="t${cyc}${subcyc}z" +fi +if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ] && [ ! -z ${ENSMEM_INDX} ]; then + export dot_ensmem=".mem${ENSMEM_INDX}" +else + export dot_ensmem= +fi +# +#----------------------------------------------------------------------- +# +# Call the ex-script for this J-job. +# +#----------------------------------------------------------------------- +# +export pgmout="${DATA}/OUTPUT.$$" +env + +${SCRIPTSsrw}/exsrw_prepstart.sh +export err=$?; err_chk + +if [ -e "$pgmout" ]; then + cat $pgmout +fi +# +#----------------------------------------------------------------------- +# +# Whether or not working directory DATA should be kept. +# +#----------------------------------------------------------------------- +# +if [ "${KEEPDATA}" = "NO" ]; then + rm -rf ${DATA} +fi +date + diff --git a/jobs/JSRW_SMOKE_DUST b/jobs/JSRW_SMOKE_DUST new file mode 100755 index 0000000000..4341fda03c --- /dev/null +++ b/jobs/JSRW_SMOKE_DUST @@ -0,0 +1,168 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# This script processes smoke and dust +# +#----------------------------------------------------------------------- +# +date +export PS4='+ $SECONDS + ' +set -xue +# +#----------------------------------------------------------------------- +# +# Set the NCO standard environment variables (Table 1, pp.4) +# +#----------------------------------------------------------------------- +# +export USHsrw="${HOMEdir}/ush" +export EXECsrw="${HOMEdir}/exec" +export PARMsrw="${HOMEdir}/parm" +export SCRIPTSsrw="${HOMEdir}/scripts" +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +export USHdir="${USHsrw}" # should be removed later +. ${USHsrw}/source_util_funcs.sh +for sect in user nco platform workflow global smoke_dust_parm ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# +scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) +scrfunc_fn=$( basename "${scrfunc_fp}" ) +scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Print message indicating entry into script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Entering script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" + +This is the J-job script for the task that copies or fetches RAVE fire +emission data files from disk, or HPSS. +========================================================================" +# +#----------------------------------------------------------------------- +# +# Define job and jobid by default for rocoto +# +#----------------------------------------------------------------------- +# +WORKFLOW_MANAGER="${WORKFLOW_MANAGER:-rocoto}" +if [ "${WORKFLOW_MANAGER}" = "rocoto" ]; then + if [ "${SCHED}" = "slurm" ]; then + job=${SLURM_JOB_NAME} + pid=${SLURM_JOB_ID} + elif [ "${SCHED}" = "pbspro" ]; then + job=${PBS_JOBNAME} + pid=${PBS_JOBID} + else + job="task" + pid=$$ + fi + jobid="${job}.${PDY}${cyc}.${pid}" +fi +# +#----------------------------------------------------------------------- +# +# Create a temp working directory (DATA) and cd into it. +# +#----------------------------------------------------------------------- +# +export DATA="${DATA:-${DATAROOT}/${jobid}}" +mkdir -p $DATA +cd $DATA +# +#----------------------------------------------------------------------- +# +# Define NCO environment variables and set COM type definitions. +# +#----------------------------------------------------------------------- +# +export NET="${NET:-${NET_default}}" +export RUN="${RUN:-${RUN_default}}" + +[[ "$WORKFLOW_MANAGER" = "rocoto" ]] && export COMROOT=$COMROOT +if [ "${MACHINE}" = "WCOSS2" ]; then + export COMIN="${COMIN:-$(compath.py -o ${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR})}" + export COMOUT="${COMOUT:-$(compath.py -o ${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR})}" +else + export COMIN="${COMIN:-${COMROOT}/${NET}/${model_ver}}" + export COMOUT="${COMOUT:-${COMROOT}/${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR}}" +fi + +mkdir -p ${COMOUT} + +export COMINsmoke="${COMINsmoke:-${COMINsmoke_default}}" +export COMINfire="${COMINfire:-${COMINfire_default}}" + +# Create a teomporary share directory for RAVE interpolated data files +export DATA_SHARE="${DATA_SHARE:-${DATAROOT}/DATA_SHARE/RAVE_fire_intp}" +mkdir -p ${DATA_SHARE} + +# Run setpdy to initialize PDYm and PDYp variables +export cycle="${cycle:-t${cyc}z}" +setpdy.sh +. ./PDY +# +#----------------------------------------------------------------------- +# +# Set sub-cycle and ensemble member names in file/diectory names +# +#----------------------------------------------------------------------- +# +if [ ${subcyc} -ne 0 ]; then + export cycle="t${cyc}${subcyc}z" +fi +if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ] && [ ! -z ${ENSMEM_INDX} ]; then + export dot_ensmem=".mem${ENSMEM_INDX}" +else + export dot_ensmem= +fi +# +#----------------------------------------------------------------------- +# +# Call the ex-script for this J-job. +# +#----------------------------------------------------------------------- +# +export pgmout="${DATA}/OUTPUT.$$" +env + +${SCRIPTSsrw}/exsrw_smoke_dust.sh +export err=$?; err_chk + +if [ -e "$pgmout" ]; then + cat $pgmout +fi +# +#----------------------------------------------------------------------- +# +# Whether or not working directory DATA should be kept. +# +#----------------------------------------------------------------------- +# +if [ "${KEEPDATA}" = "NO" ]; then + rm -rf ${DATA} +fi +date + diff --git a/parm/FV3.input.yml b/parm/FV3.input.yml index efb6c85f5b..bf12a5e73f 100644 --- a/parm/FV3.input.yml +++ b/parm/FV3.input.yml @@ -100,6 +100,167 @@ FV3_HRRR: nst_anl: null nstf_name: null +FV3_HRRR_gf: + atmos_model_nml: + avg_max_length: 3600. + ignore_rst_cksum: true + external_ic_nml: + levp: 66 + fv_core_nml: + agrid_vel_rst: true + d_con: 0.5 + d2_bg_k2: 0.04 + dz_min: 2.0 + fv_sg_adj: 7200 + hord_dp: 6 + hord_mt: 6 + hord_tm: 6 + hord_tr: 8 + hord_vt: 6 + k_split: 2 + kord_mt: 9 + kord_tm: -9 + kord_tr: 9 + kord_wz: 9 + n_split: 5 + n_sponge: 65 + nord_tr: 0 + npz: 65 + psm_bc: 1 + range_warn: False + regional_bcs_from_gsi: false + rf_cutoff: 2000.0 + sg_cutoff: -1 + vtdm4: 0.02 + write_restart_with_bcs: false + gfs_physics_nml: + addsmoke_flag: 1 + aero_dir_fdb: true + aero_ind_fdb: false + bl_mynn_edmf: 1 + bl_mynn_edmf_mom: 1 + bl_mynn_tkeadvect: true + cal_pre: false + cdmbgwd: [3.5, 1.0] + clm_debug_print: false + clm_lake_debug: false + cnvcld: false + cnvgwd: false + coarsepm_settling: 1 + debug: false + diag_log: true + do_deep: true + do_gsl_drag_ls_bl: true + do_gsl_drag_ss: true + do_gsl_drag_tofd: true + do_mynnedmf: true + do_mynnsfclay: true + do_plumerise: true + do_sfcperts: null + do_smoke_transport: true + do_tofd: false + do_ugwp: false + do_ugwp_v0: false + do_ugwp_v0_nst_only: false + do_ugwp_v0_orog_only: false + drydep_opt: 1 + dspheat: true + dt_inner: 36 + dust_alpha: 10.0 + dust_drylimit_factor: 0.5 + dust_gamma: 1.3 + dust_moist_correction: 2.0 + dust_opt: 1 + effr_in: true + enh_mix : false + fhcyc: 0.0 + fhlwr: 900.0 + fhswr: 900.0 + fhzero: 1.0 + frac_ice: true + gwd_opt: 3 + h2o_phys: true + hybedmf: false + iaer: 1011 + ialb: 2 + iau_delthrs: 6 + iaufhrs: 30 + iccn: 2 + icliq_sw: 2 + iems: 2 + imfdeepcnv: 3 + imfshalcnv: -1 + imp_physics: 8 + iopt_alb: 2 + iopt_btr: 1 + iopt_crs: 1 + iopt_dveg: 2 + iopt_frz: 1 + iopt_inf: 1 + iopt_lake: 2 + iopt_rad: 1 + iopt_run: 1 + iopt_sfc: 1 + iopt_snf: 4 + iopt_stc: 1 + iopt_tbot: 2 + iovr: 3 + isncond_opt: 2 + isncovr_opt: 3 + isol: 2 + isot: 1 + isubc_lw: 2 + isubc_sw: 2 + ivegsrc: 1 + ldiag3d: false + ldiag_ugwp: false + lgfdlmprad: false + lheatstrg: false + lightning_threat: true + lkm: 1 + lradar: true + lrefres: true + lsm: 3 + lsoil_lsm: 9 + ltaerosol: true + lwhtr: true + min_lakeice: 0.15 + min_seaice: 0.15 + mix_chem : true + mosaic_lu: 0 + mosaic_soil: 0 + nsfullradar_diag: 3600 + nst_anl: null + nstf_name: null + oz_phys: false + oz_phys_2015: true + pdfcld: false + plume_wind_eff : 1 + plumerisefire_frq : 30 + pre_rad: false + print_diff_pgr: true + prslrd0: 0.0 + random_clds: false + redrag: true + rrfs_sd : false + rrfs_smoke_debug : false + satmedmf: false + sc_factor: 1.0 + seas_opt : 0 + sfclay_compute_flux: true + shal_cnv: false + smoke_conv_wet_coef : [0.5, 0.5, 0.5] + hwp_method: 1 + swhtr: true + thsfc_loc: false + trans_trac: true + ttendlim: -999 + use_ufo: true + wetdep_ls_alpha : 0.5 + wetdep_ls_opt : 1 + fv_diagnostics_nml: + do_hailcast: true + FV3_RAP: fv_core_nml: <<: *HRRR_fv_core diff --git a/parm/diag_table_smoke_dust.FV3_HRRR_gf b/parm/diag_table_smoke_dust.FV3_HRRR_gf new file mode 100644 index 0000000000..d46aa6e831 --- /dev/null +++ b/parm/diag_table_smoke_dust.FV3_HRRR_gf @@ -0,0 +1,523 @@ +{{ starttime.strftime("%Y%m%d.%H") }}Z.{{ cres }}.32bit.non-hydro.regional +{{ starttime.strftime("%Y %m %d %H %M %S") }} + +"grid_spec", -1, "months", 1, "days", "time" +"atmos_static", -1, "hours", 1, "hours", "time" +#"atmos_4xdaily", 1, "hours", 1, "days", "time" +"fv3_history", 0, "hours", 1, "hours", "time" +"fv3_history2d", 0, "hours", 1, "hours", "time" + +# +#======================= +# ATMOSPHERE DIAGNOSTICS +#======================= +### +# grid_spec +### + "dynamics", "grid_lon", "grid_lon", "grid_spec", "all", .false., "none", 2, + "dynamics", "grid_lat", "grid_lat", "grid_spec", "all", .false., "none", 2, + "dynamics", "grid_lont", "grid_lont", "grid_spec", "all", .false., "none", 2, + "dynamics", "grid_latt", "grid_latt", "grid_spec", "all", .false., "none", 2, + "dynamics", "area", "area", "grid_spec", "all", .false., "none", 2, +### +# 4x daily output +### +# "dynamics", "slp", "slp", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "vort850", "vort850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "vort200", "vort200", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "us", "us", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "u1000", "u1000", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "u850", "u850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "u700", "u700", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "u500", "u500", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "u200", "u200", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "u100", "u100", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "u50", "u50", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "u10", "u10", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "vs", "vs", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "v1000", "v1000", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "v850", "v850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "v700", "v700", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "v500", "v500", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "v200", "v200", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "v100", "v100", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "v50", "v50", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "v10", "v10", "atmos_4xdaily", "all", .false., "none", 2 +#### +# "dynamics", "tm", "tm", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "t1000", "t1000", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "t850", "t850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "t700", "t700", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "t500", "t500", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "t200", "t200", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "t100", "t100", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "t50", "t50", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "t10", "t10", "atmos_4xdaily", "all", .false., "none", 2 +#### +# "dynamics", "z1000", "z1000", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "z850", "z850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "z700", "z700", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "z500", "z500", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "z200", "z200", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "z100", "z100", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "z50", "z50", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "z10", "z10", "atmos_4xdaily", "all", .false., "none", 2 +#### +# "dynamics", "w1000", "w1000", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "w850", "w850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "w700", "w700", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "w500", "w500", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "w200", "w200", "atmos_4xdaily", "all", .false., "none", 2 +#### +# "dynamics", "q1000", "q1000", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "q850", "q850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "q700", "q700", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "q500", "q500", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "q200", "q200", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "q100", "q100", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "q50", "q50", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "q10", "q10", "atmos_4xdaily", "all", .false., "none", 2 +#### +# "dynamics", "rh1000", "rh1000", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "rh850", "rh850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "rh700", "rh700", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "rh500", "rh500", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "rh200", "rh200", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "omg1000", "omg1000", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "omg850", "omg850", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "omg700", "omg700", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "omg500", "omg500", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "omg200", "omg200", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "omg100", "omg100", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "omg50", "omg50", "atmos_4xdaily", "all", .false., "none", 2 +# "dynamics", "omg10", "omg10", "atmos_4xdaily", "all", .false., "none", 2 +### +# gfs static data +### + "dynamics", "pk", "pk", "atmos_static", "all", .false., "none", 2 + "dynamics", "bk", "bk", "atmos_static", "all", .false., "none", 2 + "dynamics", "hyam", "hyam", "atmos_static", "all", .false., "none", 2 + "dynamics", "hybm", "hybm", "atmos_static", "all", .false., "none", 2 + "dynamics", "zsurf", "zsurf", "atmos_static", "all", .false., "none", 2 +### +# FV3 variabls needed for NGGPS evaluation +### +"gfs_dyn", "ucomp", "ugrd", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "vcomp", "vgrd", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "sphum", "spfh", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "temp", "tmp", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "liq_wat", "clwmr", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "o3mr", "o3mr", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "delp", "dpres", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "delz", "delz", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "w", "dzdt", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "ice_wat", "icmr", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "rainwat", "rwmr", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "snowwat", "snmr", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "graupel", "grle", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "ps", "pressfc", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "hs", "hgtsfc", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "refl_10cm" "refl_10cm" "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "ice_nc", "nicp", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "rain_nc", "rain_nc", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "water_nc", "water_nc", "fv3_history", "all", .false., "none", 2 + +"gfs_dyn", "wmaxup", "upvvelmax", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "wmaxdn", "dnvvelmax", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "uhmax03", "uhmax03", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "uhmax25", "uhmax25", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "uhmin03", "uhmin03", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "uhmin25", "uhmin25", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "maxvort01", "maxvort01", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "maxvort02", "maxvort02", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "maxvorthy1", "maxvorthy1", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "hailcast_dhail_max", "hailcast_dhail", "fv3_history2d", "all", .false., "none", 2 + +"gfs_phys", "ALBDO_ave", "albdo_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "cnvprcp_ave", "cprat_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "cnvprcpb_ave", "cpratb_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "totprcp_ave", "prate_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "totprcpb_ave", "prateb_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "DLWRF", "dlwrf_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "DLWRFI", "dlwrf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "ULWRF", "ulwrf_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "ULWRFI", "ulwrf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "DSWRF", "dswrf_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "DSWRFI", "dswrf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "DSWRFCI", "dswrf_clr", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "USWRF", "uswrf_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "USWRFI", "uswrf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "DSWRFtoa", "dswrf_avetoa","fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "USWRFtoa", "uswrf_avetoa","fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "ULWRFtoa", "ulwrf_avetoa","fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "ULWRFItoa", "ulwrf_toa", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "gflux_ave", "gflux_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "hpbl", "hpbl", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "lhtfl_ave", "lhtfl_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "shtfl_ave", "shtfl_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "pwat", "pwatclm", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "soilm", "soilm", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "TCDC_aveclm", "tcdc_aveclm", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "TCDC_avebndcl", "tcdc_avebndcl", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "TCDC_avehcl", "tcdc_avehcl", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "TCDC_avelcl", "tcdc_avelcl", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "TCDC_avemcl", "tcdc_avemcl", "fv3_history2d", "all", .false., "none", 2 +#"gfs_phys", "TCDCcnvcl", "tcdccnvcl", "fv3_history2d", "all", .false., "none", 2 +#"gfs_phys", "PREScnvclt", "prescnvclt", "fv3_history2d", "all", .false., "none", 2 +#"gfs_phys", "PREScnvclb", "prescnvclb", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "PRES_avehct", "pres_avehct", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "PRES_avehcb", "pres_avehcb", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "TEMP_avehct", "tmp_avehct", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "PRES_avemct", "pres_avemct", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "PRES_avemcb", "pres_avemcb", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "TEMP_avemct", "tmp_avemct", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "PRES_avelct", "pres_avelct", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "PRES_avelcb", "pres_avelcb", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "TEMP_avelct", "tmp_avelct", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "u-gwd_ave", "u-gwd_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "v-gwd_ave", "v-gwd_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "dusfc", "uflx_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "dvsfc", "vflx_ave", "fv3_history2d", "all", .false., "none", 2 +#"gfs_phys", "cnvw", "cnvcldwat", "fv3_history2d", "all", .false., "none", 2 + +"gfs_phys", "u10max", "u10max", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "v10max", "v10max", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "spd10max", "spd10max", "fv3_history2d", "all", .false., "none", 2 +"gfs_dyn", "ustm", "ustm", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "vstm", "vstm", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "srh01", "srh01", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "srh03", "srh03", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "pratemax", "pratemax", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "refdmax", "refdmax", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "refdmax263k","refdmax263k","fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "t02max", "t02max", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "t02min", "t02min", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "rh02max", "rh02max", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "rh02min", "rh02min", "fv3_history2d", "all", .false., "none", 2 + +"gfs_phys", "psurf", "pressfc", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "u10m", "ugrd10m", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "v10m", "vgrd10m", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "crain", "crain", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "tprcp", "tprcp", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "hgtsfc", "orog", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "weasd", "weasd", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "f10m", "f10m", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "q2m", "spfh2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "t2m", "tmp2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "dpt2m", "dpt2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "tsfc", "tmpsfc", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "vtype", "vtype", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "stype", "sotyp", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slmsksfc", "land", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "vfracsfc", "veg", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "wetness", "wetness", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "zorlsfc", "sfcr", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "uustar", "fricv", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt1", "soilt1" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt2", "soilt2" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt3", "soilt3" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt4", "soilt4" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt5", "soilt5" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt6", "soilt6" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt7", "soilt7" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt8", "soilt8" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilt9", "soilt9" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw1", "soilw1" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw2", "soilw2" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw3", "soilw3" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw4", "soilw4" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw5", "soilw5" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw6", "soilw6" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw7", "soilw7" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw8", "soilw8" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "soilw9", "soilw9" "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_1", "soill1", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_2", "soill2", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_3", "soill3", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_4", "soill4", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_5", "soill5", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_6", "soill6", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_7", "soill7", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_8", "soill8", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slc_9", "soill9", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "slope", "sltyp", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "alnsf", "alnsf", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "alnwf", "alnwf", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "alvsf", "alvsf", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "alvwf", "alvwf", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "canopy", "cnwat", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "facsf", "facsf", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "facwf", "facwf", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "ffhh", "ffhh", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "ffmm", "ffmm", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "fice", "icec", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "hice", "icetk", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "snoalb", "snoalb", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "shdmax", "shdmax", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "shdmin", "shdmin", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "snowd", "snod", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "tg3", "tg3", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "tisfc", "tisfc", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "tref", "tref", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "z_c", "zc", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "c_0", "c0", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "c_d", "cd", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "w_0", "w0", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "w_d", "wd", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "xt", "xt", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "xz", "xz", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "dt_cool", "dtcool", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "xs", "xs", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "xu", "xu", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "xv", "xv", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "xtts", "xtts", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "xzts", "xzts", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "d_conv", "dconv", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "qrain", "qrain", "fv3_history2d", "all", .false., "none", 2 + +"gfs_phys", "acond", "acond", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "cduvb_ave", "cduvb_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "cpofp", "cpofp", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "duvb_ave", "duvb_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "csdlf_ave", "csdlf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "csusf_ave", "csusf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "csusf_avetoa", "csusftoa", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "csdsf_ave", "csdsf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "csulf_ave", "csulf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "csulf_avetoa", "csulftoa", "fv3_history2d", "all", .false., "none", 2 +#"gfs_phys", "cwork_ave", "cwork_aveclm", "fv3_history2d", "all", .false., "none", 2 +#"gfs_phys", "fldcp", "fldcp", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "hgt_hyblev1", "hgt_hyblev1", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "spfh_hyblev1", "spfh_hyblev1", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "ugrd_hyblev1", "ugrd_hyblev1", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "vgrd_hyblev1", "vgrd_hyblev1", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "tmp_hyblev1", "tmp_hyblev1", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "gfluxi", "gflux", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "lhtfl", "lhtfl", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "shtfl", "shtfl", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "pevpr", "pevpr", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "pevpr_ave", "pevpr_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "sbsno_ave", "sbsno_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "sfexc", "sfexc", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "snohf", "snohf", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "snowc", "snowc", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "spfhmax2m", "spfhmax_max2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "spfhmin2m", "spfhmin_min2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "tmpmax2m", "tmax_max2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "tmpmin2m", "tmin_min2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "ssrun_acc", "ssrun_acc", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "sunsd_acc", "sunsd_acc", "fv3_history2d", "all", .false., "none", 2 +# "gfs_phys", "watr_acc", "watr_acc", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "wilt", "wilt", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "nirbmdi", "nirbmdi", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "nirdfdi", "nirdfdi", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "visbmdi", "visbmdi", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "visdfdi", "visdfdi", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "vbdsf_ave", "vbdsf_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "vddsf_ave", "vddsf_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "nbdsf_ave", "nbdsf_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "nddsf_ave", "nddsf_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "trans_ave", "transp_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "evbs_ave", "direvap_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "evcw_ave", "canevap_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "sbsno", "sublim", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "evbs", "direvap", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "evcw", "canevap", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "trans", "transp", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "snowmt_land", "snom_land", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "snowmt_ice", "snom_ice", "fv3_history2d", "all", .false., "none", 2 +# Aerosols (CCN, IN) from Thompson microphysics +"gfs_phys", "nwfa", "nwfa", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "nifa", "nifa", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "nwfa2d", "nwfa2d", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "nifa2d", "nifa2d", "fv3_history2d", "all", .false., "none", 2 +# Cloud effective radii from Thompson and WSM6 microphysics +"gfs_phys", "cleffr", "cleffr", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "cieffr", "cieffr", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "cseffr", "cseffr", "fv3_history", "all", .false., "none", 2 +# Prognostic/diagnostic variables from MYNN +"gfs_phys", "QC_BL", "qc_bl", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "CLDFRA", "cldfra", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "EL_PBL", "el_pbl", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "QKE", "qke", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "maxmf", "maxmf", "fv3_history2d", "all", .false., "none", 2 +#"gfs_sfc", "nupdraft", "nupdrafts", "fv3_history2d", "all", .false., "none", 2 +#"gfs_sfc", "ktop_shallow", "ktop_shallow", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "zol", "zol", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "flhc", "flhc", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "flqc", "flqc", "fv3_history2d", "all", .false., "none", 2 +# Prognostic/diagnostic variables from RUC LSM +"gfs_sfc", "rhofr", "rhofr", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "snowfall_acc_land", "snacc_land", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "acsnow_land", "accswe_land", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "snowfall_acc_ice", "snacc_ice", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "acsnow_ice", "accswe_ice", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "xlaixy", "xlaixy", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "sfalb", "sfalb", "fv3_history2d", "all", .false., "none", 2 +# Stochastic physics +#"gfs_phys", "sppt_wts", "sppt_wts", "fv3_history", "all", .false., "none", 2 +#"gfs_phys", "skebu_wts", "skebu_wts", "fv3_history", "all", .false., "none", 2 +#"gfs_phys", "skebv_wts", "skebv_wts", "fv3_history", "all", .false., "none", 2 +#"dynamics", "diss_est", "diss_est", "fv3_history", "all", .false., "none", 2 +#"gfs_phys", "shum_wts", "shum_wts", "fv3_history", "all", .false., "none", 2 +# +"gfs_phys", "frzr", "frzr", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "frzrb", "frzrb", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "frozr", "frozr", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "frozrb", "frozrb", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "tsnowp", "tsnowp", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "tsnowpb", "tsnowpb", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "snow", "snow", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "graupel", "graupel", "fv3_history2d", "all", .false., "none", 2 +# lightning threat indices +"gfs_sfc", "ltg1_max", "ltg1_max", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "ltg2_max", "ltg2_max", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "ltg3_max", "ltg3_max", "fv3_history2d", "all", .false., "none", 2 +# CLM lake model +"gfs_sfc", "lakefrac", "lakefrac", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lakedepth", "lakedepth", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_t2m", "lake_t2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_q2m", "lake_q2m", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_albedo", "lake_albedo", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_h2osno2d", "lake_h2osno2d", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_sndpth2d", "lake_sndpth2d", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_snl2d", "lake_snl2d", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_t_grnd2d", "lake_t_grnd2d", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_savedtke12d", "lake_savedtke12d", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_1", "lake_t_h2osoi_vol3d_1", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_2", "lake_t_h2osoi_vol3d_2", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_3", "lake_t_h2osoi_vol3d_3", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_4", "lake_t_h2osoi_vol3d_4", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_5", "lake_t_h2osoi_vol3d_5", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_6", "lake_t_h2osoi_vol3d_6", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_7", "lake_t_h2osoi_vol3d_7", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_8", "lake_t_h2osoi_vol3d_8", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_9", "lake_t_h2osoi_vol3d_9", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_10", "lake_t_h2osoi_vol3d_10", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_11", "lake_t_h2osoi_vol3d_11", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_12", "lake_t_h2osoi_vol3d_12", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_13", "lake_t_h2osoi_vol3d_13", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_14", "lake_t_h2osoi_vol3d_14", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_vol3d_15", "lake_t_h2osoi_vol3d_15", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_1", "lake_t_h2osoi_liq3d_1", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_2", "lake_t_h2osoi_liq3d_2", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_3", "lake_t_h2osoi_liq3d_3", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_4", "lake_t_h2osoi_liq3d_4", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_5", "lake_t_h2osoi_liq3d_5", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_6", "lake_t_h2osoi_liq3d_6", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_7", "lake_t_h2osoi_liq3d_7", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_8", "lake_t_h2osoi_liq3d_8", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_9", "lake_t_h2osoi_liq3d_9", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_10", "lake_t_h2osoi_liq3d_10", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_11", "lake_t_h2osoi_liq3d_11", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_12", "lake_t_h2osoi_liq3d_12", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_13", "lake_t_h2osoi_liq3d_13", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_14", "lake_t_h2osoi_liq3d_14", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_liq3d_15", "lake_t_h2osoi_liq3d_15", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_1", "lake_t_h2osoi_ice3d_1", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_2", "lake_t_h2osoi_ice3d_2", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_3", "lake_t_h2osoi_ice3d_3", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_4", "lake_t_h2osoi_ice3d_4", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_5", "lake_t_h2osoi_ice3d_5", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_6", "lake_t_h2osoi_ice3d_6", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_7", "lake_t_h2osoi_ice3d_7", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_8", "lake_t_h2osoi_ice3d_8", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_9", "lake_t_h2osoi_ice3d_9", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_10", "lake_t_h2osoi_ice3d_10", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_11", "lake_t_h2osoi_ice3d_11", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_12", "lake_t_h2osoi_ice3d_12", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_13", "lake_t_h2osoi_ice3d_13", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_14", "lake_t_h2osoi_ice3d_14", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_h2osoi_ice3d_15", "lake_t_h2osoi_ice3d_15", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_1", "lake_t_soisno3d_1", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_2", "lake_t_soisno3d_2", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_3", "lake_t_soisno3d_3", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_4", "lake_t_soisno3d_4", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_5", "lake_t_soisno3d_5", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_6", "lake_t_soisno3d_6", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_7", "lake_t_soisno3d_7", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_8", "lake_t_soisno3d_8", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_9", "lake_t_soisno3d_9", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_10", "lake_t_soisno3d_10", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_11", "lake_t_soisno3d_11", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_12", "lake_t_soisno3d_12", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_13", "lake_t_soisno3d_13", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_14", "lake_t_soisno3d_14", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_soisno3d_15", "lake_t_soisno3d_15", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_1", "lake_t_lake3d_1", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_2", "lake_t_lake3d_2", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_3", "lake_t_lake3d_3", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_4", "lake_t_lake3d_4", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_5", "lake_t_lake3d_5", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_6", "lake_t_lake3d_6", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_7", "lake_t_lake3d_7", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_8", "lake_t_lake3d_8", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_9", "lake_t_lake3d_9", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_t_lake3d_10", "lake_t_lake3d_10", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_1", "lake_icefrac3d_1", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_2", "lake_icefrac3d_2", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_3", "lake_icefrac3d_3", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_4", "lake_icefrac3d_4", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_5", "lake_icefrac3d_5", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_6", "lake_icefrac3d_6", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_7", "lake_icefrac3d_7", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_8", "lake_icefrac3d_8", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_9", "lake_icefrac3d_9", "fv3_history", "all", .false., "none", 2 +"gfs_sfc", "lake_icefrac3d_10", "lake_icefrac3d_10", "fv3_history", "all", .false., "none", 2 + +"gfs_sfc", "use_lake_model", "use_lake_model", "fv3_history2d","all", .false., "none", 2 +#============================================================================================= +# +#====> This file can be used with diag_manager/v2.0a (or higher) <==== +# +# +# FORMATS FOR FILE ENTRIES (not all input values are used) +# ------------------------ +# +#"file_name", output_freq, "output_units", format, "time_units", "long_name", +# +# +#output_freq: > 0 output frequency in "output_units" +# = 0 output frequency every time step +# =-1 output frequency at end of run +# +#output_units = units used for output frequency +# (years, months, days, minutes, hours, seconds) +# +#time_units = units used to label the time axis +# (days, minutes, hours, seconds) +# +# +# FORMAT FOR FIELD ENTRIES (not all input values are used) +# ------------------------ +# +#"module_name", "field_name", "output_name", "file_name" "time_sampling", time_avg, "other_opts", packing +# +#time_avg = .true. or .false. +# +#packing = 1 double precision +# = 2 float +# = 4 packed 16-bit integers +# = 8 packed 1-byte (not tested?) +# This file contains diag_table entries for the RRFS-SD. +# It should be appended to the end of the diag_table before execution of the test. + +# Tracers +"gfs_dyn", "smoke", "smoke", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "dust", "dust", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "coarsepm", "coarsepm", "fv3_history", "all", .false., "none", 2 +"gfs_dyn", "smoke_ave", "smoke_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_dyn", "dust_ave", "dust_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_dyn", "coarsepm_ave", "coarsepm_ave", "fv3_history2d", "all", .false., "none", 2 + +# Aerosols emission for smoke +"gfs_sfc", "emdust", "emdust", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "coef_bb_dc", "coef_bb_dc", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "min_fplume", "min_fplume", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "max_fplume", "max_fplume", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "hwp", "hwp", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "hwp_ave", "hwp_ave", "fv3_history2d", "all", .false., "none", 2 +"gfs_sfc", "frp_output", "frp_output", "fv3_history2d", "all", .false., "none", 2 +"gfs_phys", "ebu_smoke", "ebu_smoke", "fv3_history", "all", .false., "none", 2 +"gfs_phys", "ext550", "ext550", "fv3_history", "all", .false., "none", 2 diff --git a/parm/field_table_smoke_dust.FV3_HRRR_gf b/parm/field_table_smoke_dust.FV3_HRRR_gf new file mode 100644 index 0000000000..3d4e0afcd4 --- /dev/null +++ b/parm/field_table_smoke_dust.FV3_HRRR_gf @@ -0,0 +1,80 @@ +# added by FRE: sphum must be present in atmos +# specific humidity for moist runs + "TRACER", "atmos_mod", "sphum" + "longname", "specific humidity" + "units", "kg/kg" + "profile_type", "fixed", "surface_value=1.e-6" / +# prognostic cloud water mixing ratio + "TRACER", "atmos_mod", "liq_wat" + "longname", "cloud water mixing ratio" + "units", "kg/kg" + "profile_type", "fixed", "surface_value=1.e30" / +# prognostic ice water mixing ratio + "TRACER", "atmos_mod", "ice_wat" + "longname", "cloud ice mixing ratio" + "units", "kg/kg" + "profile_type", "fixed", "surface_value=1.e30" / +# prognostic rain water mixing ratio + "TRACER", "atmos_mod", "rainwat" + "longname", "rain water mixing ratio" + "units", "kg/kg" + "profile_type", "fixed", "surface_value=1.e30" / +# prognostic snow water mixing ratio + "TRACER", "atmos_mod", "snowwat" + "longname", "snow water mixing ratio" + "units", "kg/kg" + "profile_type", "fixed", "surface_value=1.e30" / +# prognostic graupel mixing ratio + "TRACER", "atmos_mod", "graupel" + "longname", "graupel mixing ratio" + "units", "kg/kg" + "profile_type", "fixed", "surface_value=1.e30" / +# prognostic cloud water number concentration + "TRACER", "atmos_mod", "water_nc" + "longname", "cloud liquid water number concentration" + "units", "/kg" + "profile_type", "fixed", "surface_value=0.0" / +# prognostic cloud ice number concentration + "TRACER", "atmos_mod", "ice_nc" + "longname", "cloud ice water number concentration" + "units", "/kg" + "profile_type", "fixed", "surface_value=0.0" / +# prognostic rain number concentration + "TRACER", "atmos_mod", "rain_nc" + "longname", "rain number concentration" + "units", "/kg" + "profile_type", "fixed", "surface_value=0.0" / +# prognostic ozone mixing ratio tracer + "TRACER", "atmos_mod", "o3mr" + "longname", "ozone mixing ratio" + "units", "kg/kg" + "profile_type", "fixed", "surface_value=1.e30" / +# water- and ice-friendly aerosols (Thompson) + "TRACER", "atmos_mod", "liq_aero" + "longname", "water-friendly aerosol number concentration" + "units", "/kg" + "profile_type", "fixed", "surface_value=0.0" / + "TRACER", "atmos_mod", "ice_aero" + "longname", "ice-friendly aerosol number concentration" + "units", "/kg" + "profile_type", "fixed", "surface_value=0.0" / +# prognostic subgrid scale turbulent kinetic energy + "TRACER", "atmos_mod", "sgs_tke" + "longname", "subgrid scale turbulent kinetic energy" + "units", "m2/s2" + "profile_type", "fixed", "surface_value=0.0" / +# prognostic smoke mixing ratio tracer + "TRACER", "atmos_mod", "smoke" + "longname", "smoke mixing ratio" + "units", "ug/kg" + "profile_type", "fixed", "surface_value=1.e-12" / +# prognostic dust mixing ratio tracer + "TRACER", "atmos_mod", "dust" + "longname", "dust mixing ratio" + "units", "ug/kg" + "profile_type", "fixed", "surface_value=1.e-12" / +# prognostic coarsepm mixing ratio tracer + "TRACER", "atmos_mod", "coarsepm" + "longname", "coarsepm mixing ratio" + "units", "ug/kg" + "profile_type", "fixed", "surface_value=1.e-12" / diff --git a/parm/input.nml.FV3 b/parm/input.nml.FV3 index bd6244d2d9..53cd6e5a1d 100644 --- a/parm/input.nml.FV3 +++ b/parm/input.nml.FV3 @@ -28,7 +28,7 @@ / &diag_manager_nml - max_output_fields = 450 + max_output_fields = 500 prepend_date = .false. / diff --git a/parm/wflow/smoke_dust.yaml b/parm/wflow/smoke_dust.yaml new file mode 100644 index 0000000000..18bab50787 --- /dev/null +++ b/parm/wflow/smoke_dust.yaml @@ -0,0 +1,90 @@ +default_smoke_dust_task: &default_smoke_dust + account: '&ACCOUNT;' + attrs: + cycledefs: forecast + maxtries: '2' + envars: &default_vars + GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' + HOMEdir: '&HOMEdir;' + envir: '&envir;' + model_ver: '&model_ver;' + KEEPDATA: '&KEEPDATA;' + SENDCOM: '&SENDCOM;' + COMROOT: '&COMROOT;' + DATAROOT: '&DATAROOT;' + DCOMROOT: '&DCOMROOT;' + LOGDIR: !cycstr "&LOGDIR;" + PDY: !cycstr "@Y@m@d" + cyc: !cycstr "@H" + nprocs: '{{ parent.nnodes * parent.ppn // 1 }}' + subcyc: !cycstr "@M" + SLASH_ENSMEM_SUBDIR: '&SLASH_ENSMEM_SUBDIR;' + ENSMEM_INDX: '#mem#' + native: '{{ platform.SCHED_NATIVE_CMD }}' + nnodes: 1 + nodes: '{{ nnodes }}:ppn={{ ppn }}' + partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}' + ppn: 1 + queue: '&QUEUE_DEFAULT;' + walltime: 00:30:00 + +task_smoke_dust: + <<: *default_smoke_dust + command: '&LOAD_MODULES_RUN_TASK; "smoke_dust" "&HOMEdir;/jobs/JSRW_SMOKE_DUST"' + join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;' + memory: 120G + dependency: + and: + or_grid: + datadep_grid: + attrs: + age: 00:00:00:05 + text: '{{ task_make_grid.GRID_DIR }}/make_grid_task_complete.txt' + streq: + left: staged_grid + right: '{% if not rocoto.get("tasks", {}).get("task_make_grid") %}staged_grid{% endif %}' + or_orog: + datadep_orog: + attrs: + age: 00:00:00:05 + text: '{{ task_make_orog.OROG_DIR }}/make_orog_task_complete.txt' + streq: + left: staged_orog + right: '{% if not rocoto.get("tasks", {}).get("task_make_orog") %}staged_orog{% endif %}' + or_sfc_climo: + datadep_sfc_climo: + attrs: + age: 00:00:00:05 + text: '{{ task_make_sfc_climo.SFC_CLIMO_DIR }}/make_sfc_climo_task_complete.txt' + streq: + left: staged_sfc_climo + right: '{% if not rocoto.get("tasks", {}).get("task_make_sfc_climo") %}staged_sfc_climo{% endif %}' + or: + datadep_coldstart: + attrs: + age: 00:00:00:05 + text: !cycstr '{{ workflow.EXPTDIR }}/task_skip_coldstart_@Y@m@d@H00.txt' + datadep_warmstart_tracer: + attrs: + age: 00:00:00:05 + text: !cycstr '&WARMSTART_CYCLE_DIR;/RESTART/@Y@m@d.@H@M@S.fv_tracer.res.tile1.nc' + datadep_comin_tracer: + attrs: + age: 00:00:00:05 + text: '&COMIN_DIR;/RESTART/@Y@m@d.@H@M@S.fv_tracer.res.tile1.nc' + +task_prepstart: + <<: *default_smoke_dust + command: '&LOAD_MODULES_RUN_TASK; "prepstart" "&HOMEdir;/jobs/JSRW_PREPSTART"' + join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;' + memory: 120G + dependency: + or_smoke_dust: + not: + taskvalid: + attrs: + task: smoke_dust + taskdep: + attrs: + task: smoke_dust + diff --git a/scripts/exsrw_prepstart.sh b/scripts/exsrw_prepstart.sh new file mode 100755 index 0000000000..d1e9d6a4f2 --- /dev/null +++ b/scripts/exsrw_prepstart.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash + +set -xue +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. ${PARMsrw}/source_util_funcs.sh +task_global_vars=( "COLDSTART" "DATE_FIRST_CYCL" "DO_SMOKE_DUST" \ + "INCR_CYCL_FREQ" "IO_LAYOUT_Y" "PRE_TASK_CMDS" ) +for var in ${task_global_vars[@]}; do + source_config_for_task ${var} ${GLOBAL_VAR_DEFNS_FP} +done +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# +#{ save_shell_opts; set -xue; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# +scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) +scrfunc_fn=$( basename "${scrfunc_fp}" ) +scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Print message indicating entry into script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Entering script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" + +This is the ex-script for the task that runs prepstart. +========================================================================" +# +#----------------------------------------------------------------------- +# +# update IC files +# +#----------------------------------------------------------------------- +if [ $(boolify "${COLDSTART}") = "TRUE" ] && [ "${PDY}${cyc}" = "${DATE_FIRST_CYCL:0:10}" ]; then + echo "This step is skipped for the first cycle of COLDSTART." +else + eval ${PRE_TASK_CMDS} + if [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then + # IC gfs data file: gfs_data.tile7.halo0.nc + gfs_ic_fn="${NET}.${cycle}${dot_ensmem}.gfs_data.tile7.halo0.nc" + gfs_ic_fp="${DATA_SHARE}/${gfs_ic_fn}" + gfs_ic_mod_fn="gfs_data.tile7.halo0.nc" + cp -p ${gfs_ic_fp} ${gfs_ic_mod_fn} + + # restart tracer file: fv_tracer.res.tile1.nc + bkpath_find="missing" + if [ "${bkpath_find}" = "missing" ]; then + restart_prefix="${PDY}.${cyc}0000." + CDATEprev=$($NDATE -${INCR_CYCL_FREQ} ${PDY}${cyc}) + PDYprev=${CDATEprev:0:8} + cycprev=${CDATEprev:8:2} + path_restart=${COMIN}/${RUN}.${PDYprev}/${cycprev}${SLASH_ENSMEM_SUBDIR}/RESTART + + n=${INCR_CYCL_FREQ} + while [[ $n -le 25 ]] ; do + if [ "${IO_LAYOUT_Y}" = "1" ]; then + checkfile=${path_restart}/${restart_prefix}fv_tracer.res.tile1.nc + else + checkfile=${path_restart}/${restart_prefix}fv_tracer.res.tile1.nc.0000 + fi + if [ -r "${checkfile}" ] && [ "${bkpath_find}" = "missing" ]; then + bkpath_find=${path_restart} + print_info_msg "Found ${checkfile}; Use it for smoke/dust cycle " + break + fi + n=$((n + ${INCR_CYCL_FREQ})) + CDATEprev=$($NDATE -$n ${PDY}${cyc}) + PDYprev=${CDATEprev:0:8} + cycprev=${CDATEprev:8:2} + path_restart=${COMIN}/${RUN}.${PDYprev}/${cycprev}${SLASH_ENSMEM_SUBDIR}/RESTART + print_info_msg "Trying this path: ${path_restart}" + done + fi + + # cycle smoke/dust + if [ "${bkpath_find}" = "missing" ]; then + print_info_msg "WARNING: cannot find smoke/dust files from previous cycle" + else + if [ "${IO_LAYOUT_Y}" = "1" ]; then + checkfile=${bkpath_find}/${restart_prefix}fv_tracer.res.tile1.nc + if [ -r "${checkfile}" ]; then + ncks -A -v smoke,dust,coarsepm ${checkfile} fv_tracer.res.tile1.nc + fi + else + for ii in ${list_iolayout} + do + iii=$(printf %4.4i $ii) + checkfile=${bkpath_find}/${restart_prefix}fv_tracer.res.tile1.nc.${iii} + if [ -r "${checkfile}" ]; then + ncks -A -v smoke,dust,coarsepm ${checkfile} fv_tracer.res.tile1.nc.${iii} + fi + done + fi + echo "${PDY}${cyc}: cycle smoke/dust from ${checkfile} " + fi + + ${USHsrw}/smoke_dust_add_smoke.py + export err=$? + if [ $err -ne 0 ]; then + message_txt="add_smoke.py failed with return code $err" + err_exit "${message_txt}" + print_err_msg_exit "${message_txt}" + fi + # copy output to COMOUT + cp -p ${gfs_ic_mod_fn} ${COMOUT}/${gfs_ic_fn} + fi +fi +# +#----------------------------------------------------------------------- +# +# Print message indicating successful completion of script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +PREPSTART has successfully been complete !!!! + +Exiting script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" +========================================================================" +# +#----------------------------------------------------------------------- +# +#{ restore_shell_opts; } > /dev/null 2>&1 diff --git a/scripts/exsrw_smoke_dust.sh b/scripts/exsrw_smoke_dust.sh new file mode 100755 index 0000000000..47888f2739 --- /dev/null +++ b/scripts/exsrw_smoke_dust.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash + +set -xue +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. ${PARMsrw}/source_util_funcs.sh +task_global_vars=( "EBB_DCYCLE" "FIXsmoke" "INCR_CYCL_FREQ" \ + "PERSISTENCE" "PRE_TASK_CMDS" "PREDEF_GRID_NAME" "RESTART_INTERVAL" \ + "SMOKE_DUST_FILE_PREFIX" ) +for var in ${task_global_vars[@]}; do + source_config_for_task ${var} ${GLOBAL_VAR_DEFNS_FP} +done +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# +#{ save_shell_opts; set -xue; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# +scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) +scrfunc_fn=$( basename "${scrfunc_fp}" ) +scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Print message indicating entry into script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Entering script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" + +This is the ex-script for the task that runs Smoke and Dust. +========================================================================" +# +# Set CDATE used in the fire emission generation python script +# +export CDATE="${PDY}${cyc}" +# +# Check if the fire file exists in the designated directory +# +smokeFile="${SMOKE_DUST_FILE_PREFIX}_${CDATE}00.nc" +if [ -e "${COMINsmoke}/${smokeFile}" ]; then + cp -p "${COMINsmoke}/${smokeFile}" ${COMOUT} +else + eval ${PRE_TASK_CMDS} + # + # Link restart directory of the previous cycle in COMIN/COMOUT + # + CDATEprev=$($NDATE -${INCR_CYCL_FREQ} ${PDY}${cyc}) + PDYprev=${CDATEprev:0:8} + cycprev=${CDATEprev:8:2} + path_restart=${COMIN}/${RUN}.${PDYprev}/${cycprev}${SLASH_ENSMEM_SUBDIR}/RESTART + ln -nsf ${path_restart} . + + # Check whether the RAVE files need to be split into hourly files + if [ "${EBB_DCYCLE}" -eq 1 ]; then + ddhh_to_use="${PDY}${cyc}" + else + ddhh_to_use="${PDYm1}${cyc}" + fi + for hour in {00..23}; do + fire_hr_cdate=$($NDATE +${hour} ${ddhh_to_use}) + fire_hr_pdy="${fire_hr_cdate:0:8}" + fire_hr_fn="Hourly_Emissions_3km_${fire_hr_cdate}00_${fire_hr_cdate}00.nc" + if [ -f "${COMINrave}/${fire_hr_fn}" ]; then + echo "Hourly emission file for $hour was found: ${fire_hr_fn}" + ln -nsf ${COMINrave}/${fire_hr_fn} . + else + # Check various version of RAVE raw data files (new and old) + rave_raw_fn1="RAVE-HrlyEmiss-3km_v2r0_blend_s${fire_hr_cdate}00000_e${fire_hr_pdy}23*" + rave_raw_fn2="Hourly_Emissions_3km_${fire_hr_cdate}00_${fire_hr_pdy}23*" + # Find files matching the specified patterns + files_found=$(find "${COMINrave}" -type f \( -name "${rave_raw_fn1##*/}" -o -name "${rave_raw_fn2##*/}" \)) + # Splitting 24-hour RAVE raw data into houly data + for file_to_use in $files_found; do + echo "Using file: $file_to_use" + echo "Splitting data for hour $hour..." + ncks -d time,$hour,$hour "${COMINrave}/${file_to_use}" "${DATA}/${fire_hr_fn}" + if [ -f "${DATA}/${fire_hr_fn}" ]; then + break + else + echo "WARNING: Hourly emission file for $hour was NOT created from ${file_to_use}." + fi + done + fi + done + # + #----------------------------------------------------------------------- + # + # Call python script to generate fire emission files. + # + #----------------------------------------------------------------------- + # + ${USHsrw}/smoke_dust_generate_fire_emissions.py \ + "${FIXsmoke}/${PREDEF_GRID_NAME}" \ + "${DATA}" \ + "${DATA_SHARE}" \ + "${PREDEF_GRID_NAME}" \ + "${EBB_DCYCLE}" \ + "${RESTART_INTERVAL}"\ + "${PERSISTENCE}" + export err=$? + if [ $err -ne 0 ]; then + message_txt="generate_fire_emissions.py failed with return code $err" + err_exit "${message_txt}" + print_err_msg_exit "${message_txt}" + fi + + # Copy Smoke file to COMOUT + cp -p ${DATA}/${smokeFile} ${COMOUT} +fi +# +#----------------------------------------------------------------------- +# +# Print message indicating successful completion of script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Smoke and Dust has successfully generated output files !!!! + +Exiting script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" +========================================================================" +# +#----------------------------------------------------------------------- +# +#{ restore_shell_opts; } > /dev/null 2>&1 diff --git a/sorc/CMakeLists.txt b/sorc/CMakeLists.txt index e84319ad6e..2e69635d59 100644 --- a/sorc/CMakeLists.txt +++ b/sorc/CMakeLists.txt @@ -30,7 +30,7 @@ if (BUILD_UFS) if(CPL_AQM) set(CCPP_SUITES "FV3_GFS_v15p2,FV3_GFS_v16,FV3_GFS_v17_p8") else() - set(CCPP_SUITES "FV3_GFS_v15p2,FV3_GFS_v16,FV3_GFS_v17_p8,FV3_RRFS_v1beta,FV3_HRRR,FV3_RAP,FV3_GFS_v15_thompson_mynn_lam3km,FV3_WoFS_v0") + set(CCPP_SUITES "FV3_GFS_v15p2,FV3_GFS_v16,FV3_GFS_v17_p8,FV3_RRFS_v1beta,FV3_HRRR,FV3_HRRR_gf,FV3_RAP,FV3_GFS_v15_thompson_mynn_lam3km,FV3_WoFS_v0") endif() endif() diff --git a/tests/WE2E/test_configs/smoke_dust/config.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml b/tests/WE2E/test_configs/smoke_dust/config.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml new file mode 100644 index 0000000000..8bea3cb198 --- /dev/null +++ b/tests/WE2E/test_configs/smoke_dust/config.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml @@ -0,0 +1,66 @@ +metadata: + description: config for Smoke and Dust, RRFS_CONUS_3km +user: + RUN_ENVIR: community +platform: + BUILD_MOD_FN: 'build_{{ user.MACHINE|lower() }}_intel_prod' +workflow: + PREDEF_GRID_NAME: RRFS_CONUS_3km + CCPP_PHYS_SUITE: FV3_HRRR_gf + DATE_FIRST_CYCL: '2019072200' + DATE_LAST_CYCL: '2019072206' + INCR_CYCL_FREQ: 6 + FCST_LEN_HRS: 6 + PREEXISTING_DIR_METHOD: rename + VERBOSE: true + DEBUG: false + COMPILER: intel + DIAG_TABLE_TMPL_FN: diag_table_smoke_dust.FV3_HRRR_gf + FIELD_TABLE_TMPL_FN: field_table_smoke_dust.FV3_HRRR_gf + DO_REAL_TIME: false + COLDSTART: true +nco: + envir_default: we2e_smoke_dust + NET_default: we2e_smoke_dust + RUN_default: we2e_smoke_dust +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/smoke_dust.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml"]|include }}' + metatask_run_ensemble: + task_run_fcst_mem#mem#: + walltime: 02:00:00 +task_get_extrn_ics: + EXTRN_MDL_NAME_ICS: RAP + EXTRN_MDL_ICS_OFFSET_HRS: 0 + USE_USER_STAGED_EXTRN_FILES: true +task_get_extrn_lbcs: + EXTRN_MDL_NAME_LBCS: RAP + LBC_SPEC_INTVL_HRS: 6 + EXTRN_MDL_LBCS_OFFSET_HRS: 0 + USE_USER_STAGED_EXTRN_FILES: true +task_make_ics: + VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" +task_make_lbcs: + VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" +task_run_fcst: + DT_ATMOS: 36 + LAYOUT_X: 15 + LAYOUT_Y: 20 + BLOCKSIZE: 32 + WRTCMP_write_tasks_per_group: 40 + RESTART_INTERVAL: 6 12 18 24 + QUILTING: true + PRINT_ESMF: false + DO_FCST_RESTART: false +task_run_post: + POST_OUTPUT_DOMAIN_NAME: conus3km + USE_CUSTOM_POST_CONFIG_FILE: false +global: + DO_ENSEMBLE: false + NUM_ENS_MEMBERS: 2 + HALO_BLEND: 20 +smoke_dust_parm: + DO_SMOKE_DUST: true + EBB_DCYCLE: 1 + SMOKE_DUST_FILE_PREFIX: "SMOKE_RRFS_data" + diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml new file mode 100644 index 0000000000..8bea3cb198 --- /dev/null +++ b/ush/config.smoke_dust.yaml @@ -0,0 +1,66 @@ +metadata: + description: config for Smoke and Dust, RRFS_CONUS_3km +user: + RUN_ENVIR: community +platform: + BUILD_MOD_FN: 'build_{{ user.MACHINE|lower() }}_intel_prod' +workflow: + PREDEF_GRID_NAME: RRFS_CONUS_3km + CCPP_PHYS_SUITE: FV3_HRRR_gf + DATE_FIRST_CYCL: '2019072200' + DATE_LAST_CYCL: '2019072206' + INCR_CYCL_FREQ: 6 + FCST_LEN_HRS: 6 + PREEXISTING_DIR_METHOD: rename + VERBOSE: true + DEBUG: false + COMPILER: intel + DIAG_TABLE_TMPL_FN: diag_table_smoke_dust.FV3_HRRR_gf + FIELD_TABLE_TMPL_FN: field_table_smoke_dust.FV3_HRRR_gf + DO_REAL_TIME: false + COLDSTART: true +nco: + envir_default: we2e_smoke_dust + NET_default: we2e_smoke_dust + RUN_default: we2e_smoke_dust +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/smoke_dust.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml"]|include }}' + metatask_run_ensemble: + task_run_fcst_mem#mem#: + walltime: 02:00:00 +task_get_extrn_ics: + EXTRN_MDL_NAME_ICS: RAP + EXTRN_MDL_ICS_OFFSET_HRS: 0 + USE_USER_STAGED_EXTRN_FILES: true +task_get_extrn_lbcs: + EXTRN_MDL_NAME_LBCS: RAP + LBC_SPEC_INTVL_HRS: 6 + EXTRN_MDL_LBCS_OFFSET_HRS: 0 + USE_USER_STAGED_EXTRN_FILES: true +task_make_ics: + VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" +task_make_lbcs: + VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" +task_run_fcst: + DT_ATMOS: 36 + LAYOUT_X: 15 + LAYOUT_Y: 20 + BLOCKSIZE: 32 + WRTCMP_write_tasks_per_group: 40 + RESTART_INTERVAL: 6 12 18 24 + QUILTING: true + PRINT_ESMF: false + DO_FCST_RESTART: false +task_run_post: + POST_OUTPUT_DOMAIN_NAME: conus3km + USE_CUSTOM_POST_CONFIG_FILE: false +global: + DO_ENSEMBLE: false + NUM_ENS_MEMBERS: 2 + HALO_BLEND: 20 +smoke_dust_parm: + DO_SMOKE_DUST: true + EBB_DCYCLE: 1 + SMOKE_DUST_FILE_PREFIX: "SMOKE_RRFS_data" + diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index b66cf28945..375b2e8799 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -367,6 +367,12 @@ platform: # FIXemis: # System directory where AQM emission data files are located. # + # FIXsmoke: + # System directory where Smoke and Dust data files are located. + # + # FIXupp: + # System directory where UPP fixed files are located + # # FIXcrtm: # System directory where CRTM fixed files are located # @@ -383,6 +389,8 @@ platform: FIXshp: "" FIXaqm: "" FIXemis: "" + FIXsmoke: "" + FIXupp: "" FIXcrtm: "" FIXcrtmupp: "" # @@ -1921,7 +1929,7 @@ task_run_fcst: # #----------------------------------------------------------------------- # - USE_MERRA_CLIMO: '{{ workflow.CCPP_PHYS_SUITE == "FV3_GFS_v15_thompson_mynn_lam3km" or workflow.CCPP_PHYS_SUITE == "FV3_GFS_v17_p8" }}' + USE_MERRA_CLIMO: '{{ workflow.CCPP_PHYS_SUITE == "FV3_GFS_v15_thompson_mynn_lam3km" or workflow.CCPP_PHYS_SUITE == "FV3_HRRR_gf" or workflow.CCPP_PHYS_SUITE == "FV3_GFS_v17_p8" }}' # #----------------------------------------------------------------------- # @@ -2835,6 +2843,40 @@ cpl_aqm_parm: NEXUS_GFS_SFC_DIR: "" NEXUS_GFS_SFC_ARCHV_DIR: "/NCEPPROD/hpssprod/runhistory" +#------------------------------ +# Smoke/Dust config parameters +#------------------------------ +smoke_dust_parm: + # + #----------------------------------------------------------------------- + # + # DO_SMOKE_DUST: + # Flag turning on/off Smoke and Dust + # + # EBB_DCYCLE: + # 1: for retro, 2: for forecast + # + # PERSISTENCE: + # Flag turning on/off emissions persistence method, if off same day FRP is used + # + # COMINsmoke_default: + # Path to the directory containing smoke and dust files + # + # COMINrave_default: + # Path to the directory containing RAVE fire files + # + # SMOKE_DUST_FILE_PREFIX: + # Prefix of Smoke and Dust file name + # + #----------------------------------------------------------------------- + # + DO_SMOKE_DUST: false + EBB_DCYCLE: 1 + PERSISTENCE: true + COMINsmoke_default: "" + COMINrave_default: "" + SMOKE_DUST_FILE_PREFIX: "SMOKE_RRFS_data" + #---------------------------- # UFS FIRE config parameters #----------------------------- diff --git a/ush/machine/gaea-c6.yaml b/ush/machine/gaea-c6.yaml index 5efe4e5f5f..1f293adcb8 100644 --- a/ush/machine/gaea-c6.yaml +++ b/ush/machine/gaea-c6.yaml @@ -35,7 +35,11 @@ platform: FIXorg: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/NaturalEarth + FIXsmoke: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_smoke + FIXupp: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_upp + FIXcrtm: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_crtm EXTRN_MDL_DATA_STORES: aws + data: ics_lbcs: FV3GFS: @@ -46,6 +50,11 @@ data: HRRR: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/HRRR/${yyyymmdd}${hh} RAP: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh} GSMGFS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/GSMGFS/${yyyymmdd}${hh} + +smoke_dust_parm: + COMINsmoke_default: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/data_smoke_dust/RAVE_fire + rocoto: tasks: metatask_run_ensemble: diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index 5644814e1d..c034471905 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -40,6 +40,9 @@ platform: FIXshp: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/NaturalEarth FIXaqm: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_aqm FIXemis: /scratch1/RDARCH/rda-arl-gpu/Barry.Baker/emissions/nexus + FIXsmoke: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_smoke + FIXupp: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_upp + FIXcrtm: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: hpss aws nomads cpl_aqm_parm: @@ -47,6 +50,10 @@ cpl_aqm_parm: COMINgefs_default: /scratch2/NAGAPE/epic/SRW-AQM_DATA/aqm_data/GEFS_DATA NEXUS_GFS_SFC_DIR: /scratch2/NAGAPE/epic/SRW-AQM_DATA/aqm_data/GFS_SFC_DATA +smoke_dust_parm: + COMINsmoke_default: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_fire + rocoto: tasks: metatask_run_ensemble: diff --git a/ush/machine/hercules.yaml b/ush/machine/hercules.yaml index eddf307091..523fa31233 100644 --- a/ush/machine/hercules.yaml +++ b/ush/machine/hercules.yaml @@ -36,6 +36,9 @@ platform: FIXshp: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/NaturalEarth FIXaqm: /work/noaa/epic/SRW-AQM_DATA/fix_aqm FIXemis: /work/noaa/epic/SRW-AQM_DATA/fix_emis + FIXsmoke: /work/noaa/epic/SRW-AQM_DATA/fix_smoke + FIXupp: /work/noaa/epic/SRW-AQM_DATA/fix_upp + FIXcrtm: /work/noaa/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: aws data: ics_lbcs: @@ -53,6 +56,10 @@ cpl_aqm_parm: COMINgefs_default: /work/noaa/epic/SRW-AQM_DATA/aqm_data/GEFS_DATA NEXUS_GFS_SFC_DIR: /work/noaa/epic/SRW-AQM_DATA/aqm_data/GFS_SFC_DATA +smoke_dust_parm: + COMINsmoke_default: /work/noaa/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /work/noaa/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_fire + rocoto: tasks: metatask_run_ensemble: diff --git a/ush/machine/orion.yaml b/ush/machine/orion.yaml index 285eb34ee2..d69ab9e965 100644 --- a/ush/machine/orion.yaml +++ b/ush/machine/orion.yaml @@ -35,6 +35,9 @@ platform: FIXshp: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/NaturalEarth FIXaqm: /work/noaa/epic/SRW-AQM_DATA/fix_aqm FIXemis: /work/noaa/epic/SRW-AQM_DATA/fix_emis + FIXsmoke: /work/noaa/epic/SRW-AQM_DATA/fix_smoke + FIXupp: /work/noaa/epic/SRW-AQM_DATA/fix_upp + FIXcrtm: /work/noaa/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: aws nomads data: ics_lbcs: @@ -52,6 +55,10 @@ cpl_aqm_parm: COMINgefs_default: /work/noaa/epic/SRW-AQM_DATA/aqm_data/GEFS_DATA NEXUS_GFS_SFC_DIR: /work/noaa/epic/SRW-AQM_DATA/aqm_data/GFS_SFC_DATA +smoke_dust_parm: + COMINsmoke_default: /work/noaa/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /work/noaa/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_fire + rocoto: tasks: metatask_run_ensemble: diff --git a/ush/smoke_dust_add_smoke.py b/ush/smoke_dust_add_smoke.py new file mode 100755 index 0000000000..64229d7e36 --- /dev/null +++ b/ush/smoke_dust_add_smoke.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +from typing import Tuple +import xarray as xr +import numpy as np +import os + + +def populate_data(data: np.ndarray, target_shape: Tuple) -> np.ndarray: + """ + Extracted variables need to match the target shape so we first populating it into a zero array. + + Args: + data: The extracted data to be adjusted + target_shape: The shape of the target data array + + Returns: + The adjusted data array + """ + populated_data = np.zeros(target_shape) + populated_data[: data.shape[0], :, :] = data + return populated_data + + +def main() -> None: + # File paths + source_file = "fv_tracer.res.tile1.nc" + target_file = "gfs_data.tile7.halo0.nc" + + # Check if the source file exists + if not os.path.exists(source_file): + print(f"Source file '{source_file}' does not exist. Exiting...") + return + + # Open the source file and extract data + data_to_extract = xr.open_dataset(source_file) + print("DATA FILE:", data_to_extract) + + smoke_2_add = data_to_extract["smoke"][0, :, :, :] + dust_2_add = data_to_extract["dust"][0, :, :, :] + coarsepm_2_add = data_to_extract["coarsepm"][0, :, :, :] + + print("Max values in source file:", smoke_2_add.max()) + + # Open the target file and load it into memory + file_input = xr.open_dataset(target_file).load() + file_input.close() # to remove permission error below + print("TARGET FILE:", file_input) + # Drop the 'smoke' variable if it exists in both the source and target files + if "smoke" in file_input.variables and "smoke" in data_to_extract.variables: + file_input = file_input.drop("smoke") + + # Determine the shape of the new variables based on the target file dimensions + lev_dim = file_input.dims["lev"] + lat_dim = file_input.dims["lat"] + lon_dim = file_input.dims["lon"] + + # Populate the extracted data to match the target shape + # smoke_2_add_populated = populate_data(smoke_2_add, (lev_dim, lat_dim, lon_dim)) + # dust_2_add_populated = populate_data(dust_2_add, (lev_dim, lat_dim, lon_dim)) + # coarsepm_2_add_populated = populate_data(coarsepm_2_add, (lev_dim, lat_dim, lon_dim)) + + # print('Max values in populated data:', smoke_2_add_populated.max(), dust_2_add_populated.max(), coarsepm_2_add_populated.max()) + + # Create new data arrays filled with zeros + smoke_zero = xr.DataArray( + np.zeros((lev_dim, lat_dim, lon_dim)), + dims=["lev", "lat", "lon"], + attrs={"units": "ug/kg"}, + ) + dust_zero = xr.DataArray( + np.zeros((lev_dim, lat_dim, lon_dim)), + dims=["lev", "lat", "lon"], + attrs={"units": "ug/kg"}, + ) + coarsepm_zero = xr.DataArray( + np.zeros((lev_dim, lat_dim, lon_dim)), + dims=["lev", "lat", "lon"], + attrs={"units": "ug/kg"}, + ) + + # Assign the data arrays to the dataset, initially with zeros + file_input["smoke"] = smoke_zero + file_input["dust"] = dust_zero + file_input["coarsepm"] = coarsepm_zero + + # Populate the variables with the adjusted data + file_input["smoke"][1:66, :, :] = smoke_2_add + file_input["dust"][1:66, :, :] = dust_2_add + file_input["coarsepm"][1:66, :, :] = coarsepm_2_add + + print("FINAL FILE:", file_input) + # Save the modified dataset back to the file + file_input.to_netcdf(target_file, mode="w") + + # Reopen the target file to check the variables + with xr.open_dataset(target_file) as file_input: + print("Max values in target file after update:") + print("smoke:", file_input["smoke"].max().item()) + print("dust:", file_input["dust"].max().item()) + print("coarsepm:", file_input["coarsepm"].max().item()) + + +if __name__ == "__main__": + main() diff --git a/ush/smoke_dust_fire_emiss_tools.py b/ush/smoke_dust_fire_emiss_tools.py new file mode 100755 index 0000000000..c7637dfab9 --- /dev/null +++ b/ush/smoke_dust_fire_emiss_tools.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python3 + +import os +from typing import Tuple, Any + +import numpy as np +import xarray as xr +from datetime import datetime +from netCDF4 import Dataset +from pandas import Index +from xarray import DataArray + +import smoke_dust_interp_tools as i_tools + + +def averaging_FRP( + ebb_dcycle: int, + fcst_dates: Index, + cols: int, + rows: int, + intp_dir: str, + rave_to_intp: str, + veg_map: str, + tgt_area: DataArray, + beta: float, + fg_to_ug: float, + to_s: int, +) -> Tuple[np.ndarray, np.ndarray]: + """ + Compute average FRP from raw RAVE for the previous 24 hours. + + Args: + ebb_dcycle: Valid values are ``1`` or ``2`` + fcst_dates: Forecast hours to use for averaging + cols: Number of columns + rows: Number of rows + intp_dir: Directory containing the interpolated data + rave_to_intp: Prefix of the target RAVE files + veg_map: Path to the vegetation mapping file + tgt_area: Data array containing the target cell areas + beta: Scale factor applied to emissions + fg_to_ug: Unit conversion factor + to_s: Unit conversion factor + + Returns: + A typle containing: + * ``0``: Average FRP + * ``1``: Total EBB + """ + base_array = np.zeros((cols * rows)) + frp_daily = base_array + ebb_smoke_total = [] + frp_avg_hr = [] + + try: + ef_map = xr.open_dataset(veg_map) + emiss_factor = ef_map.emiss_factor.values + target_area = tgt_area.values + except ( + FileNotFoundError, + IOError, + OSError, + RuntimeError, + ValueError, + TypeError, + KeyError, + IndexError, + MemoryError, + ) as e: + print(f"Error loading vegetation map: {e}") + return np.zeros((cols, rows)), np.zeros((cols, rows)) + + num_files = 0 + for cycle in fcst_dates: + try: + file_path = os.path.join(intp_dir, f"{rave_to_intp}{cycle}00_{cycle}59.nc") + if os.path.exists(file_path): + try: + with xr.open_dataset(file_path) as nc: + open_fre = nc.FRE[0, :, :].values + open_frp = nc.frp_avg_hr[0, :, :].values + num_files += 1 + if ebb_dcycle == 1: + print("Processing emissions for ebb_dcyc 1") + print(file_path) + frp_avg_hr.append(open_frp) + ebb_hourly = (open_fre * emiss_factor * beta * fg_to_ug) / ( + target_area * to_s + ) + ebb_smoke_total.append( + np.where(open_frp > 0, ebb_hourly, 0) + ) + else: + print("Processing emissions for ebb_dcyc 2") + ebb_hourly = ( + open_fre * emiss_factor * beta * fg_to_ug / target_area + ) + ebb_smoke_total.append( + np.where(open_frp > 0, ebb_hourly, 0).ravel() + ) + frp_daily += np.where(open_frp > 0, open_frp, 0).ravel() + except ( + FileNotFoundError, + IOError, + OSError, + RuntimeError, + ValueError, + TypeError, + KeyError, + IndexError, + MemoryError, + ) as e: + print(f"Error processing NetCDF file {file_path}: {e}") + if ebb_dcycle == 1: + frp_avg_hr.append(np.zeros((cols, rows))) + ebb_smoke_total.append(np.zeros((cols, rows))) + else: + if ebb_dcycle == 1: + frp_avg_hr.append(np.zeros((cols, rows))) + ebb_smoke_total.append(np.zeros((cols, rows))) + except Exception as e: + print(f"Error processing cycle {cycle}: {e}") + if ebb_dcycle == 1: + frp_avg_hr.append(np.zeros((cols, rows))) + ebb_smoke_total.append(np.zeros((cols, rows))) + + if num_files > 0: + if ebb_dcycle == 1: + frp_avg_reshaped = np.stack(frp_avg_hr, axis=0) + ebb_total_reshaped = np.stack(ebb_smoke_total, axis=0) + else: + summed_array = np.sum(np.array(ebb_smoke_total), axis=0) + num_zeros = len(ebb_smoke_total) - np.sum( + [arr == 0 for arr in ebb_smoke_total], axis=0 + ) + safe_zero_count = np.where(num_zeros == 0, 1, num_zeros) + result_array = np.array( + [ + ( + summed_array[i] / 2 + if safe_zero_count[i] == 1 + else summed_array[i] / safe_zero_count[i] + ) + for i in range(len(safe_zero_count)) + ] + ) + result_array[num_zeros == 0] = summed_array[num_zeros == 0] + ebb_total = result_array.reshape(cols, rows) + ebb_total_reshaped = ebb_total / 3600 + temp_frp = np.array( + [ + ( + frp_daily[i] / 2 + if safe_zero_count[i] == 1 + else frp_daily[i] / safe_zero_count[i] + ) + for i in range(len(safe_zero_count)) + ] + ) + temp_frp[num_zeros == 0] = frp_daily[num_zeros == 0] + frp_avg_reshaped = temp_frp.reshape(cols, rows) + else: + if ebb_dcycle == 1: + frp_avg_reshaped = np.zeros((24, cols, rows)) + ebb_total_reshaped = np.zeros((24, cols, rows)) + else: + frp_avg_reshaped = np.zeros((cols, rows)) + ebb_total_reshaped = np.zeros((cols, rows)) + + return (frp_avg_reshaped, ebb_total_reshaped) + + +def estimate_fire_duration( + intp_dir: str, + fcst_dates: Index, + current_day: str, + cols: int, + rows: int, + rave_to_intp: str, +) -> np.ndarray: + """ + Estimate fire duration potentially using data from previous cycles. + + There are two steps here. + 1) First day simulation no RAVE from previous 24 hours available (fire age is set to zero). + 2) Previous files are present (estimate fire age as the difference between the date of the current cycle and the date whe the fire was last observed within 24 hours). + + Args: + intp_dir: Path to interpolated RAVE data + fcst_dates: Forecast hours used in the current cycle + current_day: The current day hour + cols: Number of columns + rows: Number of rows + rave_to_intp: Prefix of the target RAVE files + """ + t_fire = np.zeros((cols, rows)) + + for date_str in fcst_dates: + try: + assert isinstance(date_str, str) + date_file = int(date_str[:10]) + print("Date processing for fire duration", date_file) + file_path = os.path.join( + intp_dir, f"{rave_to_intp}{date_str}00_{date_str}59.nc" + ) + + if os.path.exists(file_path): + try: + with xr.open_dataset(file_path) as open_intp: + FRP = open_intp.frp_avg_hr[0, :, :].values + dates_filtered = np.where(FRP > 0, date_file, 0) + t_fire = np.maximum(t_fire, dates_filtered) + except ( + FileNotFoundError, + IOError, + OSError, + RuntimeError, + ValueError, + TypeError, + KeyError, + IndexError, + MemoryError, + ) as e: + print(f"Error processing NetCDF file {file_path}: {e}") + except Exception as e: + print(f"Error processing date {date_str}: {e}") + + t_fire_flattened = [int(i) if i != 0 else 0 for i in t_fire.flatten()] + + try: + fcst_t = datetime.strptime(current_day, "%Y%m%d%H") + hr_ends = [ + datetime.strptime(str(hr), "%Y%m%d%H") if hr != 0 else 0 + for hr in t_fire_flattened + ] + te = np.array( + [(fcst_t - i).total_seconds() / 3600 if i != 0 else 0 for i in hr_ends] + ) + except ValueError as e: + print(f"Error processing forecast time {current_day}: {e}") + te = np.zeros((rows, cols)) + + return te + + +def save_fire_dur(cols: int, rows: int, te: np.ndarray) -> np.ndarray: + """ + Reshape the fire duration array. + + Args: + cols: Number of columns + rows: Number of rows + te: Target array to reshape + + Returns: + The reshaped fire duration array + """ + fire_dur = np.array(te).reshape(cols, rows) + return fire_dur + + +def produce_emiss_24hr_file( + frp_reshaped: np.ndarray, + intp_dir: str, + current_day: str, + tgt_latt: DataArray, + tgt_lont: DataArray, + ebb_smoke_reshaped: np.ndarray, + cols: int, + rows: int, +) -> None: + """ + Create a 24-hour emissions file. + + Args: + frp_reshaped: FRP numpy array + intp_dir: Directory containing interpolated RAVE files + current_day: The current forecast cycle day/hour + tgt_latt: Target grid latitudes + tgt_lont: Target grid longitudes + ebb_smoke_reshaped: EBB smoke array reshaped + cols: Number of columns + rows: Number of rows + """ + file_path = os.path.join(intp_dir, f"SMOKE_RRFS_data_{current_day}00.nc") + with Dataset(file_path, "w") as fout: + i_tools.create_emiss_file(fout, cols, rows) + i_tools.Store_latlon_by_Level( + fout, "geolat", tgt_latt, "cell center latitude", "degrees_north", "-9999.f" + ) + i_tools.Store_latlon_by_Level( + fout, "geolon", tgt_lont, "cell center longitude", "degrees_east", "-9999.f" + ) + + i_tools.Store_by_Level( + fout, "frp_avg_hr", "mean Fire Radiative Power", "MW", "0.f" + ) + fout.variables["frp_avg_hr"][:, :, :] = frp_reshaped + i_tools.Store_by_Level( + fout, "ebb_smoke_hr", "EBB emissions", "ug m-2 s-1", "0.f" + ) + fout.variables["ebb_smoke_hr"][:, :, :] = ebb_smoke_reshaped + + +def produce_emiss_file( + xarr_hwp: DataArray, + frp_avg_reshaped: np.ndarray, + totprcp_ave_arr: Any, + xarr_totprcp: DataArray, + intp_dir: str, + current_day: str, + tgt_latt: DataArray, + tgt_lont: DataArray, + ebb_tot_reshaped: np.ndarray, + fire_age: np.ndarray, + cols: int, + rows: int, +) -> str: + """ + Produce the emissions file. + + Args: + xarr_hwp: Data array containing HWP + frp_avg_reshaped: Average FRP array + totprcp_ave_arr: Average total precipitation array + xarr_totprcp: Average total precipitation as a data array + intp_dir: Directory containing interpolated RAVE data + current_day: The current forecast day/hour + tgt_latt: The target grid latitude + tgt_lont: The target grid longitudes + ebb_tot_reshaped: Total EBB array + fire_age: Estimated fire age array + cols: Number of columns + rows: Number of rows + + Returns: + A string indicating the file was written as expected + """ + # Ensure arrays are not negative or NaN + frp_avg_reshaped = np.clip(frp_avg_reshaped, 0, None) + frp_avg_reshaped = np.nan_to_num(frp_avg_reshaped) + + ebb_tot_reshaped = np.clip(ebb_tot_reshaped, 0, None) + ebb_tot_reshaped = np.nan_to_num(ebb_tot_reshaped) + + fire_age = np.clip(fire_age, 0, None) + fire_age = np.nan_to_num(fire_age) + + # Filter HWP Prcp arrays to be non-negative and replace NaNs + filtered_hwp = xarr_hwp.where(frp_avg_reshaped > 0, 0).fillna(0) + filtered_prcp = xarr_totprcp.where(frp_avg_reshaped > 0, 0).fillna(0) + + # Filter based on ebb_rate + ebb_rate_threshold = 0 # Define an appropriate threshold if needed + mask = ebb_tot_reshaped > ebb_rate_threshold + + filtered_hwp = filtered_hwp.where(mask, 0).fillna(0) + filtered_prcp = filtered_prcp.where(mask, 0).fillna(0) + frp_avg_reshaped = frp_avg_reshaped * mask + ebb_tot_reshaped = ebb_tot_reshaped * mask + fire_age = fire_age * mask + + # Produce emiss file + file_path = os.path.join(intp_dir, f"SMOKE_RRFS_data_{current_day}00.nc") + + try: + with Dataset(file_path, "w") as fout: + i_tools.create_emiss_file(fout, cols, rows) + i_tools.Store_latlon_by_Level( + fout, + "geolat", + tgt_latt, + "cell center latitude", + "degrees_north", + "-9999.f", + ) + i_tools.Store_latlon_by_Level( + fout, + "geolon", + tgt_lont, + "cell center longitude", + "degrees_east", + "-9999.f", + ) + + print("Storing different variables") + i_tools.Store_by_Level( + fout, "frp_davg", "Daily mean Fire Radiative Power", "MW", "0.f" + ) + fout.variables["frp_davg"][0, :, :] = frp_avg_reshaped + i_tools.Store_by_Level( + fout, "ebb_rate", "Total EBB emission", "ug m-2 s-1", "0.f" + ) + fout.variables["ebb_rate"][0, :, :] = ebb_tot_reshaped + i_tools.Store_by_Level( + fout, "fire_end_hr", "Hours since fire was last detected", "hrs", "0.f" + ) + fout.variables["fire_end_hr"][0, :, :] = fire_age + i_tools.Store_by_Level( + fout, "hwp_davg", "Daily mean Hourly Wildfire Potential", "none", "0.f" + ) + fout.variables["hwp_davg"][0, :, :] = filtered_hwp + i_tools.Store_by_Level( + fout, "totprcp_24hrs", "Sum of precipitation", "m", "0.f" + ) + fout.variables["totprcp_24hrs"][0, :, :] = filtered_prcp + + print("Emissions file created successfully") + return "Emissions file created successfully" + + except (OSError, IOError) as e: + print(f"Error creating or writing to NetCDF file {file_path}: {e}") + return f"Error creating or writing to NetCDF file {file_path}: {e}" + + return "Emissions file created successfully" diff --git a/ush/smoke_dust_generate_fire_emissions.py b/ush/smoke_dust_generate_fire_emissions.py new file mode 100755 index 0000000000..74a979c9a9 --- /dev/null +++ b/ush/smoke_dust_generate_fire_emissions.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 + +######################################################################### +# # +# Python script for fire emissions preprocessing from RAVE FRP and FRE # +# (Li et al.,2022). # +# johana.romero-alvarez@noaa.gov # +# # +######################################################################### + +import sys +import os +import smoke_dust_fire_emiss_tools as femmi_tools +import smoke_dust_hwp_tools as hwp_tools +import smoke_dust_interp_tools as i_tools + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Workflow +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def generate_emiss_workflow( + staticdir: str, + ravedir: str, + intp_dir: str, + predef_grid: str, + ebb_dcycle_flag: str, + restart_interval: str, + persistence: str, +) -> None: + """ + Prepares fire-related ICs. This is the main function that handles data movement and interpolation. + + Args: + staticdir: Path to fix files for the smoke and dust component + ravedir: Path to the directory containing RAVE fire data files (hourly). This is typically the working directory (DATA) + intp_dir: Path to interpolated RAVE data files from the previous cycles (DATA_SHARE) + predef_grid: If ``RRFS_NA_3km``, use pre-defined grid dimensions + ebb_dcycle_flag: Select the EBB cycle to run. Valid values are ``"1"`` or ``"2"`` + restart_interval: Indicates if restart files should be copied. The actual interval values are not used + persistence: If ``TRUE``, use satellite observations from the previous day. Otherwise, use observations from the same day. + """ + + # ---------------------------------------------------------------------- + # Import envs from workflow and get the pre-defined grid + # Set variable names, constants and unit conversions + # Set predefined grid + # Set directories + # ---------------------------------------------------------------------- + + beta = 0.3 + fg_to_ug = 1e6 + to_s = 3600 + current_day = os.environ["CDATE"] + # nwges_dir = os.environ.get("NWGES_DIR") + nwges_dir = os.environ["DATA"] + vars_emis = ["FRP_MEAN", "FRE"] + cols, rows = (2700, 3950) if predef_grid == "RRFS_NA_3km" else (1092, 1820) + print("PREDEF GRID", predef_grid, "cols,rows", cols, rows) + # used later when working with ebb_dcyle 1 or 2 + ebb_dcycle = int(ebb_dcycle_flag) + print( + "WARNING, EBB_DCYCLE set to", + ebb_dcycle, + "and persistence=", + persistence, + "if persistence is false, emissions comes from same day satellite obs", + ) + + print("CDATE:", current_day) + print("DATA:", nwges_dir) + + # This is used later when copying the rrfs restart file + restart_interval_list = [float(num) for num in restart_interval.split()] + len_restart_interval = len(restart_interval_list) + + # Setting the directories + veg_map = staticdir + "/veg_map.nc" + RAVE = ravedir + rave_to_intp = predef_grid + "_intp_" + grid_in = staticdir + "/grid_in.nc" + weightfile = staticdir + "/weight_file.nc" + grid_out = staticdir + "/ds_out_base.nc" + hourly_hwpdir = os.path.join(nwges_dir, "RESTART") + + # ---------------------------------------------------------------------- + # Workflow + # ---------------------------------------------------------------------- + + # ---------------------------------------------------------------------- + # Sort raw RAVE, create source and target filelds, and compute emissions + # ---------------------------------------------------------------------- + fcst_dates = i_tools.date_range(current_day, ebb_dcycle, persistence) + intp_avail_hours, intp_non_avail_hours, inp_files_2use = ( + i_tools.check_for_intp_rave(intp_dir, fcst_dates, rave_to_intp) + ) + rave_avail, rave_avail_hours, rave_nonavail_hours_test, first_day = ( + i_tools.check_for_raw_rave(RAVE, intp_non_avail_hours, intp_avail_hours) + ) + srcfield, tgtfield, tgt_latt, tgt_lont, srcgrid, tgtgrid, src_latt, tgt_area = ( + i_tools.creates_st_fields(grid_in, grid_out) + ) + + if not first_day: + regridder, use_dummy_emiss = i_tools.generate_regridder( + rave_avail_hours, srcfield, tgtfield, weightfile, intp_avail_hours + ) + if use_dummy_emiss: + print("RAVE files corrupted, no data to process") + i_tools.create_dummy(intp_dir, current_day, tgt_latt, tgt_lont, cols, rows) + else: + i_tools.interpolate_rave( + RAVE, + rave_avail, + rave_avail_hours, + use_dummy_emiss, + vars_emis, + regridder, + srcgrid, + tgtgrid, + rave_to_intp, + intp_dir, + tgt_latt, + tgt_lont, + cols, + rows, + ) + + if ebb_dcycle == 1: + print("Processing emissions forebb_dcyc 1") + frp_avg_reshaped, ebb_total_reshaped = femmi_tools.averaging_FRP( + ebb_dcycle, + fcst_dates, + cols, + rows, + intp_dir, + rave_to_intp, + veg_map, + tgt_area, + beta, + fg_to_ug, + to_s, + ) + femmi_tools.produce_emiss_24hr_file( + frp_avg_reshaped, + nwges_dir, + current_day, + tgt_latt, + tgt_lont, + ebb_total_reshaped, + cols, + rows, + ) + elif ebb_dcycle == 2: + print("Restart dates to process", fcst_dates) + hwp_avail_hours, hwp_non_avail_hours = hwp_tools.check_restart_files( + hourly_hwpdir, fcst_dates + ) + restart_avail, restart_nonavail_hours_test = ( + hwp_tools.copy_missing_restart( + nwges_dir, + hwp_non_avail_hours, + hourly_hwpdir, + len_restart_interval, + ) + ) + hwp_ave_arr, xarr_hwp, totprcp_ave_arr, xarr_totprcp = ( + hwp_tools.process_hwp( + fcst_dates, hourly_hwpdir, cols, rows, intp_dir, rave_to_intp + ) + ) + frp_avg_reshaped, ebb_total_reshaped = femmi_tools.averaging_FRP( + ebb_dcycle, + fcst_dates, + cols, + rows, + intp_dir, + rave_to_intp, + veg_map, + tgt_area, + beta, + fg_to_ug, + to_s, + ) + # Fire end hours processing + te = femmi_tools.estimate_fire_duration( + intp_dir, fcst_dates, current_day, cols, rows, rave_to_intp + ) + fire_age = femmi_tools.save_fire_dur(cols, rows, te) + # produce emiss file + femmi_tools.produce_emiss_file( + xarr_hwp, + frp_avg_reshaped, + totprcp_ave_arr, + xarr_totprcp, + nwges_dir, + current_day, + tgt_latt, + tgt_lont, + ebb_total_reshaped, + fire_age, + cols, + rows, + ) + else: + raise NotImplementedError(f"ebb_dcycle={ebb_dcycle}") + else: + print("First day true, no RAVE files available. Use dummy emissions file") + i_tools.create_dummy(intp_dir, current_day, tgt_latt, tgt_lont, cols, rows) + + +if __name__ == "__main__": + print("") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print("Welcome to interpolating RAVE and processing fire emissions!") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print("") + generate_emiss_workflow( + sys.argv[1], + sys.argv[2], + sys.argv[3], + sys.argv[4], + sys.argv[5], + sys.argv[6], + sys.argv[7], + ) + print("") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print("Successful Completion. Bye!") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print("") diff --git a/ush/smoke_dust_hwp_tools.py b/ush/smoke_dust_hwp_tools.py new file mode 100755 index 0000000000..9c2dbf9ff1 --- /dev/null +++ b/ush/smoke_dust_hwp_tools.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 +from typing import List, Tuple + +import numpy as np +import os +import datetime as dt +from datetime import timedelta +import xarray as xr +import fnmatch + +from pandas import Index +from xarray import DataArray + + +def check_restart_files( + hourly_hwpdir: str, fcst_dates: Index +) -> Tuple[List[str], List[str]]: + """ + Args: + hourly_hwpdir: The input HWP data directory + fcst_dates: A list of forecast dates + + Returns: + A tuple containing: + * ``0``: A list of available HWP hours + * ``1``: A list of unavailable HWP hours + """ + hwp_avail_hours = [] + hwp_non_avail_hours = [] + + for cycle in fcst_dates: + assert isinstance(cycle, str) + restart_file = f"{cycle[:8]}.{cycle[8:10]}0000.phy_data.nc" + file_path = os.path.join(hourly_hwpdir, restart_file) + + if os.path.exists(file_path): + print(f"Restart file available for: {restart_file}") + hwp_avail_hours.append(cycle) + else: + print(f"Copy restart file for: {restart_file}") + hwp_non_avail_hours.append(cycle) + + print( + f"Available restart at: {hwp_avail_hours}, Non-available restart files at: {hwp_non_avail_hours}" + ) + return hwp_avail_hours, hwp_non_avail_hours + + +def copy_missing_restart( + nwges_dir: str, + hwp_non_avail_hours: List[str], + hourly_hwpdir: str, + len_restart_interval: int, +) -> Tuple[List[str], List[str]]: + """ + Args: + nwges_dir: Root directory for restart files + hwp_non_avail_hours: List of HWP hours that are not available + hourly_hwpdir: List of available HWP hours + len_restart_interval: The length of the restart interval + + Returns: + A tuple containing: + * ``0``: List of available restart files + * ``1``: List of unavailable restart files + """ + restart_avail_hours = [] + restart_nonavail_hours_test = [] + + for cycle in hwp_non_avail_hours: + try: + YYYYMMDDHH = dt.datetime.strptime(cycle, "%Y%m%d%H") + HH = cycle[8:10] + prev_hr = YYYYMMDDHH - timedelta(hours=1) + prev_hr_str = prev_hr.strftime("%Y%m%d%H") + + source_restart_dir = os.path.join( + nwges_dir, prev_hr_str, "fcst_fv3lam", "RESTART" + ) + wildcard_name = "*.phy_data.nc" + + if len_restart_interval > 1: + print("ENTERING LOOP for len_restart_interval > 1") + if os.path.exists(source_restart_dir): + matching_files_found = False + print("PATH EXISTS") + for file in sorted(os.listdir(source_restart_dir)): + if fnmatch.fnmatch(file, wildcard_name): + matching_files_found = True + print("MATCHING FILES FOUND") + source_file_path = os.path.join(source_restart_dir, file) + target_file_path = os.path.join(hourly_hwpdir, file) + var1, var2 = "rrfs_hwp_ave", "totprcp_ave" + if os.path.exists(source_file_path): + with xr.open_dataset(source_file_path) as ds: + try: + if ( + var1 in ds.variables + and var2 in ds.variables + ): + ds = ds[[var1, var2]] + ds.to_netcdf(target_file_path) + restart_avail_hours.append(cycle) + print(f"Restart file copied: {file}") + else: + print( + f"Missing variables {var1} or {var2} in {file}. Skipping file." + ) + except AttributeError as e: + print( + f"AttributeError processing NetCDF file {source_file_path}: {e}" + ) + else: + print(f"Source file not found: {source_file_path}") + if not matching_files_found: + print("No matching files found") + restart_nonavail_hours_test.append(cycle) + else: + print(f"Source directory not found: {source_restart_dir}") + restart_nonavail_hours_test.append(cycle) + else: + if os.path.exists(source_restart_dir): + try: + matching_files = [ + f + for f in os.listdir(source_restart_dir) + if fnmatch.fnmatch(f, wildcard_name) + ] + if not matching_files: + print( + f"No matching files for cycle {cycle} in {source_restart_dir}" + ) + restart_nonavail_hours_test.append(cycle) + continue + + for matching_file in matching_files: + source_file_path = os.path.join( + source_restart_dir, matching_file + ) + target_file_path = os.path.join( + hourly_hwpdir, matching_file + ) + var1, var2 = "rrfs_hwp_ave", "totprcp_ave" + + if os.path.exists(source_file_path): + try: + with xr.open_dataset(source_file_path) as ds: + if ( + var1 in ds.variables + and var2 in ds.variables + ): + ds = ds[[var1, var2]] + ds.to_netcdf(target_file_path) + restart_avail_hours.append(cycle) + print( + f"Restart file copied: {matching_file}" + ) + else: + print( + f"Missing variables {var1} or {var2} in {matching_file}. Skipping file." + ) + except ( + FileNotFoundError, + IOError, + OSError, + RuntimeError, + ValueError, + TypeError, + KeyError, + IndexError, + MemoryError, + ) as e: + print( + f"Error processing NetCDF file {source_file_path}: {e}" + ) + restart_nonavail_hours_test.append(cycle) + else: + print(f"Source file not found: {source_file_path}") + restart_nonavail_hours_test.append(cycle) + except (FileNotFoundError, IOError, OSError, RuntimeError) as e: + print(f"Error accessing directory {source_restart_dir}: {e}") + restart_nonavail_hours_test.append(cycle) + else: + print(f"Source directory not found: {source_restart_dir}") + restart_nonavail_hours_test.append(cycle) + + except (ValueError, TypeError) as e: + print(f"Error processing cycle {cycle}: {e}") + restart_nonavail_hours_test.append(cycle) + + return restart_avail_hours, restart_nonavail_hours_test + + +def process_hwp( + fcst_dates: Index, + hourly_hwpdir: str, + cols: int, + rows: int, + intp_dir: str, + rave_to_intp: str, +) -> Tuple[np.ndarray, DataArray, np.ndarray, DataArray]: + """ + Process HWP files. + + Args: + fcst_dates: List of forecast dates + hourly_hwpdir: Path to HWP data directory + cols: Number of output columns + rows: Number of output rows + intp_dir: Path to interpolate RAVE file directory + rave_to_intp: File prefix indicating which RAVE files to interpolate + + Returns: + A tuple containing: + * ``0``: A numpy array of average HWP + * ``1``: An xarray data array version of the average HWP + * ``2``: A numpy array of average total precipitation + * ``3``: An xarray data array version of average total precipitation + """ + hwp_ave = [] + totprcp = np.zeros((cols * rows)) + var1, var2 = "rrfs_hwp_ave", "totprcp_ave" + + for cycle in fcst_dates: + assert isinstance(cycle, str) + try: + print(f"Processing restart file for date: {cycle}") + file_path = os.path.join( + hourly_hwpdir, f"{cycle[:8]}.{cycle[8:10]}0000.phy_data.nc" + ) + rave_path = os.path.join(intp_dir, f"{rave_to_intp}{cycle}00_{cycle}59.nc") + + if os.path.exists(file_path) and os.path.exists(rave_path): + try: + with xr.open_dataset(file_path) as nc: + if var1 in nc.variables and var2 in nc.variables: + hwp_values = nc.rrfs_hwp_ave.values.ravel() + tprcp_values = nc.totprcp_ave.values.ravel() + totprcp += np.where(tprcp_values > 0, tprcp_values, 0) + hwp_ave.append(hwp_values) + print(f"Restart file processed for: {cycle}") + else: + print( + f"Missing variables {var1} or {var2} in file: {file_path}" + ) + except ( + FileNotFoundError, + IOError, + OSError, + RuntimeError, + ValueError, + TypeError, + KeyError, + IndexError, + MemoryError, + ) as e: + print(f"Error processing NetCDF file {file_path}: {e}") + else: + print( + f"One or more files non-available for this cycle: {file_path}, {rave_path}" + ) + except (ValueError, TypeError) as e: + print(f"Error processing cycle {cycle}: {e}") + + # Calculate the mean HWP values if available + if hwp_ave: + hwp_ave_arr = np.nanmean(hwp_ave, axis=0).reshape(cols, rows) + totprcp_ave_arr = totprcp.reshape(cols, rows) + else: + hwp_ave_arr = np.zeros((cols, rows)) + totprcp_ave_arr = np.zeros((cols, rows)) + + xarr_hwp = xr.DataArray(hwp_ave_arr) + xarr_totprcp = xr.DataArray(totprcp_ave_arr) + + return hwp_ave_arr, xarr_hwp, totprcp_ave_arr, xarr_totprcp diff --git a/ush/smoke_dust_interp_tools.py b/ush/smoke_dust_interp_tools.py new file mode 100755 index 0000000000..6b1a6bfead --- /dev/null +++ b/ush/smoke_dust_interp_tools.py @@ -0,0 +1,566 @@ +#!/usr/bin/env python3 + +import datetime as dt +from typing import Tuple, List, Any + +import pandas as pd +import os +import fnmatch +import xarray as xr +import numpy as np +from netCDF4 import Dataset +from numpy import ndarray +from pandas import Index +from xarray import DataArray + +try: + import esmpy as ESMF +except ImportError: + # esmpy version 8.3.1 is required on Orion/Hercules + import ESMF + + +def date_range(current_day: str, ebb_dcycle: int, persistence: str) -> Index: + """ + Create date range, this is later used to search for RAVE and HWP from previous 24 hours. + + Args: + current_day: The current forecast day and hour + ebb_dcycle: Valid options are ``"1"`` and ``"2"`` + persistence: If ``True``, use satellite observations from previous day + + Returns: + A string ``Index`` with values matching the forecast day and hour + """ + print(f"Searching for interpolated RAVE for {current_day}") + print(f"EBB CYCLE: {ebb_dcycle}") + print(f"Persistence setting received: {persistence}") + + fcst_datetime = dt.datetime.strptime(current_day, "%Y%m%d%H") + # persistence (bool): Determines if forecast should persist from previous day. + + if ebb_dcycle == 1: + print("Find RAVE for ebb_dcyc 1") + if persistence == True: + # Start date range from one day prior if persistence is True + print( + "Creating emissions for persistence method where satellite FRP persist from previous day" + ) + start_datetime = fcst_datetime - dt.timedelta(days=1) + else: + # Start date range from the current date + print("Creating emissions using current date satellite FRP") + start_datetime = fcst_datetime + # Generate dates for 24 hours from start_datetime + fcst_dates = pd.date_range(start=start_datetime, periods=24, freq="H").strftime( + "%Y%m%d%H" + ) + else: + print("Creating emissions for modulated persistence by Wildfire potential") + start_datetime = fcst_datetime - dt.timedelta(days=1, hours=1) + + fcst_dates = pd.date_range(start=start_datetime, periods=24, freq="H").strftime( + "%Y%m%d%H" + ) + + print(f"Current cycle: {fcst_datetime}") + return fcst_dates + + +def check_for_intp_rave( + intp_dir: str, fcst_dates: Index, rave_to_intp: str +) -> Tuple[List[str], List[str], bool]: + """ + Check if interpolated RAVE is available for the previous 24 hours + + Args: + intp_dir: Path to directory containing interpolated RAVE files from previous cycles + fcst_dates: Forecast data and hours to search ``intp_dir`` for + rave_to_intp: Filename prefix for the interpolated RAVE files + + Returns: + A tuple containing: + * ``0``: The available forecast days/hours + * ``1``: The unavailable (missing) forecast day/hours + * ``2``: A boolean indicating if there are any interpolated RAVE files available + """ + intp_avail_hours = [] + intp_non_avail_hours = [] + # There are four situations here. + # 1) the file is missing (interpolate a new file) + # 2) the file is present (use it) + # 3) there is a link, but it's broken (interpolate a new file) + # 4) there is a valid link (use it) + for date in fcst_dates: + file_name = f"{rave_to_intp}{date}00_{date}59.nc" + file_path = os.path.join(intp_dir, file_name) + file_exists = os.path.isfile(file_path) + is_link = os.path.islink(file_path) + is_valid_link = is_link and os.path.exists(file_path) + + if file_exists or is_valid_link: + print(f"RAVE interpolated file available for {file_name}") + intp_avail_hours.append(str(date)) + else: + print(f"Interpolated file non available, interpolate RAVE for {file_name}") + intp_non_avail_hours.append(str(date)) + + print( + f"Available interpolated files for hours: {intp_avail_hours}, Non available interpolated files for hours: {intp_non_avail_hours}" + ) + + inp_files_2use = len(intp_avail_hours) > 0 + + return intp_avail_hours, intp_non_avail_hours, inp_files_2use + + +def check_for_raw_rave( + RAVE: str, intp_non_avail_hours: List[str], intp_avail_hours: List[str] +) -> Tuple[List[List[str]], List[str], List[str], bool]: + """ + Check if raw RAVE in intp_non_avail_hours list is available for interpolation. + + Args: + RAVE: Directory containing the raw RAVE files + intp_non_avail_hours: RAVE days/hours that are not available + intp_avail_hours: RAVE day/hours that are available + + Returns: + A tuple containing: + * ``0``: Raw RAVE file paths that are available + * ``1``: The days/hours of the available RAVE files + * ``2``: The days/hours that are not available + * ``3``: A boolean indicating if this is the first day of the forecast + """ + rave_avail = [] + rave_avail_hours = [] + rave_nonavail_hours_test = [] + for date in intp_non_avail_hours: + wildcard_name = f"*-3km*{date}*{date}59590*.nc" + name_retro = f"*3km*{date}*{date}*.nc" + matching_files = [ + f + for f in os.listdir(RAVE) + if fnmatch.fnmatch(f, wildcard_name) or fnmatch.fnmatch(f, name_retro) + ] + print(f"Find raw RAVE: {matching_files}") + if not matching_files: + print(f"Raw RAVE non_available for interpolation {date}") + rave_nonavail_hours_test.append(date) + else: + print(f"Raw RAVE available for interpolation {matching_files}") + rave_avail.append(matching_files) + rave_avail_hours.append(date) + + print( + f"Raw RAVE available: {rave_avail_hours}, rave_nonavail_hours: {rave_nonavail_hours_test}" + ) + first_day = not rave_avail_hours and not intp_avail_hours + + print(f"FIRST DAY?: {first_day}") + return rave_avail, rave_avail_hours, rave_nonavail_hours_test, first_day + + +def creates_st_fields(grid_in: str, grid_out: str) -> Tuple[ + ESMF.Field, + ESMF.Field, + DataArray, + DataArray, + ESMF.Grid, + ESMF.Grid, + DataArray, + DataArray, +]: + """ + Create source and target fields for regridding. + + Args: + grid_in: Path to input grid + grid_out: Path to output grid + + Returns: + A tuple containing: + * ``0``: Source ESMF field + * ``1``: Destination ESMF field + * ``2``: Destination latitudes + * ``3``: Destination longitudes + * ``4``: Source ESMF grid + * ``5``: Destination ESMF grid + * ``6``: Source latitude + * ``7``: Destination area + """ + # Open datasets with context managers + with xr.open_dataset(grid_in) as ds_in, xr.open_dataset(grid_out) as ds_out: + tgt_area = ds_out["area"] + tgt_latt = ds_out["grid_latt"] + tgt_lont = ds_out["grid_lont"] + src_latt = ds_in["grid_latt"] + + srcgrid = ESMF.Grid( + np.array(src_latt.shape), + staggerloc=[ESMF.StaggerLoc.CENTER, ESMF.StaggerLoc.CORNER], + coord_sys=ESMF.CoordSys.SPH_DEG, + ) + tgtgrid = ESMF.Grid( + np.array(tgt_latt.shape), + staggerloc=[ESMF.StaggerLoc.CENTER, ESMF.StaggerLoc.CORNER], + coord_sys=ESMF.CoordSys.SPH_DEG, + ) + + srcfield = ESMF.Field(srcgrid, name="test", staggerloc=ESMF.StaggerLoc.CENTER) + tgtfield = ESMF.Field(tgtgrid, name="test", staggerloc=ESMF.StaggerLoc.CENTER) + + print("Grid in and out files available. Generating target and source fields") + return ( + srcfield, + tgtfield, + tgt_latt, + tgt_lont, + srcgrid, + tgtgrid, + src_latt, + tgt_area, + ) + + +def create_emiss_file(fout: Dataset, cols: int, rows: int) -> None: + """ + Create necessary dimensions for the emission file. + + Args: + fout: Dataset to update + cols: Number of columns + rows: Number of rows + """ + fout.createDimension("t", None) + fout.createDimension("lat", cols) + fout.createDimension("lon", rows) + setattr(fout, "PRODUCT_ALGORITHM_VERSION", "Beta") + setattr(fout, "TIME_RANGE", "1 hour") + + +def Store_latlon_by_Level( + fout: Dataset, varname: str, var: DataArray, long_name: str, units: str, fval: str +) -> None: + """ + Store a 2D variable (latitude/longitude) in the file. + + Args: + fout: Dataset to update + varname: Variable name to create + var: Variable data to store + long_name: Variable long name + units: Variable units + fval: Variable fill value + """ + var_out = fout.createVariable(varname, "f4", ("lat", "lon")) + var_out.units = units + var_out.long_name = long_name + var_out.standard_name = varname + fout.variables[varname][:] = var + var_out.FillValue = fval + var_out.coordinates = "geolat geolon" + + +def Store_by_Level( + fout: Dataset, varname: str, long_name: str, units: str, fval: str +) -> None: + """ + Store a 3D variable (time, latitude/longitude) in the file. + + Args: + fout: Dataset to update + varname: Name of the variable to create + long_name: Long name of the variable to create + units: Units of the variable to create + fval: Fill value of the variable to create + """ + var_out = fout.createVariable(varname, "f4", ("t", "lat", "lon")) + var_out.units = units + var_out.long_name = long_name + var_out.standard_name = long_name + var_out.FillValue = fval + var_out.coordinates = "t geolat geolon" + + +def create_dummy( + intp_dir: str, + current_day: str, + tgt_latt: DataArray, + tgt_lont: DataArray, + cols: int, + rows: int, +) -> str: + """ + Create a dummy RAVE interpolated file if first day or regridder fails. + + Args: + intp_dir: Directory to create the dummy file in + current_day: Current day (and hour?) to create the dummy file for + tgt_latt: Target grid latitudes + tgt_lont: Target grid longitudes + cols: Number of columns + rows: Number of rows + + Returns: + A string stating the operation was successful. + """ + file_path = os.path.join(intp_dir, f"SMOKE_RRFS_data_{current_day}00.nc") + dummy_file = np.zeros((cols, rows)) # Changed to 3D to match the '3D' dimensions + with Dataset(file_path, "w") as fout: + create_emiss_file(fout, cols, rows) + # Store latitude and longitude + Store_latlon_by_Level( + fout, "geolat", tgt_latt, "cell center latitude", "degrees_north", "-9999.f" + ) + Store_latlon_by_Level( + fout, "geolon", tgt_lont, "cell center longitude", "degrees_east", "-9999.f" + ) + + # Initialize and store each variable + Store_by_Level(fout, "frp_davg", "Daily mean Fire Radiative Power", "MW", "0.f") + fout.variables["frp_davg"][0, :, :] = dummy_file + Store_by_Level(fout, "ebb_rate", "Total EBB emission", "ug m-2 s-1", "0.f") + fout.variables["ebb_rate"][0, :, :] = dummy_file + Store_by_Level( + fout, "fire_end_hr", "Hours since fire was last detected", "hrs", "0.f" + ) + fout.variables["fire_end_hr"][0, :, :] = dummy_file + Store_by_Level( + fout, "hwp_davg", "Daily mean Hourly Wildfire Potential", "none", "0.f" + ) + fout.variables["hwp_davg"][0, :, :] = dummy_file + Store_by_Level(fout, "totprcp_24hrs", "Sum of precipitation", "m", "0.f") + fout.variables["totprcp_24hrs"][0, :, :] = dummy_file + + return "Emissions dummy file created successfully" + + +def generate_regridder( + rave_avail_hours: List[str], + srcfield: ESMF.Field, + tgtfield: ESMF.Field, + weightfile: str, + intp_avail_hours: List[str], +) -> Tuple[Any, bool]: + """ + Generate an ESMF regridder unless we are using dummy emissions. + Args: + rave_avail_hours: The RAVE hours that are available + srcfield: The source ESMF field + tgtfield: The destination ESMF field + weightfile: The ESMF weight field mapping the RAVE grid to the forecast grid + intp_avail_hours: The available interpolated hours + + Returns: + A tuple containing: + * ``0``: ESMF regridder or none (if using dummy emissions) + * ``1``: Boolean flag indicating if dummy emissions are being used + """ + print("Checking conditions for generating regridder.") + use_dummy_emiss = len(rave_avail_hours) == 0 and len(intp_avail_hours) == 0 + regridder = None + + if not use_dummy_emiss: + try: + print("Generating regridder.") + regridder = ESMF.RegridFromFile(srcfield, tgtfield, weightfile) + print("Regridder generated successfully.") + except ValueError as e: + print(f"Regridder failed due to a ValueError: {e}.") + except OSError as e: + print( + f"Regridder failed due to an OSError: {e}. Check if the weight file exists and is accessible." + ) + except ( + FileNotFoundError, + IOError, + RuntimeError, + TypeError, + KeyError, + IndexError, + MemoryError, + ) as e: + print( + f"Regridder failed due to corrupted file: {e}. Check if RAVE file has a different grid or format. " + ) + except Exception as e: + print(f"An unexpected error occurred while generating regridder: {e}.") + else: + use_dummy_emiss = True + + return regridder, use_dummy_emiss + + +def mask_edges(data: ndarray, mask_width: int = 1) -> ndarray: + """ + Mask edges of domain for interpolation. + + Args: + data: The numpy array to mask + mask_width: The width of the mask at each edge + + Returns: + A numpy array of the masked edges + """ + original_shape = data.shape + if mask_width < 1: + return data # No masking if mask_width is less than 1 + + # Mask top and bottom rows + data[:mask_width, :] = np.nan + data[-mask_width:, :] = np.nan + + # Mask left and right columns + data[:, :mask_width] = np.nan + data[:, -mask_width:] = np.nan + assert data.shape == original_shape, "Data shape altered during masking." + + return data + + +def interpolate_rave( + RAVE: str, + rave_avail: List[List[str]], + rave_avail_hours: List[str], + use_dummy_emiss: bool, + vars_emis: List[str], + regridder: Any, + srcgrid: ESMF.Grid, + tgtgrid: ESMF.Grid, + rave_to_intp: str, + intp_dir: str, + tgt_latt: DataArray, + tgt_lont: DataArray, + cols: int, + rows: int, +) -> None: + """ + Process a RAVE file for interpolation. + + Args: + RAVE: Path to the raw RAVE files + rave_avail: List of RAVE days/hours that are available + rave_avail_hours: List of RAVE hours that are available + use_dummy_emiss: True if we are using dummy emissions + vars_emis: Names of the emission variables + regridder: The ESMF regridder object (i.e. route handle). This is None if we are using dummy emissions. + srcgrid: The source ESMF grid + tgtgrid: The destination ESMF grid + rave_to_intp: The prefix of RAVE files to interpolate + intp_dir: The RAVE directory containing interpolated files + tgt_latt: The destination grid latitudes + tgt_lont: The destination grid longitudes + cols: Number of columns in the destination + rows: Number of rows in the destination + """ + for index, current_hour in enumerate(rave_avail_hours): + file_name = rave_avail[index] + rave_file_path = os.path.join(RAVE, file_name[0]) + + print(f"Processing file: {rave_file_path} for hour: {current_hour}") + + if not use_dummy_emiss and os.path.exists(rave_file_path): + try: + with xr.open_dataset(rave_file_path, decode_times=False) as ds_togrid: + try: + ds_togrid = ds_togrid[["FRP_MEAN", "FRE"]] + except KeyError as e: + print(f"Missing required variables in {rave_file_path}: {e}") + continue + + output_file_path = os.path.join( + intp_dir, f"{rave_to_intp}{current_hour}00_{current_hour}59.nc" + ) + print("=============before regridding===========", "FRP_MEAN") + print(np.sum(ds_togrid["FRP_MEAN"], axis=(1, 2))) + + try: + with Dataset(output_file_path, "w") as fout: + create_emiss_file(fout, cols, rows) + Store_latlon_by_Level( + fout, + "geolat", + tgt_latt, + "cell center latitude", + "degrees_north", + "-9999.f", + ) + Store_latlon_by_Level( + fout, + "geolon", + tgt_lont, + "cell center longitude", + "degrees_east", + "-9999.f", + ) + + for svar in vars_emis: + try: + srcfield = ESMF.Field( + srcgrid, + name=svar, + staggerloc=ESMF.StaggerLoc.CENTER, + ) + tgtfield = ESMF.Field( + tgtgrid, + name=svar, + staggerloc=ESMF.StaggerLoc.CENTER, + ) + src_rate = ds_togrid[svar].fillna(0) + src_QA = xr.where( + ds_togrid["FRE"] > 1000, src_rate, 0.0 + ) + srcfield.data[...] = src_QA[0, :, :] + tgtfield = regridder(srcfield, tgtfield) + masked_tgt_data = mask_edges( + tgtfield.data, mask_width=1 + ) + + if svar == "FRP_MEAN": + Store_by_Level( + fout, + "frp_avg_hr", + "Mean Fire Radiative Power", + "MW", + "0.f", + ) + tgt_rate = masked_tgt_data + fout.variables["frp_avg_hr"][0, :, :] = tgt_rate + print( + "=============after regridding===========" + + svar + ) + print(np.sum(tgt_rate)) + elif svar == "FRE": + Store_by_Level(fout, "FRE", "FRE", "MJ", "0.f") + tgt_rate = masked_tgt_data + fout.variables["FRE"][0, :, :] = tgt_rate + except (ValueError, KeyError) as e: + print( + f"Error processing variable {svar} in {rave_file_path}: {e}" + ) + except ( + OSError, + IOError, + RuntimeError, + FileNotFoundError, + TypeError, + IndexError, + MemoryError, + ) as e: + print( + f"Error creating or writing to NetCDF file {output_file_path}: {e}" + ) + except ( + OSError, + IOError, + RuntimeError, + FileNotFoundError, + TypeError, + IndexError, + MemoryError, + ) as e: + print(f"Error reading NetCDF file {rave_file_path}: {e}") + else: + print(f"File not found or dummy emissions required: {rave_file_path}") diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index a2899043a8..d318baa565 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -36,6 +36,7 @@ valid_vals_CCPP_PHYS_SUITE: [ "FV3_RRFS_v1beta", "FV3_WoFS_v0", "FV3_HRRR", +"FV3_HRRR_gf", "FV3_RAP" ] valid_vals_GFDLgrid_NUM_CELLS: [48, 96, 192, 384, 768, 1152, 3072] From 2ac8822a2010ede5c4d91304447ffd4bcb110682 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Thu, 23 Jan 2025 14:32:25 +0000 Subject: [PATCH 02/41] remove fixupp --- ush/config_defaults.yaml | 4 ---- ush/machine/gaea-c6.yaml | 1 - ush/machine/hera.yaml | 1 - ush/machine/hercules.yaml | 1 - ush/machine/orion.yaml | 1 - 5 files changed, 8 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 375b2e8799..da81fef24e 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -370,9 +370,6 @@ platform: # FIXsmoke: # System directory where Smoke and Dust data files are located. # - # FIXupp: - # System directory where UPP fixed files are located - # # FIXcrtm: # System directory where CRTM fixed files are located # @@ -390,7 +387,6 @@ platform: FIXaqm: "" FIXemis: "" FIXsmoke: "" - FIXupp: "" FIXcrtm: "" FIXcrtmupp: "" # diff --git a/ush/machine/gaea-c6.yaml b/ush/machine/gaea-c6.yaml index 1f293adcb8..cb13a474b0 100644 --- a/ush/machine/gaea-c6.yaml +++ b/ush/machine/gaea-c6.yaml @@ -36,7 +36,6 @@ platform: FIXsfc: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/NaturalEarth FIXsmoke: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_smoke - FIXupp: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_upp FIXcrtm: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_crtm EXTRN_MDL_DATA_STORES: aws diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index c034471905..0e4d31edb0 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -41,7 +41,6 @@ platform: FIXaqm: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_aqm FIXemis: /scratch1/RDARCH/rda-arl-gpu/Barry.Baker/emissions/nexus FIXsmoke: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_smoke - FIXupp: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_upp FIXcrtm: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: hpss aws nomads diff --git a/ush/machine/hercules.yaml b/ush/machine/hercules.yaml index 523fa31233..ca018d9a7f 100644 --- a/ush/machine/hercules.yaml +++ b/ush/machine/hercules.yaml @@ -37,7 +37,6 @@ platform: FIXaqm: /work/noaa/epic/SRW-AQM_DATA/fix_aqm FIXemis: /work/noaa/epic/SRW-AQM_DATA/fix_emis FIXsmoke: /work/noaa/epic/SRW-AQM_DATA/fix_smoke - FIXupp: /work/noaa/epic/SRW-AQM_DATA/fix_upp FIXcrtm: /work/noaa/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: aws data: diff --git a/ush/machine/orion.yaml b/ush/machine/orion.yaml index d69ab9e965..f1a608a79b 100644 --- a/ush/machine/orion.yaml +++ b/ush/machine/orion.yaml @@ -36,7 +36,6 @@ platform: FIXaqm: /work/noaa/epic/SRW-AQM_DATA/fix_aqm FIXemis: /work/noaa/epic/SRW-AQM_DATA/fix_emis FIXsmoke: /work/noaa/epic/SRW-AQM_DATA/fix_smoke - FIXupp: /work/noaa/epic/SRW-AQM_DATA/fix_upp FIXcrtm: /work/noaa/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: aws nomads data: From fc259d1e3aa3d5fec380c41fde5475855b200a48 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Thu, 23 Jan 2025 16:26:46 +0000 Subject: [PATCH 03/41] update scripts --- jobs/JSRW_PREPSTART | 2 +- jobs/JSRW_SMOKE_DUST | 4 +- scripts/exregional_make_ics.sh | 12 +++- scripts/exregional_make_lbcs.sh | 8 ++- scripts/exregional_make_orog.sh | 2 +- scripts/exregional_run_post.sh | 114 ++++++++++++++++++++++---------- scripts/exsrw_prepstart.sh | 12 ++-- scripts/exsrw_smoke_dust.sh | 13 ++-- ush/config_defaults.yaml | 4 ++ ush/machine/gaea-c6.yaml | 1 + ush/machine/hera.yaml | 1 + ush/machine/hercules.yaml | 1 + ush/machine/orion.yaml | 1 + ush/setup.py | 5 ++ 14 files changed, 121 insertions(+), 59 deletions(-) diff --git a/jobs/JSRW_PREPSTART b/jobs/JSRW_PREPSTART index 50476d6f45..1952923d9d 100755 --- a/jobs/JSRW_PREPSTART +++ b/jobs/JSRW_PREPSTART @@ -9,7 +9,6 @@ # date export PS4='+ $SECONDS + ' -set -xue # #----------------------------------------------------------------------- # @@ -33,6 +32,7 @@ export USHdir="${USHsrw}" # should be removed later for sect in user nco platform workflow global smoke_dust_parm ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done +set -xue # #----------------------------------------------------------------------- # diff --git a/jobs/JSRW_SMOKE_DUST b/jobs/JSRW_SMOKE_DUST index 4341fda03c..0b6fafeb83 100755 --- a/jobs/JSRW_SMOKE_DUST +++ b/jobs/JSRW_SMOKE_DUST @@ -9,7 +9,6 @@ # date export PS4='+ $SECONDS + ' -set -xue # #----------------------------------------------------------------------- # @@ -33,6 +32,7 @@ export USHdir="${USHsrw}" # should be removed later for sect in user nco platform workflow global smoke_dust_parm ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done +set -xue # #----------------------------------------------------------------------- # @@ -113,7 +113,7 @@ fi mkdir -p ${COMOUT} export COMINsmoke="${COMINsmoke:-${COMINsmoke_default}}" -export COMINfire="${COMINfire:-${COMINfire_default}}" +export COMINrave="${COMINrave:-${COMINrave_default}}" # Create a teomporary share directory for RAVE interpolated data files export DATA_SHARE="${DATA_SHARE:-${DATAROOT}/DATA_SHARE/RAVE_fire_intp}" diff --git a/scripts/exregional_make_ics.sh b/scripts/exregional_make_ics.sh index debf526798..ac1884e4d8 100755 --- a/scripts/exregional_make_ics.sh +++ b/scripts/exregional_make_ics.sh @@ -86,7 +86,8 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow global cpl_aqm_parm constants task_get_extrn_ics task_make_ics ; do +for sect in user nco platform workflow global cpl_aqm_parm smoke_dust_parm \ + constants task_get_extrn_ics task_make_ics ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # @@ -195,6 +196,7 @@ case "${CCPP_PHYS_SUITE}" in "FV3_GFS_v17_p8" | \ "FV3_WoFS_v0" | \ "FV3_HRRR" | \ + "FV3_HRRR_gf" | \ "FV3_RAP" ) if [ "${EXTRN_MDL_NAME_ICS}" = "RAP" ] || \ [ "${EXTRN_MDL_NAME_ICS}" = "RRFS" ] || \ @@ -727,7 +729,7 @@ POST_STEP # #----------------------------------------------------------------------- # -if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then +if [ $(boolify "${CPL_AQM}") = "TRUE" ] || [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then COMOUT="${COMROOT}/${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR}" #temporary path, should be removed later if [ $(boolify "${COLDSTART}") = "TRUE" ] && [ "${PDY}${cyc}" = "${DATE_FIRST_CYCL:0:10}" ]; then data_trans_path="${COMOUT}" @@ -737,7 +739,11 @@ if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then cp -p out.atm.tile${TILE_RGNL}.nc "${data_trans_path}/${NET}.${cycle}${dot_ensmem}.gfs_data.tile${TILE_RGNL}.halo${NH0}.nc" cp -p out.sfc.tile${TILE_RGNL}.nc "${COMOUT}/${NET}.${cycle}${dot_ensmem}.sfc_data.tile${TILE_RGNL}.halo${NH0}.nc" cp -p gfs_ctrl.nc "${COMOUT}/${NET}.${cycle}${dot_ensmem}.gfs_ctrl.nc" - cp -p gfs.bndy.nc "${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile${TILE_RGNL}.f000.nc" + if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then + cp -p gfs.bndy.nc "${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile${TILE_RGNL}.f000.nc" + else + cp -p gfs.bndy.nc "${COMOUT}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile${TILE_RGNL}.f000.nc" + fi else mv out.atm.tile${TILE_RGNL}.nc ${INPUT_DATA}/${NET}.${cycle}${dot_ensmem}.gfs_data.tile${TILE_RGNL}.halo${NH0}.nc mv out.sfc.tile${TILE_RGNL}.nc ${INPUT_DATA}/${NET}.${cycle}${dot_ensmem}.sfc_data.tile${TILE_RGNL}.halo${NH0}.nc diff --git a/scripts/exregional_make_lbcs.sh b/scripts/exregional_make_lbcs.sh index acbe97a56b..2b4d8821b7 100755 --- a/scripts/exregional_make_lbcs.sh +++ b/scripts/exregional_make_lbcs.sh @@ -87,7 +87,8 @@ # . $USHdir/source_util_funcs.sh set -x -for sect in user nco platform workflow global cpl_aqm_parm constants task_get_extrn_lbcs task_make_lbcs ; do +for sect in user nco platform workflow global cpl_aqm_parm smoke_dust_parm \ + constants task_get_extrn_lbcs task_make_lbcs ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # @@ -194,6 +195,7 @@ case "${CCPP_PHYS_SUITE}" in "FV3_GFS_v17_p8" | \ "FV3_WoFS_v0" | \ "FV3_HRRR" | \ + "FV3_HRRR_gf" | \ "FV3_RAP") if [ "${EXTRN_MDL_NAME_LBCS}" = "RAP" ] || \ [ "${EXTRN_MDL_NAME_LBCS}" = "RRFS" ] || \ @@ -648,6 +650,10 @@ located in the following directory: fcst_hhh_FV3LAM=$( printf "%03d" "$fcst_hhh" ) if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then cp -p gfs.bndy.nc ${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile7.f${fcst_hhh_FV3LAM}.nc + elif [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then + COMOUT="${COMROOT}/${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR}" #temporary path, should be removed later + mkdir -p ${COMOUT} + cp -p gfs.bndy.nc ${COMOUT}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile7.f${fcst_hhh_FV3LAM}.nc else mv gfs.bndy.nc ${INPUT_DATA}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile7.f${fcst_hhh_FV3LAM}.nc fi diff --git a/scripts/exregional_make_orog.sh b/scripts/exregional_make_orog.sh index 34b1675d8c..f3d2e8508b 100755 --- a/scripts/exregional_make_orog.sh +++ b/scripts/exregional_make_orog.sh @@ -290,7 +290,7 @@ mv "${raw_orog_fp_orig}" "${raw_orog_fp}" # #----------------------------------------------------------------------- # -suites=( "FV3_RAP" "FV3_HRRR" "FV3_GFS_v15_thompson_mynn_lam3km" "FV3_GFS_v17_p8" ) +suites=( "FV3_RAP" "FV3_HRRR" "FV3_HRRR_gf" "FV3_GFS_v15_thompson_mynn_lam3km" "FV3_GFS_v17_p8" ) if [[ ${suites[@]} =~ "${CCPP_PHYS_SUITE}" ]] ; then DATA="${DATA:-${OROG_DIR}/temp_orog_data}" mkdir -p ${DATA} diff --git a/scripts/exregional_run_post.sh b/scripts/exregional_run_post.sh index a325245a5f..05adedf288 100755 --- a/scripts/exregional_run_post.sh +++ b/scripts/exregional_run_post.sh @@ -65,7 +65,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow global cpl_aqm_parm \ +for sect in user nco platform workflow global cpl_aqm_parm smoke_dust_parm \ task_run_fcst task_run_post ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -155,7 +155,7 @@ else if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then post_config_fp="${PARMdir}/upp/postxconfig-NT-AQM.txt" else - post_config_fp="${PARMdir}/upp/postxconfig-NT-fv3lam.txt" + post_config_fp="${PARMdir}/upp/postxconfig-NT-rrfs.txt" fi print_info_msg " ==================================================================== @@ -167,7 +167,7 @@ temporary work directory (DATA_FHR): fi cp ${post_config_fp} ./postxconfig-NT.txt cp ${PARMdir}/upp/params_grib2_tbl_new . -if [ $(boolify ${USE_CRTM}) = "TRUE" ]; then +if [ $(boolify ${DO_SMOKE_DUST}) = "TRUE" ] || [ $(boolify ${USE_CRTM}) = "TRUE" ]; then cp ${CRTM_DIR}/Nalli.IRwater.EmisCoeff.bin ./ cp ${CRTM_DIR}/FAST*.bin ./ cp ${CRTM_DIR}/NPOESS.IRland.EmisCoeff.bin ./ @@ -264,6 +264,23 @@ fileNameFlux='${phy_file}' KPO=47,PO=1000.,975.,950.,925.,900.,875.,850.,825.,800.,775.,750.,725.,700.,675.,650.,625.,600.,575.,550.,525.,500.,475.,450.,425.,400.,375.,350.,325.,300.,275.,250.,225.,200.,175.,150.,125.,100.,70.,50.,30.,20.,10.,7.,5.,3.,2.,1.,${post_itag_add},numx=${NUMX} / EOF + +if [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then + if [ ${PREDEF_GRID_NAME} = "RRFS_CONUS_3km" ]; then + grid_specs_rrfs="lambert:-97.5:38.500000 237.280472:1799:3000 21.138115:1059:3000" + elif [ ${PREDEF_GRID_NAME} = "RRFS_NA_3km" ]; then + grid_specs_rrfs="rot-ll:247.000000:-35.000000:0.000000 299.000000:4881:0.025000 -37.0000000:2961:0.025000" + fi + if [ ${PREDEF_GRID_NAME} = "RRFS_CONUS_3km" ] || [ ${PREDEF_GRID_NAME} = "RRFS_NA_3km" ]; then + for ayear in 100y 10y 5y 2y ; do + for ahour in 01h 03h 06h 12h 24h; do + if [ -f ${FIXupp}/${PREDEF_GRID_NAME}/ari${ayear}_${ahour}.grib2 ]; then + ln -snf ${FIXupp}/${PREDEF_GRID_NAME}/ari${ayear}_${ahour}.grib2 ari${ayear}_${ahour}.grib2 + fi + done + done + fi +fi # #----------------------------------------------------------------------- # @@ -315,45 +332,70 @@ The \${fhr} variable contains too few or too many characters: fhr = \"$fhr\"" fi -post_mn_or_null="" -dot_post_mn_or_null="" -if [ "${post_mn}" != "00" ]; then - post_mn_or_null="${post_mn}" - dot_post_mn_or_null=".${post_mn}" -fi +if [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then + bgdawp=${NET}.${cycle}.prslev.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 + bgrd3d=${NET}.${cycle}.natlev.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 + bgifi=${NET}.${cycle}.ififip.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 + bgavi=${NET}.${cycle}.aviation.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 + + if [ -f "PRSLEV.GrbF${post_fhr}" ]; then + wgrib2 PRSLEV.GrbF${post_fhr} -set center 7 -grib ${bgdawp} >>$pgmout 2>>errfile + cp -p ${bgdawp} ${COMOUT} + fi + if [ -f "NATLEV.GrbF${post_fhr}" ]; then + wgrib2 NATLEV.GrbF${post_fhr} -set center 7 -grib ${bgrd3d} >>$pgmout 2>>errfile + cp -p ${bgrd3d} ${COMOUT} + fi + if [ -f "IFIFIP.GrbF${post_fhr}" ]; then + wgrib2 IFIFIP.GrbF${post_fhr} -set center 7 -grib ${bgifi} >>$pgmout 2>>errfile + cp -p ${bgifi} ${COMOUT} + fi + if [ -f "AVIATION.GrbF${post_fhr}" ]; then + wgrib2 AVIATION.GrbF${post_fhr} -set center 7 -grib ${bgavi} >>$pgmout 2>>errfile + cp -p ${bgavi} ${COMOUT} + fi -post_fn_suffix="GrbF${post_fhr}${dot_post_mn_or_null}" -post_renamed_fn_suffix="f${fhr}${post_mn_or_null}.${POST_OUTPUT_DOMAIN_NAME}.grib2" -# -# For convenience, change location to COMOUT (where the final output -# from UPP will be located). Then loop through the two files that UPP -# generates (i.e. "...prslev..." and "...natlev..." files) and move, -# rename, and create symlinks to them. -# -cd "${COMOUT}" -basetime=$( $DATE_UTIL --date "$yyyymmdd $hh" +%y%j%H%M ) -symlink_suffix="${dot_ensmem}.${basetime}f${fhr}${post_mn}" -if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then - fids=( "cmaq" ) else - fids=( "prslev" "natlev" ) -fi -for fid in "${fids[@]}"; do - FID=$(echo_uppercase $fid) - post_orig_fn="${FID}.${post_fn_suffix}" - post_renamed_fn="${NET}.${cycle}${dot_ensmem}.${fid}.${post_renamed_fn_suffix}" - mv ${DATA_FHR}/${post_orig_fn} ${post_renamed_fn} - if [ $RUN_ENVIR != "nco" ]; then - create_symlink_to_file ${post_renamed_fn} ${FID}${symlink_suffix} TRUE + post_mn_or_null="" + dot_post_mn_or_null="" + if [ "${post_mn}" != "00" ]; then + post_mn_or_null="${post_mn}" + dot_post_mn_or_null=".${post_mn}" fi - # DBN alert - if [ "$SENDDBN" = "TRUE" ]; then - $DBNROOT/bin/dbn_alert MODEL rrfs_post ${job} ${COMOUT}/${post_renamed_fn} + + post_fn_suffix="GrbF${post_fhr}${dot_post_mn_or_null}" + post_renamed_fn_suffix="f${fhr}${post_mn_or_null}.${POST_OUTPUT_DOMAIN_NAME}.grib2" + # + # For convenience, change location to COMOUT (where the final output + # from UPP will be located). Then loop through the two files that UPP + # generates (i.e. "...prslev..." and "...natlev..." files) and move, + # rename, and create symlinks to them. + # + cd "${COMOUT}" + basetime=$( $DATE_UTIL --date "$yyyymmdd $hh" +%y%j%H%M ) + symlink_suffix="${dot_ensmem}.${basetime}f${fhr}${post_mn}" + if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then + fids=( "cmaq" ) + else + fids=( "prslev" "natlev" ) fi -done + for fid in "${fids[@]}"; do + FID=$(echo_uppercase $fid) + post_orig_fn="${FID}.${post_fn_suffix}" + post_renamed_fn="${NET}.${cycle}${dot_ensmem}.${fid}.${post_renamed_fn_suffix}" + mv ${DATA_FHR}/${post_orig_fn} ${post_renamed_fn} + if [ $RUN_ENVIR != "nco" ]; then + create_symlink_to_file ${post_renamed_fn} ${FID}${symlink_suffix} TRUE + fi + # DBN alert + if [ "$SENDDBN" = "TRUE" ]; then + $DBNROOT/bin/dbn_alert MODEL rrfs_post ${job} ${COMOUT}/${post_renamed_fn} + fi + done -rm -rf ${DATA_FHR} + rm -rf ${DATA_FHR} +fi # #----------------------------------------------------------------------- # diff --git a/scripts/exsrw_prepstart.sh b/scripts/exsrw_prepstart.sh index d1e9d6a4f2..6c2627aa19 100755 --- a/scripts/exsrw_prepstart.sh +++ b/scripts/exsrw_prepstart.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -set -xue # #----------------------------------------------------------------------- # @@ -8,11 +7,10 @@ set -xue # #----------------------------------------------------------------------- # -. ${PARMsrw}/source_util_funcs.sh -task_global_vars=( "COLDSTART" "DATE_FIRST_CYCL" "DO_SMOKE_DUST" \ - "INCR_CYCL_FREQ" "IO_LAYOUT_Y" "PRE_TASK_CMDS" ) -for var in ${task_global_vars[@]}; do - source_config_for_task ${var} ${GLOBAL_VAR_DEFNS_FP} +. ${USHsrw}/source_util_funcs.sh +for sect in user nco platform workflow global smoke_dust_parm \ + constants fixed_files grid_params task_run_fcst ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # #----------------------------------------------------------------------- @@ -22,7 +20,7 @@ done # #----------------------------------------------------------------------- # -#{ save_shell_opts; set -xue; } > /dev/null 2>&1 +set -xue # #----------------------------------------------------------------------- # diff --git a/scripts/exsrw_smoke_dust.sh b/scripts/exsrw_smoke_dust.sh index 47888f2739..6efce6dc4c 100755 --- a/scripts/exsrw_smoke_dust.sh +++ b/scripts/exsrw_smoke_dust.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -set -xue # #----------------------------------------------------------------------- # @@ -8,12 +7,10 @@ set -xue # #----------------------------------------------------------------------- # -. ${PARMsrw}/source_util_funcs.sh -task_global_vars=( "EBB_DCYCLE" "FIXsmoke" "INCR_CYCL_FREQ" \ - "PERSISTENCE" "PRE_TASK_CMDS" "PREDEF_GRID_NAME" "RESTART_INTERVAL" \ - "SMOKE_DUST_FILE_PREFIX" ) -for var in ${task_global_vars[@]}; do - source_config_for_task ${var} ${GLOBAL_VAR_DEFNS_FP} +. ${USHsrw}/source_util_funcs.sh +for sect in user nco platform workflow global smoke_dust_parm \ + constants fixed_files grid_params task_run_fcst ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # #----------------------------------------------------------------------- @@ -23,7 +20,7 @@ done # #----------------------------------------------------------------------- # -#{ save_shell_opts; set -xue; } > /dev/null 2>&1 +set -xue # #----------------------------------------------------------------------- # diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index da81fef24e..375b2e8799 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -370,6 +370,9 @@ platform: # FIXsmoke: # System directory where Smoke and Dust data files are located. # + # FIXupp: + # System directory where UPP fixed files are located + # # FIXcrtm: # System directory where CRTM fixed files are located # @@ -387,6 +390,7 @@ platform: FIXaqm: "" FIXemis: "" FIXsmoke: "" + FIXupp: "" FIXcrtm: "" FIXcrtmupp: "" # diff --git a/ush/machine/gaea-c6.yaml b/ush/machine/gaea-c6.yaml index cb13a474b0..1f293adcb8 100644 --- a/ush/machine/gaea-c6.yaml +++ b/ush/machine/gaea-c6.yaml @@ -36,6 +36,7 @@ platform: FIXsfc: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/NaturalEarth FIXsmoke: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_smoke + FIXupp: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_upp FIXcrtm: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_crtm EXTRN_MDL_DATA_STORES: aws diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index 0e4d31edb0..c034471905 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -41,6 +41,7 @@ platform: FIXaqm: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_aqm FIXemis: /scratch1/RDARCH/rda-arl-gpu/Barry.Baker/emissions/nexus FIXsmoke: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_smoke + FIXupp: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_upp FIXcrtm: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: hpss aws nomads diff --git a/ush/machine/hercules.yaml b/ush/machine/hercules.yaml index ca018d9a7f..523fa31233 100644 --- a/ush/machine/hercules.yaml +++ b/ush/machine/hercules.yaml @@ -37,6 +37,7 @@ platform: FIXaqm: /work/noaa/epic/SRW-AQM_DATA/fix_aqm FIXemis: /work/noaa/epic/SRW-AQM_DATA/fix_emis FIXsmoke: /work/noaa/epic/SRW-AQM_DATA/fix_smoke + FIXupp: /work/noaa/epic/SRW-AQM_DATA/fix_upp FIXcrtm: /work/noaa/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: aws data: diff --git a/ush/machine/orion.yaml b/ush/machine/orion.yaml index f1a608a79b..d69ab9e965 100644 --- a/ush/machine/orion.yaml +++ b/ush/machine/orion.yaml @@ -36,6 +36,7 @@ platform: FIXaqm: /work/noaa/epic/SRW-AQM_DATA/fix_aqm FIXemis: /work/noaa/epic/SRW-AQM_DATA/fix_emis FIXsmoke: /work/noaa/epic/SRW-AQM_DATA/fix_smoke + FIXupp: /work/noaa/epic/SRW-AQM_DATA/fix_upp FIXcrtm: /work/noaa/epic/SRW-AQM_DATA/fix_crtm EXTRN_MDL_DATA_STORES: aws nomads data: diff --git a/ush/setup.py b/ush/setup.py index d5ba107a04..85131e460b 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -1818,6 +1818,11 @@ def _dict_find(user_dict, substring): var_defns_cfg["workflow"][dates] = date_to_str(var_defns_cfg["workflow"][dates]) var_defns_cfg.dump(global_var_defns_fp) + # Generate a flag file for cold start + if expt_config["workflow"].get("COLDSTART"): + coldstart_date=date_to_str(workflow_config["DATE_FIRST_CYCL"]) + fn_pass=f"task_skip_coldstart_{coldstart_date}.txt" + open(os.path.join(exptdir,fn_pass), 'a').close() # # ----------------------------------------------------------------------- From 3645b7184e48f52a1f7fb1a6305c869cf2b05fab Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Thu, 23 Jan 2025 18:03:04 +0000 Subject: [PATCH 04/41] fix setup.py --- scripts/exregional_run_fcst.sh | 42 ++++++++++++++++++++++++---------- ush/config.smoke_dust.yaml | 21 +++++++++++++---- ush/setup.py | 2 +- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/scripts/exregional_run_fcst.sh b/scripts/exregional_run_fcst.sh index 50ae3bb8ea..da31c4e74a 100755 --- a/scripts/exregional_run_fcst.sh +++ b/scripts/exregional_run_fcst.sh @@ -117,7 +117,7 @@ # . $USHdir/source_util_funcs.sh for sect in user nco platform workflow global cpl_aqm_parm constants fixed_files \ - task_get_extrn_lbcs task_run_fcst task_run_post fire; do + task_get_extrn_lbcs task_run_fcst task_run_post smoke_dust_parm fire; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -298,7 +298,7 @@ create_symlink_to_file $target $symlink ${relative_link_flag} # that the FV3 model is hardcoded to recognize, and those are the names # we use below. # -suites=( "FV3_RAP" "FV3_HRRR" "FV3_GFS_v15_thompson_mynn_lam3km" "FV3_GFS_v17_p8" ) +suites=( "FV3_RAP" "FV3_HRRR" "FV3_HRRR_gf" "FV3_GFS_v15_thompson_mynn_lam3km" "FV3_GFS_v17_p8" ) if [[ ${suites[@]} =~ "${CCPP_PHYS_SUITE}" ]] ; then file_ids=( "ss" "ls" ) for file_id in "${file_ids[@]}"; do @@ -338,7 +338,7 @@ cd ${DATA}/INPUT # relative_link_flag="FALSE" -if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then +if [ $(boolify "${CPL_AQM}") = "TRUE" ] || [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then COMIN="${COMROOT}/${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR}" #temporary path, should be removed later target="${COMIN}/${NET}.${cycle}${dot_ensmem}.gfs_data.tile${TILE_RGNL}.halo${NH0}.nc" @@ -358,17 +358,35 @@ if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then symlink="gfs_bndy.tile${TILE_RGNL}.${fhr}.nc" create_symlink_to_file $target $symlink ${relative_link_flag} done - target="${COMIN}/${NET}.${cycle}${dot_ensmem}.NEXUS_Expt.nc" - symlink="NEXUS_Expt.nc" - create_symlink_to_file $target $symlink ${relative_link_flag} - # create symlink to PT for point source in SRW-AQM - target="${COMIN}/${NET}.${cycle}${dot_ensmem}.PT.nc" - if [ -f ${target} ]; then - symlink="PT.nc" + if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then + target="${COMIN}/${NET}.${cycle}${dot_ensmem}.NEXUS_Expt.nc" + symlink="NEXUS_Expt.nc" create_symlink_to_file $target $symlink ${relative_link_flag} - fi + # create symlink to PT for point source in SRW-AQM + target="${COMIN}/${NET}.${cycle}${dot_ensmem}.PT.nc" + if [ -f ${target} ]; then + symlink="PT.nc" + create_symlink_to_file $target $symlink ${relative_link_flag} + fi + else + ln -nsf ${FIXsmoke}/${PREDEF_GRID_NAME}/dust12m_data.nc . + ln -nsf ${FIXsmoke}/${PREDEF_GRID_NAME}/emi_data.nc . + + smokefile="${COMIN}/${SMOKE_DUST_FILE_PREFIX}_${PDY}${cyc}00.nc" + if [ -f ${smokefile} ]; then + ln -nsf ${smokefile} ${SMOKE_DUST_FILE_PREFIX}.nc + else + if [ "${EBB_DCYCLE}" = "1" ]; then + ln -nsf ${FIXsmoke}/${PREDEF_GRID_NAME}/dummy_24hr_smoke_ebbdc1.nc ${SMOKE_DUST_FILE_PREFIX}.nc + echo "WARNING: Smoke file is not available, use dummy_24hr_smoke_ebbdc1.nc instead" + else + ln -nsf ${FIXsmoke}/${PREDEF_GRID_NAME}/dummy_24hr_smoke.nc ${SMOKE_DUST_FILE_PREFIX}.nc + echo "WARNING: Smoke file is not available, use dummy_24hr_smoke.nc instead" + fi + fi + fi else target="${INPUT_DATA}/${NET}.${cycle}${dot_ensmem}.gfs_data.tile${TILE_RGNL}.halo${NH0}.nc" symlink="gfs_data.nc" @@ -520,7 +538,7 @@ if [ $(boolify ${WRITE_DOPOST}) = "TRUE" ]; then if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then post_config_fp="${PARMdir}/upp/postxconfig-NT-AQM.txt" else - post_config_fp="${PARMdir}/upp/postxconfig-NT-fv3lam.txt" + post_config_fp="${PARMdir}/upp/postxconfig-NT-rrfs.txt" fi print_info_msg " ==================================================================== diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml index 8bea3cb198..f566b0677d 100644 --- a/ush/config.smoke_dust.yaml +++ b/ush/config.smoke_dust.yaml @@ -2,9 +2,12 @@ metadata: description: config for Smoke and Dust, RRFS_CONUS_3km user: RUN_ENVIR: community -platform: - BUILD_MOD_FN: 'build_{{ user.MACHINE|lower() }}_intel_prod' + MACHINE: [hera/orion/hercules/gaea-c6] + ACCOUNT: [account name] workflow: + USE_CRON_TO_RELAUNCH: false + CRON_RELAUNCH_INTVL_MNTS: 3 + EXPT_SUBDIR: smoke_dust_conus3km PREDEF_GRID_NAME: RRFS_CONUS_3km CCPP_PHYS_SUITE: FV3_HRRR_gf DATE_FIRST_CYCL: '2019072200' @@ -19,10 +22,11 @@ workflow: FIELD_TABLE_TMPL_FN: field_table_smoke_dust.FV3_HRRR_gf DO_REAL_TIME: false COLDSTART: true +# WARMSTART_CYCLE_DIR: '/path/to/warm/start/files' nco: - envir_default: we2e_smoke_dust - NET_default: we2e_smoke_dust - RUN_default: we2e_smoke_dust + envir_default: test_smoke_dust + NET_default: smoke_dust + RUN_default: smoke_dust rocoto: tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/smoke_dust.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml"]|include }}' @@ -33,16 +37,23 @@ task_get_extrn_ics: EXTRN_MDL_NAME_ICS: RAP EXTRN_MDL_ICS_OFFSET_HRS: 0 USE_USER_STAGED_EXTRN_FILES: true + EXTRN_MDL_SOURCE_BASEDIR_ICS: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAP_DATA_SD/${yyyymmddhh} # hera +# EXTRN_MDL_SOURCE_BASEDIR_ICS: /work/noaa/epic/SRW-AQM_DATA/input_model_data/RAP/${yyyymmddhh} # orion/hercules +# EXTRN_MDL_SOURCE_BASEDIR_ICS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmddhh} # gaea-c6 task_get_extrn_lbcs: EXTRN_MDL_NAME_LBCS: RAP LBC_SPEC_INTVL_HRS: 6 EXTRN_MDL_LBCS_OFFSET_HRS: 0 USE_USER_STAGED_EXTRN_FILES: true + EXTRN_MDL_SOURCE_BASEDIR_LBCS: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAP_DATA_SD/${yyyymmddhh} # hera +# EXTRN_MDL_SOURCE_BASEDIR_LBCS: /work/noaa/epic/SRW-AQM_DATA/input_model_data/RAP/${yyyymmddhh} # orion/hercules +# EXTRN_MDL_SOURCE_BASEDIR_LBCS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmddhh} # gaea-c6 task_make_ics: VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" task_make_lbcs: VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" task_run_fcst: + OMP_NUM_THREADS_RUN_FCST: 1 DT_ATMOS: 36 LAYOUT_X: 15 LAYOUT_Y: 20 diff --git a/ush/setup.py b/ush/setup.py index 85131e460b..13fcec93de 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -1820,7 +1820,7 @@ def _dict_find(user_dict, substring): # Generate a flag file for cold start if expt_config["workflow"].get("COLDSTART"): - coldstart_date=date_to_str(workflow_config["DATE_FIRST_CYCL"]) + coldstart_date = var_defns_cfg["workflow"]["DATE_FIRST_CYCL"] fn_pass=f"task_skip_coldstart_{coldstart_date}.txt" open(os.path.join(exptdir,fn_pass), 'a').close() From d3c108704887be67f5c10781e72fba892f1fe056 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Thu, 23 Jan 2025 21:28:24 +0000 Subject: [PATCH 05/41] fix issues on run_fcst --- parm/FV3.input.yml | 3 --- parm/wflow/coldstart.yaml | 16 ++++++++++++++++ scripts/exregional_run_fcst.sh | 2 +- ush/config.smoke_dust.yaml | 2 ++ ush/generate_FV3LAM_wflow.py | 5 +++++ ush/job_preamble.sh | 2 ++ ush/link_fix.py | 2 +- 7 files changed, 27 insertions(+), 5 deletions(-) diff --git a/parm/FV3.input.yml b/parm/FV3.input.yml index bf12a5e73f..40184b39d6 100644 --- a/parm/FV3.input.yml +++ b/parm/FV3.input.yml @@ -104,8 +104,6 @@ FV3_HRRR_gf: atmos_model_nml: avg_max_length: 3600. ignore_rst_cksum: true - external_ic_nml: - levp: 66 fv_core_nml: agrid_vel_rst: true d_con: 0.5 @@ -125,7 +123,6 @@ FV3_HRRR_gf: n_split: 5 n_sponge: 65 nord_tr: 0 - npz: 65 psm_bc: 1 range_warn: False regional_bcs_from_gsi: false diff --git a/parm/wflow/coldstart.yaml b/parm/wflow/coldstart.yaml index 6fad0b8d83..6425ae20b0 100644 --- a/parm/wflow/coldstart.yaml +++ b/parm/wflow/coldstart.yaml @@ -209,3 +209,19 @@ metatask_run_ensemble: taskdep: attrs: task: aqm_lbcs + or_smoke_dust: + not: + taskvalid: + attrs: + task: smoke_dust + taskdep: + attrs: + task: smoke_dust + or_prepstart: + not: + taskvalid: + attrs: + task: prepstart + taskdep: + attrs: + task: prepstart diff --git a/scripts/exregional_run_fcst.sh b/scripts/exregional_run_fcst.sh index da31c4e74a..238db021c8 100755 --- a/scripts/exregional_run_fcst.sh +++ b/scripts/exregional_run_fcst.sh @@ -120,7 +120,7 @@ for sect in user nco platform workflow global cpl_aqm_parm constants fixed_files task_get_extrn_lbcs task_run_fcst task_run_post smoke_dust_parm fire; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done - +set -xue # #----------------------------------------------------------------------- # diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml index f566b0677d..e723d91e74 100644 --- a/ush/config.smoke_dust.yaml +++ b/ush/config.smoke_dust.yaml @@ -49,8 +49,10 @@ task_get_extrn_lbcs: # EXTRN_MDL_SOURCE_BASEDIR_LBCS: /work/noaa/epic/SRW-AQM_DATA/input_model_data/RAP/${yyyymmddhh} # orion/hercules # EXTRN_MDL_SOURCE_BASEDIR_LBCS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmddhh} # gaea-c6 task_make_ics: + LEVP: 66 VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" task_make_lbcs: + LEVP: 66 VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" task_run_fcst: OMP_NUM_THREADS_RUN_FCST: 1 diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index 2b2f85c8a7..d6ae6a87c5 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -424,6 +424,11 @@ def generate_FV3LAM_wflow( "print_diff_pgr": PRINT_DIFF_PGR, }) + if DO_SMOKE_DUST: + gfs_physics_nml_dict.update({ + "ebb_dcycle": EBB_DCYCLE, + }) + if CPL_AQM: gfs_physics_nml_dict.update({ "cplaqm": True, diff --git a/ush/job_preamble.sh b/ush/job_preamble.sh index 06603041fd..eb30bfaf89 100644 --- a/ush/job_preamble.sh +++ b/ush/job_preamble.sh @@ -32,6 +32,8 @@ export DATAROOT="${DATAROOT:-${PTMP}/${envir}/tmp}" export DCOMROOT="${DCOMROOT:-${PTMP}/${envir}/dcom}" export DATA_SHARE="${DATA_SHARE:-${DATAROOT}/DATA_SHARE/${PDY}${cyc}}" +mkdir -p ${DATA_SHARE} + export DBNROOT="${DBNROOT:-${DBNROOT_default}}" export SENDECF="${SENDECF:-${SENDECF_default}}" export SENDDBN="${SENDDBN:-${SENDDBN_default}}" diff --git a/ush/link_fix.py b/ush/link_fix.py index 1e4a7c6254..f6f56007c6 100755 --- a/ush/link_fix.py +++ b/ush/link_fix.py @@ -206,7 +206,7 @@ def link_fix( f"C*{dot_or_uscore}oro_data.tile{tile_rgnl}.halo{nh0}.nc", f"C*{dot_or_uscore}oro_data.tile{tile_rgnl}.halo{nh4}.nc", ] - if ccpp_phys_suite == "FV3_RAP" or ccpp_phys_suite == "FV3_HRRR" or ccpp_phys_suite == "FV3_GFS_v15_thompson_mynn_lam3km" or ccpp_phys_suite == "FV3_GFS_v17_p8": + if ccpp_phys_suite == "FV3_RAP" or ccpp_phys_suite == "FV3_HRRR" or ccpp_phys_suite == "FV3_HRRR_gf" or ccpp_phys_suite == "FV3_GFS_v15_thompson_mynn_lam3km" or ccpp_phys_suite == "FV3_GFS_v17_p8": fns += [ f"C*{dot_or_uscore}oro_data_ss.tile{tile_rgnl}.halo{nh0}.nc", f"C*{dot_or_uscore}oro_data_ls.tile{tile_rgnl}.halo{nh0}.nc", From 4d82c44430d17d6445a2d72912c5f3699613c9a0 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Fri, 24 Jan 2025 03:33:34 +0000 Subject: [PATCH 06/41] fix run_post failure --- scripts/exregional_make_ics.sh | 6 ++++- scripts/exregional_make_lbcs.sh | 6 ++++- scripts/exregional_run_fcst.sh | 11 +++++----- scripts/exregional_run_post.sh | 39 ++++++++++++++++++++------------- ush/config.smoke_dust.yaml | 1 - 5 files changed, 40 insertions(+), 23 deletions(-) diff --git a/scripts/exregional_make_ics.sh b/scripts/exregional_make_ics.sh index ac1884e4d8..00fe586d96 100755 --- a/scripts/exregional_make_ics.sh +++ b/scripts/exregional_make_ics.sh @@ -201,7 +201,11 @@ case "${CCPP_PHYS_SUITE}" in if [ "${EXTRN_MDL_NAME_ICS}" = "RAP" ] || \ [ "${EXTRN_MDL_NAME_ICS}" = "RRFS" ] || \ [ "${EXTRN_MDL_NAME_ICS}" = "HRRR" ]; then - varmap_file="GSDphys_var_map.txt" + if [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then + varmap_file="GSDphys_smoke_var_map.txt" + else + varmap_file="GSDphys_var_map.txt" + fi elif [ "${EXTRN_MDL_NAME_ICS}" = "NAM" ] || \ [ "${EXTRN_MDL_NAME_ICS}" = "FV3GFS" ] || \ [ "${EXTRN_MDL_NAME_ICS}" = "UFS-CASE-STUDY" ] || \ diff --git a/scripts/exregional_make_lbcs.sh b/scripts/exregional_make_lbcs.sh index 2b4d8821b7..e33c4f70f4 100755 --- a/scripts/exregional_make_lbcs.sh +++ b/scripts/exregional_make_lbcs.sh @@ -200,7 +200,11 @@ case "${CCPP_PHYS_SUITE}" in if [ "${EXTRN_MDL_NAME_LBCS}" = "RAP" ] || \ [ "${EXTRN_MDL_NAME_LBCS}" = "RRFS" ] || \ [ "${EXTRN_MDL_NAME_LBCS}" = "HRRR" ]; then - varmap_file="GSDphys_var_map.txt" + if [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then + varmap_file="GSDphys_smoke_var_map.txt" + else + varmap_file="GSDphys_var_map.txt" + fi elif [ "${EXTRN_MDL_NAME_LBCS}" = "NAM" ] || \ [ "${EXTRN_MDL_NAME_LBCS}" = "FV3GFS" ] || \ [ "${EXTRN_MDL_NAME_LBCS}" = "UFS-CASE-STUDY" ] || \ diff --git a/scripts/exregional_run_fcst.sh b/scripts/exregional_run_fcst.sh index 238db021c8..70af88b2fb 100755 --- a/scripts/exregional_run_fcst.sh +++ b/scripts/exregional_run_fcst.sh @@ -120,7 +120,6 @@ for sect in user nco platform workflow global cpl_aqm_parm constants fixed_files task_get_extrn_lbcs task_run_fcst task_run_post smoke_dust_parm fire; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done -set -xue # #----------------------------------------------------------------------- # @@ -902,7 +901,7 @@ POST_STEP # #----------------------------------------------------------------------- # -if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then +if [ $(boolify "${CPL_AQM}") = "TRUE" ] || [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then if [ "${RUN_ENVIR}" = "nco" ]; then if [ -d "${COMIN}/RESTART" ] && [ "$(ls -A ${DATA}/RESTART)" ]; then rm -rf "${COMIN}/RESTART" @@ -912,7 +911,9 @@ if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then fi fi - cp -p ${DATA}/${AQM_RC_PRODUCT_FN} ${COMOUT}/${NET}.${cycle}${dot_ensmem}.${AQM_RC_PRODUCT_FN} + if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then + cp -p ${DATA}/${AQM_RC_PRODUCT_FN} ${COMOUT}/${NET}.${cycle}${dot_ensmem}.${AQM_RC_PRODUCT_FN} + fi fhr_ct=0 fhr=0 @@ -920,8 +921,8 @@ if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then fhr_ct=$(printf "%03d" $fhr) source_dyn="${DATA}/dynf${fhr_ct}.nc" source_phy="${DATA}/phyf${fhr_ct}.nc" - target_dyn="${COMIN}/${NET}.${cycle}${dot_ensmem}.dyn.f${fhr_ct}.nc" - target_phy="${COMIN}/${NET}.${cycle}${dot_ensmem}.phy.f${fhr_ct}.nc" + target_dyn="${COMIN}/${NET}.${cycle}${dot_ensmem}.dyn.f${fhr_ct}.${POST_OUTPUT_DOMAIN_NAME}.nc" + target_phy="${COMIN}/${NET}.${cycle}${dot_ensmem}.phy.f${fhr_ct}.${POST_OUTPUT_DOMAIN_NAME}.nc" [ -f ${source_dyn} ] && cp -p ${source_dyn} ${target_dyn} [ -f ${source_phy} ] && cp -p ${source_phy} ${target_phy} (( fhr=fhr+1 )) diff --git a/scripts/exregional_run_post.sh b/scripts/exregional_run_post.sh index 05adedf288..245fde3a7c 100755 --- a/scripts/exregional_run_post.sh +++ b/scripts/exregional_run_post.sh @@ -105,6 +105,7 @@ In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the post-processor (UPP) on the output files corresponding to a specified forecast hour. ========================================================================" +set -xue # #----------------------------------------------------------------------- # @@ -168,21 +169,23 @@ fi cp ${post_config_fp} ./postxconfig-NT.txt cp ${PARMdir}/upp/params_grib2_tbl_new . if [ $(boolify ${DO_SMOKE_DUST}) = "TRUE" ] || [ $(boolify ${USE_CRTM}) = "TRUE" ]; then - cp ${CRTM_DIR}/Nalli.IRwater.EmisCoeff.bin ./ - cp ${CRTM_DIR}/FAST*.bin ./ - cp ${CRTM_DIR}/NPOESS.IRland.EmisCoeff.bin ./ - cp ${CRTM_DIR}/NPOESS.IRsnow.EmisCoeff.bin ./ - cp ${CRTM_DIR}/NPOESS.IRice.EmisCoeff.bin ./ - cp ${CRTM_DIR}/AerosolCoeff.bin ./ - cp ${CRTM_DIR}/CloudCoeff.bin ./ - cp ${CRTM_DIR}/*.SpcCoeff.bin ./ - cp ${CRTM_DIR}/*.TauCoeff.bin ./ + if [ $(boolify ${DO_SMOKE_DUST}) = "TRUE" ]; then + CRTM_DIR="${FIXcrtm}" + fi + ln -nsf ${CRTM_DIR}/Nalli.IRwater.EmisCoeff.bin . + ln -nsf ${CRTM_DIR}/FAST*.bin . + ln -nsf ${CRTM_DIR}/NPOESS.IRland.EmisCoeff.bin . + ln -nsf ${CRTM_DIR}/NPOESS.IRsnow.EmisCoeff.bin . + ln -nsf ${CRTM_DIR}/NPOESS.IRice.EmisCoeff.bin . + ln -nsf ${CRTM_DIR}/AerosolCoeff.bin . + ln -nsf ${CRTM_DIR}/CloudCoeff.bin . + ln -nsf ${CRTM_DIR}/*.SpcCoeff.bin . + ln -nsf ${CRTM_DIR}/*.TauCoeff.bin . print_info_msg " ==================================================================== Copying the external CRTM fix files from CRTM_DIR to the temporary work directory (DATA_FHR): CRTM_DIR = \"${CRTM_DIR}\" - DATA_FHR = \"${DATA_FHR}\" ====================================================================" fi # @@ -230,8 +233,10 @@ if [ "${RUN_ENVIR}" = "nco" ]; then else DATAFCST=$DATA fi + dyn_file="${DATAFCST}/dynf${fhr}${mnts_secs_str}.nc" phy_file="${DATAFCST}/phyf${fhr}${mnts_secs_str}.nc" + # # Set parameters that specify the actual time (not forecast time) of the # output. @@ -245,8 +250,10 @@ post_mn=${post_time:10:2} # # Create the input namelist file to the post-processor executable. # -if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then +if [ $(boolify "${CPL_AQM}") = "TRUE" ] && [ $(boolify "${DO_SMOKE_DUST}") = "FALSE" ]; then post_itag_add="aqf_on=.true.," +elif [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then + post_itag_add="slrutah_on=.true.,gtg_on=.true." else post_itag_add="" fi @@ -333,25 +340,27 @@ The \${fhr} variable contains too few or too many characters: fi if [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then + COMOUT="${COMROOT}/${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR}" #temporary path, should be removed later + bgdawp=${NET}.${cycle}.prslev.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 bgrd3d=${NET}.${cycle}.natlev.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 bgifi=${NET}.${cycle}.ififip.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 bgavi=${NET}.${cycle}.aviation.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 if [ -f "PRSLEV.GrbF${post_fhr}" ]; then - wgrib2 PRSLEV.GrbF${post_fhr} -set center 7 -grib ${bgdawp} >>$pgmout 2>>errfile + wgrib2 PRSLEV.GrbF${post_fhr} -set center 7 -grib ${bgdawp} cp -p ${bgdawp} ${COMOUT} fi if [ -f "NATLEV.GrbF${post_fhr}" ]; then - wgrib2 NATLEV.GrbF${post_fhr} -set center 7 -grib ${bgrd3d} >>$pgmout 2>>errfile + wgrib2 NATLEV.GrbF${post_fhr} -set center 7 -grib ${bgrd3d} cp -p ${bgrd3d} ${COMOUT} fi if [ -f "IFIFIP.GrbF${post_fhr}" ]; then - wgrib2 IFIFIP.GrbF${post_fhr} -set center 7 -grib ${bgifi} >>$pgmout 2>>errfile + wgrib2 IFIFIP.GrbF${post_fhr} -set center 7 -grib ${bgifi} cp -p ${bgifi} ${COMOUT} fi if [ -f "AVIATION.GrbF${post_fhr}" ]; then - wgrib2 AVIATION.GrbF${post_fhr} -set center 7 -grib ${bgavi} >>$pgmout 2>>errfile + wgrib2 AVIATION.GrbF${post_fhr} -set center 7 -grib ${bgavi} cp -p ${bgavi} ${COMOUT} fi diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml index e723d91e74..0fc0f8662d 100644 --- a/ush/config.smoke_dust.yaml +++ b/ush/config.smoke_dust.yaml @@ -55,7 +55,6 @@ task_make_lbcs: LEVP: 66 VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" task_run_fcst: - OMP_NUM_THREADS_RUN_FCST: 1 DT_ATMOS: 36 LAYOUT_X: 15 LAYOUT_Y: 20 From 7a0b385b8f374828c1b373e61acf467eb5ff6c29 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Fri, 24 Jan 2025 03:40:53 +0000 Subject: [PATCH 07/41] fix we2e smoke dust test script --- scripts/exregional_run_post.sh | 1 - ...nfig.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_run_post.sh b/scripts/exregional_run_post.sh index 245fde3a7c..ac31777f31 100755 --- a/scripts/exregional_run_post.sh +++ b/scripts/exregional_run_post.sh @@ -105,7 +105,6 @@ In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the post-processor (UPP) on the output files corresponding to a specified forecast hour. ========================================================================" -set -xue # #----------------------------------------------------------------------- # diff --git a/tests/WE2E/test_configs/smoke_dust/config.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml b/tests/WE2E/test_configs/smoke_dust/config.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml index 8bea3cb198..87fed7fd38 100644 --- a/tests/WE2E/test_configs/smoke_dust/config.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml +++ b/tests/WE2E/test_configs/smoke_dust/config.smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf.yaml @@ -2,8 +2,6 @@ metadata: description: config for Smoke and Dust, RRFS_CONUS_3km user: RUN_ENVIR: community -platform: - BUILD_MOD_FN: 'build_{{ user.MACHINE|lower() }}_intel_prod' workflow: PREDEF_GRID_NAME: RRFS_CONUS_3km CCPP_PHYS_SUITE: FV3_HRRR_gf @@ -21,8 +19,8 @@ workflow: COLDSTART: true nco: envir_default: we2e_smoke_dust - NET_default: we2e_smoke_dust - RUN_default: we2e_smoke_dust + NET_default: smoke_dust + RUN_default: smoke_dust rocoto: tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/smoke_dust.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml"]|include }}' @@ -39,8 +37,10 @@ task_get_extrn_lbcs: EXTRN_MDL_LBCS_OFFSET_HRS: 0 USE_USER_STAGED_EXTRN_FILES: true task_make_ics: + LEVP: 66 VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" task_make_lbcs: + LEVP: 66 VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" task_run_fcst: DT_ATMOS: 36 From f310e7ef384a850b6fe7b25891bbfe0c0105d640 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Fri, 24 Jan 2025 13:57:46 +0000 Subject: [PATCH 08/41] fix prepstart failure --- scripts/exsrw_prepstart.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/exsrw_prepstart.sh b/scripts/exsrw_prepstart.sh index 6c2627aa19..9e74dcdcf4 100755 --- a/scripts/exsrw_prepstart.sh +++ b/scripts/exsrw_prepstart.sh @@ -71,7 +71,9 @@ else CDATEprev=$($NDATE -${INCR_CYCL_FREQ} ${PDY}${cyc}) PDYprev=${CDATEprev:0:8} cycprev=${CDATEprev:8:2} - path_restart=${COMIN}/${RUN}.${PDYprev}/${cycprev}${SLASH_ENSMEM_SUBDIR}/RESTART +# the following path should be changed once the forecast script meets the nco standards: +# path_restart=${COMIN}/${RUN}.${PDYprev}/${cycprev}${SLASH_ENSMEM_SUBDIR}/RESTART + path_restart="${EXPTDIR}/${CDATEprev}/RESTART" n=${INCR_CYCL_FREQ} while [[ $n -le 25 ]] ; do From 832a26015f94fbb31a18b97b74826b06ed21745c Mon Sep 17 00:00:00 2001 From: Chan-hoo Jeon Date: Fri, 24 Jan 2025 13:34:06 -0500 Subject: [PATCH 09/41] add prod_util to module file for gaea-c6 --- modulefiles/build_gaea-c6_intel.lua | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modulefiles/build_gaea-c6_intel.lua b/modulefiles/build_gaea-c6_intel.lua index 3e05327349..b1b2aca870 100644 --- a/modulefiles/build_gaea-c6_intel.lua +++ b/modulefiles/build_gaea-c6_intel.lua @@ -20,6 +20,9 @@ load(pathJoin("cmake", cmake_ver)) load("srw_common") +load(pathJoin("nco", os.getenv("nco_ver") or "5.0.6")) +load(pathJoin("prod_util", os.getenv("prod_util_ver") or "2.1.1")) + unload("darshan-runtime/3.4.4") unload("cray-pmi/6.1.13") From 05ac09e896f36435a4b491d2989afd8da323311e Mon Sep 17 00:00:00 2001 From: chan-hoo Date: Fri, 24 Jan 2025 19:34:03 -0600 Subject: [PATCH 10/41] fix esmpy issue on orion/hercules --- devbuild.sh | 7 ++++++- envir_noesmpy.yml | 13 +++++++++++++ ush/load_modules_run_task.sh | 7 +++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 envir_noesmpy.yml diff --git a/devbuild.sh b/devbuild.sh index afe6812000..db567db233 100755 --- a/devbuild.sh +++ b/devbuild.sh @@ -233,8 +233,13 @@ if [ "${BUILD_CONDA}" = "on" ] ; then mamba install -y bash coreutils sed fi conda activate + if [ "${PLATFORM}" = "orion" ] || [ "${PLATFORM}" = "hercules" ]; then + envir_fn="envir_noesmpy.yml" + else + envir_fn="environment.yml" + fi if ! conda env list | grep -q "^srw_app\s" ; then - mamba env create -n srw_app --file environment.yml + mamba env create -n srw_app --file $envir_fn fi if ! conda env list | grep -q "^srw_graphics\s" ; then mamba env create -n srw_graphics --file graphics_environment.yml diff --git a/envir_noesmpy.yml b/envir_noesmpy.yml new file mode 100644 index 0000000000..0d9a7cfd1e --- /dev/null +++ b/envir_noesmpy.yml @@ -0,0 +1,13 @@ +name: srw_app +channels: + - conda-forge + - ufs-community +dependencies: + - pylint=2.17* + - pytest=7.2* + - uwtools=2.3* + - netcdf4=1.6.* + - numpy=1.23.* + - pandas=1.5.* + - scipy=1.10.* + - xarray=2022.11.* diff --git a/ush/load_modules_run_task.sh b/ush/load_modules_run_task.sh index 65af076203..5426fe8d6a 100755 --- a/ush/load_modules_run_task.sh +++ b/ush/load_modules_run_task.sh @@ -210,6 +210,13 @@ if [ -n "${SRW_ENV:-}" ] ; then set -u fi +# Esmpy/ESMF segfaults when running through conda on Orion/Hercules. +# Use a manually built esmpy v8.3.1 and local Python modules. +if [[ ( "${machine}" == "orion" || "${machine}" == "hercules" ) && "${task_name}" == "smoke_dust" ]]; then + set +u + export PYTHONPATH=/work/noaa/epic/bwkoziol/main_aqm/esmf/src/addon/ESMPy/src:$PYTHONPATH + set -u +fi # #----------------------------------------------------------------------- # From 98aa23e040d06991475ae02bf919cb73963b4900 Mon Sep 17 00:00:00 2001 From: chan-hoo Date: Mon, 27 Jan 2025 11:42:05 -0600 Subject: [PATCH 11/41] fix esmpy issue on orion hercules --- devbuild.sh | 7 +------ envir_noesmpy.yml | 13 ------------- environment.yml | 12 +++++++----- ush/load_modules_run_task.sh | 8 -------- 4 files changed, 8 insertions(+), 32 deletions(-) delete mode 100644 envir_noesmpy.yml diff --git a/devbuild.sh b/devbuild.sh index db567db233..afe6812000 100755 --- a/devbuild.sh +++ b/devbuild.sh @@ -233,13 +233,8 @@ if [ "${BUILD_CONDA}" = "on" ] ; then mamba install -y bash coreutils sed fi conda activate - if [ "${PLATFORM}" = "orion" ] || [ "${PLATFORM}" = "hercules" ]; then - envir_fn="envir_noesmpy.yml" - else - envir_fn="environment.yml" - fi if ! conda env list | grep -q "^srw_app\s" ; then - mamba env create -n srw_app --file $envir_fn + mamba env create -n srw_app --file environment.yml fi if ! conda env list | grep -q "^srw_graphics\s" ; then mamba env create -n srw_graphics --file graphics_environment.yml diff --git a/envir_noesmpy.yml b/envir_noesmpy.yml deleted file mode 100644 index 0d9a7cfd1e..0000000000 --- a/envir_noesmpy.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: srw_app -channels: - - conda-forge - - ufs-community -dependencies: - - pylint=2.17* - - pytest=7.2* - - uwtools=2.3* - - netcdf4=1.6.* - - numpy=1.23.* - - pandas=1.5.* - - scipy=1.10.* - - xarray=2022.11.* diff --git a/environment.yml b/environment.yml index bef636c2ae..b6d92849f8 100644 --- a/environment.yml +++ b/environment.yml @@ -3,12 +3,14 @@ channels: - conda-forge - ufs-community dependencies: + - esmpy=*=mpi_mpich* + - mpi4py + - pydantic + - netcdf4=1.6.*=mpi_mpich* + - numpy=1.23.* - pylint=2.17* - pytest=7.2* - - uwtools=2.3* - - esmpy=8.6.* - - netcdf4=1.6.* - - numpy=1.23.* - - pandas=1.5.* - scipy=1.10.* + - uwtools=2.3* - xarray=2022.11.* + diff --git a/ush/load_modules_run_task.sh b/ush/load_modules_run_task.sh index 5426fe8d6a..ca272fa241 100755 --- a/ush/load_modules_run_task.sh +++ b/ush/load_modules_run_task.sh @@ -209,14 +209,6 @@ if [ -n "${SRW_ENV:-}" ] ; then conda activate ${SRW_ENV} set -u fi - -# Esmpy/ESMF segfaults when running through conda on Orion/Hercules. -# Use a manually built esmpy v8.3.1 and local Python modules. -if [[ ( "${machine}" == "orion" || "${machine}" == "hercules" ) && "${task_name}" == "smoke_dust" ]]; then - set +u - export PYTHONPATH=/work/noaa/epic/bwkoziol/main_aqm/esmf/src/addon/ESMPy/src:$PYTHONPATH - set -u -fi # #----------------------------------------------------------------------- # From 823f8e533b49b770ec8826cafac5ff8b8384e6a9 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Tue, 28 Jan 2025 21:18:07 +0000 Subject: [PATCH 12/41] set rrfs_sd to true --- ush/generate_FV3LAM_wflow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index d6ae6a87c5..b921a077ce 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -427,6 +427,7 @@ def generate_FV3LAM_wflow( if DO_SMOKE_DUST: gfs_physics_nml_dict.update({ "ebb_dcycle": EBB_DCYCLE, + "rrfs_sd": True, }) if CPL_AQM: From 639b2ec3c04b234d7bb4c3eafaf276b72d74a933 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Wed, 29 Jan 2025 00:49:52 +0000 Subject: [PATCH 13/41] change base dir name suffix from srw to dir --- environment.yml | 2 +- jobs/JSRW_PREPSTART | 16 ++-------------- jobs/JSRW_SMOKE_DUST | 16 ++-------------- parm/wflow/smoke_dust.yaml | 1 + scripts/exregional_run_fcst.sh | 2 +- scripts/exregional_run_post.sh | 14 +++++--------- scripts/exsrw_prepstart.sh | 6 ++---- scripts/exsrw_smoke_dust.sh | 4 ++-- 8 files changed, 16 insertions(+), 45 deletions(-) diff --git a/environment.yml b/environment.yml index b6d92849f8..6e00cf4184 100644 --- a/environment.yml +++ b/environment.yml @@ -13,4 +13,4 @@ dependencies: - scipy=1.10.* - uwtools=2.3* - xarray=2022.11.* - + - sphinx=7.4.0* diff --git a/jobs/JSRW_PREPSTART b/jobs/JSRW_PREPSTART index 1952923d9d..0e6afacc39 100755 --- a/jobs/JSRW_PREPSTART +++ b/jobs/JSRW_PREPSTART @@ -12,23 +12,11 @@ export PS4='+ $SECONDS + ' # #----------------------------------------------------------------------- # -# Set the NCO standard environment variables (Table 1, pp.4) -# -#----------------------------------------------------------------------- -# -export USHsrw="${HOMEdir}/ush" -export EXECsrw="${HOMEdir}/exec" -export PARMsrw="${HOMEdir}/parm" -export SCRIPTSsrw="${HOMEdir}/scripts" -# -#----------------------------------------------------------------------- -# # Source the variable definitions file and the bash utility functions. # #----------------------------------------------------------------------- # -export USHdir="${USHsrw}" # should be removed later -. ${USHsrw}/source_util_funcs.sh +. ${USHdir}/source_util_funcs.sh for sect in user nco platform workflow global smoke_dust_parm ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -145,7 +133,7 @@ fi export pgmout="${DATA}/OUTPUT.$$" env -${SCRIPTSsrw}/exsrw_prepstart.sh +${SCRIPTSdir}/exsrw_prepstart.sh export err=$?; err_chk if [ -e "$pgmout" ]; then diff --git a/jobs/JSRW_SMOKE_DUST b/jobs/JSRW_SMOKE_DUST index 0b6fafeb83..81d0898770 100755 --- a/jobs/JSRW_SMOKE_DUST +++ b/jobs/JSRW_SMOKE_DUST @@ -12,23 +12,11 @@ export PS4='+ $SECONDS + ' # #----------------------------------------------------------------------- # -# Set the NCO standard environment variables (Table 1, pp.4) -# -#----------------------------------------------------------------------- -# -export USHsrw="${HOMEdir}/ush" -export EXECsrw="${HOMEdir}/exec" -export PARMsrw="${HOMEdir}/parm" -export SCRIPTSsrw="${HOMEdir}/scripts" -# -#----------------------------------------------------------------------- -# # Source the variable definitions file and the bash utility functions. # #----------------------------------------------------------------------- # -export USHdir="${USHsrw}" # should be removed later -. ${USHsrw}/source_util_funcs.sh +. ${USHdir}/source_util_funcs.sh for sect in user nco platform workflow global smoke_dust_parm ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -148,7 +136,7 @@ fi export pgmout="${DATA}/OUTPUT.$$" env -${SCRIPTSsrw}/exsrw_smoke_dust.sh +${SCRIPTSdir}/exsrw_smoke_dust.sh export err=$?; err_chk if [ -e "$pgmout" ]; then diff --git a/parm/wflow/smoke_dust.yaml b/parm/wflow/smoke_dust.yaml index 18bab50787..5955068aae 100644 --- a/parm/wflow/smoke_dust.yaml +++ b/parm/wflow/smoke_dust.yaml @@ -6,6 +6,7 @@ default_smoke_dust_task: &default_smoke_dust envars: &default_vars GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' HOMEdir: '&HOMEdir;' + USHdir: '&USHdir;' envir: '&envir;' model_ver: '&model_ver;' KEEPDATA: '&KEEPDATA;' diff --git a/scripts/exregional_run_fcst.sh b/scripts/exregional_run_fcst.sh index 70af88b2fb..7511bdf793 100755 --- a/scripts/exregional_run_fcst.sh +++ b/scripts/exregional_run_fcst.sh @@ -901,7 +901,7 @@ POST_STEP # #----------------------------------------------------------------------- # -if [ $(boolify "${CPL_AQM}") = "TRUE" ] || [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then +if [ $(boolify "${CPL_AQM}") = "TRUE" ]; then if [ "${RUN_ENVIR}" = "nco" ]; then if [ -d "${COMIN}/RESTART" ] && [ "$(ls -A ${DATA}/RESTART)" ]; then rm -rf "${COMIN}/RESTART" diff --git a/scripts/exregional_run_post.sh b/scripts/exregional_run_post.sh index ac31777f31..0cd882929b 100755 --- a/scripts/exregional_run_post.sh +++ b/scripts/exregional_run_post.sh @@ -339,8 +339,6 @@ The \${fhr} variable contains too few or too many characters: fi if [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then - COMOUT="${COMROOT}/${NET}/${model_ver}/${RUN}.${PDY}/${cyc}${SLASH_ENSMEM_SUBDIR}" #temporary path, should be removed later - bgdawp=${NET}.${cycle}.prslev.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 bgrd3d=${NET}.${cycle}.natlev.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 bgifi=${NET}.${cycle}.ififip.f${fhr}.${POST_OUTPUT_DOMAIN_NAME}.grib2 @@ -348,19 +346,19 @@ if [ $(boolify "${DO_SMOKE_DUST}") = "TRUE" ]; then if [ -f "PRSLEV.GrbF${post_fhr}" ]; then wgrib2 PRSLEV.GrbF${post_fhr} -set center 7 -grib ${bgdawp} - cp -p ${bgdawp} ${COMOUT} + mv ${bgdawp} ${COMOUT} fi if [ -f "NATLEV.GrbF${post_fhr}" ]; then wgrib2 NATLEV.GrbF${post_fhr} -set center 7 -grib ${bgrd3d} - cp -p ${bgrd3d} ${COMOUT} + mv ${bgrd3d} ${COMOUT} fi if [ -f "IFIFIP.GrbF${post_fhr}" ]; then wgrib2 IFIFIP.GrbF${post_fhr} -set center 7 -grib ${bgifi} - cp -p ${bgifi} ${COMOUT} + mv ${bgifi} ${COMOUT} fi if [ -f "AVIATION.GrbF${post_fhr}" ]; then wgrib2 AVIATION.GrbF${post_fhr} -set center 7 -grib ${bgavi} - cp -p ${bgavi} ${COMOUT} + mv ${bgavi} ${COMOUT} fi else @@ -400,10 +398,8 @@ else $DBNROOT/bin/dbn_alert MODEL rrfs_post ${job} ${COMOUT}/${post_renamed_fn} fi done - - rm -rf ${DATA_FHR} - fi +rm -rf ${DATA_FHR} # #----------------------------------------------------------------------- # diff --git a/scripts/exsrw_prepstart.sh b/scripts/exsrw_prepstart.sh index 9e74dcdcf4..93b9a8ebca 100755 --- a/scripts/exsrw_prepstart.sh +++ b/scripts/exsrw_prepstart.sh @@ -7,7 +7,7 @@ # #----------------------------------------------------------------------- # -. ${USHsrw}/source_util_funcs.sh +. ${USHdir}/source_util_funcs.sh for sect in user nco platform workflow global smoke_dust_parm \ constants fixed_files grid_params task_run_fcst ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} @@ -71,8 +71,6 @@ else CDATEprev=$($NDATE -${INCR_CYCL_FREQ} ${PDY}${cyc}) PDYprev=${CDATEprev:0:8} cycprev=${CDATEprev:8:2} -# the following path should be changed once the forecast script meets the nco standards: -# path_restart=${COMIN}/${RUN}.${PDYprev}/${cycprev}${SLASH_ENSMEM_SUBDIR}/RESTART path_restart="${EXPTDIR}/${CDATEprev}/RESTART" n=${INCR_CYCL_FREQ} @@ -118,7 +116,7 @@ else echo "${PDY}${cyc}: cycle smoke/dust from ${checkfile} " fi - ${USHsrw}/smoke_dust_add_smoke.py + ${USHdir}/smoke_dust_add_smoke.py export err=$? if [ $err -ne 0 ]; then message_txt="add_smoke.py failed with return code $err" diff --git a/scripts/exsrw_smoke_dust.sh b/scripts/exsrw_smoke_dust.sh index 6efce6dc4c..fd0624d3ae 100755 --- a/scripts/exsrw_smoke_dust.sh +++ b/scripts/exsrw_smoke_dust.sh @@ -7,7 +7,7 @@ # #----------------------------------------------------------------------- # -. ${USHsrw}/source_util_funcs.sh +. ${USHdir}/source_util_funcs.sh for sect in user nco platform workflow global smoke_dust_parm \ constants fixed_files grid_params task_run_fcst ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} @@ -107,7 +107,7 @@ else # #----------------------------------------------------------------------- # - ${USHsrw}/smoke_dust_generate_fire_emissions.py \ + ${USHdir}/smoke_dust_generate_fire_emissions.py \ "${FIXsmoke}/${PREDEF_GRID_NAME}" \ "${DATA}" \ "${DATA_SHARE}" \ From 1279c51b6525d5a3efd54d06c6807277ffb6dc10 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Wed, 29 Jan 2025 10:49:14 +0000 Subject: [PATCH 14/41] update users guide configworkflow --- doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index fdb2482a4a..d65d3293c3 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -236,6 +236,12 @@ These parameters are associated with the fixed (i.e., static) files. On :srw-wik ``FIXemis``: (Default: "") Path to system directory containing AQM emission data files. +``FIXsmoke``: (Default: "") + Path to system directory containing Smoke and Dust data files. + +``FIXupp``: (Default: "") + Path to system directory containing UPP fix files. + ``FIXcrtm``: (Default: "") Path to system directory containing CRTM fixed files. From 38e9c915fdc536bf28577dc22b8153b3e6f912dd Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Wed, 29 Jan 2025 11:22:59 +0000 Subject: [PATCH 15/41] add smoke dust rst files to techdocs --- doc/TechDocs/ush/modules.rst | 5 +++++ doc/TechDocs/ush/smoke_dust_add_smoke.rst | 7 +++++++ doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst | 7 +++++++ doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst | 7 +++++++ doc/TechDocs/ush/smoke_dust_hwp_tools.rst | 7 +++++++ doc/TechDocs/ush/smoke_dust_interp_tools.rst | 7 +++++++ 6 files changed, 40 insertions(+) create mode 100644 doc/TechDocs/ush/smoke_dust_add_smoke.rst create mode 100644 doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst create mode 100644 doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst create mode 100644 doc/TechDocs/ush/smoke_dust_hwp_tools.rst create mode 100644 doc/TechDocs/ush/smoke_dust_interp_tools.rst diff --git a/doc/TechDocs/ush/modules.rst b/doc/TechDocs/ush/modules.rst index 6ac0346624..cb3a41d71d 100644 --- a/doc/TechDocs/ush/modules.rst +++ b/doc/TechDocs/ush/modules.rst @@ -29,4 +29,9 @@ ush set_leadhrs set_predef_grid_params setup + smoke_dust_add_smoke + smoke_dust_fire_emiss_tools + smoke_dust_generate_fire_emissions + smoke_dust_hwp_tools + smoke_dust_interp_tools update_input_nml diff --git a/doc/TechDocs/ush/smoke_dust_add_smoke.rst b/doc/TechDocs/ush/smoke_dust_add_smoke.rst new file mode 100644 index 0000000000..72eea490f3 --- /dev/null +++ b/doc/TechDocs/ush/smoke_dust_add_smoke.rst @@ -0,0 +1,7 @@ +smoke\_dust\_add\_smoke module +================ + +.. automodule:: smoke_dust_add_smoke + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst b/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst new file mode 100644 index 0000000000..d1e3dc82e4 --- /dev/null +++ b/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst @@ -0,0 +1,7 @@ +smoke\_dust\_fire\_emiss\_tools module +================ + +.. automodule:: smoke_dust_fire_emiss_tools + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst b/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst new file mode 100644 index 0000000000..9b2317a877 --- /dev/null +++ b/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst @@ -0,0 +1,7 @@ +smoke\_dust\_generate\_fire\_emissions module +================ + +.. automodule:: smoke_dust_generate_fire_emissions + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_hwp_tools.rst b/doc/TechDocs/ush/smoke_dust_hwp_tools.rst new file mode 100644 index 0000000000..4f9e164cca --- /dev/null +++ b/doc/TechDocs/ush/smoke_dust_hwp_tools.rst @@ -0,0 +1,7 @@ +smoke\_dust\_hwp\_tools module +================ + +.. automodule:: smoke_dust_hwp_tools + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_interp_tools.rst b/doc/TechDocs/ush/smoke_dust_interp_tools.rst new file mode 100644 index 0000000000..25dec79557 --- /dev/null +++ b/doc/TechDocs/ush/smoke_dust_interp_tools.rst @@ -0,0 +1,7 @@ +smoke\_dust\_interp\_tools module +================ + +.. automodule:: smoke_dust_interp_tools + :members: + :undoc-members: + :show-inheritance: From 1ea0e473ce4487f4d48fb08fe08ce62a27799c54 Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Wed, 29 Jan 2025 11:54:43 +0000 Subject: [PATCH 16/41] add missing doc update to users guide --- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index d65d3293c3..f0b30c10b9 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -2071,6 +2071,31 @@ Non-default parameters for coupled Air Quality Modeling (AQM) tasks are set in t ``NEXUS_GFS_SFC_ARCHV_DIR``: (Default: "/NCEPPROD/hpssprod/runhistory") Path to archive directory for gfs surface files on HPSS. + +Smoke and Dust Configuration Parameters +===================================== + +Non-default parameters for Smoke and Dust tasks are set in the ``smoke_dust_parm:`` section of the ``config.yaml`` file. + +``DO_SMOKE_DUST``: (Default: false) + Flag for smoke and dust tasks. + +``EBB_DCYCLE``: (Default: 1) + Options for EBB cycle (Retro: 1, Forecast: 2). + +``PERSISTENCE``: (Default: true) + Flag for emission persistence method. If false, same day FRP is used. + +``COMINsmoke_default``: (Default: "") + Path to directory containing smoke and dust data files. + +``COMINrave_default``: (Default: "") + Path to directory containing RAVE fire data files. + +``SMOKE_DUST_FILE_PREFIX``: (Default: SMOKE_RRFS_data) + Prefix of Smoke and Dust file name used for ufs_model. + + .. _fire-parameters: Community Fire Behavior Model Parameters From ab2e73aa04cd6d0ea3e0e6e022112682f361101c Mon Sep 17 00:00:00 2001 From: "michael.lueken" Date: Wed, 29 Jan 2025 14:37:26 +0000 Subject: [PATCH 17/41] [feature/add_sd] Update TechDocs/ush to allow for the TechDocs to pass Doc Tests on GitHub --- doc/TechDocs/ush/smoke_dust_add_smoke.rst | 2 +- doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst | 2 +- doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst | 2 +- doc/TechDocs/ush/smoke_dust_hwp_tools.rst | 2 +- doc/TechDocs/ush/smoke_dust_interp_tools.rst | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/TechDocs/ush/smoke_dust_add_smoke.rst b/doc/TechDocs/ush/smoke_dust_add_smoke.rst index 72eea490f3..6667b59ad8 100644 --- a/doc/TechDocs/ush/smoke_dust_add_smoke.rst +++ b/doc/TechDocs/ush/smoke_dust_add_smoke.rst @@ -1,5 +1,5 @@ smoke\_dust\_add\_smoke module -================ +============================== .. automodule:: smoke_dust_add_smoke :members: diff --git a/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst b/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst index d1e3dc82e4..2fc04ad67d 100644 --- a/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst +++ b/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst @@ -1,5 +1,5 @@ smoke\_dust\_fire\_emiss\_tools module -================ +====================================== .. automodule:: smoke_dust_fire_emiss_tools :members: diff --git a/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst b/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst index 9b2317a877..c3e9c9856f 100644 --- a/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst +++ b/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst @@ -1,5 +1,5 @@ smoke\_dust\_generate\_fire\_emissions module -================ +============================================= .. automodule:: smoke_dust_generate_fire_emissions :members: diff --git a/doc/TechDocs/ush/smoke_dust_hwp_tools.rst b/doc/TechDocs/ush/smoke_dust_hwp_tools.rst index 4f9e164cca..69d186a1c8 100644 --- a/doc/TechDocs/ush/smoke_dust_hwp_tools.rst +++ b/doc/TechDocs/ush/smoke_dust_hwp_tools.rst @@ -1,5 +1,5 @@ smoke\_dust\_hwp\_tools module -================ +============================== .. automodule:: smoke_dust_hwp_tools :members: diff --git a/doc/TechDocs/ush/smoke_dust_interp_tools.rst b/doc/TechDocs/ush/smoke_dust_interp_tools.rst index 25dec79557..a5e1b974c4 100644 --- a/doc/TechDocs/ush/smoke_dust_interp_tools.rst +++ b/doc/TechDocs/ush/smoke_dust_interp_tools.rst @@ -1,5 +1,5 @@ smoke\_dust\_interp\_tools module -================ +================================= .. automodule:: smoke_dust_interp_tools :members: From c1ae41e2265012143f7b5803bb77b61e01b2f0bb Mon Sep 17 00:00:00 2001 From: "Chan-hoo.Jeon" Date: Wed, 29 Jan 2025 19:30:55 +0000 Subject: [PATCH 18/41] separate conda env for smoke dust --- devbuild.sh | 3 +++ environment.yml | 7 ------- modulefiles/python_srw_sd.lua | 2 ++ modulefiles/tasks/gaea-c6/prepstart.local.lua | 1 + modulefiles/tasks/gaea-c6/python_srw_sd.lua | 8 ++++++++ modulefiles/tasks/gaea-c6/smoke_dust.local.lua | 1 + modulefiles/tasks/hera/prepstart.local.lua | 1 + modulefiles/tasks/hera/smoke_dust.local.lua | 1 + modulefiles/tasks/hercules/prepstart.local.lua | 1 + modulefiles/tasks/hercules/smoke_dust.local.lua | 1 + modulefiles/tasks/orion/prepstart.local.lua | 1 + modulefiles/tasks/orion/smoke_dust.local.lua | 1 + sd_environment.yml | 15 +++++++++++++++ 13 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 modulefiles/python_srw_sd.lua create mode 100644 modulefiles/tasks/gaea-c6/prepstart.local.lua create mode 100644 modulefiles/tasks/gaea-c6/python_srw_sd.lua create mode 100644 modulefiles/tasks/gaea-c6/smoke_dust.local.lua create mode 100644 modulefiles/tasks/hera/prepstart.local.lua create mode 100644 modulefiles/tasks/hera/smoke_dust.local.lua create mode 100644 modulefiles/tasks/hercules/prepstart.local.lua create mode 100644 modulefiles/tasks/hercules/smoke_dust.local.lua create mode 100644 modulefiles/tasks/orion/prepstart.local.lua create mode 100644 modulefiles/tasks/orion/smoke_dust.local.lua create mode 100644 sd_environment.yml diff --git a/devbuild.sh b/devbuild.sh index afe6812000..e99870f7bb 100755 --- a/devbuild.sh +++ b/devbuild.sh @@ -239,6 +239,9 @@ if [ "${BUILD_CONDA}" = "on" ] ; then if ! conda env list | grep -q "^srw_graphics\s" ; then mamba env create -n srw_graphics --file graphics_environment.yml fi + if ! conda env list | grep -q "^srw_sd\s" ; then + mamba env create -n srw_sd --file sd_environment.yml + fi if [ "${APPLICATION}" = "ATMAQ" ]; then if ! conda env list | grep -q "^srw_aqm\s" ; then mamba env create -n srw_aqm --file aqm_environment.yml diff --git a/environment.yml b/environment.yml index 6e00cf4184..d77dfae569 100644 --- a/environment.yml +++ b/environment.yml @@ -3,14 +3,7 @@ channels: - conda-forge - ufs-community dependencies: - - esmpy=*=mpi_mpich* - - mpi4py - - pydantic - - netcdf4=1.6.*=mpi_mpich* - - numpy=1.23.* - pylint=2.17* - pytest=7.2* - - scipy=1.10.* - uwtools=2.3* - - xarray=2022.11.* - sphinx=7.4.0* diff --git a/modulefiles/python_srw_sd.lua b/modulefiles/python_srw_sd.lua new file mode 100644 index 0000000000..1e6fd14197 --- /dev/null +++ b/modulefiles/python_srw_sd.lua @@ -0,0 +1,2 @@ +load("conda") +setenv("SRW_ENV", "srw_sd") diff --git a/modulefiles/tasks/gaea-c6/prepstart.local.lua b/modulefiles/tasks/gaea-c6/prepstart.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/gaea-c6/prepstart.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/gaea-c6/python_srw_sd.lua b/modulefiles/tasks/gaea-c6/python_srw_sd.lua new file mode 100644 index 0000000000..a2b2ef63a0 --- /dev/null +++ b/modulefiles/tasks/gaea-c6/python_srw_sd.lua @@ -0,0 +1,8 @@ +load("darshan-runtime/3.4.4") +unload("python") +load("conda") + +setenv("SRW_ENV", "srw_sd") +setenv("LD_PRELOAD", "/usr/lib64/libstdc++.so.6") +setenv("FI_VERBS_PREFER_XRC", "0") + diff --git a/modulefiles/tasks/gaea-c6/smoke_dust.local.lua b/modulefiles/tasks/gaea-c6/smoke_dust.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/gaea-c6/smoke_dust.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/hera/prepstart.local.lua b/modulefiles/tasks/hera/prepstart.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/hera/prepstart.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/hera/smoke_dust.local.lua b/modulefiles/tasks/hera/smoke_dust.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/hera/smoke_dust.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/hercules/prepstart.local.lua b/modulefiles/tasks/hercules/prepstart.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/hercules/prepstart.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/hercules/smoke_dust.local.lua b/modulefiles/tasks/hercules/smoke_dust.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/hercules/smoke_dust.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/orion/prepstart.local.lua b/modulefiles/tasks/orion/prepstart.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/orion/prepstart.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/orion/smoke_dust.local.lua b/modulefiles/tasks/orion/smoke_dust.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/orion/smoke_dust.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/sd_environment.yml b/sd_environment.yml new file mode 100644 index 0000000000..ceee9fdc91 --- /dev/null +++ b/sd_environment.yml @@ -0,0 +1,15 @@ +name: srw_sd +channels: + - conda-forge + - ufs-community +dependencies: + - esmpy=*=mpi_mpich* + - mpi4py + - pydantic + - netcdf4=1.6.*=mpi_mpich* + - numpy=1.23.* + - pylint=2.17* + - pytest=7.2* + - scipy=1.10.* + - uwtools=2.3* + - xarray=2022.11.* From 64a70c4f52d6db30cc73f5aafb89cda8d2a6c8e9 Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Thu, 30 Jan 2025 16:58:02 -0700 Subject: [PATCH 19/41] initial reconcile attempt --- .gitignore | 1 + parm/wflow/smoke_dust.yaml | 2 +- scripts/exsrw_prepstart.sh | 2 +- scripts/exsrw_smoke_dust.sh | 14 +- sd_environment.yml | 17 +- tests/test_python/test_smoke_dust/__init__.py | 0 .../test_generate_emissions.py | 202 ++++++ ush/config.smoke_dust.yaml | 6 +- ush/config_defaults.yaml | 4 + ush/smoke_dust/__init__.py | 0 .../add_smoke.py} | 0 ush/smoke_dust/core/__init__.py | 0 ush/smoke_dust/core/common.py | 127 ++++ ush/smoke_dust/core/context.py | 250 ++++++++ ush/smoke_dust/core/cycle.py | 293 +++++++++ ush/smoke_dust/core/preprocessor.py | 148 +++++ ush/smoke_dust/core/regrid.py | 596 ++++++++++++++++++ ush/smoke_dust/core/variable.py | 110 ++++ ush/smoke_dust/generate_emissions.py | 57 ++ ush/smoke_dust_fire_emiss_tools.py | 415 ------------ ush/smoke_dust_generate_fire_emissions.py | 230 ------- ush/smoke_dust_hwp_tools.py | 276 -------- ush/smoke_dust_interp_tools.py | 566 ----------------- 23 files changed, 1813 insertions(+), 1503 deletions(-) create mode 100644 tests/test_python/test_smoke_dust/__init__.py create mode 100644 tests/test_python/test_smoke_dust/test_generate_emissions.py create mode 100644 ush/smoke_dust/__init__.py rename ush/{smoke_dust_add_smoke.py => smoke_dust/add_smoke.py} (100%) mode change 100755 => 100644 create mode 100644 ush/smoke_dust/core/__init__.py create mode 100644 ush/smoke_dust/core/common.py create mode 100644 ush/smoke_dust/core/context.py create mode 100644 ush/smoke_dust/core/cycle.py create mode 100644 ush/smoke_dust/core/preprocessor.py create mode 100644 ush/smoke_dust/core/regrid.py create mode 100644 ush/smoke_dust/core/variable.py create mode 100644 ush/smoke_dust/generate_emissions.py delete mode 100755 ush/smoke_dust_fire_emiss_tools.py delete mode 100755 ush/smoke_dust_generate_fire_emissions.py delete mode 100755 ush/smoke_dust_hwp_tools.py delete mode 100755 ush/smoke_dust_interp_tools.py diff --git a/.gitignore b/.gitignore index 99f71c9590..02c18a653f 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ conda_loc *.swp __pycache__ +.idea \ No newline at end of file diff --git a/parm/wflow/smoke_dust.yaml b/parm/wflow/smoke_dust.yaml index 5955068aae..467cfdbfae 100644 --- a/parm/wflow/smoke_dust.yaml +++ b/parm/wflow/smoke_dust.yaml @@ -25,7 +25,7 @@ default_smoke_dust_task: &default_smoke_dust nnodes: 1 nodes: '{{ nnodes }}:ppn={{ ppn }}' partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}' - ppn: 1 + ppn: 8 queue: '&QUEUE_DEFAULT;' walltime: 00:30:00 diff --git a/scripts/exsrw_prepstart.sh b/scripts/exsrw_prepstart.sh index 93b9a8ebca..15c0508653 100755 --- a/scripts/exsrw_prepstart.sh +++ b/scripts/exsrw_prepstart.sh @@ -116,7 +116,7 @@ else echo "${PDY}${cyc}: cycle smoke/dust from ${checkfile} " fi - ${USHdir}/smoke_dust_add_smoke.py + ${USHdir}/smoke_dust/add_smoke.py export err=$? if [ $err -ne 0 ]; then message_txt="add_smoke.py failed with return code $err" diff --git a/scripts/exsrw_smoke_dust.sh b/scripts/exsrw_smoke_dust.sh index fd0624d3ae..1f008c836c 100755 --- a/scripts/exsrw_smoke_dust.sh +++ b/scripts/exsrw_smoke_dust.sh @@ -107,23 +107,27 @@ else # #----------------------------------------------------------------------- # - ${USHdir}/smoke_dust_generate_fire_emissions.py \ + mpirun -n ${nprocs} ${USHdir}/smoke_dust/generate_emissions.py \ "${FIXsmoke}/${PREDEF_GRID_NAME}" \ "${DATA}" \ "${DATA_SHARE}" \ "${PREDEF_GRID_NAME}" \ "${EBB_DCYCLE}" \ - "${RESTART_INTERVAL}"\ - "${PERSISTENCE}" + "${RESTART_INTERVAL}" \ + "${PERSISTENCE}" \ + "${RAVE_QA_FILTER}" \ + "${EXIT_ON_ERROR}" \ + "${LOG_LEVEL}" export err=$? if [ $err -ne 0 ]; then - message_txt="generate_fire_emissions.py failed with return code $err" + message_txt="generate_emissions.py failed with return code $err" err_exit "${message_txt}" print_err_msg_exit "${message_txt}" fi # Copy Smoke file to COMOUT - cp -p ${DATA}/${smokeFile} ${COMOUT} + cp -p ${DATA_SHARE}/${smokeFile} ${COMOUT} + cp -p ${DATA_SHARE}/${smokeFile} ${DATA} #tdk:pr: is this copy of the file needed? fi # #----------------------------------------------------------------------- diff --git a/sd_environment.yml b/sd_environment.yml index ceee9fdc91..f4d422286a 100644 --- a/sd_environment.yml +++ b/sd_environment.yml @@ -1,15 +1,16 @@ -name: srw_sd +name: srw_aqm channels: - conda-forge - ufs-community dependencies: - - esmpy=*=mpi_mpich* - - mpi4py - - pydantic + - esmpy=8.4.*=mpi_mpich* + - mpi4py=3.1.* + - pydantic=2.10.* - netcdf4=1.6.*=mpi_mpich* - numpy=1.23.* - - pylint=2.17* - - pytest=7.2* + - pylint=2.17.* + - pytest=7.2.* + - pytest-mock=3.14.* - scipy=1.10.* - - uwtools=2.3* - - xarray=2022.11.* + - uwtools=2.3.* + - xarray=2022.11.* \ No newline at end of file diff --git a/tests/test_python/test_smoke_dust/__init__.py b/tests/test_python/test_smoke_dust/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_python/test_smoke_dust/test_generate_emissions.py b/tests/test_python/test_smoke_dust/test_generate_emissions.py new file mode 100644 index 0000000000..e6703f9221 --- /dev/null +++ b/tests/test_python/test_smoke_dust/test_generate_emissions.py @@ -0,0 +1,202 @@ +import hashlib +import os +from dataclasses import dataclass +from pathlib import Path +from typing import Type + +import netCDF4 as nc +import numpy as np +import pandas as pd +import pytest +from _pytest.fixtures import SubRequest +from pydantic import BaseModel +from pytest_mock import MockerFixture + +from smoke_dust.core.context import SmokeDustContext +from smoke_dust.core.cycle import ( + AbstractSmokeDustCycleProcessor, + SmokeDustCycleOne, + SmokeDustCycleTwo, +) +from smoke_dust.core.preprocessor import SmokeDustPreprocessor + + +@dataclass +class FakeGridOutShape: + y_size: int = 5 + x_size: int = 10 + + @property + def as_tuple(self) -> tuple[int, int]: + return self.y_size, self.x_size + + +@pytest.fixture +def fake_grid_out_shape() -> FakeGridOutShape: + return FakeGridOutShape() + + +def create_restart_files( + root_dir: Path, forecast_dates: pd.DatetimeIndex, shape: FakeGridOutShape +) -> None: + restart_dir = root_dir / "RESTART" + restart_dir.mkdir() + for date in forecast_dates: + restart_file = restart_dir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" + with nc.Dataset(restart_file, "w") as ds: + ds.createDimension("Time") + ds.createDimension("yaxis_1", shape.y_size) + ds.createDimension("xaxis_1", shape.x_size) + totprcp_ave = ds.createVariable( + "totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1") + ) + totprcp_ave[0, ...] = np.ones(shape.as_tuple) + rrfs_hwp_ave = ds.createVariable( + "rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1") + ) + rrfs_hwp_ave[0, ...] = totprcp_ave[:] + 2 + + +def create_rave_interpolated( + root_dir: Path, + forecast_dates: pd.DatetimeIndex, + shape: FakeGridOutShape, + rave_to_intp: str, +) -> None: + for date in forecast_dates: + intp_file = root_dir / f"{rave_to_intp}{date}00_{date}59.nc" + dims = ("t", "lat", "lon") + with nc.Dataset(intp_file, "w") as ds: + ds.createDimension("t") + ds.createDimension("lat", shape.y_size) + ds.createDimension("lon", shape.x_size) + for varname in ["frp_avg_hr", "FRE"]: + var = ds.createVariable(varname, "f4", dims) + var[0, ...] = np.ones(shape.as_tuple) + + +def create_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: + with nc.Dataset(root_dir / "ds_out_base.nc", "w") as ds: + ds.createDimension("grid_yt", shape.y_size) + ds.createDimension("grid_xt", shape.x_size) + for varname in ["area", "grid_latt", "grid_lont"]: + var = ds.createVariable(varname, "f4", ("grid_yt", "grid_xt")) + var[:] = np.ones((shape.y_size, shape.x_size)) + + +def create_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: + with nc.Dataset(root_dir / "veg_map.nc", "w") as ds: + ds.createDimension("grid_yt", shape.y_size) + ds.createDimension("grid_xt", shape.x_size) + emiss_factor = ds.createVariable("emiss_factor", "f4", ("grid_yt", "grid_xt")) + emiss_factor[:] = np.ones((shape.y_size, shape.x_size)) + + +def create_context(root_dir: Path, overrides: dict | None = None) -> SmokeDustContext: + current_day = "2019072200" + nwges_dir = root_dir + os.environ["CDATE"] = current_day + os.environ["DATA"] = str(nwges_dir) + kwds = dict( + staticdir=root_dir, + ravedir=root_dir, + intp_dir=root_dir, + predef_grid="RRFS_CONUS_3km", + ebb_dcycle_flag="2", + restart_interval="6 12 18 24", + persistence="FALSE", + rave_qa_filter="NONE", + exit_on_error="TRUE", + log_level="DEBUG", + ) + if overrides is not None: + kwds.update(overrides) + context = SmokeDustContext.create_from_args(kwds.values()) + return context + + +class ExpectedData(BaseModel): + flag: str + klass: Type[AbstractSmokeDustCycleProcessor] + hash: str + + +class DataForTest(BaseModel): + model_config = dict(arbitrary_types_allowed=True) + context: SmokeDustContext + preprocessor: SmokeDustPreprocessor + expected: ExpectedData + + +@pytest.fixture( + params=[ + ExpectedData( + flag="1", klass=SmokeDustCycleOne, hash="d124734dfce7ca914391e35a02e4a7d2" + ), + ExpectedData( + flag="2", klass=SmokeDustCycleTwo, hash="6752199f1039edc936a942f3885af38b" + ), + ] +) +def data_for_test( + request: SubRequest, tmp_path: Path, fake_grid_out_shape: FakeGridOutShape +) -> DataForTest: + try: + create_grid_out(tmp_path, fake_grid_out_shape) + create_veg_map(tmp_path, fake_grid_out_shape) + context = create_context( + tmp_path, overrides=dict(ebb_dcycle_flag=request.param.flag) + ) + preprocessor = SmokeDustPreprocessor(context) + create_restart_files(tmp_path, preprocessor.forecast_dates, fake_grid_out_shape) + create_rave_interpolated( + tmp_path, + preprocessor.forecast_dates, + fake_grid_out_shape, + context.predef_grid.value + "_intp_", + ) + return DataForTest( + context=context, preprocessor=preprocessor, expected=request.param + ) + finally: + for ii in ["CDATE", "DATA"]: + os.unsetenv(ii) + + +def create_file_hash(path: Path) -> str: + with open(path, "rb") as f: + file_hash = hashlib.md5() + while chunk := f.read(8192): + file_hash.update(chunk) + return file_hash.hexdigest() + + +class TestSmokeDustPreprocessor: + + def test_run(self, data_for_test: DataForTest, mocker: MockerFixture) -> None: + """Test core capabilities of the preprocessor. Note this does not test regridding.""" + preprocessor = data_for_test.preprocessor + spy1 = mocker.spy(preprocessor, "create_dummy_emissions_file") + regrid_processor_class = preprocessor._regrid_processor.__class__ + spy2 = mocker.spy(regrid_processor_class, "_run_impl_") + spy3 = mocker.spy(regrid_processor_class, "run") + cycle_processor_class = preprocessor._cycle_processor.__class__ + spy4 = mocker.spy(cycle_processor_class, "process_emissions") + spy5 = mocker.spy(cycle_processor_class, "average_frp") + + assert isinstance(preprocessor._cycle_processor, data_for_test.expected.klass) + assert preprocessor._forecast_metadata is None + assert not data_for_test.context.emissions_path.exists() + + preprocessor.run() + spy1.assert_not_called() + spy2.assert_not_called() + spy3.assert_called_once() + spy4.assert_called_once() + spy5.assert_called_once() + + assert data_for_test.context.emissions_path.exists() + assert ( + create_file_hash(data_for_test.context.emissions_path) + == data_for_test.expected.hash + ) diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml index 0fc0f8662d..a9cfe4a98d 100644 --- a/ush/config.smoke_dust.yaml +++ b/ush/config.smoke_dust.yaml @@ -37,6 +37,7 @@ task_get_extrn_ics: EXTRN_MDL_NAME_ICS: RAP EXTRN_MDL_ICS_OFFSET_HRS: 0 USE_USER_STAGED_EXTRN_FILES: true + #tdk:pr: can we use overlays to parameterize these? EXTRN_MDL_SOURCE_BASEDIR_ICS: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAP_DATA_SD/${yyyymmddhh} # hera # EXTRN_MDL_SOURCE_BASEDIR_ICS: /work/noaa/epic/SRW-AQM_DATA/input_model_data/RAP/${yyyymmddhh} # orion/hercules # EXTRN_MDL_SOURCE_BASEDIR_ICS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmddhh} # gaea-c6 @@ -75,4 +76,7 @@ smoke_dust_parm: DO_SMOKE_DUST: true EBB_DCYCLE: 1 SMOKE_DUST_FILE_PREFIX: "SMOKE_RRFS_data" - + PERSISTENCE: false + RAVE_QA_FILTER: none + EXIT_ON_ERROR: true + LOG_LEVEL: info #tdk: support upper on log_level \ No newline at end of file diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 205e08012b..68a67f09bc 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2867,6 +2867,7 @@ smoke_dust_parm: # # SMOKE_DUST_FILE_PREFIX: # Prefix of Smoke and Dust file name + #tdk:doc # #----------------------------------------------------------------------- # @@ -2876,6 +2877,9 @@ smoke_dust_parm: COMINsmoke_default: "" COMINrave_default: "" SMOKE_DUST_FILE_PREFIX: "SMOKE_RRFS_data" + RAVE_QA_FILTER: none + EXIT_ON_ERROR: true + LOG_LEVEL: info #---------------------------- # UFS FIRE config parameters diff --git a/ush/smoke_dust/__init__.py b/ush/smoke_dust/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ush/smoke_dust_add_smoke.py b/ush/smoke_dust/add_smoke.py old mode 100755 new mode 100644 similarity index 100% rename from ush/smoke_dust_add_smoke.py rename to ush/smoke_dust/add_smoke.py diff --git a/ush/smoke_dust/core/__init__.py b/ush/smoke_dust/core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ush/smoke_dust/core/common.py b/ush/smoke_dust/core/common.py new file mode 100644 index 0000000000..f2923e9d93 --- /dev/null +++ b/ush/smoke_dust/core/common.py @@ -0,0 +1,127 @@ +from contextlib import contextmanager +from pathlib import Path +from typing import Tuple, Literal, Dict + +import netCDF4 as nc +import numpy as np +import pandas as pd +from mpi4py import MPI + +from smoke_dust.core.variable import SmokeDustVariable, SD_VARS + + +@contextmanager +def open_nc( + path: Path, + mode: Literal["r", "w", "a"] = "r", + clobber: bool = False, + parallel: bool = True, +) -> nc.Dataset: + ds = nc.Dataset( + path, + mode=mode, + clobber=clobber, + parallel=parallel, + comm=MPI.COMM_WORLD, + info=MPI.Info(), + ) + try: + yield ds + finally: + ds.close() + + +def create_sd_coordinate_variable( + ds: nc.Dataset, + sd_variable: SmokeDustVariable, +) -> None: + """ + Create a smoke/dust netCDF spatial coordinate variable. + + Args: + ds: Dataset to update + sd_variable: Contains variable metadata + """ + var_out = ds.createVariable( + sd_variable.name, "f4", ("lat", "lon"), fill_value=sd_variable.fill_value_float + ) + var_out.units = sd_variable.units + var_out.long_name = sd_variable.long_name + var_out.standard_name = sd_variable.name + var_out.FillValue = sd_variable.fill_value_str + var_out.coordinates = "geolat geolon" + + +def create_sd_variable( + ds: nc.Dataset, + sd_variable: SmokeDustVariable, + fill_first_time_index: bool = True, +) -> None: + """ + Create a smoke/dust netCDF variable. + + Args: + ds: Dataset to update + sd_variable: Contains variable metadata + fill_first_time_index: If True, fill the first time index with provided `fill_value_float` + """ + var_out = ds.createVariable( + sd_variable.name, + "f4", + ("t", "lat", "lon"), + fill_value=sd_variable.fill_value_float, + ) + var_out.units = sd_variable.units + var_out.long_name = sd_variable.long_name + var_out.standard_name = sd_variable.long_name + var_out.FillValue = sd_variable.fill_value_str + var_out.coordinates = "t geolat geolon" + if fill_first_time_index: + try: + var_out.set_collective(True) + except RuntimeError: + # Allow this function to work with parallel and non-parallel datasets. If the dataset is not opened in parallel + # this error message is returned: RuntimeError: NetCDF: Parallel operation on file opened for non-parallel access + pass + var_out[0, :, :] = sd_variable.fill_value_float + try: + var_out.set_collective(False) + except RuntimeError: + pass + + +def create_template_emissions_file( + ds: nc.Dataset, grid_shape: Tuple[int, int], is_dummy: bool = False +): + ds.createDimension("t", None) + ds.createDimension("lat", grid_shape[0]) + ds.createDimension("lon", grid_shape[1]) + setattr(ds, "PRODUCT_ALGORITHM_VERSION", "Beta") + setattr(ds, "TIME_RANGE", "1 hour") + setattr(ds, "is_dummy", str(is_dummy)) + + for varname in ["geolat", "geolon"]: + create_sd_coordinate_variable(ds, SD_VARS.get(varname)) + + +def create_descriptive_statistics( + container: Dict[str, np.ma.MaskedArray], + origin: Literal["src", "dst_unmasked", "dst_masked", "derived"], + path: Path, +) -> pd.DataFrame: + df = pd.DataFrame.from_dict( + {k: v.filled(np.nan).ravel() for k, v in container.items()} + ) + desc = df.describe() + adds = {} + for field_name in container.keys(): + adds[field_name] = [ + df[field_name].sum(), + df[field_name].isnull().sum(), + origin, + path, + ] + desc = pd.concat( + [desc, pd.DataFrame(data=adds, index=["sum", "count_null", "origin", "path"])] + ) + return desc diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py new file mode 100644 index 0000000000..913fff7900 --- /dev/null +++ b/ush/smoke_dust/core/context.py @@ -0,0 +1,250 @@ +import datetime as dt +import logging +import logging.config +import os +from enum import unique, StrEnum, IntEnum +from pathlib import Path +from typing import Tuple, List + +from mpi4py import MPI +from pydantic import BaseModel, model_validator + +from smoke_dust.core.common import open_nc + + +@unique +class PredefinedGrid(StrEnum): + RRFS_CONUS_25km = "RRFS_CONUS_25km" + RRFS_CONUS_13km = "RRFS_CONUS_13km" + RRFS_CONUS_3km = "RRFS_CONUS_3km" + RRFS_NA_3km = "RRFS_NA_3km" + RRFS_NA_13km = "RRFS_NA_13km" + + +@unique +class EbbDCycle(IntEnum): + ONE = 1 + TWO = 2 + + +@unique +class RaveQaFilter(StrEnum): + NONE = "NONE" + HIGH = "HIGH" + + +@unique +class LogLevel(StrEnum): + INFO = "INFO" + DEBUG = "DEBUG" + + +@unique +class EmissionVariable(StrEnum): + FRE = "FRE" + FRP = "FRP" + + def rave_name(self) -> str: + other = {self.FRP: "FRP_MEAN", self.FRE: "FRE"} + return other[self] + + def smoke_dust_name(self) -> str: + other = {self.FRP: "frp_avg_hr", self.FRE: "FRE"} + return other[self] + + +class SmokeDustContext(BaseModel): + # Values provided via command-line + staticdir: Path + ravedir: Path + intp_dir: Path + predef_grid: PredefinedGrid + ebb_dcycle_flag: EbbDCycle + restart_interval: Tuple[int, ...] + persistence: bool + rave_qa_filter: RaveQaFilter + exit_on_error: bool + log_level: LogLevel + + # Values provided via environment + current_day: str + nwges_dir: Path + + # Fixed parameters + should_calc_desc_stats: bool = False + vars_emis: tuple[str] = ("FRP_MEAN", "FRE") + beta: float = 0.3 + fg_to_ug: float = 1e6 + to_s: int = 3600 + rank: int = MPI.COMM_WORLD.Get_rank() + grid_out_shape: Tuple[int, int] = (0, 0) # Set in _finalize_model_ + esmpy_debug: bool = False + + @model_validator(mode="after") + def _finalize_model_(self) -> "SmokeDustContext": + self._logger = self._init_logging_() + + with open_nc(self.grid_out, parallel=False) as ds: + self.grid_out_shape = ( + ds.dimensions["grid_yt"].size, + ds.dimensions["grid_xt"].size, + ) + self.log(f"{self.grid_out_shape=}") + return self + + @classmethod + def create_from_args(cls, args: List[str]) -> "SmokeDustContext": + print(f"create_from_args: {args=}", flush=True) + + # Extract local arguments from args before converting values + ( + l_staticdir, + l_ravedir, + l_intp_dir, + l_predef_grid, + l_ebb_dcycle_flag, + l_restart_interval, + l_persistence, + l_rave_qa_filter, + l_exit_on_error, + l_log_level, + ) = args + + # Format environment-level variables + current_day: str = os.environ["CDATE"] + nwges_dir = cls._format_read_path_(os.environ["DATA"]) + + # Convert to expected types + kwds = dict( + staticdir=cls._format_read_path_(l_staticdir), + ravedir=cls._format_read_path_(l_ravedir), + intp_dir=cls._format_write_path_(l_intp_dir), + predef_grid=PredefinedGrid(l_predef_grid), + ebb_dcycle_flag=EbbDCycle(int(l_ebb_dcycle_flag)), + restart_interval=[int(num) for num in l_restart_interval.split(" ")], + persistence=cls._str_to_bool_(l_persistence), + rave_qa_filter=RaveQaFilter(l_rave_qa_filter.upper()), + exit_on_error=cls._str_to_bool_(l_exit_on_error), + log_level=l_log_level, + current_day=current_day, + nwges_dir=nwges_dir, + ) + + return cls(**kwds) + + @property + def veg_map(self) -> Path: + return self.staticdir / "veg_map.nc" + + @property + def rave_to_intp(self) -> str: + return self.predef_grid.value + "_intp_" + + @property + def grid_in(self) -> Path: + return self.staticdir / "grid_in.nc" + + @property + def weightfile(self) -> Path: + return self.staticdir / "weight_file.nc" + + @property + def grid_out(self) -> Path: + return self.staticdir / "ds_out_base.nc" + + @property + def hourly_hwpdir(self) -> Path: + return self.nwges_dir / "RESTART" + + @property + def emissions_path(self) -> Path: + return self.intp_dir / f"SMOKE_RRFS_data_{self.current_day}00.nc" + + @property + def fcst_datetime(self) -> dt.datetime: + return dt.datetime.strptime(self.current_day, "%Y%m%d%H") + + def log( + self, + msg, + level=logging.INFO, + exc_info: Exception = None, + stacklevel: int = 2, + ): + if exc_info is not None: + level = logging.ERROR + self._logger.log(level, msg, exc_info=exc_info, stacklevel=stacklevel) + if exc_info is not None and self.exit_on_error: + raise exc_info + + @staticmethod + def _format_path_(value: Path | str) -> Path: + return Path(value).expanduser().resolve(strict=True) + + @classmethod + def _format_read_path_(cls, value: str) -> Path: + path = cls._format_path_(value) + errors = [] + if not path.exists(): + errors.append(f"path does not exist: {path}") + if not os.access(path, os.R_OK): + errors.append(f"path is not readable: {path}") + if not path.is_dir(): + errors.append(f"path is not a directory: {path}") + if len(errors) > 0: + raise OSError(errors) + return path + + @classmethod + def _format_write_path_(cls, value: str) -> Path: + path = cls._format_path_(value) + errors = [] + if not path.exists(): + errors.append(f"path does not exist: {path}") + if not os.access(path, os.W_OK): + errors.append(f"path is not writable: {path}") + if not path.is_dir(): + errors.append(f"path is not a directory: {path}") + if len(errors) > 0: + raise OSError(errors) + return path + + @staticmethod + def _str_to_bool_(value: str) -> bool: + value = value.lower() + if value in ["true", "t", "1"]: + return True + elif value in ["false", "f", "0"]: + return False + raise NotImplementedError(f"boolean string not recognized: {value}") + + def _init_logging_(self) -> logging.Logger: + project_name = "smoke-dust-preprocessor" + + logging_config: dict = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "plain": { + # Uncomment to report verbose output in logs; try to keep these two in sync + # "format": f"[%(name)s][%(levelname)s][%(asctime)s][%(pathname)s:%(lineno)d][%(process)d][%(thread)d][rank={self._rank}]: %(message)s" + "format": f"[%(name)s][%(levelname)s][%(asctime)s][%(filename)s:%(lineno)d][rank={self.rank}]: %(message)s" + }, + }, + "handlers": { + "default": { + "formatter": "plain", + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", + "filters": [], + }, + }, + "loggers": { + project_name: { + "handlers": ["default"], + "level": getattr(logging, self.log_level.value), + }, + }, + } + logging.config.dictConfig(logging_config) + return logging.getLogger(project_name) diff --git a/ush/smoke_dust/core/cycle.py b/ush/smoke_dust/core/cycle.py new file mode 100644 index 0000000000..5e71db3c0d --- /dev/null +++ b/ush/smoke_dust/core/cycle.py @@ -0,0 +1,293 @@ +import abc +import datetime as dt +from enum import StrEnum, unique +from typing import Dict, Any + +import numpy as np +import pandas as pd +import xarray as xr + +from smoke_dust.core.common import ( + open_nc, + create_sd_variable, + create_template_emissions_file, +) +from smoke_dust.core.context import SmokeDustContext, EmissionVariable, EbbDCycle +from smoke_dust.core.variable import SD_VARS + + +@unique +class FrpVariable(StrEnum): + FRP_AVG = "frp_avg_hr" + EBB_TOTAL = "ebb_smoke_hr" + + +class AbstractSmokeDustCycleProcessor(abc.ABC): + + def __init__(self, context: SmokeDustContext): + self._context = context + + def log(self, *args: Any, **kwargs: Any) -> None: + self._context.log(*args, **kwargs) + + @abc.abstractmethod + def flag(self) -> EbbDCycle: ... + + @abc.abstractmethod + def create_start_datetime(self) -> dt.datetime: ... + + @abc.abstractmethod + def average_frp( + self, forecast_metadata: pd.DataFrame + ) -> Dict[FrpVariable, np.ndarray]: ... + + @abc.abstractmethod + def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: ... + + +class SmokeDustCycleOne(AbstractSmokeDustCycleProcessor): + flag = EbbDCycle.ONE + + def create_start_datetime(self) -> dt.datetime: + if self._context.persistence: + self.log( + "Creating emissions for persistence method where satellite FRP persist from previous day" + ) + start_datetime = self._context.fcst_datetime - dt.timedelta(days=1) + else: + self.log("Creating emissions using current date satellite FRP") + start_datetime = self._context.fcst_datetime + return start_datetime + + def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: + derived = self.average_frp(forecast_metadata) + self.log(f"creating 24-hour emissions file: {self._context.emissions_path}") + with open_nc( + self._context.emissions_path, "w", parallel=False, clobber=True + ) as ds_out: + create_template_emissions_file(ds_out, self._context.grid_out_shape) + with open_nc(self._context.grid_out, parallel=False) as ds_src: + ds_out.variables["geolat"][:] = ds_src.variables["grid_latt"][:] + ds_out.variables["geolon"][:] = ds_src.variables["grid_lont"][:] + for var, fill_array in derived.items(): + create_sd_variable(ds_out, SD_VARS.get(var.value)) + ds_out.variables[var.value][:] = fill_array + + def average_frp( + self, forecast_metadata: pd.DataFrame + ) -> Dict[FrpVariable, np.ndarray]: + ebb_smoke_total = [] + frp_avg_hr = [] + + with xr.open_dataset(self._context.veg_map) as ds: + emiss_factor = ds["emiss_factor"].values + with xr.open_dataset(self._context.grid_out) as ds: + target_area = ds["area"].values + + for row_idx, row_df in forecast_metadata.iterrows(): + self.log(f"processing emissions: {row_idx}, {row_df.to_dict()}") + with xr.open_dataset(row_df["rave_interpolated"]) as ds: + fre = ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values + frp = ds[EmissionVariable.FRP.smoke_dust_name()][0, :, :].values + + frp_avg_hr.append(frp) + ebb_hourly = ( + fre * emiss_factor * self._context.beta * self._context.fg_to_ug + ) / (target_area * self._context.to_s) + ebb_smoke_total.append(np.where(frp > 0, ebb_hourly, 0)) + + frp_avg_reshaped = np.stack(frp_avg_hr, axis=0) + ebb_total_reshaped = np.stack(ebb_smoke_total, axis=0) + + np.nan_to_num(frp_avg_reshaped, copy=False, nan=0.0) + + return { + FrpVariable.FRP_AVG: frp_avg_reshaped, + FrpVariable.EBB_TOTAL: ebb_total_reshaped, + } + + +class SmokeDustCycleTwo(AbstractSmokeDustCycleProcessor): + flag = EbbDCycle.TWO + + def create_start_datetime(self) -> dt.datetime: + self.log("Creating emissions for modulated persistence by Wildfire potential") + return self._context.fcst_datetime - dt.timedelta(days=1, hours=1) + + def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: + # tdk:story: figure out restart file copying + self.log("process_emissions: enter") + + hwp_ave = [] + totprcp = np.zeros(self._context.grid_out_shape).ravel() + for date in forecast_metadata["forecast_date"]: + phy_data_path = ( + self._context.hourly_hwpdir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" + ) + rave_path = ( + self._context.intp_dir + / f"{self._context.rave_to_intp}{date}00_{date}59.nc" + ) + self.log(f"processing emissions for: {phy_data_path=}, {rave_path=}") + with xr.open_dataset(phy_data_path) as ds: + hwp_values = ds.rrfs_hwp_ave.values.ravel() + tprcp_values = ds.totprcp_ave.values.ravel() + totprcp += np.where(tprcp_values > 0, tprcp_values, 0) + hwp_ave.append(hwp_values) + hwp_ave_arr = np.nanmean(hwp_ave, axis=0).reshape(*self._context.grid_out_shape) + totprcp_ave_arr = totprcp.reshape(*self._context.grid_out_shape) + xarr_hwp = xr.DataArray(hwp_ave_arr) + xarr_totprcp = xr.DataArray(totprcp_ave_arr) + + derived = self.average_frp(forecast_metadata) + + t_fire = np.zeros(self._context.grid_out_shape) + for date in forecast_metadata["forecast_date"]: + rave_path = ( + self._context.intp_dir + / f"{self._context.rave_to_intp}{date}00_{date}59.nc" + ) + with xr.open_dataset(rave_path) as ds: + frp = ds.frp_avg_hr[0, :, :].values + dates_filtered = np.where(frp > 0, int(date[:10]), 0) + t_fire = np.maximum(t_fire, dates_filtered) + t_fire_flattened = [int(i) if i != 0 else 0 for i in t_fire.flatten()] + hr_ends = [ + dt.datetime.strptime(str(hr), "%Y%m%d%H") if hr != 0 else 0 + for hr in t_fire_flattened + ] + te = np.array( + [ + ( + (self._context.fcst_datetime - i).total_seconds() / 3600 + if i != 0 + else 0 + ) + for i in hr_ends + ] + ) + fire_age = np.array(te).reshape(self._context.grid_out_shape) + + # Ensure arrays are not negative or NaN + frp_avg_reshaped = np.clip(derived[FrpVariable.FRP_AVG], 0, None) + frp_avg_reshaped = np.nan_to_num(frp_avg_reshaped) + + ebb_tot_reshaped = np.clip(derived[FrpVariable.EBB_TOTAL], 0, None) + ebb_tot_reshaped = np.nan_to_num(ebb_tot_reshaped) + + fire_age = np.clip(fire_age, 0, None) + fire_age = np.nan_to_num(fire_age) + + # Filter HWP Prcp arrays to be non-negative and replace NaNs + filtered_hwp = xarr_hwp.where(frp_avg_reshaped > 0, 0).fillna(0) + filtered_prcp = xarr_totprcp.where(frp_avg_reshaped > 0, 0).fillna(0) + + # Filter based on ebb_rate + ebb_rate_threshold = 0 # Define an appropriate threshold if needed + mask = ebb_tot_reshaped > ebb_rate_threshold + + filtered_hwp = filtered_hwp.where(mask, 0).fillna(0) + filtered_prcp = filtered_prcp.where(mask, 0).fillna(0) + frp_avg_reshaped = frp_avg_reshaped * mask + ebb_tot_reshaped = ebb_tot_reshaped * mask + fire_age = fire_age * mask + + self.log(f"creating emissions file: {self._context.emissions_path}") + with open_nc(self._context.emissions_path, "w", parallel=False) as ds_out: + create_template_emissions_file(ds_out, self._context.grid_out_shape) + with open_nc(self._context.grid_out, parallel=False) as ds_src: + ds_out.variables["geolat"][:] = ds_src.variables["grid_latt"][:] + ds_out.variables["geolon"][:] = ds_src.variables["grid_lont"][:] + + var_map = { + "frp_davg": frp_avg_reshaped, + "ebb_rate": ebb_tot_reshaped, + "fire_end_hr": fire_age, + "hwp_davg": filtered_hwp, + "totprcp_24hrs": filtered_prcp, + } + for varname, fill_array in var_map.items(): + create_sd_variable(ds_out, SD_VARS.get(varname)) + ds_out.variables[varname][0, :, :] = fill_array + + self.log("process_emissions: exit") + + def average_frp( + self, forecast_metadata: pd.DataFrame + ) -> Dict[FrpVariable, np.ndarray]: + self.log(f"average_frp: entering") + + frp_daily = np.zeros(self._context.grid_out_shape).ravel() + ebb_smoke_total = [] + + with xr.open_dataset(self._context.veg_map) as ds: + emiss_factor = ds["emiss_factor"].values + with xr.open_dataset(self._context.grid_out) as ds: + target_area = ds["area"].values + + for row_idx, row_df in forecast_metadata.iterrows(): + self.log(f"processing emissions: {row_idx}, {row_df.to_dict()}") + with xr.open_dataset(row_df["rave_interpolated"]) as ds: + fre = ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values + frp = ds[EmissionVariable.FRP.smoke_dust_name()][0, :, :].values + + ebb_hourly = ( + fre + * emiss_factor + * self._context.beta + * self._context.fg_to_ug + / target_area + ) + ebb_smoke_total.append(np.where(frp > 0, ebb_hourly, 0).ravel()) + frp_daily += np.where(frp > 0, frp, 0).ravel() + + summed_array = np.sum(np.array(ebb_smoke_total), axis=0) + num_zeros = len(ebb_smoke_total) - np.sum( + [arr == 0 for arr in ebb_smoke_total], axis=0 + ) + safe_zero_count = np.where(num_zeros == 0, 1, num_zeros) + result_array = np.array( + [ + ( + summed_array[i] / 2 + if safe_zero_count[i] == 1 + else summed_array[i] / safe_zero_count[i] + ) + for i in range(len(safe_zero_count)) + ] + ) + result_array[num_zeros == 0] = summed_array[num_zeros == 0] + ebb_total = result_array.reshape(self._context.grid_out_shape) + ebb_total_reshaped = ebb_total / 3600 + temp_frp = np.array( + [ + ( + frp_daily[i] / 2 + if safe_zero_count[i] == 1 + else frp_daily[i] / safe_zero_count[i] + ) + for i in range(len(safe_zero_count)) + ] + ) + temp_frp[num_zeros == 0] = frp_daily[num_zeros == 0] + frp_avg_reshaped = temp_frp.reshape(*self._context.grid_out_shape) + + np.nan_to_num(frp_avg_reshaped, copy=False, nan=0.0) + + self.log("average_frp: exiting") + return { + FrpVariable.FRP_AVG: frp_avg_reshaped, + FrpVariable.EBB_TOTAL: ebb_total_reshaped, + } + + +def create_cycle_processor( + context: SmokeDustContext, +) -> AbstractSmokeDustCycleProcessor: + match context.ebb_dcycle_flag: + case EbbDCycle.ONE: + return SmokeDustCycleOne(context) + case EbbDCycle.TWO: + return SmokeDustCycleTwo(context) + case _: + raise NotImplementedError(context.ebb_dcycle_flag) diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py new file mode 100644 index 0000000000..f48e3d249e --- /dev/null +++ b/ush/smoke_dust/core/preprocessor.py @@ -0,0 +1,148 @@ +import fnmatch +import logging +from pathlib import Path +from typing import Any + +import pandas as pd + +from smoke_dust.core.common import ( + open_nc, + create_template_emissions_file, + create_sd_variable, +) +from smoke_dust.core.context import SmokeDustContext +from smoke_dust.core.cycle import create_cycle_processor +from smoke_dust.core.regrid import SmokeDustRegridProcessor +from smoke_dust.core.variable import SD_VARS + + +class SmokeDustPreprocessor: + + def __init__(self, context: SmokeDustContext) -> None: + self._context = context + self.log(f"__init__: enter") + + # Processes regridding from source data to destination analysis grid + self._regrid_processor = SmokeDustRegridProcessor(context) + # Processes cycle-specific data transformations + self._cycle_processor = create_cycle_processor(context) + + # On-demand/cached property values + self._forecast_metadata = None + self._forecast_dates = None + + self.log(f"{self._context=}") + self.log("__init__: exit") + + def log(self, *args: Any, **kwargs: Any) -> None: + self._context.log(*args, **kwargs) + + @property + def forecast_dates(self) -> pd.DatetimeIndex: + if self._forecast_dates is not None: + return self._forecast_dates + start_datetime = self._cycle_processor.create_start_datetime() + self.log(f"{start_datetime=}") + forecast_dates = pd.date_range( + start=start_datetime, periods=24, freq="h" + ).strftime("%Y%m%d%H") + self._forecast_dates = forecast_dates + return self._forecast_dates + + @property + def forecast_metadata(self) -> pd.DataFrame: + if self._forecast_metadata is not None: + return self._forecast_metadata + + # Collect metadata on data files related to forecast dates + self.log(f"creating forecast metadata") + intp_path = [] + rave_to_forecast = [] + for date in self.forecast_dates: + # Check for pre-existing interpolated RAVE data + file_path = ( + Path(self._context.intp_dir) + / f"{self._context.rave_to_intp}{date}00_{date}59.nc" + ) + if file_path.exists() and file_path.is_file(): + try: + resolved = file_path.resolve(strict=True) + except FileNotFoundError: + continue + else: + intp_path.append(resolved) + else: + intp_path.append(None) + + # Check for raw RAVE data + wildcard_name = f"*-3km*{date}*{date}59590*.nc" + name_retro = f"*3km*{date}*{date}*.nc" + found = False + for rave_path in self._context.ravedir.iterdir(): + if fnmatch.fnmatch(str(rave_path), wildcard_name) or fnmatch.fnmatch( + str(rave_path), name_retro + ): + rave_to_forecast.append(rave_path) + found = True + break + if not found: + rave_to_forecast.append(None) + + self.log(f"{self.forecast_dates}", level=logging.DEBUG) + self.log(f"{intp_path=}", level=logging.DEBUG) + self.log(f"{rave_to_forecast=}", level=logging.DEBUG) + df = pd.DataFrame( + data={ + "forecast_date": self.forecast_dates, + "rave_interpolated": intp_path, + "rave_raw": rave_to_forecast, + } + ) + self._forecast_metadata = df + return df + + @property + def is_first_day(self) -> bool: + is_first_day = ( + self.forecast_metadata["rave_interpolated"].isnull().all() + and self.forecast_metadata["rave_raw"].isnull().all() + ) + self.log(f"{is_first_day=}") + return is_first_day + + def run(self) -> None: + self.log("run: entering") + if self.is_first_day: + if self._context.rank == 0: + self.create_dummy_emissions_file() + else: + self._regrid_processor.run(self.forecast_metadata) + if self._context.rank == 0: + self._cycle_processor.process_emissions(self.forecast_metadata) + self.log("run: exiting") + + def create_dummy_emissions_file(self) -> None: + self.log("create_dummy_emissions_file: enter") + self.log(f"{self._context.emissions_path=}") + with open_nc( + self._context.emissions_path, "w", parallel=False, clobber=True + ) as ds: + create_template_emissions_file( + ds, self._context.grid_out_shape, is_dummy=True + ) + with open_nc(self._context.grid_out, parallel=False) as ds_src: + ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] + ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] + + for varname in [ + "frp_davg", + "ebb_rate", + "fire_end_hr", + "hwp_davg", + "totprcp_24hrs", + ]: + create_sd_variable(ds, SD_VARS.get(varname)) + self.log("create_dummy_emissions_file: exit") + + def finalize(self) -> None: + self.log("finalize: exiting") diff --git a/ush/smoke_dust/core/regrid.py b/ush/smoke_dust/core/regrid.py new file mode 100644 index 0000000000..3777753fe7 --- /dev/null +++ b/ush/smoke_dust/core/regrid.py @@ -0,0 +1,596 @@ +import abc +import logging +from copy import copy, deepcopy +from pathlib import Path +from typing import Any, Union, Dict, Tuple, Literal + +import esmpy +import netCDF4 as nc +import numpy as np +import pandas as pd +from pydantic import BaseModel, ConfigDict, model_validator + +from smoke_dust.core.common import ( + create_template_emissions_file, + create_sd_variable, + create_descriptive_statistics, + open_nc, +) +from smoke_dust.core.context import RaveQaFilter, SmokeDustContext, PredefinedGrid +from smoke_dust.core.variable import SD_VARS + + +class SmokeDustRegridProcessor: + + def __init__(self, context: SmokeDustContext): + self._context = context + self._esmpy_manager = esmpy.Manager(debug=self._context.esmpy_debug) + + # Holds interpolation descriptive statistics + self._interpolation_stats = None + + def log(self, *args: Any, **kwargs: Any) -> None: + self._context.log(*args, **kwargs) + + def run(self, forecast_metadata: pd.DataFrame) -> None: + # Select which RAVE files to interpolate + rave_to_interpolate = forecast_metadata[ + forecast_metadata["rave_interpolated"].isnull() + & ~forecast_metadata["rave_raw"].isnull() + ] + if len(rave_to_interpolate) == 0: + self.log("all rave files have been interpolated") + return + + self._run_impl_(forecast_metadata, rave_to_interpolate) + + def _run_impl_( + self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Series + ) -> None: + first = True + for row_idx, row_data in rave_to_interpolate.iterrows(): + row_dict = row_data.to_dict() + self.log(f"processing RAVE interpolation row: {row_idx}, {row_dict}") + + if first: + self.log("creating destination grid from RRFS grid file") + dst_nc2grid = NcToGrid( + path=self._context.grid_out, + spec=GridSpec( + x_center="grid_lont", + y_center="grid_latt", + x_dim=("grid_xt",), + y_dim=("grid_yt",), + x_corner="grid_lon", + y_corner="grid_lat", + x_corner_dim=("grid_x",), + y_corner_dim=("grid_y",), + ), + ) + dst_gwrap = dst_nc2grid.create_grid_wrapper() + + # We are translating metadata and some structure for the destination grid. + dst_output_gwrap = copy(dst_gwrap) + dst_output_gwrap.corner_dims = None + dst_output_gwrap.spec = GridSpec( + x_center="geolon", y_center="geolat", x_dim=("lon",), y_dim=("lat",) + ) + dst_output_gwrap.dims = deepcopy(dst_gwrap.dims) + dst_output_gwrap.dims.value[0].name = ("lon",) + dst_output_gwrap.dims.value[1].name = ("lat",) + + forecast_date = row_data["forecast_date"] + output_file_path = ( + self._context.intp_dir + / f"{self._context.rave_to_intp}{forecast_date}00_{forecast_date}59.nc" + ) + self.log(f"creating output file: {output_file_path}") + with open_nc(output_file_path, "w") as ds: + create_template_emissions_file(ds, self._context.grid_out_shape) + for varname in ["frp_avg_hr", "FRE"]: + create_sd_variable(ds, SD_VARS.get(varname)) + + dst_output_gwrap.fill_nc_variables(output_file_path) + + for field_name in self._context.vars_emis: + match field_name: + case "FRP_MEAN": + dst_field_name = "frp_avg_hr" + case "FRE": + dst_field_name = "FRE" + case _: + raise NotImplementedError(field_name) + + self.log("creating destination field", level=logging.DEBUG) + dst_nc2field = NcToField( + path=output_file_path, + name=dst_field_name, + gwrap=dst_output_gwrap, + dim_time=("t",), + ) + dst_fwrap = dst_nc2field.create_field_wrapper() + + if first: + self.log("creating source grid from RAVE file") + src_nc2grid = NcToGrid( + path=self._context.grid_in, + spec=GridSpec( + x_center="grid_lont", + y_center="grid_latt", + x_dim=("grid_xt",), + y_dim=("grid_yt",), + x_corner="grid_lon", + y_corner="grid_lat", + x_corner_dim=("grid_x",), + y_corner_dim=("grid_y",), + ), + ) + src_gwrap = src_nc2grid.create_grid_wrapper() + + self.log("creating source field", level=logging.DEBUG) + src_nc2field = NcToField( + path=row_data["rave_raw"], + name=field_name, + gwrap=src_gwrap, + dim_time=("time",), + ) + src_fwrap = src_nc2field.create_field_wrapper() + + if first: + self.log("creating regridder") + self.log(f"{src_fwrap.value.data.shape=}", level=logging.DEBUG) + self.log(f"{dst_fwrap.value.data.shape=}", level=logging.DEBUG) + if self._context.predef_grid == PredefinedGrid.RRFS_NA_13km: + # ESMF does not like reading the weights for this field combination (rc=-1). The error can be + # bypassed by creating weights in-memory. + self.log("creating regridding in-memory") + regridder = esmpy.Regrid( + src_fwrap.value, + dst_fwrap.value, + regrid_method=esmpy.RegridMethod.CONSERVE, + unmapped_action=esmpy.UnmappedAction.IGNORE, + ignore_degenerate=True, + ) + else: + self.log("creating regridding from file") + regridder = esmpy.RegridFromFile( + src_fwrap.value, + dst_fwrap.value, + filename=str(self._context.weightfile), + ) + + first = False + + src_data = src_fwrap.value.data + match field_name: + case "FRP_MEAN": + src_data[:] = np.where(src_data == -1.0, 0.0, src_data) + case "FRE": + src_data[:] = np.where(src_data > 1000.0, src_data, 0.0) + case _: + raise NotImplementedError(field_name) + + if self._context.rave_qa_filter == RaveQaFilter.HIGH: + with open_nc(row_data["rave_raw"], parallel=True) as rave_ds: + rave_qa = load_variable_data( + rave_ds.variables["QA"], src_fwrap.dims + ) + set_to_zero = rave_qa < 2 + self.log( + f"RAVE QA filter applied: {self._context.rave_qa_filter=}; {set_to_zero.size=}; {np.sum(set_to_zero)=}" + ) + src_data[set_to_zero] = 0.0 + + # Execute the ESMF regridding + self.log(f"run regridding", level=logging.DEBUG) + _ = regridder(src_fwrap.value, dst_fwrap.value) + + # Persist the destination field + self.log(f"filling netcdf", level=logging.DEBUG) + dst_fwrap.fill_nc_variable(output_file_path) + + # Update the forecast metadata with the interpolated RAVE file data + forecast_metadata.loc[row_idx, "rave_interpolated"] = output_file_path + row_data["rave_interpolated"] = output_file_path + + if self._context.rank == 0: + self._regrid_postprocessing_(row_data) + + if ( + self._context.rank == 0 + and self._context.should_calc_desc_stats + and self._interpolation_stats is not None + ): + forecast_dates = forecast_metadata["forecast_date"] + stats_path = ( + self._context.intp_dir + / f"stats_regridding_{forecast_dates.min()}_{forecast_dates.max()}.csv" + ) + self.log(f"writing interpolation statistics: {stats_path=}") + self._interpolation_stats.to_csv(stats_path, index=False) + + def _regrid_postprocessing_(self, row_data: pd.Series) -> None: + self.log("_run_interpolation_postprocessing: enter", level=logging.DEBUG) + + calc_stats = self._context.should_calc_desc_stats + + field_names_dst = [ + "frp_avg_hr", + "FRE", + ] + with open_nc(row_data["rave_interpolated"], parallel=False) as ds: + dst_data = {ii: ds.variables[ii][:] for ii in field_names_dst} + if calc_stats: + # Do these calculations before we modify the arrays since edge masking is inplace + dst_desc_unmasked = create_descriptive_statistics( + dst_data, "dst_unmasked", None + ) + + # Mask edges to reduce model edge effects + self.log("masking edges", level=logging.DEBUG) + for v in dst_data.values(): + # Operation is inplace + mask_edges(v[0, :, :]) + + # Persist masked data to disk + with open_nc(row_data["rave_interpolated"], parallel=False, mode="a") as ds: + for k, v in dst_data.items(): + ds.variables[k][:] = v + + if calc_stats: + with open_nc(row_data["rave_raw"], parallel=False) as ds: + src_desc = create_descriptive_statistics( + {ii: ds.variables[ii][:] for ii in self._context.vars_emis}, + "src", + row_data["rave_raw"], + ) + src_desc.rename(columns={"FRP_MEAN": "frp_avg_hr"}, inplace=True) + dst_desc_masked = create_descriptive_statistics( + dst_data, "dst_masked", row_data["rave_interpolated"] + ) + summary = pd.concat( + [ + ii.transpose() + for ii in [src_desc, dst_desc_unmasked, dst_desc_masked] + ] + ) + summary.index.name = "variable" + summary["forecast_date"] = row_data["forecast_date"] + summary.reset_index(inplace=True) + if self._interpolation_stats is None: + self._interpolation_stats = summary + else: + self._interpolation_stats = pd.concat( + [self._interpolation_stats, summary] + ) + + self.log("_run_interpolation_postprocessing: exit", level=logging.DEBUG) + + +HasNcAttrsType = Union[nc.Dataset, nc.Variable] + + +NameListType = Tuple[str, ...] + + +def get_aliased_key(source: Dict, keys: NameListType | str) -> Any: + if isinstance(keys, str): + keys_to_find = (keys,) + else: + keys_to_find = keys + for key in keys_to_find: + try: + return source[key] + except KeyError: + continue + raise ValueError(f"key not found: {keys}") + + +def get_nc_dimension(ds: nc.Dataset, names: NameListType) -> nc.Dimension: + return get_aliased_key(ds.dimensions, names) + + +class Dimension(BaseModel): + name: NameListType + size: int + lower: int + upper: int + staggerloc: int + coordinate_type: Literal["y", "x", "time"] + + +class DimensionCollection(BaseModel): + value: Tuple[Dimension, ...] + + def get(self, name: str | NameListType) -> Dimension: + if isinstance(name, str): + name_to_find = (name,) + else: + name_to_find = name + for jj in name_to_find: + for ii in self.value: + if jj in ii.name: + return ii + raise ValueError(f"dimension not found: {name}") + + +def create_dimension_map(dims: DimensionCollection) -> Dict[str, int]: + ret = {} + for idx, dim in enumerate(dims.value): + for name in dim.name: + ret[name] = idx + return ret + + +def load_variable_data( + var: nc.Variable, target_dims: DimensionCollection +) -> np.ndarray: + slices = [ + slice(target_dims.get(ii).lower, target_dims.get(ii).upper) + for ii in var.dimensions + ] + raw_data = var[*slices] + dim_map = {dim: ii for ii, dim in enumerate(var.dimensions)} + axes = [get_aliased_key(dim_map, ii.name) for ii in target_dims.value] + transposed_data = raw_data.transpose(axes) + return transposed_data + + +def set_variable_data( + var: nc.Variable, target_dims: DimensionCollection, target_data: np.ndarray +) -> np.ndarray: + dim_map = create_dimension_map(target_dims) + axes = [get_aliased_key(dim_map, ii) for ii in var.dimensions] + transposed_data = target_data.transpose(axes) + slices = [ + slice(target_dims.get(ii).lower, target_dims.get(ii).upper) + for ii in var.dimensions + ] + var[*slices] = transposed_data + return transposed_data + + +class AbstractWrapper(abc.ABC, BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + dims: DimensionCollection + + +class GridSpec(BaseModel): + model_config = ConfigDict(frozen=True) + + x_center: str + y_center: str + x_dim: NameListType + y_dim: NameListType + x_corner: str | None = None + y_corner: str | None = None + x_corner_dim: NameListType | None = None + y_corner_dim: NameListType | None = None + x_index: int = 0 + y_index: int = 1 + + @model_validator(mode="after") + def _validate_model_(self) -> "GridSpec": + corner_meta = [ + self.x_corner, + self.y_corner, + self.x_corner_dim, + self.y_corner_dim, + ] + is_given_sum = sum([ii is not None for ii in corner_meta]) + if is_given_sum > 0 and is_given_sum != len(corner_meta): + raise ValueError( + "if one corner name is supplied, then all must be supplied" + ) + return self + + @property + def has_corners(self) -> bool: + return self.x_corner is not None + + def get_x_corner(self) -> str: + if self.x_corner is None: + raise ValueError + return self.x_corner + + def get_y_corner(self) -> str: + if self.y_corner is None: + raise ValueError + return self.y_corner + + def get_x_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: + return grid.get_coords(self.x_index, staggerloc=staggerloc) + + def get_y_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: + return grid.get_coords(self.y_index, staggerloc=staggerloc) + + def create_grid_dims( + self, ds: nc.Dataset, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc + ) -> DimensionCollection: + if staggerloc == esmpy.StaggerLoc.CENTER: + x_dim, y_dim = self.x_dim, self.y_dim + elif staggerloc == esmpy.StaggerLoc.CORNER: + x_dim, y_dim = self.x_corner_dim, self.y_corner_dim + else: + raise NotImplementedError(staggerloc) + x_dimobj = Dimension( + name=x_dim, + size=get_nc_dimension(ds, x_dim).size, + lower=grid.lower_bounds[staggerloc][self.x_index], + upper=grid.upper_bounds[staggerloc][self.x_index], + staggerloc=staggerloc, + coordinate_type="x", + ) + y_dimobj = Dimension( + name=y_dim, + size=get_nc_dimension(ds, y_dim).size, + lower=grid.lower_bounds[staggerloc][self.y_index], + upper=grid.upper_bounds[staggerloc][self.y_index], + staggerloc=staggerloc, + coordinate_type="y", + ) + if self.x_index == 0: + value = [x_dimobj, y_dimobj] + elif self.x_index == 1: + value = [y_dimobj, x_dimobj] + else: + raise NotImplementedError(self.x_index, self.y_index) + return DimensionCollection(value=value) + + +class GridWrapper(AbstractWrapper): + value: esmpy.Grid + spec: GridSpec + corner_dims: DimensionCollection | None = None + + def fill_nc_variables(self, path: Path): + if self.corner_dims is not None: + raise NotImplementedError + with open_nc(path, "a") as ds: + staggerloc = esmpy.StaggerLoc.CENTER + x_center_data = self.spec.get_x_data(self.value, staggerloc) + set_variable_data( + ds.variables[self.spec.x_center], self.dims, x_center_data + ) + y_center_data = self.spec.get_y_data(self.value, staggerloc) + set_variable_data( + ds.variables[self.spec.y_center], self.dims, y_center_data + ) + + +class NcToGrid(BaseModel): + path: Path + spec: GridSpec + + def create_grid_wrapper(self) -> GridWrapper: + with open_nc(self.path, "r") as ds: + grid_shape = self._create_grid_shape_(ds) + staggerloc = esmpy.StaggerLoc.CENTER + grid = esmpy.Grid( + grid_shape, + staggerloc=staggerloc, + coord_sys=esmpy.CoordSys.SPH_DEG, + ) + dims = self.spec.create_grid_dims(ds, grid, staggerloc) + grid_x_center_coords = self.spec.get_x_data(grid, staggerloc) + grid_x_center_coords[:] = load_variable_data( + ds.variables[self.spec.x_center], dims + ) + grid_y_center_coords = self.spec.get_y_data(grid, staggerloc) + grid_y_center_coords[:] = load_variable_data( + ds.variables[self.spec.y_center], dims + ) + + if self.spec.has_corners: + corner_dims = self._add_corner_coords_(ds, grid) + else: + corner_dims = None + + gwrap = GridWrapper( + value=grid, dims=dims, spec=self.spec, corner_dims=corner_dims + ) + return gwrap + + def _create_grid_shape_(self, ds: nc.Dataset) -> np.ndarray: + x_size = get_nc_dimension(ds, self.spec.x_dim).size + y_size = get_nc_dimension(ds, self.spec.y_dim).size + if self.spec.x_index == 0: + grid_shape = (x_size, y_size) + elif self.spec.x_index == 1: + grid_shape = (y_size, x_size) + else: + raise NotImplementedError(self.spec.x_index, self.spec.y_index) + return np.array(grid_shape) + + def _add_corner_coords_( + self, ds: nc.Dataset, grid: esmpy.Grid + ) -> DimensionCollection: + staggerloc = esmpy.StaggerLoc.CORNER + grid.add_coords(staggerloc) + dims = self.spec.create_grid_dims(ds, grid, staggerloc) + grid_x_corner_coords = self.spec.get_x_data(grid, staggerloc) + grid_x_corner_coords[:] = load_variable_data( + ds.variables[self.spec.x_corner], dims + ) + grid_y_corner_coords = self.spec.get_y_data(grid, staggerloc) + grid_y_corner_coords[:] = load_variable_data( + ds.variables[self.spec.y_corner], dims + ) + return dims + + +class FieldWrapper(AbstractWrapper): + value: esmpy.Field + gwrap: GridWrapper + + def fill_nc_variable(self, path: Path): + with open_nc(path, "a") as ds: + var = ds.variables[self.value.name] + set_variable_data(var, self.dims, self.value.data) + + +class NcToField(BaseModel): + path: Path + name: str + gwrap: GridWrapper + dim_time: NameListType | None = None + staggerloc: int = esmpy.StaggerLoc.CENTER + + def create_field_wrapper(self) -> FieldWrapper: + with open_nc(self.path, "r") as ds: + if self.dim_time is None: + ndbounds = None + target_dims = self.gwrap.dims + else: + ndbounds = (len(get_nc_dimension(ds, self.dim_time)),) + time_dim = Dimension( + name=self.dim_time, + size=ndbounds[0], + lower=0, + upper=ndbounds[0], + staggerloc=self.staggerloc, + coordinate_type="time", + ) + target_dims = DimensionCollection( + value=list(self.gwrap.dims.value) + [time_dim] + ) + field = esmpy.Field( + self.gwrap.value, + name=self.name, + ndbounds=ndbounds, + staggerloc=self.staggerloc, + ) + field.data[:] = load_variable_data(ds.variables[self.name], target_dims) + fwrap = FieldWrapper(value=field, dims=target_dims, gwrap=self.gwrap) + return fwrap + + +def mask_edges(data: np.ma.MaskedArray, mask_width: int = 1) -> None: + """ + Mask edges of domain for interpolation. + + Args: + data: The masked array to alter + mask_width: The width of the mask at each edge + + Returns: + A numpy array of the masked edges + """ + if data.ndim != 2: + raise ValueError(f"{data.ndim=}") + + original_shape = data.shape + if mask_width < 1: + return # No masking if mask_width is less than 1 + + target = data.mask + # Mask top and bottom rows + target[:mask_width, :] = True + target[-mask_width:, :] = True + + # Mask left and right columns + target[:, :mask_width] = True + target[:, -mask_width:] = True + + if data.shape != original_shape: + raise ValueError("Data shape altered during masking.") diff --git a/ush/smoke_dust/core/variable.py b/ush/smoke_dust/core/variable.py new file mode 100644 index 0000000000..4e9ef5d8dc --- /dev/null +++ b/ush/smoke_dust/core/variable.py @@ -0,0 +1,110 @@ +from typing import Tuple, Any + +from pydantic import BaseModel, field_validator + + +class SmokeDustVariable(BaseModel): + name: str + long_name: str + units: str + fill_value_str: str + fill_value_float: float + + +SmokeDustVariablesType = Tuple[SmokeDustVariable, ...] + + +class SmokeDustVariables(BaseModel): + values: SmokeDustVariablesType + + def get(self, name: str) -> SmokeDustVariable: + for value in self.values: + if value.name == name: + return value + raise ValueError(name) + + @field_validator("values", mode="after") + @classmethod + def _validate_values_( + cls, values: SmokeDustVariablesType + ) -> SmokeDustVariablesType: + names = [ii.name for ii in values] + if len(names) != len(set(names)): + raise ValueError("Variable names must be unique") + return values + + +SD_VARS = SmokeDustVariables( + values=( + SmokeDustVariable( + name="geolat", + long_name="cell center latitude", + units="degrees_north", + fill_value_str="-9999.f", + fill_value_float=-9999.0, + ), + SmokeDustVariable( + name="geolon", + long_name="cell center longitude", + units="degrees_east", + fill_value_str="-9999.f", + fill_value_float=-9999.0, + ), + SmokeDustVariable( + name="frp_avg_hr", + long_name="Mean Fire Radiative Power", + units="MW", + fill_value_str="0.f", + fill_value_float=0.0, + ), + SmokeDustVariable( + name="ebb_smoke_hr", + long_name="EBB emissions", + units="ug m-2 s-1", + fill_value_str="0.f", + fill_value_float=0.0, + ), + SmokeDustVariable( + name="frp_davg", + long_name="Daily mean Fire Radiative Power", + units="MW", + fill_value_str="0.f", + fill_value_float=0.0, + ), + SmokeDustVariable( + name="ebb_rate", + long_name="Total EBB emission", + units="ug m-2 s-1", + fill_value_str="0.f", + fill_value_float=0.0, + ), + SmokeDustVariable( + name="fire_end_hr", + long_name="Hours since fire was last detected", + units="hrs", + fill_value_str="0.f", + fill_value_float=0.0, + ), + SmokeDustVariable( + name="hwp_davg", + long_name="Daily mean Hourly Wildfire Potential", + units="none", + fill_value_str="0.f", + fill_value_float=0.0, + ), + SmokeDustVariable( + name="totprcp_24hrs", + long_name="Sum of precipitation", + units="m", + fill_value_str="0.f", + fill_value_float=0.0, + ), + SmokeDustVariable( + name="FRE", + long_name="FRE", + units="MJ", + fill_value_str="0.f", + fill_value_float=0.0, + ), + ) +) diff --git a/ush/smoke_dust/generate_emissions.py b/ush/smoke_dust/generate_emissions.py new file mode 100644 index 0000000000..d218d00749 --- /dev/null +++ b/ush/smoke_dust/generate_emissions.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +######################################################################### +# # +# Python script for fire emissions preprocessing from RAVE FRP and FRE # +# (Li et al.,2022). # +# johana.romero-alvarez@noaa.gov # +# # +######################################################################### + +import sys +from pathlib import Path +from typing import List + +sys.path.append(str(Path(__file__).parent.parent)) + +from smoke_dust.core.context import SmokeDustContext +from smoke_dust.core.preprocessor import SmokeDustPreprocessor + + +def main(args: List[str]) -> None: + """ + Prepares fire-related ICs. This is the main function that handles data movement and interpolation. + #tdk: doc + Args: + staticdir: Path to fix files for the smoke and dust component + ravedir: Path to the directory containing RAVE fire data files (hourly). This is typically the working directory (DATA) + intp_dir: Path to interpolated RAVE data files from the previous cycles (DATA_SHARE) + predef_grid: If ``RRFS_NA_3km``, use pre-defined grid dimensions + ebb_dcycle_flag: Select the EBB cycle to run. Valid values are ``"1"`` or ``"2"`` + restart_interval: Indicates if restart files should be copied. The actual interval values are not used + persistence: If ``TRUE``, use satellite observations from the previous day. Otherwise, use observations from the same day. + """ + + context = SmokeDustContext.create_from_args(args) + processor = SmokeDustPreprocessor(context) + try: + processor.run() + processor.finalize() + except Exception as e: + processor.create_dummy_emissions_file() + context.log("unhandled error", exc_info=e) + + +if __name__ == "__main__": + print("") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print("Welcome to interpolating RAVE and processing fire emissions!") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print("") + # tdk:story: use argparse + main(sys.argv[1:]) + print("") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print("Exiting. Bye!") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print("") diff --git a/ush/smoke_dust_fire_emiss_tools.py b/ush/smoke_dust_fire_emiss_tools.py deleted file mode 100755 index c7637dfab9..0000000000 --- a/ush/smoke_dust_fire_emiss_tools.py +++ /dev/null @@ -1,415 +0,0 @@ -#!/usr/bin/env python3 - -import os -from typing import Tuple, Any - -import numpy as np -import xarray as xr -from datetime import datetime -from netCDF4 import Dataset -from pandas import Index -from xarray import DataArray - -import smoke_dust_interp_tools as i_tools - - -def averaging_FRP( - ebb_dcycle: int, - fcst_dates: Index, - cols: int, - rows: int, - intp_dir: str, - rave_to_intp: str, - veg_map: str, - tgt_area: DataArray, - beta: float, - fg_to_ug: float, - to_s: int, -) -> Tuple[np.ndarray, np.ndarray]: - """ - Compute average FRP from raw RAVE for the previous 24 hours. - - Args: - ebb_dcycle: Valid values are ``1`` or ``2`` - fcst_dates: Forecast hours to use for averaging - cols: Number of columns - rows: Number of rows - intp_dir: Directory containing the interpolated data - rave_to_intp: Prefix of the target RAVE files - veg_map: Path to the vegetation mapping file - tgt_area: Data array containing the target cell areas - beta: Scale factor applied to emissions - fg_to_ug: Unit conversion factor - to_s: Unit conversion factor - - Returns: - A typle containing: - * ``0``: Average FRP - * ``1``: Total EBB - """ - base_array = np.zeros((cols * rows)) - frp_daily = base_array - ebb_smoke_total = [] - frp_avg_hr = [] - - try: - ef_map = xr.open_dataset(veg_map) - emiss_factor = ef_map.emiss_factor.values - target_area = tgt_area.values - except ( - FileNotFoundError, - IOError, - OSError, - RuntimeError, - ValueError, - TypeError, - KeyError, - IndexError, - MemoryError, - ) as e: - print(f"Error loading vegetation map: {e}") - return np.zeros((cols, rows)), np.zeros((cols, rows)) - - num_files = 0 - for cycle in fcst_dates: - try: - file_path = os.path.join(intp_dir, f"{rave_to_intp}{cycle}00_{cycle}59.nc") - if os.path.exists(file_path): - try: - with xr.open_dataset(file_path) as nc: - open_fre = nc.FRE[0, :, :].values - open_frp = nc.frp_avg_hr[0, :, :].values - num_files += 1 - if ebb_dcycle == 1: - print("Processing emissions for ebb_dcyc 1") - print(file_path) - frp_avg_hr.append(open_frp) - ebb_hourly = (open_fre * emiss_factor * beta * fg_to_ug) / ( - target_area * to_s - ) - ebb_smoke_total.append( - np.where(open_frp > 0, ebb_hourly, 0) - ) - else: - print("Processing emissions for ebb_dcyc 2") - ebb_hourly = ( - open_fre * emiss_factor * beta * fg_to_ug / target_area - ) - ebb_smoke_total.append( - np.where(open_frp > 0, ebb_hourly, 0).ravel() - ) - frp_daily += np.where(open_frp > 0, open_frp, 0).ravel() - except ( - FileNotFoundError, - IOError, - OSError, - RuntimeError, - ValueError, - TypeError, - KeyError, - IndexError, - MemoryError, - ) as e: - print(f"Error processing NetCDF file {file_path}: {e}") - if ebb_dcycle == 1: - frp_avg_hr.append(np.zeros((cols, rows))) - ebb_smoke_total.append(np.zeros((cols, rows))) - else: - if ebb_dcycle == 1: - frp_avg_hr.append(np.zeros((cols, rows))) - ebb_smoke_total.append(np.zeros((cols, rows))) - except Exception as e: - print(f"Error processing cycle {cycle}: {e}") - if ebb_dcycle == 1: - frp_avg_hr.append(np.zeros((cols, rows))) - ebb_smoke_total.append(np.zeros((cols, rows))) - - if num_files > 0: - if ebb_dcycle == 1: - frp_avg_reshaped = np.stack(frp_avg_hr, axis=0) - ebb_total_reshaped = np.stack(ebb_smoke_total, axis=0) - else: - summed_array = np.sum(np.array(ebb_smoke_total), axis=0) - num_zeros = len(ebb_smoke_total) - np.sum( - [arr == 0 for arr in ebb_smoke_total], axis=0 - ) - safe_zero_count = np.where(num_zeros == 0, 1, num_zeros) - result_array = np.array( - [ - ( - summed_array[i] / 2 - if safe_zero_count[i] == 1 - else summed_array[i] / safe_zero_count[i] - ) - for i in range(len(safe_zero_count)) - ] - ) - result_array[num_zeros == 0] = summed_array[num_zeros == 0] - ebb_total = result_array.reshape(cols, rows) - ebb_total_reshaped = ebb_total / 3600 - temp_frp = np.array( - [ - ( - frp_daily[i] / 2 - if safe_zero_count[i] == 1 - else frp_daily[i] / safe_zero_count[i] - ) - for i in range(len(safe_zero_count)) - ] - ) - temp_frp[num_zeros == 0] = frp_daily[num_zeros == 0] - frp_avg_reshaped = temp_frp.reshape(cols, rows) - else: - if ebb_dcycle == 1: - frp_avg_reshaped = np.zeros((24, cols, rows)) - ebb_total_reshaped = np.zeros((24, cols, rows)) - else: - frp_avg_reshaped = np.zeros((cols, rows)) - ebb_total_reshaped = np.zeros((cols, rows)) - - return (frp_avg_reshaped, ebb_total_reshaped) - - -def estimate_fire_duration( - intp_dir: str, - fcst_dates: Index, - current_day: str, - cols: int, - rows: int, - rave_to_intp: str, -) -> np.ndarray: - """ - Estimate fire duration potentially using data from previous cycles. - - There are two steps here. - 1) First day simulation no RAVE from previous 24 hours available (fire age is set to zero). - 2) Previous files are present (estimate fire age as the difference between the date of the current cycle and the date whe the fire was last observed within 24 hours). - - Args: - intp_dir: Path to interpolated RAVE data - fcst_dates: Forecast hours used in the current cycle - current_day: The current day hour - cols: Number of columns - rows: Number of rows - rave_to_intp: Prefix of the target RAVE files - """ - t_fire = np.zeros((cols, rows)) - - for date_str in fcst_dates: - try: - assert isinstance(date_str, str) - date_file = int(date_str[:10]) - print("Date processing for fire duration", date_file) - file_path = os.path.join( - intp_dir, f"{rave_to_intp}{date_str}00_{date_str}59.nc" - ) - - if os.path.exists(file_path): - try: - with xr.open_dataset(file_path) as open_intp: - FRP = open_intp.frp_avg_hr[0, :, :].values - dates_filtered = np.where(FRP > 0, date_file, 0) - t_fire = np.maximum(t_fire, dates_filtered) - except ( - FileNotFoundError, - IOError, - OSError, - RuntimeError, - ValueError, - TypeError, - KeyError, - IndexError, - MemoryError, - ) as e: - print(f"Error processing NetCDF file {file_path}: {e}") - except Exception as e: - print(f"Error processing date {date_str}: {e}") - - t_fire_flattened = [int(i) if i != 0 else 0 for i in t_fire.flatten()] - - try: - fcst_t = datetime.strptime(current_day, "%Y%m%d%H") - hr_ends = [ - datetime.strptime(str(hr), "%Y%m%d%H") if hr != 0 else 0 - for hr in t_fire_flattened - ] - te = np.array( - [(fcst_t - i).total_seconds() / 3600 if i != 0 else 0 for i in hr_ends] - ) - except ValueError as e: - print(f"Error processing forecast time {current_day}: {e}") - te = np.zeros((rows, cols)) - - return te - - -def save_fire_dur(cols: int, rows: int, te: np.ndarray) -> np.ndarray: - """ - Reshape the fire duration array. - - Args: - cols: Number of columns - rows: Number of rows - te: Target array to reshape - - Returns: - The reshaped fire duration array - """ - fire_dur = np.array(te).reshape(cols, rows) - return fire_dur - - -def produce_emiss_24hr_file( - frp_reshaped: np.ndarray, - intp_dir: str, - current_day: str, - tgt_latt: DataArray, - tgt_lont: DataArray, - ebb_smoke_reshaped: np.ndarray, - cols: int, - rows: int, -) -> None: - """ - Create a 24-hour emissions file. - - Args: - frp_reshaped: FRP numpy array - intp_dir: Directory containing interpolated RAVE files - current_day: The current forecast cycle day/hour - tgt_latt: Target grid latitudes - tgt_lont: Target grid longitudes - ebb_smoke_reshaped: EBB smoke array reshaped - cols: Number of columns - rows: Number of rows - """ - file_path = os.path.join(intp_dir, f"SMOKE_RRFS_data_{current_day}00.nc") - with Dataset(file_path, "w") as fout: - i_tools.create_emiss_file(fout, cols, rows) - i_tools.Store_latlon_by_Level( - fout, "geolat", tgt_latt, "cell center latitude", "degrees_north", "-9999.f" - ) - i_tools.Store_latlon_by_Level( - fout, "geolon", tgt_lont, "cell center longitude", "degrees_east", "-9999.f" - ) - - i_tools.Store_by_Level( - fout, "frp_avg_hr", "mean Fire Radiative Power", "MW", "0.f" - ) - fout.variables["frp_avg_hr"][:, :, :] = frp_reshaped - i_tools.Store_by_Level( - fout, "ebb_smoke_hr", "EBB emissions", "ug m-2 s-1", "0.f" - ) - fout.variables["ebb_smoke_hr"][:, :, :] = ebb_smoke_reshaped - - -def produce_emiss_file( - xarr_hwp: DataArray, - frp_avg_reshaped: np.ndarray, - totprcp_ave_arr: Any, - xarr_totprcp: DataArray, - intp_dir: str, - current_day: str, - tgt_latt: DataArray, - tgt_lont: DataArray, - ebb_tot_reshaped: np.ndarray, - fire_age: np.ndarray, - cols: int, - rows: int, -) -> str: - """ - Produce the emissions file. - - Args: - xarr_hwp: Data array containing HWP - frp_avg_reshaped: Average FRP array - totprcp_ave_arr: Average total precipitation array - xarr_totprcp: Average total precipitation as a data array - intp_dir: Directory containing interpolated RAVE data - current_day: The current forecast day/hour - tgt_latt: The target grid latitude - tgt_lont: The target grid longitudes - ebb_tot_reshaped: Total EBB array - fire_age: Estimated fire age array - cols: Number of columns - rows: Number of rows - - Returns: - A string indicating the file was written as expected - """ - # Ensure arrays are not negative or NaN - frp_avg_reshaped = np.clip(frp_avg_reshaped, 0, None) - frp_avg_reshaped = np.nan_to_num(frp_avg_reshaped) - - ebb_tot_reshaped = np.clip(ebb_tot_reshaped, 0, None) - ebb_tot_reshaped = np.nan_to_num(ebb_tot_reshaped) - - fire_age = np.clip(fire_age, 0, None) - fire_age = np.nan_to_num(fire_age) - - # Filter HWP Prcp arrays to be non-negative and replace NaNs - filtered_hwp = xarr_hwp.where(frp_avg_reshaped > 0, 0).fillna(0) - filtered_prcp = xarr_totprcp.where(frp_avg_reshaped > 0, 0).fillna(0) - - # Filter based on ebb_rate - ebb_rate_threshold = 0 # Define an appropriate threshold if needed - mask = ebb_tot_reshaped > ebb_rate_threshold - - filtered_hwp = filtered_hwp.where(mask, 0).fillna(0) - filtered_prcp = filtered_prcp.where(mask, 0).fillna(0) - frp_avg_reshaped = frp_avg_reshaped * mask - ebb_tot_reshaped = ebb_tot_reshaped * mask - fire_age = fire_age * mask - - # Produce emiss file - file_path = os.path.join(intp_dir, f"SMOKE_RRFS_data_{current_day}00.nc") - - try: - with Dataset(file_path, "w") as fout: - i_tools.create_emiss_file(fout, cols, rows) - i_tools.Store_latlon_by_Level( - fout, - "geolat", - tgt_latt, - "cell center latitude", - "degrees_north", - "-9999.f", - ) - i_tools.Store_latlon_by_Level( - fout, - "geolon", - tgt_lont, - "cell center longitude", - "degrees_east", - "-9999.f", - ) - - print("Storing different variables") - i_tools.Store_by_Level( - fout, "frp_davg", "Daily mean Fire Radiative Power", "MW", "0.f" - ) - fout.variables["frp_davg"][0, :, :] = frp_avg_reshaped - i_tools.Store_by_Level( - fout, "ebb_rate", "Total EBB emission", "ug m-2 s-1", "0.f" - ) - fout.variables["ebb_rate"][0, :, :] = ebb_tot_reshaped - i_tools.Store_by_Level( - fout, "fire_end_hr", "Hours since fire was last detected", "hrs", "0.f" - ) - fout.variables["fire_end_hr"][0, :, :] = fire_age - i_tools.Store_by_Level( - fout, "hwp_davg", "Daily mean Hourly Wildfire Potential", "none", "0.f" - ) - fout.variables["hwp_davg"][0, :, :] = filtered_hwp - i_tools.Store_by_Level( - fout, "totprcp_24hrs", "Sum of precipitation", "m", "0.f" - ) - fout.variables["totprcp_24hrs"][0, :, :] = filtered_prcp - - print("Emissions file created successfully") - return "Emissions file created successfully" - - except (OSError, IOError) as e: - print(f"Error creating or writing to NetCDF file {file_path}: {e}") - return f"Error creating or writing to NetCDF file {file_path}: {e}" - - return "Emissions file created successfully" diff --git a/ush/smoke_dust_generate_fire_emissions.py b/ush/smoke_dust_generate_fire_emissions.py deleted file mode 100755 index 74a979c9a9..0000000000 --- a/ush/smoke_dust_generate_fire_emissions.py +++ /dev/null @@ -1,230 +0,0 @@ -#!/usr/bin/env python3 - -######################################################################### -# # -# Python script for fire emissions preprocessing from RAVE FRP and FRE # -# (Li et al.,2022). # -# johana.romero-alvarez@noaa.gov # -# # -######################################################################### - -import sys -import os -import smoke_dust_fire_emiss_tools as femmi_tools -import smoke_dust_hwp_tools as hwp_tools -import smoke_dust_interp_tools as i_tools - - -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Workflow -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -def generate_emiss_workflow( - staticdir: str, - ravedir: str, - intp_dir: str, - predef_grid: str, - ebb_dcycle_flag: str, - restart_interval: str, - persistence: str, -) -> None: - """ - Prepares fire-related ICs. This is the main function that handles data movement and interpolation. - - Args: - staticdir: Path to fix files for the smoke and dust component - ravedir: Path to the directory containing RAVE fire data files (hourly). This is typically the working directory (DATA) - intp_dir: Path to interpolated RAVE data files from the previous cycles (DATA_SHARE) - predef_grid: If ``RRFS_NA_3km``, use pre-defined grid dimensions - ebb_dcycle_flag: Select the EBB cycle to run. Valid values are ``"1"`` or ``"2"`` - restart_interval: Indicates if restart files should be copied. The actual interval values are not used - persistence: If ``TRUE``, use satellite observations from the previous day. Otherwise, use observations from the same day. - """ - - # ---------------------------------------------------------------------- - # Import envs from workflow and get the pre-defined grid - # Set variable names, constants and unit conversions - # Set predefined grid - # Set directories - # ---------------------------------------------------------------------- - - beta = 0.3 - fg_to_ug = 1e6 - to_s = 3600 - current_day = os.environ["CDATE"] - # nwges_dir = os.environ.get("NWGES_DIR") - nwges_dir = os.environ["DATA"] - vars_emis = ["FRP_MEAN", "FRE"] - cols, rows = (2700, 3950) if predef_grid == "RRFS_NA_3km" else (1092, 1820) - print("PREDEF GRID", predef_grid, "cols,rows", cols, rows) - # used later when working with ebb_dcyle 1 or 2 - ebb_dcycle = int(ebb_dcycle_flag) - print( - "WARNING, EBB_DCYCLE set to", - ebb_dcycle, - "and persistence=", - persistence, - "if persistence is false, emissions comes from same day satellite obs", - ) - - print("CDATE:", current_day) - print("DATA:", nwges_dir) - - # This is used later when copying the rrfs restart file - restart_interval_list = [float(num) for num in restart_interval.split()] - len_restart_interval = len(restart_interval_list) - - # Setting the directories - veg_map = staticdir + "/veg_map.nc" - RAVE = ravedir - rave_to_intp = predef_grid + "_intp_" - grid_in = staticdir + "/grid_in.nc" - weightfile = staticdir + "/weight_file.nc" - grid_out = staticdir + "/ds_out_base.nc" - hourly_hwpdir = os.path.join(nwges_dir, "RESTART") - - # ---------------------------------------------------------------------- - # Workflow - # ---------------------------------------------------------------------- - - # ---------------------------------------------------------------------- - # Sort raw RAVE, create source and target filelds, and compute emissions - # ---------------------------------------------------------------------- - fcst_dates = i_tools.date_range(current_day, ebb_dcycle, persistence) - intp_avail_hours, intp_non_avail_hours, inp_files_2use = ( - i_tools.check_for_intp_rave(intp_dir, fcst_dates, rave_to_intp) - ) - rave_avail, rave_avail_hours, rave_nonavail_hours_test, first_day = ( - i_tools.check_for_raw_rave(RAVE, intp_non_avail_hours, intp_avail_hours) - ) - srcfield, tgtfield, tgt_latt, tgt_lont, srcgrid, tgtgrid, src_latt, tgt_area = ( - i_tools.creates_st_fields(grid_in, grid_out) - ) - - if not first_day: - regridder, use_dummy_emiss = i_tools.generate_regridder( - rave_avail_hours, srcfield, tgtfield, weightfile, intp_avail_hours - ) - if use_dummy_emiss: - print("RAVE files corrupted, no data to process") - i_tools.create_dummy(intp_dir, current_day, tgt_latt, tgt_lont, cols, rows) - else: - i_tools.interpolate_rave( - RAVE, - rave_avail, - rave_avail_hours, - use_dummy_emiss, - vars_emis, - regridder, - srcgrid, - tgtgrid, - rave_to_intp, - intp_dir, - tgt_latt, - tgt_lont, - cols, - rows, - ) - - if ebb_dcycle == 1: - print("Processing emissions forebb_dcyc 1") - frp_avg_reshaped, ebb_total_reshaped = femmi_tools.averaging_FRP( - ebb_dcycle, - fcst_dates, - cols, - rows, - intp_dir, - rave_to_intp, - veg_map, - tgt_area, - beta, - fg_to_ug, - to_s, - ) - femmi_tools.produce_emiss_24hr_file( - frp_avg_reshaped, - nwges_dir, - current_day, - tgt_latt, - tgt_lont, - ebb_total_reshaped, - cols, - rows, - ) - elif ebb_dcycle == 2: - print("Restart dates to process", fcst_dates) - hwp_avail_hours, hwp_non_avail_hours = hwp_tools.check_restart_files( - hourly_hwpdir, fcst_dates - ) - restart_avail, restart_nonavail_hours_test = ( - hwp_tools.copy_missing_restart( - nwges_dir, - hwp_non_avail_hours, - hourly_hwpdir, - len_restart_interval, - ) - ) - hwp_ave_arr, xarr_hwp, totprcp_ave_arr, xarr_totprcp = ( - hwp_tools.process_hwp( - fcst_dates, hourly_hwpdir, cols, rows, intp_dir, rave_to_intp - ) - ) - frp_avg_reshaped, ebb_total_reshaped = femmi_tools.averaging_FRP( - ebb_dcycle, - fcst_dates, - cols, - rows, - intp_dir, - rave_to_intp, - veg_map, - tgt_area, - beta, - fg_to_ug, - to_s, - ) - # Fire end hours processing - te = femmi_tools.estimate_fire_duration( - intp_dir, fcst_dates, current_day, cols, rows, rave_to_intp - ) - fire_age = femmi_tools.save_fire_dur(cols, rows, te) - # produce emiss file - femmi_tools.produce_emiss_file( - xarr_hwp, - frp_avg_reshaped, - totprcp_ave_arr, - xarr_totprcp, - nwges_dir, - current_day, - tgt_latt, - tgt_lont, - ebb_total_reshaped, - fire_age, - cols, - rows, - ) - else: - raise NotImplementedError(f"ebb_dcycle={ebb_dcycle}") - else: - print("First day true, no RAVE files available. Use dummy emissions file") - i_tools.create_dummy(intp_dir, current_day, tgt_latt, tgt_lont, cols, rows) - - -if __name__ == "__main__": - print("") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("Welcome to interpolating RAVE and processing fire emissions!") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("") - generate_emiss_workflow( - sys.argv[1], - sys.argv[2], - sys.argv[3], - sys.argv[4], - sys.argv[5], - sys.argv[6], - sys.argv[7], - ) - print("") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("Successful Completion. Bye!") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("") diff --git a/ush/smoke_dust_hwp_tools.py b/ush/smoke_dust_hwp_tools.py deleted file mode 100755 index 9c2dbf9ff1..0000000000 --- a/ush/smoke_dust_hwp_tools.py +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env python3 -from typing import List, Tuple - -import numpy as np -import os -import datetime as dt -from datetime import timedelta -import xarray as xr -import fnmatch - -from pandas import Index -from xarray import DataArray - - -def check_restart_files( - hourly_hwpdir: str, fcst_dates: Index -) -> Tuple[List[str], List[str]]: - """ - Args: - hourly_hwpdir: The input HWP data directory - fcst_dates: A list of forecast dates - - Returns: - A tuple containing: - * ``0``: A list of available HWP hours - * ``1``: A list of unavailable HWP hours - """ - hwp_avail_hours = [] - hwp_non_avail_hours = [] - - for cycle in fcst_dates: - assert isinstance(cycle, str) - restart_file = f"{cycle[:8]}.{cycle[8:10]}0000.phy_data.nc" - file_path = os.path.join(hourly_hwpdir, restart_file) - - if os.path.exists(file_path): - print(f"Restart file available for: {restart_file}") - hwp_avail_hours.append(cycle) - else: - print(f"Copy restart file for: {restart_file}") - hwp_non_avail_hours.append(cycle) - - print( - f"Available restart at: {hwp_avail_hours}, Non-available restart files at: {hwp_non_avail_hours}" - ) - return hwp_avail_hours, hwp_non_avail_hours - - -def copy_missing_restart( - nwges_dir: str, - hwp_non_avail_hours: List[str], - hourly_hwpdir: str, - len_restart_interval: int, -) -> Tuple[List[str], List[str]]: - """ - Args: - nwges_dir: Root directory for restart files - hwp_non_avail_hours: List of HWP hours that are not available - hourly_hwpdir: List of available HWP hours - len_restart_interval: The length of the restart interval - - Returns: - A tuple containing: - * ``0``: List of available restart files - * ``1``: List of unavailable restart files - """ - restart_avail_hours = [] - restart_nonavail_hours_test = [] - - for cycle in hwp_non_avail_hours: - try: - YYYYMMDDHH = dt.datetime.strptime(cycle, "%Y%m%d%H") - HH = cycle[8:10] - prev_hr = YYYYMMDDHH - timedelta(hours=1) - prev_hr_str = prev_hr.strftime("%Y%m%d%H") - - source_restart_dir = os.path.join( - nwges_dir, prev_hr_str, "fcst_fv3lam", "RESTART" - ) - wildcard_name = "*.phy_data.nc" - - if len_restart_interval > 1: - print("ENTERING LOOP for len_restart_interval > 1") - if os.path.exists(source_restart_dir): - matching_files_found = False - print("PATH EXISTS") - for file in sorted(os.listdir(source_restart_dir)): - if fnmatch.fnmatch(file, wildcard_name): - matching_files_found = True - print("MATCHING FILES FOUND") - source_file_path = os.path.join(source_restart_dir, file) - target_file_path = os.path.join(hourly_hwpdir, file) - var1, var2 = "rrfs_hwp_ave", "totprcp_ave" - if os.path.exists(source_file_path): - with xr.open_dataset(source_file_path) as ds: - try: - if ( - var1 in ds.variables - and var2 in ds.variables - ): - ds = ds[[var1, var2]] - ds.to_netcdf(target_file_path) - restart_avail_hours.append(cycle) - print(f"Restart file copied: {file}") - else: - print( - f"Missing variables {var1} or {var2} in {file}. Skipping file." - ) - except AttributeError as e: - print( - f"AttributeError processing NetCDF file {source_file_path}: {e}" - ) - else: - print(f"Source file not found: {source_file_path}") - if not matching_files_found: - print("No matching files found") - restart_nonavail_hours_test.append(cycle) - else: - print(f"Source directory not found: {source_restart_dir}") - restart_nonavail_hours_test.append(cycle) - else: - if os.path.exists(source_restart_dir): - try: - matching_files = [ - f - for f in os.listdir(source_restart_dir) - if fnmatch.fnmatch(f, wildcard_name) - ] - if not matching_files: - print( - f"No matching files for cycle {cycle} in {source_restart_dir}" - ) - restart_nonavail_hours_test.append(cycle) - continue - - for matching_file in matching_files: - source_file_path = os.path.join( - source_restart_dir, matching_file - ) - target_file_path = os.path.join( - hourly_hwpdir, matching_file - ) - var1, var2 = "rrfs_hwp_ave", "totprcp_ave" - - if os.path.exists(source_file_path): - try: - with xr.open_dataset(source_file_path) as ds: - if ( - var1 in ds.variables - and var2 in ds.variables - ): - ds = ds[[var1, var2]] - ds.to_netcdf(target_file_path) - restart_avail_hours.append(cycle) - print( - f"Restart file copied: {matching_file}" - ) - else: - print( - f"Missing variables {var1} or {var2} in {matching_file}. Skipping file." - ) - except ( - FileNotFoundError, - IOError, - OSError, - RuntimeError, - ValueError, - TypeError, - KeyError, - IndexError, - MemoryError, - ) as e: - print( - f"Error processing NetCDF file {source_file_path}: {e}" - ) - restart_nonavail_hours_test.append(cycle) - else: - print(f"Source file not found: {source_file_path}") - restart_nonavail_hours_test.append(cycle) - except (FileNotFoundError, IOError, OSError, RuntimeError) as e: - print(f"Error accessing directory {source_restart_dir}: {e}") - restart_nonavail_hours_test.append(cycle) - else: - print(f"Source directory not found: {source_restart_dir}") - restart_nonavail_hours_test.append(cycle) - - except (ValueError, TypeError) as e: - print(f"Error processing cycle {cycle}: {e}") - restart_nonavail_hours_test.append(cycle) - - return restart_avail_hours, restart_nonavail_hours_test - - -def process_hwp( - fcst_dates: Index, - hourly_hwpdir: str, - cols: int, - rows: int, - intp_dir: str, - rave_to_intp: str, -) -> Tuple[np.ndarray, DataArray, np.ndarray, DataArray]: - """ - Process HWP files. - - Args: - fcst_dates: List of forecast dates - hourly_hwpdir: Path to HWP data directory - cols: Number of output columns - rows: Number of output rows - intp_dir: Path to interpolate RAVE file directory - rave_to_intp: File prefix indicating which RAVE files to interpolate - - Returns: - A tuple containing: - * ``0``: A numpy array of average HWP - * ``1``: An xarray data array version of the average HWP - * ``2``: A numpy array of average total precipitation - * ``3``: An xarray data array version of average total precipitation - """ - hwp_ave = [] - totprcp = np.zeros((cols * rows)) - var1, var2 = "rrfs_hwp_ave", "totprcp_ave" - - for cycle in fcst_dates: - assert isinstance(cycle, str) - try: - print(f"Processing restart file for date: {cycle}") - file_path = os.path.join( - hourly_hwpdir, f"{cycle[:8]}.{cycle[8:10]}0000.phy_data.nc" - ) - rave_path = os.path.join(intp_dir, f"{rave_to_intp}{cycle}00_{cycle}59.nc") - - if os.path.exists(file_path) and os.path.exists(rave_path): - try: - with xr.open_dataset(file_path) as nc: - if var1 in nc.variables and var2 in nc.variables: - hwp_values = nc.rrfs_hwp_ave.values.ravel() - tprcp_values = nc.totprcp_ave.values.ravel() - totprcp += np.where(tprcp_values > 0, tprcp_values, 0) - hwp_ave.append(hwp_values) - print(f"Restart file processed for: {cycle}") - else: - print( - f"Missing variables {var1} or {var2} in file: {file_path}" - ) - except ( - FileNotFoundError, - IOError, - OSError, - RuntimeError, - ValueError, - TypeError, - KeyError, - IndexError, - MemoryError, - ) as e: - print(f"Error processing NetCDF file {file_path}: {e}") - else: - print( - f"One or more files non-available for this cycle: {file_path}, {rave_path}" - ) - except (ValueError, TypeError) as e: - print(f"Error processing cycle {cycle}: {e}") - - # Calculate the mean HWP values if available - if hwp_ave: - hwp_ave_arr = np.nanmean(hwp_ave, axis=0).reshape(cols, rows) - totprcp_ave_arr = totprcp.reshape(cols, rows) - else: - hwp_ave_arr = np.zeros((cols, rows)) - totprcp_ave_arr = np.zeros((cols, rows)) - - xarr_hwp = xr.DataArray(hwp_ave_arr) - xarr_totprcp = xr.DataArray(totprcp_ave_arr) - - return hwp_ave_arr, xarr_hwp, totprcp_ave_arr, xarr_totprcp diff --git a/ush/smoke_dust_interp_tools.py b/ush/smoke_dust_interp_tools.py deleted file mode 100755 index 6b1a6bfead..0000000000 --- a/ush/smoke_dust_interp_tools.py +++ /dev/null @@ -1,566 +0,0 @@ -#!/usr/bin/env python3 - -import datetime as dt -from typing import Tuple, List, Any - -import pandas as pd -import os -import fnmatch -import xarray as xr -import numpy as np -from netCDF4 import Dataset -from numpy import ndarray -from pandas import Index -from xarray import DataArray - -try: - import esmpy as ESMF -except ImportError: - # esmpy version 8.3.1 is required on Orion/Hercules - import ESMF - - -def date_range(current_day: str, ebb_dcycle: int, persistence: str) -> Index: - """ - Create date range, this is later used to search for RAVE and HWP from previous 24 hours. - - Args: - current_day: The current forecast day and hour - ebb_dcycle: Valid options are ``"1"`` and ``"2"`` - persistence: If ``True``, use satellite observations from previous day - - Returns: - A string ``Index`` with values matching the forecast day and hour - """ - print(f"Searching for interpolated RAVE for {current_day}") - print(f"EBB CYCLE: {ebb_dcycle}") - print(f"Persistence setting received: {persistence}") - - fcst_datetime = dt.datetime.strptime(current_day, "%Y%m%d%H") - # persistence (bool): Determines if forecast should persist from previous day. - - if ebb_dcycle == 1: - print("Find RAVE for ebb_dcyc 1") - if persistence == True: - # Start date range from one day prior if persistence is True - print( - "Creating emissions for persistence method where satellite FRP persist from previous day" - ) - start_datetime = fcst_datetime - dt.timedelta(days=1) - else: - # Start date range from the current date - print("Creating emissions using current date satellite FRP") - start_datetime = fcst_datetime - # Generate dates for 24 hours from start_datetime - fcst_dates = pd.date_range(start=start_datetime, periods=24, freq="H").strftime( - "%Y%m%d%H" - ) - else: - print("Creating emissions for modulated persistence by Wildfire potential") - start_datetime = fcst_datetime - dt.timedelta(days=1, hours=1) - - fcst_dates = pd.date_range(start=start_datetime, periods=24, freq="H").strftime( - "%Y%m%d%H" - ) - - print(f"Current cycle: {fcst_datetime}") - return fcst_dates - - -def check_for_intp_rave( - intp_dir: str, fcst_dates: Index, rave_to_intp: str -) -> Tuple[List[str], List[str], bool]: - """ - Check if interpolated RAVE is available for the previous 24 hours - - Args: - intp_dir: Path to directory containing interpolated RAVE files from previous cycles - fcst_dates: Forecast data and hours to search ``intp_dir`` for - rave_to_intp: Filename prefix for the interpolated RAVE files - - Returns: - A tuple containing: - * ``0``: The available forecast days/hours - * ``1``: The unavailable (missing) forecast day/hours - * ``2``: A boolean indicating if there are any interpolated RAVE files available - """ - intp_avail_hours = [] - intp_non_avail_hours = [] - # There are four situations here. - # 1) the file is missing (interpolate a new file) - # 2) the file is present (use it) - # 3) there is a link, but it's broken (interpolate a new file) - # 4) there is a valid link (use it) - for date in fcst_dates: - file_name = f"{rave_to_intp}{date}00_{date}59.nc" - file_path = os.path.join(intp_dir, file_name) - file_exists = os.path.isfile(file_path) - is_link = os.path.islink(file_path) - is_valid_link = is_link and os.path.exists(file_path) - - if file_exists or is_valid_link: - print(f"RAVE interpolated file available for {file_name}") - intp_avail_hours.append(str(date)) - else: - print(f"Interpolated file non available, interpolate RAVE for {file_name}") - intp_non_avail_hours.append(str(date)) - - print( - f"Available interpolated files for hours: {intp_avail_hours}, Non available interpolated files for hours: {intp_non_avail_hours}" - ) - - inp_files_2use = len(intp_avail_hours) > 0 - - return intp_avail_hours, intp_non_avail_hours, inp_files_2use - - -def check_for_raw_rave( - RAVE: str, intp_non_avail_hours: List[str], intp_avail_hours: List[str] -) -> Tuple[List[List[str]], List[str], List[str], bool]: - """ - Check if raw RAVE in intp_non_avail_hours list is available for interpolation. - - Args: - RAVE: Directory containing the raw RAVE files - intp_non_avail_hours: RAVE days/hours that are not available - intp_avail_hours: RAVE day/hours that are available - - Returns: - A tuple containing: - * ``0``: Raw RAVE file paths that are available - * ``1``: The days/hours of the available RAVE files - * ``2``: The days/hours that are not available - * ``3``: A boolean indicating if this is the first day of the forecast - """ - rave_avail = [] - rave_avail_hours = [] - rave_nonavail_hours_test = [] - for date in intp_non_avail_hours: - wildcard_name = f"*-3km*{date}*{date}59590*.nc" - name_retro = f"*3km*{date}*{date}*.nc" - matching_files = [ - f - for f in os.listdir(RAVE) - if fnmatch.fnmatch(f, wildcard_name) or fnmatch.fnmatch(f, name_retro) - ] - print(f"Find raw RAVE: {matching_files}") - if not matching_files: - print(f"Raw RAVE non_available for interpolation {date}") - rave_nonavail_hours_test.append(date) - else: - print(f"Raw RAVE available for interpolation {matching_files}") - rave_avail.append(matching_files) - rave_avail_hours.append(date) - - print( - f"Raw RAVE available: {rave_avail_hours}, rave_nonavail_hours: {rave_nonavail_hours_test}" - ) - first_day = not rave_avail_hours and not intp_avail_hours - - print(f"FIRST DAY?: {first_day}") - return rave_avail, rave_avail_hours, rave_nonavail_hours_test, first_day - - -def creates_st_fields(grid_in: str, grid_out: str) -> Tuple[ - ESMF.Field, - ESMF.Field, - DataArray, - DataArray, - ESMF.Grid, - ESMF.Grid, - DataArray, - DataArray, -]: - """ - Create source and target fields for regridding. - - Args: - grid_in: Path to input grid - grid_out: Path to output grid - - Returns: - A tuple containing: - * ``0``: Source ESMF field - * ``1``: Destination ESMF field - * ``2``: Destination latitudes - * ``3``: Destination longitudes - * ``4``: Source ESMF grid - * ``5``: Destination ESMF grid - * ``6``: Source latitude - * ``7``: Destination area - """ - # Open datasets with context managers - with xr.open_dataset(grid_in) as ds_in, xr.open_dataset(grid_out) as ds_out: - tgt_area = ds_out["area"] - tgt_latt = ds_out["grid_latt"] - tgt_lont = ds_out["grid_lont"] - src_latt = ds_in["grid_latt"] - - srcgrid = ESMF.Grid( - np.array(src_latt.shape), - staggerloc=[ESMF.StaggerLoc.CENTER, ESMF.StaggerLoc.CORNER], - coord_sys=ESMF.CoordSys.SPH_DEG, - ) - tgtgrid = ESMF.Grid( - np.array(tgt_latt.shape), - staggerloc=[ESMF.StaggerLoc.CENTER, ESMF.StaggerLoc.CORNER], - coord_sys=ESMF.CoordSys.SPH_DEG, - ) - - srcfield = ESMF.Field(srcgrid, name="test", staggerloc=ESMF.StaggerLoc.CENTER) - tgtfield = ESMF.Field(tgtgrid, name="test", staggerloc=ESMF.StaggerLoc.CENTER) - - print("Grid in and out files available. Generating target and source fields") - return ( - srcfield, - tgtfield, - tgt_latt, - tgt_lont, - srcgrid, - tgtgrid, - src_latt, - tgt_area, - ) - - -def create_emiss_file(fout: Dataset, cols: int, rows: int) -> None: - """ - Create necessary dimensions for the emission file. - - Args: - fout: Dataset to update - cols: Number of columns - rows: Number of rows - """ - fout.createDimension("t", None) - fout.createDimension("lat", cols) - fout.createDimension("lon", rows) - setattr(fout, "PRODUCT_ALGORITHM_VERSION", "Beta") - setattr(fout, "TIME_RANGE", "1 hour") - - -def Store_latlon_by_Level( - fout: Dataset, varname: str, var: DataArray, long_name: str, units: str, fval: str -) -> None: - """ - Store a 2D variable (latitude/longitude) in the file. - - Args: - fout: Dataset to update - varname: Variable name to create - var: Variable data to store - long_name: Variable long name - units: Variable units - fval: Variable fill value - """ - var_out = fout.createVariable(varname, "f4", ("lat", "lon")) - var_out.units = units - var_out.long_name = long_name - var_out.standard_name = varname - fout.variables[varname][:] = var - var_out.FillValue = fval - var_out.coordinates = "geolat geolon" - - -def Store_by_Level( - fout: Dataset, varname: str, long_name: str, units: str, fval: str -) -> None: - """ - Store a 3D variable (time, latitude/longitude) in the file. - - Args: - fout: Dataset to update - varname: Name of the variable to create - long_name: Long name of the variable to create - units: Units of the variable to create - fval: Fill value of the variable to create - """ - var_out = fout.createVariable(varname, "f4", ("t", "lat", "lon")) - var_out.units = units - var_out.long_name = long_name - var_out.standard_name = long_name - var_out.FillValue = fval - var_out.coordinates = "t geolat geolon" - - -def create_dummy( - intp_dir: str, - current_day: str, - tgt_latt: DataArray, - tgt_lont: DataArray, - cols: int, - rows: int, -) -> str: - """ - Create a dummy RAVE interpolated file if first day or regridder fails. - - Args: - intp_dir: Directory to create the dummy file in - current_day: Current day (and hour?) to create the dummy file for - tgt_latt: Target grid latitudes - tgt_lont: Target grid longitudes - cols: Number of columns - rows: Number of rows - - Returns: - A string stating the operation was successful. - """ - file_path = os.path.join(intp_dir, f"SMOKE_RRFS_data_{current_day}00.nc") - dummy_file = np.zeros((cols, rows)) # Changed to 3D to match the '3D' dimensions - with Dataset(file_path, "w") as fout: - create_emiss_file(fout, cols, rows) - # Store latitude and longitude - Store_latlon_by_Level( - fout, "geolat", tgt_latt, "cell center latitude", "degrees_north", "-9999.f" - ) - Store_latlon_by_Level( - fout, "geolon", tgt_lont, "cell center longitude", "degrees_east", "-9999.f" - ) - - # Initialize and store each variable - Store_by_Level(fout, "frp_davg", "Daily mean Fire Radiative Power", "MW", "0.f") - fout.variables["frp_davg"][0, :, :] = dummy_file - Store_by_Level(fout, "ebb_rate", "Total EBB emission", "ug m-2 s-1", "0.f") - fout.variables["ebb_rate"][0, :, :] = dummy_file - Store_by_Level( - fout, "fire_end_hr", "Hours since fire was last detected", "hrs", "0.f" - ) - fout.variables["fire_end_hr"][0, :, :] = dummy_file - Store_by_Level( - fout, "hwp_davg", "Daily mean Hourly Wildfire Potential", "none", "0.f" - ) - fout.variables["hwp_davg"][0, :, :] = dummy_file - Store_by_Level(fout, "totprcp_24hrs", "Sum of precipitation", "m", "0.f") - fout.variables["totprcp_24hrs"][0, :, :] = dummy_file - - return "Emissions dummy file created successfully" - - -def generate_regridder( - rave_avail_hours: List[str], - srcfield: ESMF.Field, - tgtfield: ESMF.Field, - weightfile: str, - intp_avail_hours: List[str], -) -> Tuple[Any, bool]: - """ - Generate an ESMF regridder unless we are using dummy emissions. - Args: - rave_avail_hours: The RAVE hours that are available - srcfield: The source ESMF field - tgtfield: The destination ESMF field - weightfile: The ESMF weight field mapping the RAVE grid to the forecast grid - intp_avail_hours: The available interpolated hours - - Returns: - A tuple containing: - * ``0``: ESMF regridder or none (if using dummy emissions) - * ``1``: Boolean flag indicating if dummy emissions are being used - """ - print("Checking conditions for generating regridder.") - use_dummy_emiss = len(rave_avail_hours) == 0 and len(intp_avail_hours) == 0 - regridder = None - - if not use_dummy_emiss: - try: - print("Generating regridder.") - regridder = ESMF.RegridFromFile(srcfield, tgtfield, weightfile) - print("Regridder generated successfully.") - except ValueError as e: - print(f"Regridder failed due to a ValueError: {e}.") - except OSError as e: - print( - f"Regridder failed due to an OSError: {e}. Check if the weight file exists and is accessible." - ) - except ( - FileNotFoundError, - IOError, - RuntimeError, - TypeError, - KeyError, - IndexError, - MemoryError, - ) as e: - print( - f"Regridder failed due to corrupted file: {e}. Check if RAVE file has a different grid or format. " - ) - except Exception as e: - print(f"An unexpected error occurred while generating regridder: {e}.") - else: - use_dummy_emiss = True - - return regridder, use_dummy_emiss - - -def mask_edges(data: ndarray, mask_width: int = 1) -> ndarray: - """ - Mask edges of domain for interpolation. - - Args: - data: The numpy array to mask - mask_width: The width of the mask at each edge - - Returns: - A numpy array of the masked edges - """ - original_shape = data.shape - if mask_width < 1: - return data # No masking if mask_width is less than 1 - - # Mask top and bottom rows - data[:mask_width, :] = np.nan - data[-mask_width:, :] = np.nan - - # Mask left and right columns - data[:, :mask_width] = np.nan - data[:, -mask_width:] = np.nan - assert data.shape == original_shape, "Data shape altered during masking." - - return data - - -def interpolate_rave( - RAVE: str, - rave_avail: List[List[str]], - rave_avail_hours: List[str], - use_dummy_emiss: bool, - vars_emis: List[str], - regridder: Any, - srcgrid: ESMF.Grid, - tgtgrid: ESMF.Grid, - rave_to_intp: str, - intp_dir: str, - tgt_latt: DataArray, - tgt_lont: DataArray, - cols: int, - rows: int, -) -> None: - """ - Process a RAVE file for interpolation. - - Args: - RAVE: Path to the raw RAVE files - rave_avail: List of RAVE days/hours that are available - rave_avail_hours: List of RAVE hours that are available - use_dummy_emiss: True if we are using dummy emissions - vars_emis: Names of the emission variables - regridder: The ESMF regridder object (i.e. route handle). This is None if we are using dummy emissions. - srcgrid: The source ESMF grid - tgtgrid: The destination ESMF grid - rave_to_intp: The prefix of RAVE files to interpolate - intp_dir: The RAVE directory containing interpolated files - tgt_latt: The destination grid latitudes - tgt_lont: The destination grid longitudes - cols: Number of columns in the destination - rows: Number of rows in the destination - """ - for index, current_hour in enumerate(rave_avail_hours): - file_name = rave_avail[index] - rave_file_path = os.path.join(RAVE, file_name[0]) - - print(f"Processing file: {rave_file_path} for hour: {current_hour}") - - if not use_dummy_emiss and os.path.exists(rave_file_path): - try: - with xr.open_dataset(rave_file_path, decode_times=False) as ds_togrid: - try: - ds_togrid = ds_togrid[["FRP_MEAN", "FRE"]] - except KeyError as e: - print(f"Missing required variables in {rave_file_path}: {e}") - continue - - output_file_path = os.path.join( - intp_dir, f"{rave_to_intp}{current_hour}00_{current_hour}59.nc" - ) - print("=============before regridding===========", "FRP_MEAN") - print(np.sum(ds_togrid["FRP_MEAN"], axis=(1, 2))) - - try: - with Dataset(output_file_path, "w") as fout: - create_emiss_file(fout, cols, rows) - Store_latlon_by_Level( - fout, - "geolat", - tgt_latt, - "cell center latitude", - "degrees_north", - "-9999.f", - ) - Store_latlon_by_Level( - fout, - "geolon", - tgt_lont, - "cell center longitude", - "degrees_east", - "-9999.f", - ) - - for svar in vars_emis: - try: - srcfield = ESMF.Field( - srcgrid, - name=svar, - staggerloc=ESMF.StaggerLoc.CENTER, - ) - tgtfield = ESMF.Field( - tgtgrid, - name=svar, - staggerloc=ESMF.StaggerLoc.CENTER, - ) - src_rate = ds_togrid[svar].fillna(0) - src_QA = xr.where( - ds_togrid["FRE"] > 1000, src_rate, 0.0 - ) - srcfield.data[...] = src_QA[0, :, :] - tgtfield = regridder(srcfield, tgtfield) - masked_tgt_data = mask_edges( - tgtfield.data, mask_width=1 - ) - - if svar == "FRP_MEAN": - Store_by_Level( - fout, - "frp_avg_hr", - "Mean Fire Radiative Power", - "MW", - "0.f", - ) - tgt_rate = masked_tgt_data - fout.variables["frp_avg_hr"][0, :, :] = tgt_rate - print( - "=============after regridding===========" - + svar - ) - print(np.sum(tgt_rate)) - elif svar == "FRE": - Store_by_Level(fout, "FRE", "FRE", "MJ", "0.f") - tgt_rate = masked_tgt_data - fout.variables["FRE"][0, :, :] = tgt_rate - except (ValueError, KeyError) as e: - print( - f"Error processing variable {svar} in {rave_file_path}: {e}" - ) - except ( - OSError, - IOError, - RuntimeError, - FileNotFoundError, - TypeError, - IndexError, - MemoryError, - ) as e: - print( - f"Error creating or writing to NetCDF file {output_file_path}: {e}" - ) - except ( - OSError, - IOError, - RuntimeError, - FileNotFoundError, - TypeError, - IndexError, - MemoryError, - ) as e: - print(f"Error reading NetCDF file {rave_file_path}: {e}") - else: - print(f"File not found or dummy emissions required: {rave_file_path}") From e5bd29cd541ba2b63c54f657a5ec88ac32e12106 Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Fri, 31 Jan 2025 08:13:05 -0700 Subject: [PATCH 20/41] update executable permissions --- ush/smoke_dust/add_smoke.py | 0 ush/smoke_dust/generate_emissions.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ush/smoke_dust/add_smoke.py mode change 100644 => 100755 ush/smoke_dust/generate_emissions.py diff --git a/ush/smoke_dust/add_smoke.py b/ush/smoke_dust/add_smoke.py old mode 100644 new mode 100755 diff --git a/ush/smoke_dust/generate_emissions.py b/ush/smoke_dust/generate_emissions.py old mode 100644 new mode 100755 From 8b0a118d47a75e0ff52bdb1f0160e529c3921cce Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Fri, 31 Jan 2025 09:02:03 -0700 Subject: [PATCH 21/41] support upper on log level --- ush/config.smoke_dust.yaml | 10 +++------- ush/smoke_dust/core/context.py | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml index a9cfe4a98d..9197db5ec3 100644 --- a/ush/config.smoke_dust.yaml +++ b/ush/config.smoke_dust.yaml @@ -57,17 +57,13 @@ task_make_lbcs: VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" task_run_fcst: DT_ATMOS: 36 - LAYOUT_X: 15 - LAYOUT_Y: 20 - BLOCKSIZE: 32 - WRTCMP_write_tasks_per_group: 40 RESTART_INTERVAL: 6 12 18 24 QUILTING: true PRINT_ESMF: false DO_FCST_RESTART: false -task_run_post: - POST_OUTPUT_DOMAIN_NAME: conus3km - USE_CUSTOM_POST_CONFIG_FILE: false +#task_run_post: #tdk:rm maybe? +# POST_OUTPUT_DOMAIN_NAME: conus3km +# USE_CUSTOM_POST_CONFIG_FILE: false global: DO_ENSEMBLE: false NUM_ENS_MEMBERS: 2 diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index 913fff7900..f8bd00e189 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -125,7 +125,7 @@ def create_from_args(cls, args: List[str]) -> "SmokeDustContext": persistence=cls._str_to_bool_(l_persistence), rave_qa_filter=RaveQaFilter(l_rave_qa_filter.upper()), exit_on_error=cls._str_to_bool_(l_exit_on_error), - log_level=l_log_level, + log_level=l_log_level.upper(), current_day=current_day, nwges_dir=nwges_dir, ) From 785e19e2294638f775c55f203aa7ca7576b05dc2 Mon Sep 17 00:00:00 2001 From: benkozi Date: Fri, 31 Jan 2025 12:05:42 -0700 Subject: [PATCH 22/41] Update sd_environment.yml --- sd_environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sd_environment.yml b/sd_environment.yml index f4d422286a..d8557a69ee 100644 --- a/sd_environment.yml +++ b/sd_environment.yml @@ -1,4 +1,4 @@ -name: srw_aqm +name: srw_sd channels: - conda-forge - ufs-community @@ -13,4 +13,4 @@ dependencies: - pytest-mock=3.14.* - scipy=1.10.* - uwtools=2.3.* - - xarray=2022.11.* \ No newline at end of file + - xarray=2022.11.* From 68bba33cf31ab6813078f4e1014972c2e80664b7 Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Fri, 31 Jan 2025 15:56:57 -0700 Subject: [PATCH 23/41] added regridding test --- sd_environment.yml | 2 +- .../test_smoke_dust/bin/weight_file.nc | Bin 0 -> 7918 bytes tests/test_python/test_smoke_dust/conftest.py | 78 ++++++++++ .../test_smoke_dust/test_core/__init__.py | 0 .../test_smoke_dust/test_core/test_regrid.py | 137 ++++++++++++++++++ .../test_generate_emissions.py | 103 +++---------- ush/smoke_dust/core/context.py | 9 +- ush/smoke_dust/core/regrid.py | 9 +- 8 files changed, 255 insertions(+), 83 deletions(-) create mode 100644 tests/test_python/test_smoke_dust/bin/weight_file.nc create mode 100644 tests/test_python/test_smoke_dust/conftest.py create mode 100644 tests/test_python/test_smoke_dust/test_core/__init__.py create mode 100644 tests/test_python/test_smoke_dust/test_core/test_regrid.py diff --git a/sd_environment.yml b/sd_environment.yml index f4d422286a..7b6b981b9b 100644 --- a/sd_environment.yml +++ b/sd_environment.yml @@ -1,4 +1,4 @@ -name: srw_aqm +name: srw_sd channels: - conda-forge - ufs-community diff --git a/tests/test_python/test_smoke_dust/bin/weight_file.nc b/tests/test_python/test_smoke_dust/bin/weight_file.nc new file mode 100644 index 0000000000000000000000000000000000000000..57bb7bb0563464e78db139fefb0df0d69b96cf5b GIT binary patch literal 7918 zcmeHMO-~b16umPewUogEf*_y_h^UCQt;J{pVr`3{r5H+L;-(f_V-sjiJ77#qG;w8O zqFcWbmPU8R#P|=?5H}{SUARW$AHaKN=C#s*Zkia$yW#Y8-p9QA&b@sk=iZ-)jyHNb zJ*wMH!qy#I+i&;yoq=0R-yTj+L}yg}joN4RX*--Dv81~7sH9+uYY#pZF`cx0s`MRQ zY)wu0Jh=~_L}%=V+PVp|HHE6*DW2_hUp0F{(phKsKI^m}`<-4iYj?hQEcY8vDM~AP zR7FvO_`)VytrZ?O_x4!Z6B*R*ruI8Lqs}^u=D`j-$aMUDZLOxVzpgF))sJGTMlN#W z-H~`WnWWv^h4ZV7=;Tx^k(``Pq>_TV*$WckshBnYtTTW1dah*fU)soL z%}8|I$mABY`BJWsH|Ca1BVRBpL(A!6E(qgHmBgD9Mi6bi*mE}u5DrH!t0h@E#Mu4U8XHn(18pI=j%^jWa6SYnn=_4JO9y@?*!D*m!IvmWaes){ZBcW9*no zo9R;4tZZG5DY-FT4#_4Sov6eK8`9HH`388T$MA$WTZxxSL@pHzH?l=D$7CS^`xsce zoGl`IMuS6~P@D>*A>V0VVCZ`0+VE(=7YO=-E3eMY^F(Qq0m;C2VBmaYI&R%)xLnx8 z?dDu;B>qW@tEIZ1?{v$m`<-rj;T7LxD;basNCqSWk^#wpWI!??8ITM}1|$QL0m;C> zW#Gq{y}xY#H4m1!u8<|JZ(5$vN%>uxuk(oH5XHA*wvQHtD=e8Ee#zK!4 ztsbtCj1Eb=79zSOG1b|w)AGA}SD{^t!jaIQwE4oqBgpe?BahYRe^lIWLoctT;x4=p zZMEVqJhS~Hdi$ZJ2t^I^p_Cy`IH8NNlL6JNp5b9MFmy&EV;2L$8AQ2e2C5|x>bY(} zZ46Y?P@6#Y1Z1Gvg`jscP?tjCu7_bTdKrC;1B`ygLB;^%5aTf82xE|e>eLY97{kXn z&hRq=j3A>HnK|H86#ui#v_wvknIA8A1g*?OFu0_N_`;N=$Oe(KLL1eJg`0y24cqX@ l=L>(#*$2pwTIOuo3)=w_UOJCdowLGIF}_y6>e((i{0nA_takta literal 0 HcmV?d00001 diff --git a/tests/test_python/test_smoke_dust/conftest.py b/tests/test_python/test_smoke_dust/conftest.py new file mode 100644 index 0000000000..bde707ded5 --- /dev/null +++ b/tests/test_python/test_smoke_dust/conftest.py @@ -0,0 +1,78 @@ +import hashlib +import os +from dataclasses import dataclass +from pathlib import Path + +import netCDF4 as nc +import numpy as np + +import pytest + +from smoke_dust.core.context import SmokeDustContext + + +@dataclass +class FakeGridOutShape: + y_size: int = 5 + x_size: int = 10 + + @property + def as_tuple(self) -> tuple[int, int]: + return self.y_size, self.x_size + + +@pytest.fixture +def fake_grid_out_shape() -> FakeGridOutShape: + return FakeGridOutShape() + + +@pytest.fixture +def bin_dir() -> Path: + return (Path(__file__).parent / "bin").expanduser().resolve(strict=True) + + +def create_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: + # tdk:rm: use one with corners + with nc.Dataset(root_dir / "ds_out_base.nc", "w") as ds: + ds.createDimension("grid_yt", shape.y_size) + ds.createDimension("grid_xt", shape.x_size) + for varname in ["area", "grid_latt", "grid_lont"]: + var = ds.createVariable(varname, "f4", ("grid_yt", "grid_xt")) + var[:] = np.ones((shape.y_size, shape.x_size)) + + +def create_context( + root_dir: Path, overrides: dict | None = None, extra: dict | None = None +) -> SmokeDustContext: + current_day = "2019072200" + nwges_dir = root_dir + os.environ["CDATE"] = current_day + os.environ["DATA"] = str(nwges_dir) + try: + kwds = dict( + staticdir=root_dir, + ravedir=root_dir, + intp_dir=root_dir, + predef_grid="RRFS_CONUS_3km", + ebb_dcycle_flag="2", + restart_interval="6 12 18 24", + persistence="FALSE", + rave_qa_filter="NONE", + exit_on_error="TRUE", + log_level="debug", + ) + if overrides is not None: + kwds.update(overrides) + context = SmokeDustContext.create_from_args(kwds.values(), extra=extra) + finally: + for ii in ["CDATE", "DATA"]: + os.unsetenv(ii) + return context + + +def create_file_hash(path: Path) -> str: + with open(path, "rb") as f: + file_hash = hashlib.md5() + while chunk := f.read(8192): + file_hash.update(chunk) + return file_hash.hexdigest() diff --git a/tests/test_python/test_smoke_dust/test_core/__init__.py b/tests/test_python/test_smoke_dust/test_core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_python/test_smoke_dust/test_core/test_regrid.py b/tests/test_python/test_smoke_dust/test_core/test_regrid.py new file mode 100644 index 0000000000..0876820115 --- /dev/null +++ b/tests/test_python/test_smoke_dust/test_core/test_regrid.py @@ -0,0 +1,137 @@ +import glob +import shutil +import subprocess +from pathlib import Path +from typing import Any + +import numpy as np +import pytest +import xarray as xr +from _pytest.fixtures import SubRequest +from pydantic import BaseModel +from pytest_mock import MockerFixture + +from smoke_dust.core.context import SmokeDustContext +from smoke_dust.core.preprocessor import SmokeDustPreprocessor +from smoke_dust.core.regrid import SmokeDustRegridProcessor +from test_python.test_smoke_dust.conftest import ( + FakeGridOutShape, + create_grid_out, + create_context, + create_file_hash, +) + + +def ncdump(path: Path, header_only: bool = True) -> Any: + args = ["ncdump"] + if header_only: + args.append("-h") + args.append(str(path)) + ret = subprocess.check_output(args) + print(ret.decode(), flush=True) + return ret + + +class DataForTest(BaseModel): + model_config = dict(arbitrary_types_allowed=True) + context: SmokeDustContext + preprocessor: SmokeDustPreprocessor + + +@pytest.fixture(params=[True, False], ids=lambda p: f"regrid_in_memory={p}") +def data_for_test( + request: SubRequest, + tmp_path: Path, + fake_grid_out_shape: FakeGridOutShape, + bin_dir: Path, +) -> DataForTest: + weight_file = "weight_file.nc" + shutil.copy(bin_dir / weight_file, tmp_path / "weight_file.nc") + for name in ["ds_out_base.nc", "grid_in.nc"]: + path = tmp_path / name + create_rave_and_rrfs_like_data( + path, fake_grid_out_shape, fields=["area"], ntime=None + ) + context = create_context(tmp_path, extra=dict(regrid_in_memory=request.param)) + preprocessor = SmokeDustPreprocessor(context) + for date in preprocessor.forecast_dates: + path = tmp_path / f"Hourly_Emissions_3km_{date}_{date}.nc" + create_rave_and_rrfs_like_data( + path, fake_grid_out_shape, fields=["FRP_MEAN", "FRE"] + ) + return DataForTest(context=context, preprocessor=preprocessor) + + +def create_analytic_data_array( + dims: list[str], + lon_mesh: np.ndarray, + lat_mesh: np.ndarray, + ntime: int | None = None, +) -> xr.DataArray: + deg_to_rad = 3.141592653589793 / 180.0 + analytic_data = 2.0 + np.cos(deg_to_rad * lon_mesh) ** 2 * np.cos( + 2.0 * deg_to_rad * (90.0 - lat_mesh) + ) + if ntime is not None: + time_modifier = np.arange(1, ntime + 1).reshape(ntime, 1, 1) + analytic_data = analytic_data.reshape([1] + list(analytic_data.shape)) + analytic_data = np.repeat(analytic_data, ntime, axis=0) + analytic_data = time_modifier * analytic_data + return xr.DataArray( + analytic_data, + dims=dims, + ) + + +def create_rave_and_rrfs_like_data( + path: Path, + shape: FakeGridOutShape, + with_corners: bool = True, + fields: list[str] | None = None, + min_lon: int = 230, + min_lat: int = 25, + ntime: int | None = 1, +) -> xr.Dataset: + if path.exists(): + raise ValueError(f"path exists: {path}") + lon = np.arange(shape.x_size, dtype=float) + min_lon + lat = np.arange(shape.y_size, dtype=float) + min_lat + lon_mesh, lat_mesh = np.meshgrid(lon, lat) + ds = xr.Dataset() + dims = ["grid_yt", "grid_xt"] + ds["grid_lont"] = xr.DataArray(lon_mesh, dims=dims) + ds["grid_latt"] = xr.DataArray(lat_mesh, dims=dims) + if with_corners: + lonc = np.hstack((lon - 0.5, [lon[-1] + 0.5])) + latc = np.hstack((lat - 0.5, [lat[-1] + 0.5])) + lonc_mesh, latc_mesh = np.meshgrid(lonc, latc) + ds["grid_lon"] = xr.DataArray(lonc_mesh, dims=["grid_y", "grid_x"]) + ds["grid_lat"] = xr.DataArray(latc_mesh, dims=["grid_y", "grid_x"]) + if fields is not None: + if ntime is not None: + field_dims = ["time"] + dims + else: + field_dims = dims + for field in fields: + ds[field] = create_analytic_data_array( + field_dims, lon_mesh, lat_mesh, ntime=ntime + ) + ds.to_netcdf(path) + return ds + + +class TestSmokeDustRegridProcessor: + def test_run( + self, data_for_test: DataForTest, mocker: MockerFixture, tmp_path: Path + ) -> None: + spy1 = mocker.spy(SmokeDustRegridProcessor, "_run_impl_") + regrid_processor = SmokeDustRegridProcessor(data_for_test.context) + regrid_processor.run(data_for_test.preprocessor.forecast_metadata) + spy1.assert_called_once() + interpolated_files = glob.glob( + f"*{data_for_test.context.rave_to_intp}*nc", root_dir=tmp_path + ) + assert len(interpolated_files) == 24 + for f in interpolated_files: + fpath = tmp_path / f + assert create_file_hash(fpath) == "8e90b769137aad054a2e49559d209c4d" diff --git a/tests/test_python/test_smoke_dust/test_generate_emissions.py b/tests/test_python/test_smoke_dust/test_generate_emissions.py index e6703f9221..4491cd7f5a 100644 --- a/tests/test_python/test_smoke_dust/test_generate_emissions.py +++ b/tests/test_python/test_smoke_dust/test_generate_emissions.py @@ -1,6 +1,3 @@ -import hashlib -import os -from dataclasses import dataclass from pathlib import Path from typing import Type @@ -19,21 +16,12 @@ SmokeDustCycleTwo, ) from smoke_dust.core.preprocessor import SmokeDustPreprocessor - - -@dataclass -class FakeGridOutShape: - y_size: int = 5 - x_size: int = 10 - - @property - def as_tuple(self) -> tuple[int, int]: - return self.y_size, self.x_size - - -@pytest.fixture -def fake_grid_out_shape() -> FakeGridOutShape: - return FakeGridOutShape() +from test_python.test_smoke_dust.conftest import ( + FakeGridOutShape, + create_grid_out, + create_context, + create_file_hash, +) def create_restart_files( @@ -75,15 +63,6 @@ def create_rave_interpolated( var[0, ...] = np.ones(shape.as_tuple) -def create_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: - with nc.Dataset(root_dir / "ds_out_base.nc", "w") as ds: - ds.createDimension("grid_yt", shape.y_size) - ds.createDimension("grid_xt", shape.x_size) - for varname in ["area", "grid_latt", "grid_lont"]: - var = ds.createVariable(varname, "f4", ("grid_yt", "grid_xt")) - var[:] = np.ones((shape.y_size, shape.x_size)) - - def create_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: with nc.Dataset(root_dir / "veg_map.nc", "w") as ds: ds.createDimension("grid_yt", shape.y_size) @@ -92,29 +71,6 @@ def create_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: emiss_factor[:] = np.ones((shape.y_size, shape.x_size)) -def create_context(root_dir: Path, overrides: dict | None = None) -> SmokeDustContext: - current_day = "2019072200" - nwges_dir = root_dir - os.environ["CDATE"] = current_day - os.environ["DATA"] = str(nwges_dir) - kwds = dict( - staticdir=root_dir, - ravedir=root_dir, - intp_dir=root_dir, - predef_grid="RRFS_CONUS_3km", - ebb_dcycle_flag="2", - restart_interval="6 12 18 24", - persistence="FALSE", - rave_qa_filter="NONE", - exit_on_error="TRUE", - log_level="DEBUG", - ) - if overrides is not None: - kwds.update(overrides) - context = SmokeDustContext.create_from_args(kwds.values()) - return context - - class ExpectedData(BaseModel): flag: str klass: Type[AbstractSmokeDustCycleProcessor] @@ -136,39 +92,28 @@ class DataForTest(BaseModel): ExpectedData( flag="2", klass=SmokeDustCycleTwo, hash="6752199f1039edc936a942f3885af38b" ), - ] + ], + ids=lambda p: f"ebb_dcycle_flag={p.flag}", ) def data_for_test( request: SubRequest, tmp_path: Path, fake_grid_out_shape: FakeGridOutShape ) -> DataForTest: - try: - create_grid_out(tmp_path, fake_grid_out_shape) - create_veg_map(tmp_path, fake_grid_out_shape) - context = create_context( - tmp_path, overrides=dict(ebb_dcycle_flag=request.param.flag) - ) - preprocessor = SmokeDustPreprocessor(context) - create_restart_files(tmp_path, preprocessor.forecast_dates, fake_grid_out_shape) - create_rave_interpolated( - tmp_path, - preprocessor.forecast_dates, - fake_grid_out_shape, - context.predef_grid.value + "_intp_", - ) - return DataForTest( - context=context, preprocessor=preprocessor, expected=request.param - ) - finally: - for ii in ["CDATE", "DATA"]: - os.unsetenv(ii) - - -def create_file_hash(path: Path) -> str: - with open(path, "rb") as f: - file_hash = hashlib.md5() - while chunk := f.read(8192): - file_hash.update(chunk) - return file_hash.hexdigest() + create_grid_out(tmp_path, fake_grid_out_shape) + create_veg_map(tmp_path, fake_grid_out_shape) + context = create_context( + tmp_path, overrides=dict(ebb_dcycle_flag=request.param.flag) + ) + preprocessor = SmokeDustPreprocessor(context) + create_restart_files(tmp_path, preprocessor.forecast_dates, fake_grid_out_shape) + create_rave_interpolated( + tmp_path, + preprocessor.forecast_dates, + fake_grid_out_shape, + context.predef_grid.value + "_intp_", + ) + return DataForTest( + context=context, preprocessor=preprocessor, expected=request.param + ) class TestSmokeDustPreprocessor: diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index f8bd00e189..2e5b2078ae 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -4,7 +4,7 @@ import os from enum import unique, StrEnum, IntEnum from pathlib import Path -from typing import Tuple, List +from typing import Tuple, List, Any from mpi4py import MPI from pydantic import BaseModel, model_validator @@ -79,6 +79,7 @@ class SmokeDustContext(BaseModel): rank: int = MPI.COMM_WORLD.Get_rank() grid_out_shape: Tuple[int, int] = (0, 0) # Set in _finalize_model_ esmpy_debug: bool = False + regrid_in_memory: bool = False @model_validator(mode="after") def _finalize_model_(self) -> "SmokeDustContext": @@ -93,7 +94,9 @@ def _finalize_model_(self) -> "SmokeDustContext": return self @classmethod - def create_from_args(cls, args: List[str]) -> "SmokeDustContext": + def create_from_args( + cls, args: List[str], extra: dict | None = None + ) -> "SmokeDustContext": print(f"create_from_args: {args=}", flush=True) # Extract local arguments from args before converting values @@ -129,6 +132,8 @@ def create_from_args(cls, args: List[str]) -> "SmokeDustContext": current_day=current_day, nwges_dir=nwges_dir, ) + if extra is not None: + kwds.update(extra) return cls(**kwds) diff --git a/ush/smoke_dust/core/regrid.py b/ush/smoke_dust/core/regrid.py index 3777753fe7..6602861d00 100644 --- a/ush/smoke_dust/core/regrid.py +++ b/ush/smoke_dust/core/regrid.py @@ -140,7 +140,10 @@ def _run_impl_( self.log("creating regridder") self.log(f"{src_fwrap.value.data.shape=}", level=logging.DEBUG) self.log(f"{dst_fwrap.value.data.shape=}", level=logging.DEBUG) - if self._context.predef_grid == PredefinedGrid.RRFS_NA_13km: + if ( + self._context.predef_grid == PredefinedGrid.RRFS_NA_13km + or self._context.regrid_in_memory + ): # ESMF does not like reading the weights for this field combination (rc=-1). The error can be # bypassed by creating weights in-memory. self.log("creating regridding in-memory") @@ -150,6 +153,7 @@ def _run_impl_( regrid_method=esmpy.RegridMethod.CONSERVE, unmapped_action=esmpy.UnmappedAction.IGNORE, ignore_degenerate=True, + # filename="/opt/project/weight_file.nc" # Can be used to create a weight file for testing ) else: self.log("creating regridding from file") @@ -584,6 +588,9 @@ def mask_edges(data: np.ma.MaskedArray, mask_width: int = 1) -> None: return # No masking if mask_width is less than 1 target = data.mask + if isinstance(target, np.bool_): + data.mask = np.zeros_like(data, dtype=bool) + target = data.mask # Mask top and bottom rows target[:mask_width, :] = True target[-mask_width:, :] = True From ce62b3e45aa419d38f1ff093e8f8fbc8a72eb1ef Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Fri, 31 Jan 2025 16:16:32 -0700 Subject: [PATCH 24/41] refactored regrid to use property caching --- ush/smoke_dust/core/regrid.py | 463 ++++++++++++++++++---------------- 1 file changed, 241 insertions(+), 222 deletions(-) diff --git a/ush/smoke_dust/core/regrid.py b/ush/smoke_dust/core/regrid.py index 6602861d00..f3332d4805 100644 --- a/ush/smoke_dust/core/regrid.py +++ b/ush/smoke_dust/core/regrid.py @@ -19,6 +19,150 @@ from smoke_dust.core.context import RaveQaFilter, SmokeDustContext, PredefinedGrid from smoke_dust.core.variable import SD_VARS +NameListType = Tuple[str, ...] + + +class Dimension(BaseModel): + name: NameListType + size: int + lower: int + upper: int + staggerloc: int + coordinate_type: Literal["y", "x", "time"] + + +class DimensionCollection(BaseModel): + value: Tuple[Dimension, ...] + + def get(self, name: str | NameListType) -> Dimension: + if isinstance(name, str): + name_to_find = (name,) + else: + name_to_find = name + for jj in name_to_find: + for ii in self.value: + if jj in ii.name: + return ii + raise ValueError(f"dimension not found: {name}") + + +class AbstractWrapper(abc.ABC, BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + dims: DimensionCollection + + +class GridSpec(BaseModel): + model_config = ConfigDict(frozen=True) + + x_center: str + y_center: str + x_dim: NameListType + y_dim: NameListType + x_corner: str | None = None + y_corner: str | None = None + x_corner_dim: NameListType | None = None + y_corner_dim: NameListType | None = None + x_index: int = 0 + y_index: int = 1 + + @model_validator(mode="after") + def _validate_model_(self) -> "GridSpec": + corner_meta = [ + self.x_corner, + self.y_corner, + self.x_corner_dim, + self.y_corner_dim, + ] + is_given_sum = sum([ii is not None for ii in corner_meta]) + if is_given_sum > 0 and is_given_sum != len(corner_meta): + raise ValueError( + "if one corner name is supplied, then all must be supplied" + ) + return self + + @property + def has_corners(self) -> bool: + return self.x_corner is not None + + def get_x_corner(self) -> str: + if self.x_corner is None: + raise ValueError + return self.x_corner + + def get_y_corner(self) -> str: + if self.y_corner is None: + raise ValueError + return self.y_corner + + def get_x_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: + return grid.get_coords(self.x_index, staggerloc=staggerloc) + + def get_y_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: + return grid.get_coords(self.y_index, staggerloc=staggerloc) + + def create_grid_dims( + self, ds: nc.Dataset, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc + ) -> DimensionCollection: + if staggerloc == esmpy.StaggerLoc.CENTER: + x_dim, y_dim = self.x_dim, self.y_dim + elif staggerloc == esmpy.StaggerLoc.CORNER: + x_dim, y_dim = self.x_corner_dim, self.y_corner_dim + else: + raise NotImplementedError(staggerloc) + x_dimobj = Dimension( + name=x_dim, + size=get_nc_dimension(ds, x_dim).size, + lower=grid.lower_bounds[staggerloc][self.x_index], + upper=grid.upper_bounds[staggerloc][self.x_index], + staggerloc=staggerloc, + coordinate_type="x", + ) + y_dimobj = Dimension( + name=y_dim, + size=get_nc_dimension(ds, y_dim).size, + lower=grid.lower_bounds[staggerloc][self.y_index], + upper=grid.upper_bounds[staggerloc][self.y_index], + staggerloc=staggerloc, + coordinate_type="y", + ) + if self.x_index == 0: + value = [x_dimobj, y_dimobj] + elif self.x_index == 1: + value = [y_dimobj, x_dimobj] + else: + raise NotImplementedError(self.x_index, self.y_index) + return DimensionCollection(value=value) + + +class GridWrapper(AbstractWrapper): + value: esmpy.Grid + spec: GridSpec + corner_dims: DimensionCollection | None = None + + def fill_nc_variables(self, path: Path): + if self.corner_dims is not None: + raise NotImplementedError + with open_nc(path, "a") as ds: + staggerloc = esmpy.StaggerLoc.CENTER + x_center_data = self.spec.get_x_data(self.value, staggerloc) + set_variable_data( + ds.variables[self.spec.x_center], self.dims, x_center_data + ) + y_center_data = self.spec.get_y_data(self.value, staggerloc) + set_variable_data( + ds.variables[self.spec.y_center], self.dims, y_center_data + ) + + +class FieldWrapper(AbstractWrapper): + value: esmpy.Field + gwrap: GridWrapper + + def fill_nc_variable(self, path: Path): + with open_nc(path, "a") as ds: + var = ds.variables[self.value.name] + set_variable_data(var, self.dims, self.value.data) + class SmokeDustRegridProcessor: @@ -29,6 +173,12 @@ def __init__(self, context: SmokeDustContext): # Holds interpolation descriptive statistics self._interpolation_stats = None + # Caches regridding objects + self.__src_gwrap = None + self.__dst_gwrap = None + self.__dst_output_gwrap = None + self.__regridder = None + def log(self, *args: Any, **kwargs: Any) -> None: self._context.log(*args, **kwargs) @@ -44,41 +194,100 @@ def run(self, forecast_metadata: pd.DataFrame) -> None: self._run_impl_(forecast_metadata, rave_to_interpolate) + @property + def _src_gwrap(self) -> GridWrapper: + if self.__src_gwrap is None: + self.log("creating source grid from RAVE file") + src_nc2grid = NcToGrid( + path=self._context.grid_in, + spec=GridSpec( + x_center="grid_lont", + y_center="grid_latt", + x_dim=("grid_xt",), + y_dim=("grid_yt",), + x_corner="grid_lon", + y_corner="grid_lat", + x_corner_dim=("grid_x",), + y_corner_dim=("grid_y",), + ), + ) + self.__src_gwrap = src_nc2grid.create_grid_wrapper() + return self.__src_gwrap + + @property + def _dst_gwrap(self) -> GridWrapper: + if self.__dst_gwrap is None: + self.log("creating destination grid from RRFS grid file") + dst_nc2grid = NcToGrid( + path=self._context.grid_out, + spec=GridSpec( + x_center="grid_lont", + y_center="grid_latt", + x_dim=("grid_xt",), + y_dim=("grid_yt",), + x_corner="grid_lon", + y_corner="grid_lat", + x_corner_dim=("grid_x",), + y_corner_dim=("grid_y",), + ), + ) + self.__dst_gwrap = dst_nc2grid.create_grid_wrapper() + return self.__dst_gwrap + + @property + def _dst_output_gwrap(self) -> GridWrapper: + if self.__dst_output_gwrap is None: + # We are translating metadata and some structure for the destination grid. + dst_output_gwrap = copy(self._dst_gwrap) + dst_output_gwrap.corner_dims = None + dst_output_gwrap.spec = GridSpec( + x_center="geolon", y_center="geolat", x_dim=("lon",), y_dim=("lat",) + ) + dst_output_gwrap.dims = deepcopy(self._dst_gwrap.dims) + dst_output_gwrap.dims.value[0].name = ("lon",) + dst_output_gwrap.dims.value[1].name = ("lat",) + self.__dst_output_gwrap = dst_output_gwrap + return self.__dst_output_gwrap + + def _get_regridder_( + self, src_fwrap: FieldWrapper, dst_fwrap: FieldWrapper + ) -> esmpy.Regrid: + if self.__regridder is None: + self.log("creating regridder") + self.log(f"{src_fwrap.value.data.shape=}", level=logging.DEBUG) + self.log(f"{dst_fwrap.value.data.shape=}", level=logging.DEBUG) + if ( + self._context.predef_grid == PredefinedGrid.RRFS_NA_13km + or self._context.regrid_in_memory + ): + # ESMF does not like reading the weights for this field combination (rc=-1). The error can be + # bypassed by creating weights in-memory. + self.log("creating regridding in-memory") + regridder = esmpy.Regrid( + src_fwrap.value, + dst_fwrap.value, + regrid_method=esmpy.RegridMethod.CONSERVE, + unmapped_action=esmpy.UnmappedAction.IGNORE, + ignore_degenerate=True, + # filename="/opt/project/weight_file.nc" # Can be used to create a weight file for testing + ) + else: + self.log("creating regridding from file") + regridder = esmpy.RegridFromFile( + src_fwrap.value, + dst_fwrap.value, + filename=str(self._context.weightfile), + ) + self.__regridder = regridder + return self.__regridder + def _run_impl_( self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Series ) -> None: - first = True for row_idx, row_data in rave_to_interpolate.iterrows(): row_dict = row_data.to_dict() self.log(f"processing RAVE interpolation row: {row_idx}, {row_dict}") - if first: - self.log("creating destination grid from RRFS grid file") - dst_nc2grid = NcToGrid( - path=self._context.grid_out, - spec=GridSpec( - x_center="grid_lont", - y_center="grid_latt", - x_dim=("grid_xt",), - y_dim=("grid_yt",), - x_corner="grid_lon", - y_corner="grid_lat", - x_corner_dim=("grid_x",), - y_corner_dim=("grid_y",), - ), - ) - dst_gwrap = dst_nc2grid.create_grid_wrapper() - - # We are translating metadata and some structure for the destination grid. - dst_output_gwrap = copy(dst_gwrap) - dst_output_gwrap.corner_dims = None - dst_output_gwrap.spec = GridSpec( - x_center="geolon", y_center="geolat", x_dim=("lon",), y_dim=("lat",) - ) - dst_output_gwrap.dims = deepcopy(dst_gwrap.dims) - dst_output_gwrap.dims.value[0].name = ("lon",) - dst_output_gwrap.dims.value[1].name = ("lat",) - forecast_date = row_data["forecast_date"] output_file_path = ( self._context.intp_dir @@ -90,7 +299,7 @@ def _run_impl_( for varname in ["frp_avg_hr", "FRE"]: create_sd_variable(ds, SD_VARS.get(varname)) - dst_output_gwrap.fill_nc_variables(output_file_path) + self._dst_output_gwrap.fill_nc_variables(output_file_path) for field_name in self._context.vars_emis: match field_name: @@ -105,66 +314,20 @@ def _run_impl_( dst_nc2field = NcToField( path=output_file_path, name=dst_field_name, - gwrap=dst_output_gwrap, + gwrap=self._dst_output_gwrap, dim_time=("t",), ) dst_fwrap = dst_nc2field.create_field_wrapper() - if first: - self.log("creating source grid from RAVE file") - src_nc2grid = NcToGrid( - path=self._context.grid_in, - spec=GridSpec( - x_center="grid_lont", - y_center="grid_latt", - x_dim=("grid_xt",), - y_dim=("grid_yt",), - x_corner="grid_lon", - y_corner="grid_lat", - x_corner_dim=("grid_x",), - y_corner_dim=("grid_y",), - ), - ) - src_gwrap = src_nc2grid.create_grid_wrapper() - self.log("creating source field", level=logging.DEBUG) src_nc2field = NcToField( path=row_data["rave_raw"], name=field_name, - gwrap=src_gwrap, + gwrap=self._src_gwrap, dim_time=("time",), ) src_fwrap = src_nc2field.create_field_wrapper() - if first: - self.log("creating regridder") - self.log(f"{src_fwrap.value.data.shape=}", level=logging.DEBUG) - self.log(f"{dst_fwrap.value.data.shape=}", level=logging.DEBUG) - if ( - self._context.predef_grid == PredefinedGrid.RRFS_NA_13km - or self._context.regrid_in_memory - ): - # ESMF does not like reading the weights for this field combination (rc=-1). The error can be - # bypassed by creating weights in-memory. - self.log("creating regridding in-memory") - regridder = esmpy.Regrid( - src_fwrap.value, - dst_fwrap.value, - regrid_method=esmpy.RegridMethod.CONSERVE, - unmapped_action=esmpy.UnmappedAction.IGNORE, - ignore_degenerate=True, - # filename="/opt/project/weight_file.nc" # Can be used to create a weight file for testing - ) - else: - self.log("creating regridding from file") - regridder = esmpy.RegridFromFile( - src_fwrap.value, - dst_fwrap.value, - filename=str(self._context.weightfile), - ) - - first = False - src_data = src_fwrap.value.data match field_name: case "FRP_MEAN": @@ -187,6 +350,7 @@ def _run_impl_( # Execute the ESMF regridding self.log(f"run regridding", level=logging.DEBUG) + regridder = self._get_regridder_(src_fwrap, dst_fwrap) _ = regridder(src_fwrap.value, dst_fwrap.value) # Persist the destination field @@ -274,9 +438,6 @@ def _regrid_postprocessing_(self, row_data: pd.Series) -> None: HasNcAttrsType = Union[nc.Dataset, nc.Variable] -NameListType = Tuple[str, ...] - - def get_aliased_key(source: Dict, keys: NameListType | str) -> Any: if isinstance(keys, str): keys_to_find = (keys,) @@ -294,30 +455,6 @@ def get_nc_dimension(ds: nc.Dataset, names: NameListType) -> nc.Dimension: return get_aliased_key(ds.dimensions, names) -class Dimension(BaseModel): - name: NameListType - size: int - lower: int - upper: int - staggerloc: int - coordinate_type: Literal["y", "x", "time"] - - -class DimensionCollection(BaseModel): - value: Tuple[Dimension, ...] - - def get(self, name: str | NameListType) -> Dimension: - if isinstance(name, str): - name_to_find = (name,) - else: - name_to_find = name - for jj in name_to_find: - for ii in self.value: - if jj in ii.name: - return ii - raise ValueError(f"dimension not found: {name}") - - def create_dimension_map(dims: DimensionCollection) -> Dict[str, int]: ret = {} for idx, dim in enumerate(dims.value): @@ -354,114 +491,6 @@ def set_variable_data( return transposed_data -class AbstractWrapper(abc.ABC, BaseModel): - model_config = ConfigDict(arbitrary_types_allowed=True) - dims: DimensionCollection - - -class GridSpec(BaseModel): - model_config = ConfigDict(frozen=True) - - x_center: str - y_center: str - x_dim: NameListType - y_dim: NameListType - x_corner: str | None = None - y_corner: str | None = None - x_corner_dim: NameListType | None = None - y_corner_dim: NameListType | None = None - x_index: int = 0 - y_index: int = 1 - - @model_validator(mode="after") - def _validate_model_(self) -> "GridSpec": - corner_meta = [ - self.x_corner, - self.y_corner, - self.x_corner_dim, - self.y_corner_dim, - ] - is_given_sum = sum([ii is not None for ii in corner_meta]) - if is_given_sum > 0 and is_given_sum != len(corner_meta): - raise ValueError( - "if one corner name is supplied, then all must be supplied" - ) - return self - - @property - def has_corners(self) -> bool: - return self.x_corner is not None - - def get_x_corner(self) -> str: - if self.x_corner is None: - raise ValueError - return self.x_corner - - def get_y_corner(self) -> str: - if self.y_corner is None: - raise ValueError - return self.y_corner - - def get_x_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: - return grid.get_coords(self.x_index, staggerloc=staggerloc) - - def get_y_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: - return grid.get_coords(self.y_index, staggerloc=staggerloc) - - def create_grid_dims( - self, ds: nc.Dataset, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc - ) -> DimensionCollection: - if staggerloc == esmpy.StaggerLoc.CENTER: - x_dim, y_dim = self.x_dim, self.y_dim - elif staggerloc == esmpy.StaggerLoc.CORNER: - x_dim, y_dim = self.x_corner_dim, self.y_corner_dim - else: - raise NotImplementedError(staggerloc) - x_dimobj = Dimension( - name=x_dim, - size=get_nc_dimension(ds, x_dim).size, - lower=grid.lower_bounds[staggerloc][self.x_index], - upper=grid.upper_bounds[staggerloc][self.x_index], - staggerloc=staggerloc, - coordinate_type="x", - ) - y_dimobj = Dimension( - name=y_dim, - size=get_nc_dimension(ds, y_dim).size, - lower=grid.lower_bounds[staggerloc][self.y_index], - upper=grid.upper_bounds[staggerloc][self.y_index], - staggerloc=staggerloc, - coordinate_type="y", - ) - if self.x_index == 0: - value = [x_dimobj, y_dimobj] - elif self.x_index == 1: - value = [y_dimobj, x_dimobj] - else: - raise NotImplementedError(self.x_index, self.y_index) - return DimensionCollection(value=value) - - -class GridWrapper(AbstractWrapper): - value: esmpy.Grid - spec: GridSpec - corner_dims: DimensionCollection | None = None - - def fill_nc_variables(self, path: Path): - if self.corner_dims is not None: - raise NotImplementedError - with open_nc(path, "a") as ds: - staggerloc = esmpy.StaggerLoc.CENTER - x_center_data = self.spec.get_x_data(self.value, staggerloc) - set_variable_data( - ds.variables[self.spec.x_center], self.dims, x_center_data - ) - y_center_data = self.spec.get_y_data(self.value, staggerloc) - set_variable_data( - ds.variables[self.spec.y_center], self.dims, y_center_data - ) - - class NcToGrid(BaseModel): path: Path spec: GridSpec @@ -523,16 +552,6 @@ def _add_corner_coords_( return dims -class FieldWrapper(AbstractWrapper): - value: esmpy.Field - gwrap: GridWrapper - - def fill_nc_variable(self, path: Path): - with open_nc(path, "a") as ds: - var = ds.variables[self.value.name] - set_variable_data(var, self.dims, self.value.data) - - class NcToField(BaseModel): path: Path name: str From af12544b0ad1398ec2434a7531b8a331e51e8b33 Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Fri, 31 Jan 2025 16:33:22 -0700 Subject: [PATCH 25/41] refactored regrid to put common files in separate module --- tests/test_python/test_smoke_dust/conftest.py | 1 - .../test_smoke_dust/test_core/test_regrid.py | 3 +- ush/smoke_dust/add_smoke.py | 5 +- ush/smoke_dust/core/context.py | 2 +- ush/smoke_dust/core/preprocessor.py | 2 +- ush/smoke_dust/core/regrid/__init__.py | 0 ush/smoke_dust/core/regrid/common.py | 341 +++++++++++++++++ .../core/{regrid.py => regrid/processor.py} | 349 +----------------- ush/smoke_dust/core/variable.py | 2 +- 9 files changed, 361 insertions(+), 344 deletions(-) create mode 100644 ush/smoke_dust/core/regrid/__init__.py create mode 100644 ush/smoke_dust/core/regrid/common.py rename ush/smoke_dust/core/{regrid.py => regrid/processor.py} (51%) diff --git a/tests/test_python/test_smoke_dust/conftest.py b/tests/test_python/test_smoke_dust/conftest.py index bde707ded5..beb9c7bd4b 100644 --- a/tests/test_python/test_smoke_dust/conftest.py +++ b/tests/test_python/test_smoke_dust/conftest.py @@ -5,7 +5,6 @@ import netCDF4 as nc import numpy as np - import pytest from smoke_dust.core.context import SmokeDustContext diff --git a/tests/test_python/test_smoke_dust/test_core/test_regrid.py b/tests/test_python/test_smoke_dust/test_core/test_regrid.py index 0876820115..14a3b89ec9 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_regrid.py +++ b/tests/test_python/test_smoke_dust/test_core/test_regrid.py @@ -13,10 +13,9 @@ from smoke_dust.core.context import SmokeDustContext from smoke_dust.core.preprocessor import SmokeDustPreprocessor -from smoke_dust.core.regrid import SmokeDustRegridProcessor +from smoke_dust.core.regrid.processor import SmokeDustRegridProcessor from test_python.test_smoke_dust.conftest import ( FakeGridOutShape, - create_grid_out, create_context, create_file_hash, ) diff --git a/ush/smoke_dust/add_smoke.py b/ush/smoke_dust/add_smoke.py index 64229d7e36..6d305665b9 100755 --- a/ush/smoke_dust/add_smoke.py +++ b/ush/smoke_dust/add_smoke.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 +import os from typing import Tuple -import xarray as xr + import numpy as np -import os +import xarray as xr def populate_data(data: np.ndarray, target_shape: Tuple) -> np.ndarray: diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index 2e5b2078ae..149b9a742d 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -4,7 +4,7 @@ import os from enum import unique, StrEnum, IntEnum from pathlib import Path -from typing import Tuple, List, Any +from typing import Tuple, List from mpi4py import MPI from pydantic import BaseModel, model_validator diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py index f48e3d249e..dd46a96107 100644 --- a/ush/smoke_dust/core/preprocessor.py +++ b/ush/smoke_dust/core/preprocessor.py @@ -12,7 +12,7 @@ ) from smoke_dust.core.context import SmokeDustContext from smoke_dust.core.cycle import create_cycle_processor -from smoke_dust.core.regrid import SmokeDustRegridProcessor +from smoke_dust.core.regrid.processor import SmokeDustRegridProcessor from smoke_dust.core.variable import SD_VARS diff --git a/ush/smoke_dust/core/regrid/__init__.py b/ush/smoke_dust/core/regrid/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ush/smoke_dust/core/regrid/common.py b/ush/smoke_dust/core/regrid/common.py new file mode 100644 index 0000000000..803f66390c --- /dev/null +++ b/ush/smoke_dust/core/regrid/common.py @@ -0,0 +1,341 @@ +import abc +from pathlib import Path +from typing import Tuple, Literal, Union, Dict, Any + +import esmpy +import netCDF4 as nc +import numpy as np +from pydantic import BaseModel, ConfigDict, model_validator + +from smoke_dust.core.common import open_nc + +NameListType = Tuple[str, ...] + + +class Dimension(BaseModel): + name: NameListType + size: int + lower: int + upper: int + staggerloc: int + coordinate_type: Literal["y", "x", "time"] + + +class DimensionCollection(BaseModel): + value: Tuple[Dimension, ...] + + def get(self, name: str | NameListType) -> Dimension: + if isinstance(name, str): + name_to_find = (name,) + else: + name_to_find = name + for jj in name_to_find: + for ii in self.value: + if jj in ii.name: + return ii + raise ValueError(f"dimension not found: {name}") + + +class AbstractWrapper(abc.ABC, BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + dims: DimensionCollection + + +class GridSpec(BaseModel): + model_config = ConfigDict(frozen=True) + + x_center: str + y_center: str + x_dim: NameListType + y_dim: NameListType + x_corner: str | None = None + y_corner: str | None = None + x_corner_dim: NameListType | None = None + y_corner_dim: NameListType | None = None + x_index: int = 0 + y_index: int = 1 + + @model_validator(mode="after") + def _validate_model_(self) -> "GridSpec": + corner_meta = [ + self.x_corner, + self.y_corner, + self.x_corner_dim, + self.y_corner_dim, + ] + is_given_sum = sum([ii is not None for ii in corner_meta]) + if is_given_sum > 0 and is_given_sum != len(corner_meta): + raise ValueError( + "if one corner name is supplied, then all must be supplied" + ) + return self + + @property + def has_corners(self) -> bool: + return self.x_corner is not None + + def get_x_corner(self) -> str: + if self.x_corner is None: + raise ValueError + return self.x_corner + + def get_y_corner(self) -> str: + if self.y_corner is None: + raise ValueError + return self.y_corner + + def get_x_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: + return grid.get_coords(self.x_index, staggerloc=staggerloc) + + def get_y_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: + return grid.get_coords(self.y_index, staggerloc=staggerloc) + + def create_grid_dims( + self, ds: nc.Dataset, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc + ) -> DimensionCollection: + if staggerloc == esmpy.StaggerLoc.CENTER: + x_dim, y_dim = self.x_dim, self.y_dim + elif staggerloc == esmpy.StaggerLoc.CORNER: + x_dim, y_dim = self.x_corner_dim, self.y_corner_dim + else: + raise NotImplementedError(staggerloc) + x_dimobj = Dimension( + name=x_dim, + size=get_nc_dimension(ds, x_dim).size, + lower=grid.lower_bounds[staggerloc][self.x_index], + upper=grid.upper_bounds[staggerloc][self.x_index], + staggerloc=staggerloc, + coordinate_type="x", + ) + y_dimobj = Dimension( + name=y_dim, + size=get_nc_dimension(ds, y_dim).size, + lower=grid.lower_bounds[staggerloc][self.y_index], + upper=grid.upper_bounds[staggerloc][self.y_index], + staggerloc=staggerloc, + coordinate_type="y", + ) + if self.x_index == 0: + value = [x_dimobj, y_dimobj] + elif self.x_index == 1: + value = [y_dimobj, x_dimobj] + else: + raise NotImplementedError(self.x_index, self.y_index) + return DimensionCollection(value=value) + + +class GridWrapper(AbstractWrapper): + value: esmpy.Grid + spec: GridSpec + corner_dims: DimensionCollection | None = None + + def fill_nc_variables(self, path: Path): + if self.corner_dims is not None: + raise NotImplementedError + with open_nc(path, "a") as ds: + staggerloc = esmpy.StaggerLoc.CENTER + x_center_data = self.spec.get_x_data(self.value, staggerloc) + set_variable_data( + ds.variables[self.spec.x_center], self.dims, x_center_data + ) + y_center_data = self.spec.get_y_data(self.value, staggerloc) + set_variable_data( + ds.variables[self.spec.y_center], self.dims, y_center_data + ) + + +class FieldWrapper(AbstractWrapper): + value: esmpy.Field + gwrap: GridWrapper + + def fill_nc_variable(self, path: Path): + with open_nc(path, "a") as ds: + var = ds.variables[self.value.name] + set_variable_data(var, self.dims, self.value.data) + + +HasNcAttrsType = Union[nc.Dataset, nc.Variable] + + +def get_aliased_key(source: Dict, keys: NameListType | str) -> Any: + if isinstance(keys, str): + keys_to_find = (keys,) + else: + keys_to_find = keys + for key in keys_to_find: + try: + return source[key] + except KeyError: + continue + raise ValueError(f"key not found: {keys}") + + +def get_nc_dimension(ds: nc.Dataset, names: NameListType) -> nc.Dimension: + return get_aliased_key(ds.dimensions, names) + + +def create_dimension_map(dims: DimensionCollection) -> Dict[str, int]: + ret = {} + for idx, dim in enumerate(dims.value): + for name in dim.name: + ret[name] = idx + return ret + + +def load_variable_data( + var: nc.Variable, target_dims: DimensionCollection +) -> np.ndarray: + slices = [ + slice(target_dims.get(ii).lower, target_dims.get(ii).upper) + for ii in var.dimensions + ] + raw_data = var[*slices] + dim_map = {dim: ii for ii, dim in enumerate(var.dimensions)} + axes = [get_aliased_key(dim_map, ii.name) for ii in target_dims.value] + transposed_data = raw_data.transpose(axes) + return transposed_data + + +def set_variable_data( + var: nc.Variable, target_dims: DimensionCollection, target_data: np.ndarray +) -> np.ndarray: + dim_map = create_dimension_map(target_dims) + axes = [get_aliased_key(dim_map, ii) for ii in var.dimensions] + transposed_data = target_data.transpose(axes) + slices = [ + slice(target_dims.get(ii).lower, target_dims.get(ii).upper) + for ii in var.dimensions + ] + var[*slices] = transposed_data + return transposed_data + + +class NcToGrid(BaseModel): + path: Path + spec: GridSpec + + def create_grid_wrapper(self) -> GridWrapper: + with open_nc(self.path, "r") as ds: + grid_shape = self._create_grid_shape_(ds) + staggerloc = esmpy.StaggerLoc.CENTER + grid = esmpy.Grid( + grid_shape, + staggerloc=staggerloc, + coord_sys=esmpy.CoordSys.SPH_DEG, + ) + dims = self.spec.create_grid_dims(ds, grid, staggerloc) + grid_x_center_coords = self.spec.get_x_data(grid, staggerloc) + grid_x_center_coords[:] = load_variable_data( + ds.variables[self.spec.x_center], dims + ) + grid_y_center_coords = self.spec.get_y_data(grid, staggerloc) + grid_y_center_coords[:] = load_variable_data( + ds.variables[self.spec.y_center], dims + ) + + if self.spec.has_corners: + corner_dims = self._add_corner_coords_(ds, grid) + else: + corner_dims = None + + gwrap = GridWrapper( + value=grid, dims=dims, spec=self.spec, corner_dims=corner_dims + ) + return gwrap + + def _create_grid_shape_(self, ds: nc.Dataset) -> np.ndarray: + x_size = get_nc_dimension(ds, self.spec.x_dim).size + y_size = get_nc_dimension(ds, self.spec.y_dim).size + if self.spec.x_index == 0: + grid_shape = (x_size, y_size) + elif self.spec.x_index == 1: + grid_shape = (y_size, x_size) + else: + raise NotImplementedError(self.spec.x_index, self.spec.y_index) + return np.array(grid_shape) + + def _add_corner_coords_( + self, ds: nc.Dataset, grid: esmpy.Grid + ) -> DimensionCollection: + staggerloc = esmpy.StaggerLoc.CORNER + grid.add_coords(staggerloc) + dims = self.spec.create_grid_dims(ds, grid, staggerloc) + grid_x_corner_coords = self.spec.get_x_data(grid, staggerloc) + grid_x_corner_coords[:] = load_variable_data( + ds.variables[self.spec.x_corner], dims + ) + grid_y_corner_coords = self.spec.get_y_data(grid, staggerloc) + grid_y_corner_coords[:] = load_variable_data( + ds.variables[self.spec.y_corner], dims + ) + return dims + + +class NcToField(BaseModel): + path: Path + name: str + gwrap: GridWrapper + dim_time: NameListType | None = None + staggerloc: int = esmpy.StaggerLoc.CENTER + + def create_field_wrapper(self) -> FieldWrapper: + with open_nc(self.path, "r") as ds: + if self.dim_time is None: + ndbounds = None + target_dims = self.gwrap.dims + else: + ndbounds = (len(get_nc_dimension(ds, self.dim_time)),) + time_dim = Dimension( + name=self.dim_time, + size=ndbounds[0], + lower=0, + upper=ndbounds[0], + staggerloc=self.staggerloc, + coordinate_type="time", + ) + target_dims = DimensionCollection( + value=list(self.gwrap.dims.value) + [time_dim] + ) + field = esmpy.Field( + self.gwrap.value, + name=self.name, + ndbounds=ndbounds, + staggerloc=self.staggerloc, + ) + field.data[:] = load_variable_data(ds.variables[self.name], target_dims) + fwrap = FieldWrapper(value=field, dims=target_dims, gwrap=self.gwrap) + return fwrap + + +def mask_edges(data: np.ma.MaskedArray, mask_width: int = 1) -> None: + """ + Mask edges of domain for interpolation. + + Args: + data: The masked array to alter + mask_width: The width of the mask at each edge + + Returns: + A numpy array of the masked edges + """ + if data.ndim != 2: + raise ValueError(f"{data.ndim=}") + + original_shape = data.shape + if mask_width < 1: + return # No masking if mask_width is less than 1 + + target = data.mask + if isinstance(target, np.bool_): + data.mask = np.zeros_like(data, dtype=bool) + target = data.mask + # Mask top and bottom rows + target[:mask_width, :] = True + target[-mask_width:, :] = True + + # Mask left and right columns + target[:, :mask_width] = True + target[:, -mask_width:] = True + + if data.shape != original_shape: + raise ValueError("Data shape altered during masking.") diff --git a/ush/smoke_dust/core/regrid.py b/ush/smoke_dust/core/regrid/processor.py similarity index 51% rename from ush/smoke_dust/core/regrid.py rename to ush/smoke_dust/core/regrid/processor.py index f3332d4805..f4850ae2a7 100644 --- a/ush/smoke_dust/core/regrid.py +++ b/ush/smoke_dust/core/regrid/processor.py @@ -1,14 +1,10 @@ -import abc import logging from copy import copy, deepcopy -from pathlib import Path -from typing import Any, Union, Dict, Tuple, Literal +from typing import Any import esmpy -import netCDF4 as nc import numpy as np import pandas as pd -from pydantic import BaseModel, ConfigDict, model_validator from smoke_dust.core.common import ( create_template_emissions_file, @@ -17,152 +13,17 @@ open_nc, ) from smoke_dust.core.context import RaveQaFilter, SmokeDustContext, PredefinedGrid +from smoke_dust.core.regrid.common import ( + GridSpec, + GridWrapper, + FieldWrapper, + load_variable_data, + NcToGrid, + NcToField, + mask_edges, +) from smoke_dust.core.variable import SD_VARS -NameListType = Tuple[str, ...] - - -class Dimension(BaseModel): - name: NameListType - size: int - lower: int - upper: int - staggerloc: int - coordinate_type: Literal["y", "x", "time"] - - -class DimensionCollection(BaseModel): - value: Tuple[Dimension, ...] - - def get(self, name: str | NameListType) -> Dimension: - if isinstance(name, str): - name_to_find = (name,) - else: - name_to_find = name - for jj in name_to_find: - for ii in self.value: - if jj in ii.name: - return ii - raise ValueError(f"dimension not found: {name}") - - -class AbstractWrapper(abc.ABC, BaseModel): - model_config = ConfigDict(arbitrary_types_allowed=True) - dims: DimensionCollection - - -class GridSpec(BaseModel): - model_config = ConfigDict(frozen=True) - - x_center: str - y_center: str - x_dim: NameListType - y_dim: NameListType - x_corner: str | None = None - y_corner: str | None = None - x_corner_dim: NameListType | None = None - y_corner_dim: NameListType | None = None - x_index: int = 0 - y_index: int = 1 - - @model_validator(mode="after") - def _validate_model_(self) -> "GridSpec": - corner_meta = [ - self.x_corner, - self.y_corner, - self.x_corner_dim, - self.y_corner_dim, - ] - is_given_sum = sum([ii is not None for ii in corner_meta]) - if is_given_sum > 0 and is_given_sum != len(corner_meta): - raise ValueError( - "if one corner name is supplied, then all must be supplied" - ) - return self - - @property - def has_corners(self) -> bool: - return self.x_corner is not None - - def get_x_corner(self) -> str: - if self.x_corner is None: - raise ValueError - return self.x_corner - - def get_y_corner(self) -> str: - if self.y_corner is None: - raise ValueError - return self.y_corner - - def get_x_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: - return grid.get_coords(self.x_index, staggerloc=staggerloc) - - def get_y_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: - return grid.get_coords(self.y_index, staggerloc=staggerloc) - - def create_grid_dims( - self, ds: nc.Dataset, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc - ) -> DimensionCollection: - if staggerloc == esmpy.StaggerLoc.CENTER: - x_dim, y_dim = self.x_dim, self.y_dim - elif staggerloc == esmpy.StaggerLoc.CORNER: - x_dim, y_dim = self.x_corner_dim, self.y_corner_dim - else: - raise NotImplementedError(staggerloc) - x_dimobj = Dimension( - name=x_dim, - size=get_nc_dimension(ds, x_dim).size, - lower=grid.lower_bounds[staggerloc][self.x_index], - upper=grid.upper_bounds[staggerloc][self.x_index], - staggerloc=staggerloc, - coordinate_type="x", - ) - y_dimobj = Dimension( - name=y_dim, - size=get_nc_dimension(ds, y_dim).size, - lower=grid.lower_bounds[staggerloc][self.y_index], - upper=grid.upper_bounds[staggerloc][self.y_index], - staggerloc=staggerloc, - coordinate_type="y", - ) - if self.x_index == 0: - value = [x_dimobj, y_dimobj] - elif self.x_index == 1: - value = [y_dimobj, x_dimobj] - else: - raise NotImplementedError(self.x_index, self.y_index) - return DimensionCollection(value=value) - - -class GridWrapper(AbstractWrapper): - value: esmpy.Grid - spec: GridSpec - corner_dims: DimensionCollection | None = None - - def fill_nc_variables(self, path: Path): - if self.corner_dims is not None: - raise NotImplementedError - with open_nc(path, "a") as ds: - staggerloc = esmpy.StaggerLoc.CENTER - x_center_data = self.spec.get_x_data(self.value, staggerloc) - set_variable_data( - ds.variables[self.spec.x_center], self.dims, x_center_data - ) - y_center_data = self.spec.get_y_data(self.value, staggerloc) - set_variable_data( - ds.variables[self.spec.y_center], self.dims, y_center_data - ) - - -class FieldWrapper(AbstractWrapper): - value: esmpy.Field - gwrap: GridWrapper - - def fill_nc_variable(self, path: Path): - with open_nc(path, "a") as ds: - var = ds.variables[self.value.name] - set_variable_data(var, self.dims, self.value.data) - class SmokeDustRegridProcessor: @@ -347,6 +208,9 @@ def _run_impl_( f"RAVE QA filter applied: {self._context.rave_qa_filter=}; {set_to_zero.size=}; {np.sum(set_to_zero)=}" ) src_data[set_to_zero] = 0.0 + else: + if self._context.rave_qa_filter != RaveQaFilter.NONE: + raise NotImplementedError # Execute the ESMF regridding self.log(f"run regridding", level=logging.DEBUG) @@ -433,190 +297,3 @@ def _regrid_postprocessing_(self, row_data: pd.Series) -> None: ) self.log("_run_interpolation_postprocessing: exit", level=logging.DEBUG) - - -HasNcAttrsType = Union[nc.Dataset, nc.Variable] - - -def get_aliased_key(source: Dict, keys: NameListType | str) -> Any: - if isinstance(keys, str): - keys_to_find = (keys,) - else: - keys_to_find = keys - for key in keys_to_find: - try: - return source[key] - except KeyError: - continue - raise ValueError(f"key not found: {keys}") - - -def get_nc_dimension(ds: nc.Dataset, names: NameListType) -> nc.Dimension: - return get_aliased_key(ds.dimensions, names) - - -def create_dimension_map(dims: DimensionCollection) -> Dict[str, int]: - ret = {} - for idx, dim in enumerate(dims.value): - for name in dim.name: - ret[name] = idx - return ret - - -def load_variable_data( - var: nc.Variable, target_dims: DimensionCollection -) -> np.ndarray: - slices = [ - slice(target_dims.get(ii).lower, target_dims.get(ii).upper) - for ii in var.dimensions - ] - raw_data = var[*slices] - dim_map = {dim: ii for ii, dim in enumerate(var.dimensions)} - axes = [get_aliased_key(dim_map, ii.name) for ii in target_dims.value] - transposed_data = raw_data.transpose(axes) - return transposed_data - - -def set_variable_data( - var: nc.Variable, target_dims: DimensionCollection, target_data: np.ndarray -) -> np.ndarray: - dim_map = create_dimension_map(target_dims) - axes = [get_aliased_key(dim_map, ii) for ii in var.dimensions] - transposed_data = target_data.transpose(axes) - slices = [ - slice(target_dims.get(ii).lower, target_dims.get(ii).upper) - for ii in var.dimensions - ] - var[*slices] = transposed_data - return transposed_data - - -class NcToGrid(BaseModel): - path: Path - spec: GridSpec - - def create_grid_wrapper(self) -> GridWrapper: - with open_nc(self.path, "r") as ds: - grid_shape = self._create_grid_shape_(ds) - staggerloc = esmpy.StaggerLoc.CENTER - grid = esmpy.Grid( - grid_shape, - staggerloc=staggerloc, - coord_sys=esmpy.CoordSys.SPH_DEG, - ) - dims = self.spec.create_grid_dims(ds, grid, staggerloc) - grid_x_center_coords = self.spec.get_x_data(grid, staggerloc) - grid_x_center_coords[:] = load_variable_data( - ds.variables[self.spec.x_center], dims - ) - grid_y_center_coords = self.spec.get_y_data(grid, staggerloc) - grid_y_center_coords[:] = load_variable_data( - ds.variables[self.spec.y_center], dims - ) - - if self.spec.has_corners: - corner_dims = self._add_corner_coords_(ds, grid) - else: - corner_dims = None - - gwrap = GridWrapper( - value=grid, dims=dims, spec=self.spec, corner_dims=corner_dims - ) - return gwrap - - def _create_grid_shape_(self, ds: nc.Dataset) -> np.ndarray: - x_size = get_nc_dimension(ds, self.spec.x_dim).size - y_size = get_nc_dimension(ds, self.spec.y_dim).size - if self.spec.x_index == 0: - grid_shape = (x_size, y_size) - elif self.spec.x_index == 1: - grid_shape = (y_size, x_size) - else: - raise NotImplementedError(self.spec.x_index, self.spec.y_index) - return np.array(grid_shape) - - def _add_corner_coords_( - self, ds: nc.Dataset, grid: esmpy.Grid - ) -> DimensionCollection: - staggerloc = esmpy.StaggerLoc.CORNER - grid.add_coords(staggerloc) - dims = self.spec.create_grid_dims(ds, grid, staggerloc) - grid_x_corner_coords = self.spec.get_x_data(grid, staggerloc) - grid_x_corner_coords[:] = load_variable_data( - ds.variables[self.spec.x_corner], dims - ) - grid_y_corner_coords = self.spec.get_y_data(grid, staggerloc) - grid_y_corner_coords[:] = load_variable_data( - ds.variables[self.spec.y_corner], dims - ) - return dims - - -class NcToField(BaseModel): - path: Path - name: str - gwrap: GridWrapper - dim_time: NameListType | None = None - staggerloc: int = esmpy.StaggerLoc.CENTER - - def create_field_wrapper(self) -> FieldWrapper: - with open_nc(self.path, "r") as ds: - if self.dim_time is None: - ndbounds = None - target_dims = self.gwrap.dims - else: - ndbounds = (len(get_nc_dimension(ds, self.dim_time)),) - time_dim = Dimension( - name=self.dim_time, - size=ndbounds[0], - lower=0, - upper=ndbounds[0], - staggerloc=self.staggerloc, - coordinate_type="time", - ) - target_dims = DimensionCollection( - value=list(self.gwrap.dims.value) + [time_dim] - ) - field = esmpy.Field( - self.gwrap.value, - name=self.name, - ndbounds=ndbounds, - staggerloc=self.staggerloc, - ) - field.data[:] = load_variable_data(ds.variables[self.name], target_dims) - fwrap = FieldWrapper(value=field, dims=target_dims, gwrap=self.gwrap) - return fwrap - - -def mask_edges(data: np.ma.MaskedArray, mask_width: int = 1) -> None: - """ - Mask edges of domain for interpolation. - - Args: - data: The masked array to alter - mask_width: The width of the mask at each edge - - Returns: - A numpy array of the masked edges - """ - if data.ndim != 2: - raise ValueError(f"{data.ndim=}") - - original_shape = data.shape - if mask_width < 1: - return # No masking if mask_width is less than 1 - - target = data.mask - if isinstance(target, np.bool_): - data.mask = np.zeros_like(data, dtype=bool) - target = data.mask - # Mask top and bottom rows - target[:mask_width, :] = True - target[-mask_width:, :] = True - - # Mask left and right columns - target[:, :mask_width] = True - target[:, -mask_width:] = True - - if data.shape != original_shape: - raise ValueError("Data shape altered during masking.") diff --git a/ush/smoke_dust/core/variable.py b/ush/smoke_dust/core/variable.py index 4e9ef5d8dc..c33003a5e9 100644 --- a/ush/smoke_dust/core/variable.py +++ b/ush/smoke_dust/core/variable.py @@ -1,4 +1,4 @@ -from typing import Tuple, Any +from typing import Tuple from pydantic import BaseModel, field_validator From 531a5e910f8f13aa06bf0e5d0a698dd89d5c031e Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Fri, 31 Jan 2025 16:40:19 -0700 Subject: [PATCH 26/41] minor --- ush/config.smoke_dust.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml index 9197db5ec3..64b5a7b112 100644 --- a/ush/config.smoke_dust.yaml +++ b/ush/config.smoke_dust.yaml @@ -75,4 +75,4 @@ smoke_dust_parm: PERSISTENCE: false RAVE_QA_FILTER: none EXIT_ON_ERROR: true - LOG_LEVEL: info #tdk: support upper on log_level \ No newline at end of file + LOG_LEVEL: info \ No newline at end of file From 955d01350cb68315f349bb26baf8d868456664e7 Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Fri, 31 Jan 2025 16:49:33 -0700 Subject: [PATCH 27/41] minor --- tests/test_python/test_smoke_dust/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_python/test_smoke_dust/conftest.py b/tests/test_python/test_smoke_dust/conftest.py index beb9c7bd4b..73f5603d74 100644 --- a/tests/test_python/test_smoke_dust/conftest.py +++ b/tests/test_python/test_smoke_dust/conftest.py @@ -31,7 +31,6 @@ def bin_dir() -> Path: def create_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: - # tdk:rm: use one with corners with nc.Dataset(root_dir / "ds_out_base.nc", "w") as ds: ds.createDimension("grid_yt", shape.y_size) ds.createDimension("grid_xt", shape.x_size) From 58605de7162b59ad023179a7817f844d386d80a6 Mon Sep 17 00:00:00 2001 From: benkozi Date: Mon, 3 Feb 2025 16:45:39 -0700 Subject: [PATCH 28/41] fix: pylint for test files (#6) --- tests/test_python/test_smoke_dust/conftest.py | 74 +++++++---- .../test_smoke_dust/test_core/test_regrid.py | 119 +++++++++++++----- .../test_generate_emissions.py | 93 +++++++++----- ush/smoke_dust/core/common.py | 8 +- ush/smoke_dust/core/context.py | 4 +- ush/smoke_dust/core/cycle.py | 61 +++------ ush/smoke_dust/core/preprocessor.py | 17 +-- ush/smoke_dust/core/regrid/common.py | 54 ++------ ush/smoke_dust/core/regrid/processor.py | 25 +--- ush/smoke_dust/core/variable.py | 4 +- 10 files changed, 242 insertions(+), 217 deletions(-) diff --git a/tests/test_python/test_smoke_dust/conftest.py b/tests/test_python/test_smoke_dust/conftest.py index 73f5603d74..96ab6796fc 100644 --- a/tests/test_python/test_smoke_dust/conftest.py +++ b/tests/test_python/test_smoke_dust/conftest.py @@ -1,3 +1,5 @@ +"""Shared pytest fixtures and test functions.""" + import hashlib import os from dataclasses import dataclass @@ -12,25 +14,38 @@ @dataclass class FakeGridOutShape: + """Explicitly defines the test grid shape.""" + y_size: int = 5 x_size: int = 10 @property def as_tuple(self) -> tuple[int, int]: + """ + Convert the grid shape to a tuple. + """ return self.y_size, self.x_size @pytest.fixture def fake_grid_out_shape() -> FakeGridOutShape: + """Fixture creating the test grid shape.""" return FakeGridOutShape() @pytest.fixture def bin_dir() -> Path: + """Fixture returning the path to the binary test directory for this package.""" return (Path(__file__).parent / "bin").expanduser().resolve(strict=True) -def create_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: +def create_fake_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: + """Create the output grid netCDF file. The output grid is the domain grid for the experiment. + + Args: + root_dir: Directory to write grid to. + shape: Grid output shape. + """ with nc.Dataset(root_dir / "ds_out_base.nc", "w") as ds: ds.createDimension("grid_yt", shape.y_size) ds.createDimension("grid_xt", shape.x_size) @@ -39,36 +54,53 @@ def create_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: var[:] = np.ones((shape.y_size, shape.x_size)) -def create_context( +def create_fake_context( root_dir: Path, overrides: dict | None = None, extra: dict | None = None ) -> SmokeDustContext: + """ + Create a fake context for the test runner. + Args: + root_dir: Path to write fake test files to. + overrides: If provided, override the required context arguments - the arguments provided to + the CLI program. + extra: If provided, override context parameters not used in the CLI. + + Returns: + A fake context to use for testing. + """ current_day = "2019072200" nwges_dir = root_dir os.environ["CDATE"] = current_day os.environ["DATA"] = str(nwges_dir) - try: - kwds = dict( - staticdir=root_dir, - ravedir=root_dir, - intp_dir=root_dir, - predef_grid="RRFS_CONUS_3km", - ebb_dcycle_flag="2", - restart_interval="6 12 18 24", - persistence="FALSE", - rave_qa_filter="NONE", - exit_on_error="TRUE", - log_level="debug", - ) - if overrides is not None: - kwds.update(overrides) - context = SmokeDustContext.create_from_args(kwds.values(), extra=extra) - finally: - for ii in ["CDATE", "DATA"]: - os.unsetenv(ii) + kwds = { + "staticdir": root_dir, + "ravedir": root_dir, + "intp_dir": root_dir, + "predef_grid": "RRFS_CONUS_3km", + "ebb_dcycle_flag": "2", + "restart_interval": "6 12 18 24", + "persistence": "FALSE", + "rave_qa_filter": "NONE", + "exit_on_error": "TRUE", + "log_level": "debug", + } + if overrides is not None: + kwds.update(overrides) + context = SmokeDustContext.create_from_args(kwds.values(), extra=extra) + for ii in ["CDATE", "DATA"]: + os.unsetenv(ii) return context def create_file_hash(path: Path) -> str: + """ + Create a unique file hash to use for bit-for-bit comparison. + Args: + path: Target binary file to hash. + + Returns: + The file's hex digest. + """ with open(path, "rb") as f: file_hash = hashlib.md5() while chunk := f.read(8192): diff --git a/tests/test_python/test_smoke_dust/test_core/test_regrid.py b/tests/test_python/test_smoke_dust/test_core/test_regrid.py index 14a3b89ec9..15fe1f9d33 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_regrid.py +++ b/tests/test_python/test_smoke_dust/test_core/test_regrid.py @@ -1,14 +1,15 @@ +"""Tests the regrid processor.""" + import glob import shutil import subprocess from pathlib import Path -from typing import Any import numpy as np import pytest import xarray as xr from _pytest.fixtures import SubRequest -from pydantic import BaseModel +from pydantic import BaseModel, Field from pytest_mock import MockerFixture from smoke_dust.core.context import SmokeDustContext @@ -16,27 +17,61 @@ from smoke_dust.core.regrid.processor import SmokeDustRegridProcessor from test_python.test_smoke_dust.conftest import ( FakeGridOutShape, - create_context, + create_fake_context, create_file_hash, ) -def ncdump(path: Path, header_only: bool = True) -> Any: +def ncdump(path: Path, header_only: bool = True) -> str: + """ + Convenience wrapper for calling the ncdump utility. + + Args: + path: Target netCDF file. + header_only: If True, return only netCDF header information. + + Returns: + Output from the ncdump program. + """ args = ["ncdump"] if header_only: args.append("-h") args.append(str(path)) - ret = subprocess.check_output(args) - print(ret.decode(), flush=True) + ret = subprocess.check_output(args).decode() + print(ret, flush=True) return ret class DataForTest(BaseModel): - model_config = dict(arbitrary_types_allowed=True) + """Model holds objects needed for testing.""" + + model_config = {"arbitrary_types_allowed": True} context: SmokeDustContext preprocessor: SmokeDustPreprocessor +class FakeGridParams(BaseModel): + """Model for a fake RAVE/RRFS data file definition.""" + + path: Path = Field(description="Path to the output data file.") + shape: FakeGridOutShape = Field(description="Output grid shape.") + with_corners: bool = Field( + description="If True, create the output grid with corners", default=True + ) + fields: list[str] | None = Field( + description="If provided, a list of field names to create in the output file.", default=None + ) + min_lon: int = Field( + description="The minimum longitude value as origin for grid generation.", default=230 + ) + min_lat: int = Field( + description="The minimum latitude value as origin for grid generation.", default=25 + ) + ntime: int | None = Field( + description="If provided, create the output fields with this many time steps.", default=1 + ) + + @pytest.fixture(params=[True, False], ids=lambda p: f"regrid_in_memory={p}") def data_for_test( request: SubRequest, @@ -44,19 +79,20 @@ def data_for_test( fake_grid_out_shape: FakeGridOutShape, bin_dir: Path, ) -> DataForTest: + """Create test data including any required data files.""" weight_file = "weight_file.nc" shutil.copy(bin_dir / weight_file, tmp_path / "weight_file.nc") for name in ["ds_out_base.nc", "grid_in.nc"]: path = tmp_path / name - create_rave_and_rrfs_like_data( - path, fake_grid_out_shape, fields=["area"], ntime=None + _ = create_fake_rave_and_rrfs_like_data( + FakeGridParams(path=path, shape=fake_grid_out_shape, fields=["area"], ntime=None) ) - context = create_context(tmp_path, extra=dict(regrid_in_memory=request.param)) + context = create_fake_context(tmp_path, extra={"regrid_in_memory": request.param}) preprocessor = SmokeDustPreprocessor(context) for date in preprocessor.forecast_dates: path = tmp_path / f"Hourly_Emissions_3km_{date}_{date}.nc" - create_rave_and_rrfs_like_data( - path, fake_grid_out_shape, fields=["FRP_MEAN", "FRE"] + _ = create_fake_rave_and_rrfs_like_data( + FakeGridParams(path=path, shape=fake_grid_out_shape, fields=["FRP_MEAN", "FRE"]) ) return DataForTest(context=context, preprocessor=preprocessor) @@ -67,6 +103,18 @@ def create_analytic_data_array( lat_mesh: np.ndarray, ntime: int | None = None, ) -> xr.DataArray: + """ + Create an analytic data array using lat/lon values. + + Args: + dims: Names of the lat/lon dimensions. For example `["lat", "lon"]`. + lon_mesh: A two-dimensional array of longitude values. + lat_mesh: A two-dimensional array of latitude values. + ntime: If provided, create the output data array with the provided number of time steps. + + Returns: + An analytic data array. + """ deg_to_rad = 3.141592653589793 / 180.0 analytic_data = 2.0 + np.cos(deg_to_rad * lon_mesh) ** 2 * np.cos( 2.0 * deg_to_rad * (90.0 - lat_mesh) @@ -82,47 +130,52 @@ def create_analytic_data_array( ) -def create_rave_and_rrfs_like_data( - path: Path, - shape: FakeGridOutShape, - with_corners: bool = True, - fields: list[str] | None = None, - min_lon: int = 230, - min_lat: int = 25, - ntime: int | None = 1, -) -> xr.Dataset: - if path.exists(): - raise ValueError(f"path exists: {path}") - lon = np.arange(shape.x_size, dtype=float) + min_lon - lat = np.arange(shape.y_size, dtype=float) + min_lat +def create_fake_rave_and_rrfs_like_data(params: FakeGridParams) -> xr.Dataset: + """ + Create fake RAVE and RRFS data. These data files share a common grid. + + Returns: + The created dataset object. + """ + if params.path.exists(): + raise ValueError(f"path exists: {params.path}") + lon = np.arange(params.shape.x_size, dtype=float) + params.min_lon + lat = np.arange(params.shape.y_size, dtype=float) + params.min_lat lon_mesh, lat_mesh = np.meshgrid(lon, lat) ds = xr.Dataset() dims = ["grid_yt", "grid_xt"] ds["grid_lont"] = xr.DataArray(lon_mesh, dims=dims) ds["grid_latt"] = xr.DataArray(lat_mesh, dims=dims) - if with_corners: + if params.with_corners: lonc = np.hstack((lon - 0.5, [lon[-1] + 0.5])) latc = np.hstack((lat - 0.5, [lat[-1] + 0.5])) lonc_mesh, latc_mesh = np.meshgrid(lonc, latc) ds["grid_lon"] = xr.DataArray(lonc_mesh, dims=["grid_y", "grid_x"]) ds["grid_lat"] = xr.DataArray(latc_mesh, dims=["grid_y", "grid_x"]) - if fields is not None: - if ntime is not None: + if params.fields is not None: + if params.ntime is not None: field_dims = ["time"] + dims else: field_dims = dims - for field in fields: + for field in params.fields: ds[field] = create_analytic_data_array( - field_dims, lon_mesh, lat_mesh, ntime=ntime + field_dims, lon_mesh, lat_mesh, ntime=params.ntime ) - ds.to_netcdf(path) + ds.to_netcdf(params.path) return ds -class TestSmokeDustRegridProcessor: +class TestSmokeDustRegridProcessor: # pylint: disable=too-few-public-methods + """Tests for the smoke/dust regrid processor.""" + def test_run( - self, data_for_test: DataForTest, mocker: MockerFixture, tmp_path: Path + self, + data_for_test: DataForTest, # pylint: disable=redefined-outer-name + mocker: MockerFixture, + tmp_path: Path, ) -> None: + """Test the regrid processor.""" + # tdk:story: add MPI testing spy1 = mocker.spy(SmokeDustRegridProcessor, "_run_impl_") regrid_processor = SmokeDustRegridProcessor(data_for_test.context) regrid_processor.run(data_for_test.preprocessor.forecast_metadata) diff --git a/tests/test_python/test_smoke_dust/test_generate_emissions.py b/tests/test_python/test_smoke_dust/test_generate_emissions.py index 4491cd7f5a..b9d60265a1 100644 --- a/tests/test_python/test_smoke_dust/test_generate_emissions.py +++ b/tests/test_python/test_smoke_dust/test_generate_emissions.py @@ -1,3 +1,5 @@ +"""Test emissions processing for smoke/dust.""" + from pathlib import Path from typing import Type @@ -18,15 +20,23 @@ from smoke_dust.core.preprocessor import SmokeDustPreprocessor from test_python.test_smoke_dust.conftest import ( FakeGridOutShape, - create_grid_out, - create_context, + create_fake_grid_out, + create_fake_context, create_file_hash, ) -def create_restart_files( +def create_fake_restart_files( root_dir: Path, forecast_dates: pd.DatetimeIndex, shape: FakeGridOutShape ) -> None: + """ + Create fake restart files expected for EBB_DCYLE=2. + + Args: + root_dir: Directory to create fake files in. + forecast_dates: The series of dates to create the restart files for. + shape: Output grid shape. + """ restart_dir = root_dir / "RESTART" restart_dir.mkdir() for date in forecast_dates: @@ -35,22 +45,27 @@ def create_restart_files( ds.createDimension("Time") ds.createDimension("yaxis_1", shape.y_size) ds.createDimension("xaxis_1", shape.x_size) - totprcp_ave = ds.createVariable( - "totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1") - ) + totprcp_ave = ds.createVariable("totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) totprcp_ave[0, ...] = np.ones(shape.as_tuple) - rrfs_hwp_ave = ds.createVariable( - "rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1") - ) + rrfs_hwp_ave = ds.createVariable("rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) rrfs_hwp_ave[0, ...] = totprcp_ave[:] + 2 -def create_rave_interpolated( +def create_fake_rave_interpolated( root_dir: Path, forecast_dates: pd.DatetimeIndex, shape: FakeGridOutShape, rave_to_intp: str, ) -> None: + """ + Create fake interpolated RAVE data. + + Args: + root_dir: The directory to create fake interpolated data in. + forecast_dates: The series of dates to create the interpolated data for. + shape: The output grid shape. + rave_to_intp: Filename prefix to use for output files. + """ for date in forecast_dates: intp_file = root_dir / f"{rave_to_intp}{date}00_{date}59.nc" dims = ("t", "lat", "lon") @@ -63,7 +78,14 @@ def create_rave_interpolated( var[0, ...] = np.ones(shape.as_tuple) -def create_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: +def create_fake_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: + """ + Create a fake vegetation map data file. + + Args: + root_dir: The directory to create the file in. + shape: Shape of the output grid. + """ with nc.Dataset(root_dir / "veg_map.nc", "w") as ds: ds.createDimension("grid_yt", shape.y_size) ds.createDimension("grid_xt", shape.x_size) @@ -72,13 +94,17 @@ def create_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: class ExpectedData(BaseModel): + """Holds expected data to test against.""" + flag: str klass: Type[AbstractSmokeDustCycleProcessor] hash: str class DataForTest(BaseModel): - model_config = dict(arbitrary_types_allowed=True) + """Holds data objects used by the test.""" + + model_config = {"arbitrary_types_allowed": True} context: SmokeDustContext preprocessor: SmokeDustPreprocessor expected: ExpectedData @@ -86,40 +112,41 @@ class DataForTest(BaseModel): @pytest.fixture( params=[ - ExpectedData( - flag="1", klass=SmokeDustCycleOne, hash="d124734dfce7ca914391e35a02e4a7d2" - ), - ExpectedData( - flag="2", klass=SmokeDustCycleTwo, hash="6752199f1039edc936a942f3885af38b" - ), + ExpectedData(flag="1", klass=SmokeDustCycleOne, hash="d124734dfce7ca914391e35a02e4a7d2"), + ExpectedData(flag="2", klass=SmokeDustCycleTwo, hash="6752199f1039edc936a942f3885af38b"), ], ids=lambda p: f"ebb_dcycle_flag={p.flag}", ) def data_for_test( request: SubRequest, tmp_path: Path, fake_grid_out_shape: FakeGridOutShape ) -> DataForTest: - create_grid_out(tmp_path, fake_grid_out_shape) - create_veg_map(tmp_path, fake_grid_out_shape) - context = create_context( - tmp_path, overrides=dict(ebb_dcycle_flag=request.param.flag) - ) + """ + Creates the necessary test data including data files. + """ + create_fake_grid_out(tmp_path, fake_grid_out_shape) + create_fake_veg_map(tmp_path, fake_grid_out_shape) + context = create_fake_context(tmp_path, overrides={"ebb_dcycle_flag": request.param.flag}) preprocessor = SmokeDustPreprocessor(context) - create_restart_files(tmp_path, preprocessor.forecast_dates, fake_grid_out_shape) - create_rave_interpolated( + create_fake_restart_files(tmp_path, preprocessor.forecast_dates, fake_grid_out_shape) + create_fake_rave_interpolated( tmp_path, preprocessor.forecast_dates, fake_grid_out_shape, context.predef_grid.value + "_intp_", ) - return DataForTest( - context=context, preprocessor=preprocessor, expected=request.param - ) + return DataForTest(context=context, preprocessor=preprocessor, expected=request.param) -class TestSmokeDustPreprocessor: +class TestSmokeDustPreprocessor: # pylint: disable=too-few-public-methods + """Tests for the smoke/dust preprocessor.""" - def test_run(self, data_for_test: DataForTest, mocker: MockerFixture) -> None: + def test_run( + self, + data_for_test: DataForTest, # pylint: disable=redefined-outer-name + mocker: MockerFixture, + ) -> None: """Test core capabilities of the preprocessor. Note this does not test regridding.""" + # pylint: disable=protected-access preprocessor = data_for_test.preprocessor spy1 = mocker.spy(preprocessor, "create_dummy_emissions_file") regrid_processor_class = preprocessor._regrid_processor.__class__ @@ -131,6 +158,7 @@ def test_run(self, data_for_test: DataForTest, mocker: MockerFixture) -> None: assert isinstance(preprocessor._cycle_processor, data_for_test.expected.klass) assert preprocessor._forecast_metadata is None + # pylint: enable=protected-access assert not data_for_test.context.emissions_path.exists() preprocessor.run() @@ -141,7 +169,4 @@ def test_run(self, data_for_test: DataForTest, mocker: MockerFixture) -> None: spy5.assert_called_once() assert data_for_test.context.emissions_path.exists() - assert ( - create_file_hash(data_for_test.context.emissions_path) - == data_for_test.expected.hash - ) + assert create_file_hash(data_for_test.context.emissions_path) == data_for_test.expected.hash diff --git a/ush/smoke_dust/core/common.py b/ush/smoke_dust/core/common.py index f2923e9d93..8ba94204f5 100644 --- a/ush/smoke_dust/core/common.py +++ b/ush/smoke_dust/core/common.py @@ -109,9 +109,7 @@ def create_descriptive_statistics( origin: Literal["src", "dst_unmasked", "dst_masked", "derived"], path: Path, ) -> pd.DataFrame: - df = pd.DataFrame.from_dict( - {k: v.filled(np.nan).ravel() for k, v in container.items()} - ) + df = pd.DataFrame.from_dict({k: v.filled(np.nan).ravel() for k, v in container.items()}) desc = df.describe() adds = {} for field_name in container.keys(): @@ -121,7 +119,5 @@ def create_descriptive_statistics( origin, path, ] - desc = pd.concat( - [desc, pd.DataFrame(data=adds, index=["sum", "count_null", "origin", "path"])] - ) + desc = pd.concat([desc, pd.DataFrame(data=adds, index=["sum", "count_null", "origin", "path"])]) return desc diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index 149b9a742d..e36362a40b 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -94,9 +94,7 @@ def _finalize_model_(self) -> "SmokeDustContext": return self @classmethod - def create_from_args( - cls, args: List[str], extra: dict | None = None - ) -> "SmokeDustContext": + def create_from_args(cls, args: List[str], extra: dict | None = None) -> "SmokeDustContext": print(f"create_from_args: {args=}", flush=True) # Extract local arguments from args before converting values diff --git a/ush/smoke_dust/core/cycle.py b/ush/smoke_dust/core/cycle.py index 5e71db3c0d..8def1799d7 100644 --- a/ush/smoke_dust/core/cycle.py +++ b/ush/smoke_dust/core/cycle.py @@ -37,9 +37,7 @@ def flag(self) -> EbbDCycle: ... def create_start_datetime(self) -> dt.datetime: ... @abc.abstractmethod - def average_frp( - self, forecast_metadata: pd.DataFrame - ) -> Dict[FrpVariable, np.ndarray]: ... + def average_frp(self, forecast_metadata: pd.DataFrame) -> Dict[FrpVariable, np.ndarray]: ... @abc.abstractmethod def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: ... @@ -62,9 +60,7 @@ def create_start_datetime(self) -> dt.datetime: def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: derived = self.average_frp(forecast_metadata) self.log(f"creating 24-hour emissions file: {self._context.emissions_path}") - with open_nc( - self._context.emissions_path, "w", parallel=False, clobber=True - ) as ds_out: + with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as ds_out: create_template_emissions_file(ds_out, self._context.grid_out_shape) with open_nc(self._context.grid_out, parallel=False) as ds_src: ds_out.variables["geolat"][:] = ds_src.variables["grid_latt"][:] @@ -73,9 +69,7 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: create_sd_variable(ds_out, SD_VARS.get(var.value)) ds_out.variables[var.value][:] = fill_array - def average_frp( - self, forecast_metadata: pd.DataFrame - ) -> Dict[FrpVariable, np.ndarray]: + def average_frp(self, forecast_metadata: pd.DataFrame) -> Dict[FrpVariable, np.ndarray]: ebb_smoke_total = [] frp_avg_hr = [] @@ -91,9 +85,9 @@ def average_frp( frp = ds[EmissionVariable.FRP.smoke_dust_name()][0, :, :].values frp_avg_hr.append(frp) - ebb_hourly = ( - fre * emiss_factor * self._context.beta * self._context.fg_to_ug - ) / (target_area * self._context.to_s) + ebb_hourly = (fre * emiss_factor * self._context.beta * self._context.fg_to_ug) / ( + target_area * self._context.to_s + ) ebb_smoke_total.append(np.where(frp > 0, ebb_hourly, 0)) frp_avg_reshaped = np.stack(frp_avg_hr, axis=0) @@ -121,13 +115,8 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: hwp_ave = [] totprcp = np.zeros(self._context.grid_out_shape).ravel() for date in forecast_metadata["forecast_date"]: - phy_data_path = ( - self._context.hourly_hwpdir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" - ) - rave_path = ( - self._context.intp_dir - / f"{self._context.rave_to_intp}{date}00_{date}59.nc" - ) + phy_data_path = self._context.hourly_hwpdir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" + rave_path = self._context.intp_dir / f"{self._context.rave_to_intp}{date}00_{date}59.nc" self.log(f"processing emissions for: {phy_data_path=}, {rave_path=}") with xr.open_dataset(phy_data_path) as ds: hwp_values = ds.rrfs_hwp_ave.values.ravel() @@ -143,26 +132,18 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: t_fire = np.zeros(self._context.grid_out_shape) for date in forecast_metadata["forecast_date"]: - rave_path = ( - self._context.intp_dir - / f"{self._context.rave_to_intp}{date}00_{date}59.nc" - ) + rave_path = self._context.intp_dir / f"{self._context.rave_to_intp}{date}00_{date}59.nc" with xr.open_dataset(rave_path) as ds: frp = ds.frp_avg_hr[0, :, :].values dates_filtered = np.where(frp > 0, int(date[:10]), 0) t_fire = np.maximum(t_fire, dates_filtered) t_fire_flattened = [int(i) if i != 0 else 0 for i in t_fire.flatten()] hr_ends = [ - dt.datetime.strptime(str(hr), "%Y%m%d%H") if hr != 0 else 0 - for hr in t_fire_flattened + dt.datetime.strptime(str(hr), "%Y%m%d%H") if hr != 0 else 0 for hr in t_fire_flattened ] te = np.array( [ - ( - (self._context.fcst_datetime - i).total_seconds() / 3600 - if i != 0 - else 0 - ) + ((self._context.fcst_datetime - i).total_seconds() / 3600 if i != 0 else 0) for i in hr_ends ] ) @@ -212,9 +193,7 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: self.log("process_emissions: exit") - def average_frp( - self, forecast_metadata: pd.DataFrame - ) -> Dict[FrpVariable, np.ndarray]: + def average_frp(self, forecast_metadata: pd.DataFrame) -> Dict[FrpVariable, np.ndarray]: self.log(f"average_frp: entering") frp_daily = np.zeros(self._context.grid_out_shape).ravel() @@ -232,19 +211,13 @@ def average_frp( frp = ds[EmissionVariable.FRP.smoke_dust_name()][0, :, :].values ebb_hourly = ( - fre - * emiss_factor - * self._context.beta - * self._context.fg_to_ug - / target_area + fre * emiss_factor * self._context.beta * self._context.fg_to_ug / target_area ) ebb_smoke_total.append(np.where(frp > 0, ebb_hourly, 0).ravel()) frp_daily += np.where(frp > 0, frp, 0).ravel() summed_array = np.sum(np.array(ebb_smoke_total), axis=0) - num_zeros = len(ebb_smoke_total) - np.sum( - [arr == 0 for arr in ebb_smoke_total], axis=0 - ) + num_zeros = len(ebb_smoke_total) - np.sum([arr == 0 for arr in ebb_smoke_total], axis=0) safe_zero_count = np.where(num_zeros == 0, 1, num_zeros) result_array = np.array( [ @@ -261,11 +234,7 @@ def average_frp( ebb_total_reshaped = ebb_total / 3600 temp_frp = np.array( [ - ( - frp_daily[i] / 2 - if safe_zero_count[i] == 1 - else frp_daily[i] / safe_zero_count[i] - ) + (frp_daily[i] / 2 if safe_zero_count[i] == 1 else frp_daily[i] / safe_zero_count[i]) for i in range(len(safe_zero_count)) ] ) diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py index dd46a96107..ba1bfbd0b1 100644 --- a/ush/smoke_dust/core/preprocessor.py +++ b/ush/smoke_dust/core/preprocessor.py @@ -43,9 +43,9 @@ def forecast_dates(self) -> pd.DatetimeIndex: return self._forecast_dates start_datetime = self._cycle_processor.create_start_datetime() self.log(f"{start_datetime=}") - forecast_dates = pd.date_range( - start=start_datetime, periods=24, freq="h" - ).strftime("%Y%m%d%H") + forecast_dates = pd.date_range(start=start_datetime, periods=24, freq="h").strftime( + "%Y%m%d%H" + ) self._forecast_dates = forecast_dates return self._forecast_dates @@ -61,8 +61,7 @@ def forecast_metadata(self) -> pd.DataFrame: for date in self.forecast_dates: # Check for pre-existing interpolated RAVE data file_path = ( - Path(self._context.intp_dir) - / f"{self._context.rave_to_intp}{date}00_{date}59.nc" + Path(self._context.intp_dir) / f"{self._context.rave_to_intp}{date}00_{date}59.nc" ) if file_path.exists() and file_path.is_file(): try: @@ -124,12 +123,8 @@ def run(self) -> None: def create_dummy_emissions_file(self) -> None: self.log("create_dummy_emissions_file: enter") self.log(f"{self._context.emissions_path=}") - with open_nc( - self._context.emissions_path, "w", parallel=False, clobber=True - ) as ds: - create_template_emissions_file( - ds, self._context.grid_out_shape, is_dummy=True - ) + with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as ds: + create_template_emissions_file(ds, self._context.grid_out_shape, is_dummy=True) with open_nc(self._context.grid_out, parallel=False) as ds_src: ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] diff --git a/ush/smoke_dust/core/regrid/common.py b/ush/smoke_dust/core/regrid/common.py index 803f66390c..d75d1f366f 100644 --- a/ush/smoke_dust/core/regrid/common.py +++ b/ush/smoke_dust/core/regrid/common.py @@ -65,9 +65,7 @@ def _validate_model_(self) -> "GridSpec": ] is_given_sum = sum([ii is not None for ii in corner_meta]) if is_given_sum > 0 and is_given_sum != len(corner_meta): - raise ValueError( - "if one corner name is supplied, then all must be supplied" - ) + raise ValueError("if one corner name is supplied, then all must be supplied") return self @property @@ -135,13 +133,9 @@ def fill_nc_variables(self, path: Path): with open_nc(path, "a") as ds: staggerloc = esmpy.StaggerLoc.CENTER x_center_data = self.spec.get_x_data(self.value, staggerloc) - set_variable_data( - ds.variables[self.spec.x_center], self.dims, x_center_data - ) + set_variable_data(ds.variables[self.spec.x_center], self.dims, x_center_data) y_center_data = self.spec.get_y_data(self.value, staggerloc) - set_variable_data( - ds.variables[self.spec.y_center], self.dims, y_center_data - ) + set_variable_data(ds.variables[self.spec.y_center], self.dims, y_center_data) class FieldWrapper(AbstractWrapper): @@ -182,13 +176,8 @@ def create_dimension_map(dims: DimensionCollection) -> Dict[str, int]: return ret -def load_variable_data( - var: nc.Variable, target_dims: DimensionCollection -) -> np.ndarray: - slices = [ - slice(target_dims.get(ii).lower, target_dims.get(ii).upper) - for ii in var.dimensions - ] +def load_variable_data(var: nc.Variable, target_dims: DimensionCollection) -> np.ndarray: + slices = [slice(target_dims.get(ii).lower, target_dims.get(ii).upper) for ii in var.dimensions] raw_data = var[*slices] dim_map = {dim: ii for ii, dim in enumerate(var.dimensions)} axes = [get_aliased_key(dim_map, ii.name) for ii in target_dims.value] @@ -202,10 +191,7 @@ def set_variable_data( dim_map = create_dimension_map(target_dims) axes = [get_aliased_key(dim_map, ii) for ii in var.dimensions] transposed_data = target_data.transpose(axes) - slices = [ - slice(target_dims.get(ii).lower, target_dims.get(ii).upper) - for ii in var.dimensions - ] + slices = [slice(target_dims.get(ii).lower, target_dims.get(ii).upper) for ii in var.dimensions] var[*slices] = transposed_data return transposed_data @@ -225,22 +211,16 @@ def create_grid_wrapper(self) -> GridWrapper: ) dims = self.spec.create_grid_dims(ds, grid, staggerloc) grid_x_center_coords = self.spec.get_x_data(grid, staggerloc) - grid_x_center_coords[:] = load_variable_data( - ds.variables[self.spec.x_center], dims - ) + grid_x_center_coords[:] = load_variable_data(ds.variables[self.spec.x_center], dims) grid_y_center_coords = self.spec.get_y_data(grid, staggerloc) - grid_y_center_coords[:] = load_variable_data( - ds.variables[self.spec.y_center], dims - ) + grid_y_center_coords[:] = load_variable_data(ds.variables[self.spec.y_center], dims) if self.spec.has_corners: corner_dims = self._add_corner_coords_(ds, grid) else: corner_dims = None - gwrap = GridWrapper( - value=grid, dims=dims, spec=self.spec, corner_dims=corner_dims - ) + gwrap = GridWrapper(value=grid, dims=dims, spec=self.spec, corner_dims=corner_dims) return gwrap def _create_grid_shape_(self, ds: nc.Dataset) -> np.ndarray: @@ -254,20 +234,14 @@ def _create_grid_shape_(self, ds: nc.Dataset) -> np.ndarray: raise NotImplementedError(self.spec.x_index, self.spec.y_index) return np.array(grid_shape) - def _add_corner_coords_( - self, ds: nc.Dataset, grid: esmpy.Grid - ) -> DimensionCollection: + def _add_corner_coords_(self, ds: nc.Dataset, grid: esmpy.Grid) -> DimensionCollection: staggerloc = esmpy.StaggerLoc.CORNER grid.add_coords(staggerloc) dims = self.spec.create_grid_dims(ds, grid, staggerloc) grid_x_corner_coords = self.spec.get_x_data(grid, staggerloc) - grid_x_corner_coords[:] = load_variable_data( - ds.variables[self.spec.x_corner], dims - ) + grid_x_corner_coords[:] = load_variable_data(ds.variables[self.spec.x_corner], dims) grid_y_corner_coords = self.spec.get_y_data(grid, staggerloc) - grid_y_corner_coords[:] = load_variable_data( - ds.variables[self.spec.y_corner], dims - ) + grid_y_corner_coords[:] = load_variable_data(ds.variables[self.spec.y_corner], dims) return dims @@ -293,9 +267,7 @@ def create_field_wrapper(self) -> FieldWrapper: staggerloc=self.staggerloc, coordinate_type="time", ) - target_dims = DimensionCollection( - value=list(self.gwrap.dims.value) + [time_dim] - ) + target_dims = DimensionCollection(value=list(self.gwrap.dims.value) + [time_dim]) field = esmpy.Field( self.gwrap.value, name=self.name, diff --git a/ush/smoke_dust/core/regrid/processor.py b/ush/smoke_dust/core/regrid/processor.py index f4850ae2a7..d0f611f55a 100644 --- a/ush/smoke_dust/core/regrid/processor.py +++ b/ush/smoke_dust/core/regrid/processor.py @@ -110,9 +110,7 @@ def _dst_output_gwrap(self) -> GridWrapper: self.__dst_output_gwrap = dst_output_gwrap return self.__dst_output_gwrap - def _get_regridder_( - self, src_fwrap: FieldWrapper, dst_fwrap: FieldWrapper - ) -> esmpy.Regrid: + def _get_regridder_(self, src_fwrap: FieldWrapper, dst_fwrap: FieldWrapper) -> esmpy.Regrid: if self.__regridder is None: self.log("creating regridder") self.log(f"{src_fwrap.value.data.shape=}", level=logging.DEBUG) @@ -142,9 +140,7 @@ def _get_regridder_( self.__regridder = regridder return self.__regridder - def _run_impl_( - self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Series - ) -> None: + def _run_impl_(self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Series) -> None: for row_idx, row_data in rave_to_interpolate.iterrows(): row_dict = row_data.to_dict() self.log(f"processing RAVE interpolation row: {row_idx}, {row_dict}") @@ -200,9 +196,7 @@ def _run_impl_( if self._context.rave_qa_filter == RaveQaFilter.HIGH: with open_nc(row_data["rave_raw"], parallel=True) as rave_ds: - rave_qa = load_variable_data( - rave_ds.variables["QA"], src_fwrap.dims - ) + rave_qa = load_variable_data(rave_ds.variables["QA"], src_fwrap.dims) set_to_zero = rave_qa < 2 self.log( f"RAVE QA filter applied: {self._context.rave_qa_filter=}; {set_to_zero.size=}; {np.sum(set_to_zero)=}" @@ -254,9 +248,7 @@ def _regrid_postprocessing_(self, row_data: pd.Series) -> None: dst_data = {ii: ds.variables[ii][:] for ii in field_names_dst} if calc_stats: # Do these calculations before we modify the arrays since edge masking is inplace - dst_desc_unmasked = create_descriptive_statistics( - dst_data, "dst_unmasked", None - ) + dst_desc_unmasked = create_descriptive_statistics(dst_data, "dst_unmasked", None) # Mask edges to reduce model edge effects self.log("masking edges", level=logging.DEBUG) @@ -281,10 +273,7 @@ def _regrid_postprocessing_(self, row_data: pd.Series) -> None: dst_data, "dst_masked", row_data["rave_interpolated"] ) summary = pd.concat( - [ - ii.transpose() - for ii in [src_desc, dst_desc_unmasked, dst_desc_masked] - ] + [ii.transpose() for ii in [src_desc, dst_desc_unmasked, dst_desc_masked]] ) summary.index.name = "variable" summary["forecast_date"] = row_data["forecast_date"] @@ -292,8 +281,6 @@ def _regrid_postprocessing_(self, row_data: pd.Series) -> None: if self._interpolation_stats is None: self._interpolation_stats = summary else: - self._interpolation_stats = pd.concat( - [self._interpolation_stats, summary] - ) + self._interpolation_stats = pd.concat([self._interpolation_stats, summary]) self.log("_run_interpolation_postprocessing: exit", level=logging.DEBUG) diff --git a/ush/smoke_dust/core/variable.py b/ush/smoke_dust/core/variable.py index c33003a5e9..93b23597ba 100644 --- a/ush/smoke_dust/core/variable.py +++ b/ush/smoke_dust/core/variable.py @@ -25,9 +25,7 @@ def get(self, name: str) -> SmokeDustVariable: @field_validator("values", mode="after") @classmethod - def _validate_values_( - cls, values: SmokeDustVariablesType - ) -> SmokeDustVariablesType: + def _validate_values_(cls, values: SmokeDustVariablesType) -> SmokeDustVariablesType: names = [ii.name for ii in values] if len(names) != len(set(names)): raise ValueError("Variable names must be unique") From 6fd45066a9188bac117ef2d3a51871e7fcf9813e Mon Sep 17 00:00:00 2001 From: benkozi Date: Thu, 6 Feb 2025 10:37:27 -0700 Subject: [PATCH 29/41] feat: pylint fixes, documentation, unit test workflow (#7) --- .github/workflows/python_tests.yaml | 14 +- doc/TechDocs/ush/modules.rst | 6 +- doc/TechDocs/ush/smoke_dust.core.regrid.rst | 26 ++ doc/TechDocs/ush/smoke_dust.core.rst | 58 ++++ doc/TechDocs/ush/smoke_dust.rst | 34 +++ doc/TechDocs/ush/smoke_dust_add_smoke.rst | 7 - .../ush/smoke_dust_fire_emiss_tools.rst | 7 - .../smoke_dust_generate_fire_emissions.rst | 7 - doc/TechDocs/ush/smoke_dust_hwp_tools.rst | 7 - doc/TechDocs/ush/smoke_dust_interp_tools.rst | 7 - doc/conf.py | 41 ++- scripts/exsrw_smoke_dust.sh | 23 +- sd_environment.yml | 2 + tests/test_python/test_smoke_dust/conftest.py | 22 +- .../test_core/test_preprocessor.py | 172 +++++++++++ .../test_smoke_dust/test_core/test_regrid.py | 10 +- .../test_generate_emissions.py | 214 ++++---------- ush/config.smoke_dust.yaml | 9 +- ush/config_defaults.yaml | 13 +- ush/smoke_dust/add_smoke.py | 13 +- ush/smoke_dust/core/common.py | 44 ++- ush/smoke_dust/core/context.py | 268 ++++++++++-------- ush/smoke_dust/core/cycle.py | 111 ++++++-- ush/smoke_dust/core/preprocessor.py | 23 +- ush/smoke_dust/core/regrid/common.py | 92 ++++-- ush/smoke_dust/core/regrid/processor.py | 21 +- ush/smoke_dust/core/variable.py | 26 +- ush/smoke_dust/generate_emissions.py | 131 ++++++--- 28 files changed, 928 insertions(+), 480 deletions(-) create mode 100644 doc/TechDocs/ush/smoke_dust.core.regrid.rst create mode 100644 doc/TechDocs/ush/smoke_dust.core.rst create mode 100644 doc/TechDocs/ush/smoke_dust.rst delete mode 100644 doc/TechDocs/ush/smoke_dust_add_smoke.rst delete mode 100644 doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst delete mode 100644 doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst delete mode 100644 doc/TechDocs/ush/smoke_dust_hwp_tools.rst delete mode 100644 doc/TechDocs/ush/smoke_dust_interp_tools.rst create mode 100644 tests/test_python/test_smoke_dust/test_core/test_preprocessor.py diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 113ec3f59c..b6124b3093 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -24,21 +24,22 @@ jobs: - name: Install Micromamba and srw_app environment uses: mamba-org/setup-micromamba@v1 with: - environment-file: environment.yml - environment-name: srw_app + environment-file: sd_environment.yml + environment-name: srw_sd init-shell: bash cache-downloads: true cache-environment: true - name: Lint the python code run: | - micromamba activate srw_app + micromamba activate srw_sd export PYTHONPATH=$(pwd)/ush pylint --ignore-imports=yes tests/test_python/ pylint ush/create_*.py pylint ush/generate_FV3LAM_wflow.py pylint ush/set_fv3nml*.py pylint ush/update_input_nml.py + pylint ush/smoke_dust - name: Checkout externals run: | @@ -47,15 +48,16 @@ jobs: - name: Run python unittests run: | # exclude test_retrieve_data that is tested in functional test - micromamba activate srw_app + micromamba activate srw_sd export UNIT_TEST=True export PYTHONPATH=$(pwd)/ush python -m unittest tests/test_python/*.py + export PYTHONPATH=${PYTHONPATH}:$(pwd)/tests + python -m pytest tests/test_python/test_smoke_dust - name: Run python functional tests run: | - micromamba activate srw_app + micromamba activate srw_sd export CI=true export PYTHONPATH=${PWD}/ush python3 -m unittest tests/test_python/test_retrieve_data.py - diff --git a/doc/TechDocs/ush/modules.rst b/doc/TechDocs/ush/modules.rst index cb3a41d71d..ba91937e67 100644 --- a/doc/TechDocs/ush/modules.rst +++ b/doc/TechDocs/ush/modules.rst @@ -29,9 +29,5 @@ ush set_leadhrs set_predef_grid_params setup - smoke_dust_add_smoke - smoke_dust_fire_emiss_tools - smoke_dust_generate_fire_emissions - smoke_dust_hwp_tools - smoke_dust_interp_tools + smoke_dust update_input_nml diff --git a/doc/TechDocs/ush/smoke_dust.core.regrid.rst b/doc/TechDocs/ush/smoke_dust.core.regrid.rst new file mode 100644 index 0000000000..1c6ec04f88 --- /dev/null +++ b/doc/TechDocs/ush/smoke_dust.core.regrid.rst @@ -0,0 +1,26 @@ +smoke\_dust.core.regrid package +=============================== + +.. automodule:: smoke_dust.core.regrid + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +smoke\_dust.core.regrid.common module +------------------------------------- + +.. automodule:: smoke_dust.core.regrid.common + :members: + :undoc-members: + :show-inheritance: + +smoke\_dust.core.regrid.processor module +---------------------------------------- + +.. automodule:: smoke_dust.core.regrid.processor + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust.core.rst b/doc/TechDocs/ush/smoke_dust.core.rst new file mode 100644 index 0000000000..c9f24f409d --- /dev/null +++ b/doc/TechDocs/ush/smoke_dust.core.rst @@ -0,0 +1,58 @@ +smoke\_dust.core package +======================== + +.. automodule:: smoke_dust.core + :members: + :undoc-members: + :show-inheritance: + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + smoke_dust.core.regrid + +Submodules +---------- + +smoke\_dust.core.common module +------------------------------ + +.. automodule:: smoke_dust.core.common + :members: + :undoc-members: + :show-inheritance: + +smoke\_dust.core.context module +------------------------------- + +.. automodule:: smoke_dust.core.context + :members: + :undoc-members: + :show-inheritance: + +smoke\_dust.core.cycle module +----------------------------- + +.. automodule:: smoke_dust.core.cycle + :members: + :undoc-members: + :show-inheritance: + +smoke\_dust.core.preprocessor module +------------------------------------ + +.. automodule:: smoke_dust.core.preprocessor + :members: + :undoc-members: + :show-inheritance: + +smoke\_dust.core.variable module +-------------------------------- + +.. automodule:: smoke_dust.core.variable + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust.rst b/doc/TechDocs/ush/smoke_dust.rst new file mode 100644 index 0000000000..1f06009837 --- /dev/null +++ b/doc/TechDocs/ush/smoke_dust.rst @@ -0,0 +1,34 @@ +smoke\_dust package +=================== + +.. automodule:: smoke_dust + :members: + :undoc-members: + :show-inheritance: + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + smoke_dust.core + +Submodules +---------- + +smoke\_dust.add\_smoke module +----------------------------- + +.. automodule:: smoke_dust.add_smoke + :members: + :undoc-members: + :show-inheritance: + +smoke\_dust.generate\_emissions module +-------------------------------------- + +.. automodule:: smoke_dust.generate_emissions + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_add_smoke.rst b/doc/TechDocs/ush/smoke_dust_add_smoke.rst deleted file mode 100644 index 6667b59ad8..0000000000 --- a/doc/TechDocs/ush/smoke_dust_add_smoke.rst +++ /dev/null @@ -1,7 +0,0 @@ -smoke\_dust\_add\_smoke module -============================== - -.. automodule:: smoke_dust_add_smoke - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst b/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst deleted file mode 100644 index 2fc04ad67d..0000000000 --- a/doc/TechDocs/ush/smoke_dust_fire_emiss_tools.rst +++ /dev/null @@ -1,7 +0,0 @@ -smoke\_dust\_fire\_emiss\_tools module -====================================== - -.. automodule:: smoke_dust_fire_emiss_tools - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst b/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst deleted file mode 100644 index c3e9c9856f..0000000000 --- a/doc/TechDocs/ush/smoke_dust_generate_fire_emissions.rst +++ /dev/null @@ -1,7 +0,0 @@ -smoke\_dust\_generate\_fire\_emissions module -============================================= - -.. automodule:: smoke_dust_generate_fire_emissions - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_hwp_tools.rst b/doc/TechDocs/ush/smoke_dust_hwp_tools.rst deleted file mode 100644 index 69d186a1c8..0000000000 --- a/doc/TechDocs/ush/smoke_dust_hwp_tools.rst +++ /dev/null @@ -1,7 +0,0 @@ -smoke\_dust\_hwp\_tools module -============================== - -.. automodule:: smoke_dust_hwp_tools - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/TechDocs/ush/smoke_dust_interp_tools.rst b/doc/TechDocs/ush/smoke_dust_interp_tools.rst deleted file mode 100644 index a5e1b974c4..0000000000 --- a/doc/TechDocs/ush/smoke_dust_interp_tools.rst +++ /dev/null @@ -1,7 +0,0 @@ -smoke\_dust\_interp\_tools module -================================= - -.. automodule:: smoke_dust_interp_tools - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/conf.py b/doc/conf.py index a490a0df4d..c776351626 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -36,9 +36,22 @@ numfig = True -nitpick_ignore = [('py:class', 'obj'),('py:class', - 'yaml.dumper.Dumper'),('py:class', - 'xml.etree.ElementTree'),('py:class', 'Basemap'),] +nitpick_ignore = [ + ('py:class', 'obj'), + ('py:class', 'yaml.dumper.Dumper'), + ('py:class', 'xml.etree.ElementTree'), + ('py:class', 'Basemap'), + ('py:class', 'numpy.ma.MaskedArray'), + ('py:class', 'esmpy.Field'), + ('py:class', 'pandas.DataFrame'), + ('py:class', 'netCDF4.Variable'), + ('py:class', 'pandas.DatetimeIndex'), + ('py:class', 'netCDF4.Dataset'), + ('py:class', 'numpy.ndarray'), + ('py:class', 'pydantic.BaseModel'), + ('py:class', 'esmpy.Grid'), + ('py:class', 'esmpy.StaggerLoc'), +] # -- General configuration --------------------------------------------------- @@ -255,9 +268,24 @@ def setup(app): # -- Options for autodoc extension --------------------------------------- -autodoc_mock_imports = ["f90nml","cartopy","mpl_toolkits.basemap","fill_jinja_template", - "matplotlib","numpy","uwtools","mpl_toolkits","metplus", - ] +autodoc_mock_imports = [ + "f90nml", + "cartopy", + "mpl_toolkits.basemap", + "fill_jinja_template", + "matplotlib", + "numpy", + "uwtools", + "mpl_toolkits", + "metplus", + "esmpy", + "netCDF4", + "pandas", + "xarray", + "mpi4py", + "pydantic", + "typer", +] logger = logging.getLogger(__name__) @@ -320,4 +348,3 @@ def warn_undocumented_members(app, what, name, obj, options, lines): # the purpose of building the documentation, METplus is loaded by including "metplus" in # the autodoc_mock_imports list above, not via use of the METPLUS_ROOT environment variable. os.environ["METPLUS_ROOT"] = "" - diff --git a/scripts/exsrw_smoke_dust.sh b/scripts/exsrw_smoke_dust.sh index b71ef5f6fe..c73049f041 100755 --- a/scripts/exsrw_smoke_dust.sh +++ b/scripts/exsrw_smoke_dust.sh @@ -108,16 +108,16 @@ else #----------------------------------------------------------------------- # mpirun -n ${nprocs} ${USHdir}/smoke_dust/generate_emissions.py \ - "${FIXsmoke}/${PREDEF_GRID_NAME}" \ - "${DATA}" \ - "${DATA_SHARE}" \ - "${PREDEF_GRID_NAME}" \ - "${EBB_DCYCLE}" \ - "${RESTART_INTERVAL}" \ - "${PERSISTENCE}" \ - "${RAVE_QA_FILTER}" \ - "${EXIT_ON_ERROR}" \ - "${LOG_LEVEL}" + --staticdir "${FIXsmoke}/${PREDEF_GRID_NAME}" \ + --ravedir "${DATA}" \ + --intp-dir "${DATA_SHARE}" \ + --predef-grid "${PREDEF_GRID_NAME}" \ + --ebb-dcycle "${EBB_DCYCLE}" \ + --restart-interval "${RESTART_INTERVAL}" \ + --persistence "${PERSISTENCE}" \ + --rave-qa-filter "${RAVE_QA_FILTER}" \ + --exit-on-error "${EXIT_ON_ERROR}" \ + --log-level "${LOG_LEVEL}" export err=$? if [ $err -ne 0 ]; then message_txt="generate_emissions.py failed with return code $err" @@ -127,8 +127,7 @@ else # Copy Smoke file to COMOUT cp -p ${DATA_SHARE}/${smokeFile} ${COMOUT} - cp -p ${DATA_SHARE}/${smokeFile} ${DATA} #tdk:pr: is this copy of the file needed? -# cp -p ${DATA}/${smokeFile} ${COMOUT} #tdk: is this the correct method? + cp -p ${DATA_SHARE}/${smokeFile} ${DATA} fi # #----------------------------------------------------------------------- diff --git a/sd_environment.yml b/sd_environment.yml index d8557a69ee..7f1ae1c2a1 100644 --- a/sd_environment.yml +++ b/sd_environment.yml @@ -14,3 +14,5 @@ dependencies: - scipy=1.10.* - uwtools=2.3.* - xarray=2022.11.* + - typer=0.15.* + - sphinx=7.4.* \ No newline at end of file diff --git a/tests/test_python/test_smoke_dust/conftest.py b/tests/test_python/test_smoke_dust/conftest.py index 96ab6796fc..48ef54f4f6 100644 --- a/tests/test_python/test_smoke_dust/conftest.py +++ b/tests/test_python/test_smoke_dust/conftest.py @@ -4,6 +4,7 @@ import os from dataclasses import dataclass from pathlib import Path +from typing import Union import netCDF4 as nc import numpy as np @@ -54,16 +55,14 @@ def create_fake_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: var[:] = np.ones((shape.y_size, shape.x_size)) -def create_fake_context( - root_dir: Path, overrides: dict | None = None, extra: dict | None = None -) -> SmokeDustContext: +def create_fake_context(root_dir: Path, overrides: Union[dict, None] = None) -> SmokeDustContext: """ Create a fake context for the test runner. + Args: root_dir: Path to write fake test files to. overrides: If provided, override the required context arguments - the arguments provided to the CLI program. - extra: If provided, override context parameters not used in the CLI. Returns: A fake context to use for testing. @@ -77,18 +76,21 @@ def create_fake_context( "ravedir": root_dir, "intp_dir": root_dir, "predef_grid": "RRFS_CONUS_3km", - "ebb_dcycle_flag": "2", + "ebb_dcycle": "2", "restart_interval": "6 12 18 24", - "persistence": "FALSE", - "rave_qa_filter": "NONE", + "persistence": "false", + "rave_qa_filter": "none", "exit_on_error": "TRUE", "log_level": "debug", } if overrides is not None: kwds.update(overrides) - context = SmokeDustContext.create_from_args(kwds.values(), extra=extra) - for ii in ["CDATE", "DATA"]: - os.unsetenv(ii) + try: + context = SmokeDustContext.model_validate(kwds) + except: + for ii in ["CDATE", "DATA"]: + os.unsetenv(ii) + raise return context diff --git a/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py b/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py new file mode 100644 index 0000000000..a3f4278bb5 --- /dev/null +++ b/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py @@ -0,0 +1,172 @@ +"""Test emissions processing for smoke/dust.""" + +from pathlib import Path +from typing import Type + +import netCDF4 as nc +import numpy as np +import pandas as pd +import pytest +from _pytest.fixtures import SubRequest +from pydantic import BaseModel +from pytest_mock import MockerFixture + +from smoke_dust.core.context import SmokeDustContext +from smoke_dust.core.cycle import ( + AbstractSmokeDustCycleProcessor, + SmokeDustCycleOne, + SmokeDustCycleTwo, +) +from smoke_dust.core.preprocessor import SmokeDustPreprocessor +from test_python.test_smoke_dust.conftest import ( + FakeGridOutShape, + create_fake_grid_out, + create_fake_context, + create_file_hash, +) + + +def create_fake_restart_files( + root_dir: Path, forecast_dates: pd.DatetimeIndex, shape: FakeGridOutShape +) -> None: + """ + Create fake restart files expected for EBB_DCYLE=2. + + Args: + root_dir: Directory to create fake files in. + forecast_dates: The series of dates to create the restart files for. + shape: Output grid shape. + """ + restart_dir = root_dir / "RESTART" + restart_dir.mkdir() + for date in forecast_dates: + restart_file = restart_dir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" + with nc.Dataset(restart_file, "w") as ds: + ds.createDimension("Time") + ds.createDimension("yaxis_1", shape.y_size) + ds.createDimension("xaxis_1", shape.x_size) + totprcp_ave = ds.createVariable("totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) + totprcp_ave[0, ...] = np.ones(shape.as_tuple) + rrfs_hwp_ave = ds.createVariable("rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) + rrfs_hwp_ave[0, ...] = totprcp_ave[:] + 2 + + +def create_fake_rave_interpolated( + root_dir: Path, + forecast_dates: pd.DatetimeIndex, + shape: FakeGridOutShape, + rave_to_intp: str, +) -> None: + """ + Create fake interpolated RAVE data. + + Args: + root_dir: The directory to create fake interpolated data in. + forecast_dates: The series of dates to create the interpolated data for. + shape: The output grid shape. + rave_to_intp: Filename prefix to use for output files. + """ + for date in forecast_dates: + intp_file = root_dir / f"{rave_to_intp}{date}00_{date}59.nc" + dims = ("t", "lat", "lon") + with nc.Dataset(intp_file, "w") as ds: + ds.createDimension("t") + ds.createDimension("lat", shape.y_size) + ds.createDimension("lon", shape.x_size) + for varname in ["frp_avg_hr", "FRE"]: + var = ds.createVariable(varname, "f4", dims) + var[0, ...] = np.ones(shape.as_tuple) + + +def create_fake_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: + """ + Create a fake vegetation map data file. + + Args: + root_dir: The directory to create the file in. + shape: Shape of the output grid. + """ + with nc.Dataset(root_dir / "veg_map.nc", "w") as ds: + ds.createDimension("grid_yt", shape.y_size) + ds.createDimension("grid_xt", shape.x_size) + emiss_factor = ds.createVariable("emiss_factor", "f4", ("grid_yt", "grid_xt")) + emiss_factor[:] = np.ones((shape.y_size, shape.x_size)) + + +class ExpectedData(BaseModel): + """Holds expected data to test against.""" + + flag: str + klass: Type[AbstractSmokeDustCycleProcessor] + hash: str + + +class DataForTest(BaseModel): + """Holds data objects used by the test.""" + + model_config = {"arbitrary_types_allowed": True} + context: SmokeDustContext + preprocessor: SmokeDustPreprocessor + expected: ExpectedData + + +@pytest.fixture( + params=[ + ExpectedData(flag="1", klass=SmokeDustCycleOne, hash="d124734dfce7ca914391e35a02e4a7d2"), + ExpectedData(flag="2", klass=SmokeDustCycleTwo, hash="6752199f1039edc936a942f3885af38b"), + ], + ids=lambda p: f"ebb_dcycle={p.flag}", +) +def data_for_test( + request: SubRequest, tmp_path: Path, fake_grid_out_shape: FakeGridOutShape +) -> DataForTest: + """ + Creates the necessary test data including data files. + """ + create_fake_grid_out(tmp_path, fake_grid_out_shape) + create_fake_veg_map(tmp_path, fake_grid_out_shape) + context = create_fake_context(tmp_path, overrides={"ebb_dcycle": request.param.flag}) + preprocessor = SmokeDustPreprocessor(context) + create_fake_restart_files(tmp_path, preprocessor.forecast_dates, fake_grid_out_shape) + create_fake_rave_interpolated( + tmp_path, + preprocessor.forecast_dates, + fake_grid_out_shape, + context.predef_grid.value + "_intp_", + ) + return DataForTest(context=context, preprocessor=preprocessor, expected=request.param) + + +class TestSmokeDustPreprocessor: # pylint: disable=too-few-public-methods + """Tests for the smoke/dust preprocessor.""" + + def test_run( + self, + data_for_test: DataForTest, # pylint: disable=redefined-outer-name + mocker: MockerFixture, + ) -> None: + """Test core capabilities of the preprocessor. Note this does not test regridding.""" + # pylint: disable=protected-access + preprocessor = data_for_test.preprocessor + spy1 = mocker.spy(preprocessor, "create_dummy_emissions_file") + regrid_processor_class = preprocessor._regrid_processor.__class__ + spy2 = mocker.spy(regrid_processor_class, "_run_impl_") + spy3 = mocker.spy(regrid_processor_class, "run") + cycle_processor_class = preprocessor._cycle_processor.__class__ + spy4 = mocker.spy(cycle_processor_class, "process_emissions") + spy5 = mocker.spy(cycle_processor_class, "average_frp") + + assert isinstance(preprocessor._cycle_processor, data_for_test.expected.klass) + assert preprocessor._forecast_metadata is None + # pylint: enable=protected-access + assert not data_for_test.context.emissions_path.exists() + + preprocessor.run() + spy1.assert_not_called() + spy2.assert_not_called() + spy3.assert_called_once() + spy4.assert_called_once() + spy5.assert_called_once() + + assert data_for_test.context.emissions_path.exists() + assert create_file_hash(data_for_test.context.emissions_path) == data_for_test.expected.hash diff --git a/tests/test_python/test_smoke_dust/test_core/test_regrid.py b/tests/test_python/test_smoke_dust/test_core/test_regrid.py index 15fe1f9d33..ea5b33224f 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_regrid.py +++ b/tests/test_python/test_smoke_dust/test_core/test_regrid.py @@ -4,6 +4,7 @@ import shutil import subprocess from pathlib import Path +from typing import Union import numpy as np import pytest @@ -58,7 +59,7 @@ class FakeGridParams(BaseModel): with_corners: bool = Field( description="If True, create the output grid with corners", default=True ) - fields: list[str] | None = Field( + fields: Union[list[str], None] = Field( description="If provided, a list of field names to create in the output file.", default=None ) min_lon: int = Field( @@ -67,7 +68,7 @@ class FakeGridParams(BaseModel): min_lat: int = Field( description="The minimum latitude value as origin for grid generation.", default=25 ) - ntime: int | None = Field( + ntime: Union[int, None] = Field( description="If provided, create the output fields with this many time steps.", default=1 ) @@ -87,7 +88,7 @@ def data_for_test( _ = create_fake_rave_and_rrfs_like_data( FakeGridParams(path=path, shape=fake_grid_out_shape, fields=["area"], ntime=None) ) - context = create_fake_context(tmp_path, extra={"regrid_in_memory": request.param}) + context = create_fake_context(tmp_path, overrides={"regrid_in_memory": request.param}) preprocessor = SmokeDustPreprocessor(context) for date in preprocessor.forecast_dates: path = tmp_path / f"Hourly_Emissions_3km_{date}_{date}.nc" @@ -101,7 +102,7 @@ def create_analytic_data_array( dims: list[str], lon_mesh: np.ndarray, lat_mesh: np.ndarray, - ntime: int | None = None, + ntime: Union[int, None] = None, ) -> xr.DataArray: """ Create an analytic data array using lat/lon values. @@ -175,7 +176,6 @@ def test_run( tmp_path: Path, ) -> None: """Test the regrid processor.""" - # tdk:story: add MPI testing spy1 = mocker.spy(SmokeDustRegridProcessor, "_run_impl_") regrid_processor = SmokeDustRegridProcessor(data_for_test.context) regrid_processor.run(data_for_test.preprocessor.forecast_metadata) diff --git a/tests/test_python/test_smoke_dust/test_generate_emissions.py b/tests/test_python/test_smoke_dust/test_generate_emissions.py index b9d60265a1..df0527675a 100644 --- a/tests/test_python/test_smoke_dust/test_generate_emissions.py +++ b/tests/test_python/test_smoke_dust/test_generate_emissions.py @@ -1,172 +1,60 @@ -"""Test emissions processing for smoke/dust.""" +"""Test the main entrypoint for generating fire emission ICs.""" +import os +import subprocess from pathlib import Path -from typing import Type -import netCDF4 as nc -import numpy as np -import pandas as pd -import pytest -from _pytest.fixtures import SubRequest -from pydantic import BaseModel from pytest_mock import MockerFixture +from typer.testing import CliRunner -from smoke_dust.core.context import SmokeDustContext -from smoke_dust.core.cycle import ( - AbstractSmokeDustCycleProcessor, - SmokeDustCycleOne, - SmokeDustCycleTwo, -) from smoke_dust.core.preprocessor import SmokeDustPreprocessor -from test_python.test_smoke_dust.conftest import ( - FakeGridOutShape, - create_fake_grid_out, - create_fake_context, - create_file_hash, -) +from smoke_dust.generate_emissions import app +from test_python.test_smoke_dust.conftest import create_fake_grid_out, FakeGridOutShape -def create_fake_restart_files( - root_dir: Path, forecast_dates: pd.DatetimeIndex, shape: FakeGridOutShape -) -> None: - """ - Create fake restart files expected for EBB_DCYLE=2. - - Args: - root_dir: Directory to create fake files in. - forecast_dates: The series of dates to create the restart files for. - shape: Output grid shape. - """ - restart_dir = root_dir / "RESTART" - restart_dir.mkdir() - for date in forecast_dates: - restart_file = restart_dir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" - with nc.Dataset(restart_file, "w") as ds: - ds.createDimension("Time") - ds.createDimension("yaxis_1", shape.y_size) - ds.createDimension("xaxis_1", shape.x_size) - totprcp_ave = ds.createVariable("totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) - totprcp_ave[0, ...] = np.ones(shape.as_tuple) - rrfs_hwp_ave = ds.createVariable("rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) - rrfs_hwp_ave[0, ...] = totprcp_ave[:] + 2 - - -def create_fake_rave_interpolated( - root_dir: Path, - forecast_dates: pd.DatetimeIndex, - shape: FakeGridOutShape, - rave_to_intp: str, -) -> None: - """ - Create fake interpolated RAVE data. - - Args: - root_dir: The directory to create fake interpolated data in. - forecast_dates: The series of dates to create the interpolated data for. - shape: The output grid shape. - rave_to_intp: Filename prefix to use for output files. - """ - for date in forecast_dates: - intp_file = root_dir / f"{rave_to_intp}{date}00_{date}59.nc" - dims = ("t", "lat", "lon") - with nc.Dataset(intp_file, "w") as ds: - ds.createDimension("t") - ds.createDimension("lat", shape.y_size) - ds.createDimension("lon", shape.x_size) - for varname in ["frp_avg_hr", "FRE"]: - var = ds.createVariable(varname, "f4", dims) - var[0, ...] = np.ones(shape.as_tuple) - - -def create_fake_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: - """ - Create a fake vegetation map data file. - - Args: - root_dir: The directory to create the file in. - shape: Shape of the output grid. - """ - with nc.Dataset(root_dir / "veg_map.nc", "w") as ds: - ds.createDimension("grid_yt", shape.y_size) - ds.createDimension("grid_xt", shape.x_size) - emiss_factor = ds.createVariable("emiss_factor", "f4", ("grid_yt", "grid_xt")) - emiss_factor[:] = np.ones((shape.y_size, shape.x_size)) - - -class ExpectedData(BaseModel): - """Holds expected data to test against.""" - - flag: str - klass: Type[AbstractSmokeDustCycleProcessor] - hash: str - - -class DataForTest(BaseModel): - """Holds data objects used by the test.""" - - model_config = {"arbitrary_types_allowed": True} - context: SmokeDustContext - preprocessor: SmokeDustPreprocessor - expected: ExpectedData - - -@pytest.fixture( - params=[ - ExpectedData(flag="1", klass=SmokeDustCycleOne, hash="d124734dfce7ca914391e35a02e4a7d2"), - ExpectedData(flag="2", klass=SmokeDustCycleTwo, hash="6752199f1039edc936a942f3885af38b"), - ], - ids=lambda p: f"ebb_dcycle_flag={p.flag}", -) -def data_for_test( - request: SubRequest, tmp_path: Path, fake_grid_out_shape: FakeGridOutShape -) -> DataForTest: - """ - Creates the necessary test data including data files. - """ +def test(tmp_path: Path, fake_grid_out_shape: FakeGridOutShape, mocker: MockerFixture) -> None: + """Test invoking emissions generation.""" + mock_proc = mocker.Mock(spec=SmokeDustPreprocessor) + mocker.patch("smoke_dust.generate_emissions.SmokeDustPreprocessor", return_value=mock_proc) create_fake_grid_out(tmp_path, fake_grid_out_shape) - create_fake_veg_map(tmp_path, fake_grid_out_shape) - context = create_fake_context(tmp_path, overrides={"ebb_dcycle_flag": request.param.flag}) - preprocessor = SmokeDustPreprocessor(context) - create_fake_restart_files(tmp_path, preprocessor.forecast_dates, fake_grid_out_shape) - create_fake_rave_interpolated( - tmp_path, - preprocessor.forecast_dates, - fake_grid_out_shape, - context.predef_grid.value + "_intp_", - ) - return DataForTest(context=context, preprocessor=preprocessor, expected=request.param) - - -class TestSmokeDustPreprocessor: # pylint: disable=too-few-public-methods - """Tests for the smoke/dust preprocessor.""" - - def test_run( - self, - data_for_test: DataForTest, # pylint: disable=redefined-outer-name - mocker: MockerFixture, - ) -> None: - """Test core capabilities of the preprocessor. Note this does not test regridding.""" - # pylint: disable=protected-access - preprocessor = data_for_test.preprocessor - spy1 = mocker.spy(preprocessor, "create_dummy_emissions_file") - regrid_processor_class = preprocessor._regrid_processor.__class__ - spy2 = mocker.spy(regrid_processor_class, "_run_impl_") - spy3 = mocker.spy(regrid_processor_class, "run") - cycle_processor_class = preprocessor._cycle_processor.__class__ - spy4 = mocker.spy(cycle_processor_class, "process_emissions") - spy5 = mocker.spy(cycle_processor_class, "average_frp") - - assert isinstance(preprocessor._cycle_processor, data_for_test.expected.klass) - assert preprocessor._forecast_metadata is None - # pylint: enable=protected-access - assert not data_for_test.context.emissions_path.exists() - - preprocessor.run() - spy1.assert_not_called() - spy2.assert_not_called() - spy3.assert_called_once() - spy4.assert_called_once() - spy5.assert_called_once() - - assert data_for_test.context.emissions_path.exists() - assert create_file_hash(data_for_test.context.emissions_path) == data_for_test.expected.hash + strpath = str(tmp_path) + runner = CliRunner() + os.environ["CDATE"] = "2019072200" + os.environ["DATA"] = strpath + + try: + args = [ + "--staticdir", + strpath, + "--ravedir", + strpath, + "--intp-dir", + strpath, + "--predef-grid", + "RRFS_CONUS_25km", + "--ebb-dcycle", + "2", + "--restart-interval", + "6 12 18 24", + "--persistence", + "False", + "--rave-qa-filter", + "none", + "--log-level", + "debug", + ] + result = runner.invoke(app, args, catch_exceptions=False) + except: + for ii in ["CDATE", "DATA"]: + os.unsetenv(ii) + raise + print(result.output) + + assert result.exit_code == 0 + mock_proc.run.assert_called_once() + mock_proc.finalize.assert_called_once() + + +def test_help() -> None: + """Test that the help message can be displayed.""" + subprocess.check_call(["python", "../../../ush/smoke_dust/generate_emissions.py", "--help"]) diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml index 64b5a7b112..dcd84d4390 100644 --- a/ush/config.smoke_dust.yaml +++ b/ush/config.smoke_dust.yaml @@ -37,7 +37,6 @@ task_get_extrn_ics: EXTRN_MDL_NAME_ICS: RAP EXTRN_MDL_ICS_OFFSET_HRS: 0 USE_USER_STAGED_EXTRN_FILES: true - #tdk:pr: can we use overlays to parameterize these? EXTRN_MDL_SOURCE_BASEDIR_ICS: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAP_DATA_SD/${yyyymmddhh} # hera # EXTRN_MDL_SOURCE_BASEDIR_ICS: /work/noaa/epic/SRW-AQM_DATA/input_model_data/RAP/${yyyymmddhh} # orion/hercules # EXTRN_MDL_SOURCE_BASEDIR_ICS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmddhh} # gaea-c6 @@ -61,9 +60,9 @@ task_run_fcst: QUILTING: true PRINT_ESMF: false DO_FCST_RESTART: false -#task_run_post: #tdk:rm maybe? -# POST_OUTPUT_DOMAIN_NAME: conus3km -# USE_CUSTOM_POST_CONFIG_FILE: false +task_run_post: + POST_OUTPUT_DOMAIN_NAME: conus3km + USE_CUSTOM_POST_CONFIG_FILE: false global: DO_ENSEMBLE: false NUM_ENS_MEMBERS: 2 @@ -75,4 +74,4 @@ smoke_dust_parm: PERSISTENCE: false RAVE_QA_FILTER: none EXIT_ON_ERROR: true - LOG_LEVEL: info \ No newline at end of file + LOG_LEVEL: info diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 68a67f09bc..1b594bd5a9 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2867,7 +2867,18 @@ smoke_dust_parm: # # SMOKE_DUST_FILE_PREFIX: # Prefix of Smoke and Dust file name - #tdk:doc + # + # RAVE_QA_FILTER: + # Apply RAVE QA filtering to source RAVE fields. + # none: No filtering applied + # high: QA flag values < 2 set to zero. + # + # EXIT_ON_ERROR: + # If true, raise and exception in the preprocessor when an error occurs. If false, create a + # dummy emissions file, log the error, and continue. + # + # LOG_LEVEL: + # Logging level for the preprocessor: info or debug # #----------------------------------------------------------------------- # diff --git a/ush/smoke_dust/add_smoke.py b/ush/smoke_dust/add_smoke.py index 6d305665b9..fda5aa41aa 100755 --- a/ush/smoke_dust/add_smoke.py +++ b/ush/smoke_dust/add_smoke.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +"""Add smoke/dust tracers to ICs.""" + import os from typing import Tuple @@ -12,11 +14,11 @@ def populate_data(data: np.ndarray, target_shape: Tuple) -> np.ndarray: Extracted variables need to match the target shape so we first populating it into a zero array. Args: - data: The extracted data to be adjusted - target_shape: The shape of the target data array + data: The extracted data to be adjusted. + target_shape: The shape of the target data array. Returns: - The adjusted data array + The adjusted data array. """ populated_data = np.zeros(target_shape) populated_data[: data.shape[0], :, :] = data @@ -24,6 +26,9 @@ def populate_data(data: np.ndarray, target_shape: Tuple) -> np.ndarray: def main() -> None: + """ + The main function ;-). + """ # File paths source_file = "fv_tracer.res.tile1.nc" target_file = "gfs_data.tile7.halo0.nc" @@ -61,8 +66,6 @@ def main() -> None: # dust_2_add_populated = populate_data(dust_2_add, (lev_dim, lat_dim, lon_dim)) # coarsepm_2_add_populated = populate_data(coarsepm_2_add, (lev_dim, lat_dim, lon_dim)) - # print('Max values in populated data:', smoke_2_add_populated.max(), dust_2_add_populated.max(), coarsepm_2_add_populated.max()) - # Create new data arrays filled with zeros smoke_zero = xr.DataArray( np.zeros((lev_dim, lat_dim, lon_dim)), diff --git a/ush/smoke_dust/core/common.py b/ush/smoke_dust/core/common.py index 8ba94204f5..3699c1601f 100644 --- a/ush/smoke_dust/core/common.py +++ b/ush/smoke_dust/core/common.py @@ -1,3 +1,5 @@ +"""Contains common functionality used across smoke/dust.""" + from contextlib import contextmanager from pathlib import Path from typing import Tuple, Literal, Dict @@ -17,6 +19,18 @@ def open_nc( clobber: bool = False, parallel: bool = True, ) -> nc.Dataset: + """ + Open a netCDF file for various operations. + + Args: + path: Path to the target netCDF file. + mode: Mode to open the file in. + clobber: If True, overwrite an existing file. + parallel: If True, open the netCDF for parallel operations. + + Returns: + A netCDF dataset object. + """ ds = nc.Dataset( path, mode=mode, @@ -39,8 +53,8 @@ def create_sd_coordinate_variable( Create a smoke/dust netCDF spatial coordinate variable. Args: - ds: Dataset to update - sd_variable: Contains variable metadata + ds: Dataset to update. + sd_variable: Contains variable metadata. """ var_out = ds.createVariable( sd_variable.name, "f4", ("lat", "lon"), fill_value=sd_variable.fill_value_float @@ -80,8 +94,9 @@ def create_sd_variable( try: var_out.set_collective(True) except RuntimeError: - # Allow this function to work with parallel and non-parallel datasets. If the dataset is not opened in parallel - # this error message is returned: RuntimeError: NetCDF: Parallel operation on file opened for non-parallel access + # Allow this function to work with parallel and non-parallel datasets. If the dataset + # is not opened in parallel this error message is returned: + # RuntimeError: NetCDF: Parallel operation on file opened for non-parallel access pass var_out[0, :, :] = sd_variable.fill_value_float try: @@ -93,6 +108,15 @@ def create_sd_variable( def create_template_emissions_file( ds: nc.Dataset, grid_shape: Tuple[int, int], is_dummy: bool = False ): + """ + Create a smoke/dust template netCDF emissions file. + + Args: + ds: The target netCDF dataset object. + grid_shape: The grid shape to create. + is_dummy: Converted to a netCDF attribute to indicate if the created file is dummy + emissions or will contain actual values. + """ ds.createDimension("t", None) ds.createDimension("lat", grid_shape[0]) ds.createDimension("lon", grid_shape[1]) @@ -109,6 +133,18 @@ def create_descriptive_statistics( origin: Literal["src", "dst_unmasked", "dst_masked", "derived"], path: Path, ) -> pd.DataFrame: + """ + Create a standard set of descriptive statistics using `pandas`. + + + Args: + container: A dictionary mapping names to masked arrays. + origin: A tag to indicate the data origin to add to the created dataframe. + path: Path to the netCDF file where the container data originated. + + Returns: + A dataframe containing descriptive statistics fields. + """ df = pd.DataFrame.from_dict({k: v.filled(np.nan).ravel() for k, v in container.items()}) desc = df.describe() adds = {} diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index e36362a40b..5b8d6cb9e5 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -1,74 +1,166 @@ +"""Context object for smoke/dust holding external and derived parameters.""" + import datetime as dt import logging import logging.config import os -from enum import unique, StrEnum, IntEnum +from enum import unique, StrEnum from pathlib import Path -from typing import Tuple, List +from typing import Union, Annotated, Any from mpi4py import MPI -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, model_validator, BeforeValidator, Field from smoke_dust.core.common import open_nc @unique class PredefinedGrid(StrEnum): - RRFS_CONUS_25km = "RRFS_CONUS_25km" - RRFS_CONUS_13km = "RRFS_CONUS_13km" - RRFS_CONUS_3km = "RRFS_CONUS_3km" - RRFS_NA_3km = "RRFS_NA_3km" - RRFS_NA_13km = "RRFS_NA_13km" + """Predefined grids supported by smoke/dust.""" + + RRFS_CONUS_25KM = "RRFS_CONUS_25km" + RRFS_CONUS_13KM = "RRFS_CONUS_13km" + RRFS_CONUS_3KM = "RRFS_CONUS_3km" + RRFS_NA_3KM = "RRFS_NA_3km" + RRFS_NA_13KM = "RRFS_NA_13km" @unique -class EbbDCycle(IntEnum): - ONE = 1 - TWO = 2 +class EbbDCycle(StrEnum): + """Emission forecast cycle method. + + * ``1``: Estimate emissions and fire radiative power. + * ``2``: In addition to `1`, also create inputs to forecast hourly wildfire potential. + """ + + ONE = "1" + TWO = "2" @unique class RaveQaFilter(StrEnum): - NONE = "NONE" - HIGH = "HIGH" + """ + Quality assurance flag filtering to apply to input RAVE data. RAVE QA filter values range from + one to three. + + * ``none``: Do not apply any QA filtering. + * ``high``: QA flag values less than `2` are set to zero for derived fire radiative energy + fields. + """ + + NONE = "none" + HIGH = "high" @unique class LogLevel(StrEnum): - INFO = "INFO" - DEBUG = "DEBUG" + """Logging level for the preprocessor.""" + + INFO = "info" + DEBUG = "debug" @unique class EmissionVariable(StrEnum): + """Maps RAVE and smoke/dust variable names.""" + FRE = "FRE" FRP = "FRP" def rave_name(self) -> str: + """Convert to a RAVE name.""" other = {self.FRP: "FRP_MEAN", self.FRE: "FRE"} return other[self] def smoke_dust_name(self) -> str: + """Convert to a smoke/dust name.""" other = {self.FRP: "frp_avg_hr", self.FRE: "FRE"} return other[self] +def _format_path_(value: Union[Path, str]) -> Path: + return Path(value).expanduser().resolve(strict=True) + + +def _format_read_path_(value: Union[Path, str]) -> Path: + path = _format_path_(value) + errors = [] + if not path.exists(): + errors.append(f"path does not exist: {path}") + if not os.access(path, os.R_OK): + errors.append(f"path is not readable: {path}") + if not path.is_dir(): + errors.append(f"path is not a directory: {path}") + if len(errors) > 0: + raise OSError(errors) + return path + + +def _format_write_path_(value: Union[Path, str]) -> Path: + path = _format_path_(value) + errors = [] + if not path.exists(): + errors.append(f"path does not exist: {path}") + if not os.access(path, os.W_OK): + errors.append(f"path is not writable: {path}") + if not path.is_dir(): + errors.append(f"path is not a directory: {path}") + if len(errors) > 0: + raise OSError(errors) + return path + + +def _format_restart_interval_(value: Any) -> tuple[int, ...]: + if isinstance(value, str): + return tuple(int(num) for num in value.split(" ")) + return value + + +ReadPathType = Annotated[Path, BeforeValidator(_format_read_path_)] +WritePathType = Annotated[Path, BeforeValidator(_format_write_path_)] + + class SmokeDustContext(BaseModel): + """Context object for smoke/dust.""" + # Values provided via command-line - staticdir: Path - ravedir: Path - intp_dir: Path - predef_grid: PredefinedGrid - ebb_dcycle_flag: EbbDCycle - restart_interval: Tuple[int, ...] - persistence: bool - rave_qa_filter: RaveQaFilter - exit_on_error: bool - log_level: LogLevel + staticdir: ReadPathType = Field(description="Path to smoke and dust fixed files.") + ravedir: ReadPathType = Field( + description="Path to the directory containing RAVE data files (hourly)." + ) + intp_dir: WritePathType = Field( + description="Path to the directory containing interpolated RAVE data files." + ) + predef_grid: PredefinedGrid = Field( + description="SRW predefined grid to use as the forecast domain." + ) + ebb_dcycle: EbbDCycle = Field(description="The forecast cycle to run.") + restart_interval: Annotated[tuple[int, ...], BeforeValidator(_format_restart_interval_)] = ( + Field( + description="Restart intervals used for restart file search. For example '6 12 18 24'." + ) + ) + persistence: bool = Field( + description="If true, use satellite observations from the previous day. Otherwise, use " + "observations from the same day." + ) + rave_qa_filter: RaveQaFilter = Field( + description="Filter level for RAVE QA flags when regridding fields." + ) + exit_on_error: bool = Field( + description="If false, log errors and write a dummy emissions file but do not raise an " + "exception." + ) + log_level: LogLevel = Field(description="Logging level for the preprocessor") + regrid_in_memory: bool = Field( + description="If true, do esmpy regridding in-memory as opposed to reading from the fixed " + "weight file.", + default=False, + ) # Values provided via environment - current_day: str - nwges_dir: Path + current_day: str = Field(description="The forecast date for the start of the cycle.") + nwges_dir: ReadPathType = Field(description="Directory containing restart files.") # Fixed parameters should_calc_desc_stats: bool = False @@ -77,9 +169,21 @@ class SmokeDustContext(BaseModel): fg_to_ug: float = 1e6 to_s: int = 3600 rank: int = MPI.COMM_WORLD.Get_rank() - grid_out_shape: Tuple[int, int] = (0, 0) # Set in _finalize_model_ esmpy_debug: bool = False - regrid_in_memory: bool = False + + # Set in _finalize_model_ + grid_out_shape: tuple[int, int] = (0, 0) + _logger: Union[logging.Logger, None] = None + + @model_validator(mode="before") + @classmethod + def _initialize_values_(cls, values: dict) -> dict: + + # Format environment-level variables + values["current_day"] = os.environ["CDATE"] + values["nwges_dir"] = os.environ["DATA"] + + return values @model_validator(mode="after") def _finalize_model_(self) -> "SmokeDustContext": @@ -93,78 +197,44 @@ def _finalize_model_(self) -> "SmokeDustContext": self.log(f"{self.grid_out_shape=}") return self - @classmethod - def create_from_args(cls, args: List[str], extra: dict | None = None) -> "SmokeDustContext": - print(f"create_from_args: {args=}", flush=True) - - # Extract local arguments from args before converting values - ( - l_staticdir, - l_ravedir, - l_intp_dir, - l_predef_grid, - l_ebb_dcycle_flag, - l_restart_interval, - l_persistence, - l_rave_qa_filter, - l_exit_on_error, - l_log_level, - ) = args - - # Format environment-level variables - current_day: str = os.environ["CDATE"] - nwges_dir = cls._format_read_path_(os.environ["DATA"]) - - # Convert to expected types - kwds = dict( - staticdir=cls._format_read_path_(l_staticdir), - ravedir=cls._format_read_path_(l_ravedir), - intp_dir=cls._format_write_path_(l_intp_dir), - predef_grid=PredefinedGrid(l_predef_grid), - ebb_dcycle_flag=EbbDCycle(int(l_ebb_dcycle_flag)), - restart_interval=[int(num) for num in l_restart_interval.split(" ")], - persistence=cls._str_to_bool_(l_persistence), - rave_qa_filter=RaveQaFilter(l_rave_qa_filter.upper()), - exit_on_error=cls._str_to_bool_(l_exit_on_error), - log_level=l_log_level.upper(), - current_day=current_day, - nwges_dir=nwges_dir, - ) - if extra is not None: - kwds.update(extra) - - return cls(**kwds) - @property def veg_map(self) -> Path: + """Path to the vegetation map netCDF file which contains emission factors.""" return self.staticdir / "veg_map.nc" @property def rave_to_intp(self) -> str: + """File prefix for interpolated RAVE files.""" return self.predef_grid.value + "_intp_" @property def grid_in(self) -> Path: + """Path to the grid definition for RAVE data.""" return self.staticdir / "grid_in.nc" @property def weightfile(self) -> Path: + """Path to pre-calculated ESMF weights file mapping the RAVE grid to forecast grid.""" return self.staticdir / "weight_file.nc" @property def grid_out(self) -> Path: + """Path to the forecast grid definition.""" return self.staticdir / "ds_out_base.nc" @property def hourly_hwpdir(self) -> Path: + """Path to the directory containing restart files for `EBB_DCYCLE=2`.""" return self.nwges_dir / "RESTART" @property def emissions_path(self) -> Path: + """Path to the output emissions files containing ICs for smoke/dust.""" return self.intp_dir / f"SMOKE_RRFS_data_{self.current_day}00.nc" @property def fcst_datetime(self) -> dt.datetime: + """The starting datetime object parsed from the `current_day`.""" return dt.datetime.strptime(self.current_day, "%Y%m%d%H") def log( @@ -174,53 +244,23 @@ def log( exc_info: Exception = None, stacklevel: int = 2, ): + """ + Log a message. + + Args: + msg: The message to log. + level: An optional override for the message level. + exc_info: If provided, log this exception and raise an error if `self.exit_on_error` + is `True`. + stacklevel: If greater than 1, the corresponding number of stack frames are skipped + when computing the line number and function name. + """ if exc_info is not None: level = logging.ERROR self._logger.log(level, msg, exc_info=exc_info, stacklevel=stacklevel) if exc_info is not None and self.exit_on_error: raise exc_info - @staticmethod - def _format_path_(value: Path | str) -> Path: - return Path(value).expanduser().resolve(strict=True) - - @classmethod - def _format_read_path_(cls, value: str) -> Path: - path = cls._format_path_(value) - errors = [] - if not path.exists(): - errors.append(f"path does not exist: {path}") - if not os.access(path, os.R_OK): - errors.append(f"path is not readable: {path}") - if not path.is_dir(): - errors.append(f"path is not a directory: {path}") - if len(errors) > 0: - raise OSError(errors) - return path - - @classmethod - def _format_write_path_(cls, value: str) -> Path: - path = cls._format_path_(value) - errors = [] - if not path.exists(): - errors.append(f"path does not exist: {path}") - if not os.access(path, os.W_OK): - errors.append(f"path is not writable: {path}") - if not path.is_dir(): - errors.append(f"path is not a directory: {path}") - if len(errors) > 0: - raise OSError(errors) - return path - - @staticmethod - def _str_to_bool_(value: str) -> bool: - value = value.lower() - if value in ["true", "t", "1"]: - return True - elif value in ["false", "f", "0"]: - return False - raise NotImplementedError(f"boolean string not recognized: {value}") - def _init_logging_(self) -> logging.Logger: project_name = "smoke-dust-preprocessor" @@ -229,9 +269,11 @@ def _init_logging_(self) -> logging.Logger: "disable_existing_loggers": False, "formatters": { "plain": { + # pylint: disable=line-too-long # Uncomment to report verbose output in logs; try to keep these two in sync # "format": f"[%(name)s][%(levelname)s][%(asctime)s][%(pathname)s:%(lineno)d][%(process)d][%(thread)d][rank={self._rank}]: %(message)s" "format": f"[%(name)s][%(levelname)s][%(asctime)s][%(filename)s:%(lineno)d][rank={self.rank}]: %(message)s" + # pylint: enable=line-too-long }, }, "handlers": { @@ -245,7 +287,7 @@ def _init_logging_(self) -> logging.Logger: "loggers": { project_name: { "handlers": ["default"], - "level": getattr(logging, self.log_level.value), + "level": getattr(logging, self.log_level.value.upper()), }, }, } diff --git a/ush/smoke_dust/core/cycle.py b/ush/smoke_dust/core/cycle.py index 8def1799d7..13de40e7e0 100644 --- a/ush/smoke_dust/core/cycle.py +++ b/ush/smoke_dust/core/cycle.py @@ -1,11 +1,13 @@ +"""Forecast cycle definitions for smoke/dust.""" + import abc import datetime as dt -from enum import StrEnum, unique -from typing import Dict, Any +from typing import Any import numpy as np import pandas as pd import xarray as xr +from pydantic import BaseModel, field_validator from smoke_dust.core.common import ( open_nc, @@ -16,40 +18,76 @@ from smoke_dust.core.variable import SD_VARS -@unique -class FrpVariable(StrEnum): - FRP_AVG = "frp_avg_hr" - EBB_TOTAL = "ebb_smoke_hr" +class AverageFrpOutput(BaseModel): + """Output expected from the ``average_frp`` method.""" + + model_config = {"arbitrary_types_allowed": True} + data: dict[str, np.ndarray] + + @field_validator("data", mode="before") + @classmethod + def _validate_data_(cls, value: dict[str, np.ndarray]) -> dict[str, np.ndarray]: + if set(value.keys()) != {"frp_avg_hr", "ebb_smoke_hr"}: + raise ValueError + return value class AbstractSmokeDustCycleProcessor(abc.ABC): + """Base class for all smoke/dust cycle processors.""" def __init__(self, context: SmokeDustContext): self._context = context def log(self, *args: Any, **kwargs: Any) -> None: + """ + See ``SmokeDustContext.log``. + """ self._context.log(*args, **kwargs) @abc.abstractmethod - def flag(self) -> EbbDCycle: ... + def flag(self) -> EbbDCycle: + """ + The cycle flag associated with the processor. + """ @abc.abstractmethod - def create_start_datetime(self) -> dt.datetime: ... + def create_start_datetime(self) -> dt.datetime: + """ + Creates the cycle's start datetime. Used when searching for RAVE files to use for the + forecast. + """ @abc.abstractmethod - def average_frp(self, forecast_metadata: pd.DataFrame) -> Dict[FrpVariable, np.ndarray]: ... + def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: + """ + Calculate fire radiative power and smoke emissions from biomass burning. + + Args: + forecast_metadata: Dataframe containing forecast metadata. + Returns: + Fire radiative power and smoke emissions. + """ @abc.abstractmethod - def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: ... + def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: + """ + Create smoke/dust ICs emissions file. + + Args: + forecast_metadata: Dataframe containing forecast metadata. + """ class SmokeDustCycleOne(AbstractSmokeDustCycleProcessor): + """Creates ICs consisting of fire radiative power and smoke emissions from biomass burning.""" + flag = EbbDCycle.ONE def create_start_datetime(self) -> dt.datetime: if self._context.persistence: self.log( - "Creating emissions for persistence method where satellite FRP persist from previous day" + "Creating emissions for persistence method where satellite FRP persist from " + "previous day" ) start_datetime = self._context.fcst_datetime - dt.timedelta(days=1) else: @@ -65,11 +103,11 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: with open_nc(self._context.grid_out, parallel=False) as ds_src: ds_out.variables["geolat"][:] = ds_src.variables["grid_latt"][:] ds_out.variables["geolon"][:] = ds_src.variables["grid_lont"][:] - for var, fill_array in derived.items(): - create_sd_variable(ds_out, SD_VARS.get(var.value)) - ds_out.variables[var.value][:] = fill_array + for var_name, fill_array in derived.data.items(): + create_sd_variable(ds_out, SD_VARS.get(var_name)) + ds_out.variables[var_name][:] = fill_array - def average_frp(self, forecast_metadata: pd.DataFrame) -> Dict[FrpVariable, np.ndarray]: + def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: ebb_smoke_total = [] frp_avg_hr = [] @@ -95,13 +133,20 @@ def average_frp(self, forecast_metadata: pd.DataFrame) -> Dict[FrpVariable, np.n np.nan_to_num(frp_avg_reshaped, copy=False, nan=0.0) - return { - FrpVariable.FRP_AVG: frp_avg_reshaped, - FrpVariable.EBB_TOTAL: ebb_total_reshaped, - } + return AverageFrpOutput( + data={ + "frp_avg_hr": frp_avg_reshaped, + "ebb_smoke_hr": ebb_total_reshaped, + } + ) class SmokeDustCycleTwo(AbstractSmokeDustCycleProcessor): + """ + In addition to outputs from cycle `1`, also creates ICs for forecasting hourly wildfire + potential. + """ + flag = EbbDCycle.TWO def create_start_datetime(self) -> dt.datetime: @@ -109,7 +154,7 @@ def create_start_datetime(self) -> dt.datetime: return self._context.fcst_datetime - dt.timedelta(days=1, hours=1) def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: - # tdk:story: figure out restart file copying + # pylint: disable=too-many-statements self.log("process_emissions: enter") hwp_ave = [] @@ -150,10 +195,10 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: fire_age = np.array(te).reshape(self._context.grid_out_shape) # Ensure arrays are not negative or NaN - frp_avg_reshaped = np.clip(derived[FrpVariable.FRP_AVG], 0, None) + frp_avg_reshaped = np.clip(derived.data["frp_avg_hr"], 0, None) frp_avg_reshaped = np.nan_to_num(frp_avg_reshaped) - ebb_tot_reshaped = np.clip(derived[FrpVariable.EBB_TOTAL], 0, None) + ebb_tot_reshaped = np.clip(derived.data["ebb_smoke_hr"], 0, None) ebb_tot_reshaped = np.nan_to_num(ebb_tot_reshaped) fire_age = np.clip(fire_age, 0, None) @@ -192,9 +237,10 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: ds_out.variables[varname][0, :, :] = fill_array self.log("process_emissions: exit") + # pylint: enable=too-many-statements - def average_frp(self, forecast_metadata: pd.DataFrame) -> Dict[FrpVariable, np.ndarray]: - self.log(f"average_frp: entering") + def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: + self.log("average_frp: entering") frp_daily = np.zeros(self._context.grid_out_shape).ravel() ebb_smoke_total = [] @@ -244,19 +290,24 @@ def average_frp(self, forecast_metadata: pd.DataFrame) -> Dict[FrpVariable, np.n np.nan_to_num(frp_avg_reshaped, copy=False, nan=0.0) self.log("average_frp: exiting") - return { - FrpVariable.FRP_AVG: frp_avg_reshaped, - FrpVariable.EBB_TOTAL: ebb_total_reshaped, - } + return AverageFrpOutput( + data={ + "frp_avg_hr": frp_avg_reshaped, + "ebb_smoke_hr": ebb_total_reshaped, + } + ) def create_cycle_processor( context: SmokeDustContext, ) -> AbstractSmokeDustCycleProcessor: - match context.ebb_dcycle_flag: + """ + Factory function to create the smoke/dust cycle processor. + """ + match context.ebb_dcycle: case EbbDCycle.ONE: return SmokeDustCycleOne(context) case EbbDCycle.TWO: return SmokeDustCycleTwo(context) case _: - raise NotImplementedError(context.ebb_dcycle_flag) + raise NotImplementedError(context.ebb_dcycle) diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py index ba1bfbd0b1..6d41f9f331 100644 --- a/ush/smoke_dust/core/preprocessor.py +++ b/ush/smoke_dust/core/preprocessor.py @@ -1,3 +1,5 @@ +"""Smoke/dust preprocessor core implementation.""" + import fnmatch import logging from pathlib import Path @@ -17,10 +19,11 @@ class SmokeDustPreprocessor: + """Implements smoke/dust preprocessing such as regridding and IC value calculations.""" def __init__(self, context: SmokeDustContext) -> None: self._context = context - self.log(f"__init__: enter") + self.log("__init__: enter") # Processes regridding from source data to destination analysis grid self._regrid_processor = SmokeDustRegridProcessor(context) @@ -35,10 +38,12 @@ def __init__(self, context: SmokeDustContext) -> None: self.log("__init__: exit") def log(self, *args: Any, **kwargs: Any) -> None: + """See ``SmokeDustContext.log``.""" self._context.log(*args, **kwargs) @property def forecast_dates(self) -> pd.DatetimeIndex: + """Create the forecast dates for cycle.""" if self._forecast_dates is not None: return self._forecast_dates start_datetime = self._cycle_processor.create_start_datetime() @@ -51,11 +56,18 @@ def forecast_dates(self) -> pd.DatetimeIndex: @property def forecast_metadata(self) -> pd.DataFrame: + """Create forecast metadata consisting of: + + * ``forecast_date``: The forecast timestep as a `datetime` object. + * ``rave_interpolated``: To the date's corresponding interpolated RAVE file. Null if not + found. + * ``rave_raw``: Raw RAVE data before interpolation. Null if not found. + """ if self._forecast_metadata is not None: return self._forecast_metadata # Collect metadata on data files related to forecast dates - self.log(f"creating forecast metadata") + self.log("creating forecast metadata") intp_path = [] rave_to_forecast = [] for date in self.forecast_dates: @@ -102,6 +114,9 @@ def forecast_metadata(self) -> pd.DataFrame: @property def is_first_day(self) -> bool: + """``True`` if this is considered the "first day" of the simulation where there is no + interpolated or raw RAVE data available.""" + is_first_day = ( self.forecast_metadata["rave_interpolated"].isnull().all() and self.forecast_metadata["rave_raw"].isnull().all() @@ -110,6 +125,7 @@ def is_first_day(self) -> bool: return is_first_day def run(self) -> None: + """Run the preprocessor.""" self.log("run: entering") if self.is_first_day: if self._context.rank == 0: @@ -121,6 +137,8 @@ def run(self) -> None: self.log("run: exiting") def create_dummy_emissions_file(self) -> None: + """Create a dummy emissions file. This occurs if it is the first day of the forecast or + there is an exception and the context is set to not exit on error.""" self.log("create_dummy_emissions_file: enter") self.log(f"{self._context.emissions_path=}") with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as ds: @@ -140,4 +158,5 @@ def create_dummy_emissions_file(self) -> None: self.log("create_dummy_emissions_file: exit") def finalize(self) -> None: + """Finalize the preprocessor.""" self.log("finalize: exiting") diff --git a/ush/smoke_dust/core/regrid/common.py b/ush/smoke_dust/core/regrid/common.py index d75d1f366f..75a3a899cc 100644 --- a/ush/smoke_dust/core/regrid/common.py +++ b/ush/smoke_dust/core/regrid/common.py @@ -1,3 +1,5 @@ +"""Common regridding functionality used by the regrid processor.""" + import abc from pathlib import Path from typing import Tuple, Literal, Union, Dict, Any @@ -13,6 +15,8 @@ class Dimension(BaseModel): + """A dimension object containing metadata and rank bounds information.""" + name: NameListType size: int lower: int @@ -22,9 +26,14 @@ class Dimension(BaseModel): class DimensionCollection(BaseModel): + """A collection of dimension objects.""" + value: Tuple[Dimension, ...] - def get(self, name: str | NameListType) -> Dimension: + def get(self, name: Union[str, NameListType]) -> Dimension: + """ + Get a dimension object from the collection. + """ if isinstance(name, str): name_to_find = (name,) else: @@ -37,21 +46,28 @@ def get(self, name: str | NameListType) -> Dimension: class AbstractWrapper(abc.ABC, BaseModel): + """ + Superclass for all wrapper objects. Wrapper objects map metadata to an associated ``esmpy`` + object. + """ + model_config = ConfigDict(arbitrary_types_allowed=True) dims: DimensionCollection class GridSpec(BaseModel): + """Defines a grid specification that can be read from a netCDF file.""" + model_config = ConfigDict(frozen=True) x_center: str y_center: str x_dim: NameListType y_dim: NameListType - x_corner: str | None = None - y_corner: str | None = None - x_corner_dim: NameListType | None = None - y_corner_dim: NameListType | None = None + x_corner: Union[str, None] = None + y_corner: Union[str, None] = None + x_corner_dim: Union[NameListType, None] = None + y_corner_dim: Union[NameListType, None] = None x_index: int = 0 y_index: int = 1 @@ -63,34 +79,40 @@ def _validate_model_(self) -> "GridSpec": self.x_corner_dim, self.y_corner_dim, ] - is_given_sum = sum([ii is not None for ii in corner_meta]) + is_given_sum = sum(ii is not None for ii in corner_meta) if is_given_sum > 0 and is_given_sum != len(corner_meta): raise ValueError("if one corner name is supplied, then all must be supplied") return self @property def has_corners(self) -> bool: + """Returns true if the grid has corners.""" return self.x_corner is not None def get_x_corner(self) -> str: + """Get the name of the `x` corner.""" if self.x_corner is None: raise ValueError return self.x_corner def get_y_corner(self) -> str: + """Get the name of the `y` corner.""" if self.y_corner is None: raise ValueError return self.y_corner def get_x_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: + """Get x-coordinate data from a grid.""" return grid.get_coords(self.x_index, staggerloc=staggerloc) def get_y_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarray: + """Get y-coordinate data from a grid.""" return grid.get_coords(self.y_index, staggerloc=staggerloc) def create_grid_dims( self, ds: nc.Dataset, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc ) -> DimensionCollection: + """Create a dimension collection from a netCDF dataset and ``esmpy`` grid.""" if staggerloc == esmpy.StaggerLoc.CENTER: x_dim, y_dim = self.x_dim, self.y_dim elif staggerloc == esmpy.StaggerLoc.CORNER: @@ -99,7 +121,7 @@ def create_grid_dims( raise NotImplementedError(staggerloc) x_dimobj = Dimension( name=x_dim, - size=get_nc_dimension(ds, x_dim).size, + size=_get_nc_dimension_(ds, x_dim).size, lower=grid.lower_bounds[staggerloc][self.x_index], upper=grid.upper_bounds[staggerloc][self.x_index], staggerloc=staggerloc, @@ -107,7 +129,7 @@ def create_grid_dims( ) y_dimobj = Dimension( name=y_dim, - size=get_nc_dimension(ds, y_dim).size, + size=_get_nc_dimension_(ds, y_dim).size, lower=grid.lower_bounds[staggerloc][self.y_index], upper=grid.upper_bounds[staggerloc][self.y_index], staggerloc=staggerloc, @@ -123,35 +145,41 @@ def create_grid_dims( class GridWrapper(AbstractWrapper): + """Wraps an ``esmpy`` grid with dimension metadata.""" + value: esmpy.Grid spec: GridSpec - corner_dims: DimensionCollection | None = None + corner_dims: Union[DimensionCollection, None] = None def fill_nc_variables(self, path: Path): + """Fill netCDF variables using coordinate data from an ``esmpy`` grid.""" if self.corner_dims is not None: raise NotImplementedError with open_nc(path, "a") as ds: staggerloc = esmpy.StaggerLoc.CENTER x_center_data = self.spec.get_x_data(self.value, staggerloc) - set_variable_data(ds.variables[self.spec.x_center], self.dims, x_center_data) + _set_variable_data_(ds.variables[self.spec.x_center], self.dims, x_center_data) y_center_data = self.spec.get_y_data(self.value, staggerloc) - set_variable_data(ds.variables[self.spec.y_center], self.dims, y_center_data) + _set_variable_data_(ds.variables[self.spec.y_center], self.dims, y_center_data) class FieldWrapper(AbstractWrapper): + """Wraps an ``esmpy`` field with dimension metadata.""" + value: esmpy.Field gwrap: GridWrapper def fill_nc_variable(self, path: Path): + """Fill the netCDF variable associated with the ``esmpy`` field.""" with open_nc(path, "a") as ds: var = ds.variables[self.value.name] - set_variable_data(var, self.dims, self.value.data) + _set_variable_data_(var, self.dims, self.value.data) HasNcAttrsType = Union[nc.Dataset, nc.Variable] -def get_aliased_key(source: Dict, keys: NameListType | str) -> Any: +def _get_aliased_key_(source: Dict, keys: Union[NameListType, str]) -> Any: if isinstance(keys, str): keys_to_find = (keys,) else: @@ -164,11 +192,11 @@ def get_aliased_key(source: Dict, keys: NameListType | str) -> Any: raise ValueError(f"key not found: {keys}") -def get_nc_dimension(ds: nc.Dataset, names: NameListType) -> nc.Dimension: - return get_aliased_key(ds.dimensions, names) +def _get_nc_dimension_(ds: nc.Dataset, names: NameListType) -> nc.Dimension: + return _get_aliased_key_(ds.dimensions, names) -def create_dimension_map(dims: DimensionCollection) -> Dict[str, int]: +def _create_dimension_map_(dims: DimensionCollection) -> Dict[str, int]: ret = {} for idx, dim in enumerate(dims.value): for name in dim.name: @@ -177,19 +205,29 @@ def create_dimension_map(dims: DimensionCollection) -> Dict[str, int]: def load_variable_data(var: nc.Variable, target_dims: DimensionCollection) -> np.ndarray: + """ + Load variable data using bounds defined in the dimension collection. + + Args: + var: netCDF variable to load data from. + target_dims: Dimensions for the variable containing ``esmpy`` bounds. + + Returns: + The loaded data array. + """ slices = [slice(target_dims.get(ii).lower, target_dims.get(ii).upper) for ii in var.dimensions] raw_data = var[*slices] dim_map = {dim: ii for ii, dim in enumerate(var.dimensions)} - axes = [get_aliased_key(dim_map, ii.name) for ii in target_dims.value] + axes = [_get_aliased_key_(dim_map, ii.name) for ii in target_dims.value] transposed_data = raw_data.transpose(axes) return transposed_data -def set_variable_data( +def _set_variable_data_( var: nc.Variable, target_dims: DimensionCollection, target_data: np.ndarray ) -> np.ndarray: - dim_map = create_dimension_map(target_dims) - axes = [get_aliased_key(dim_map, ii) for ii in var.dimensions] + dim_map = _create_dimension_map_(target_dims) + axes = [_get_aliased_key_(dim_map, ii) for ii in var.dimensions] transposed_data = target_data.transpose(axes) slices = [slice(target_dims.get(ii).lower, target_dims.get(ii).upper) for ii in var.dimensions] var[*slices] = transposed_data @@ -197,10 +235,13 @@ def set_variable_data( class NcToGrid(BaseModel): + """Converts a netCDF file to an ``esmpy`` grid.""" + path: Path spec: GridSpec def create_grid_wrapper(self) -> GridWrapper: + """Create a grid wrapper.""" with open_nc(self.path, "r") as ds: grid_shape = self._create_grid_shape_(ds) staggerloc = esmpy.StaggerLoc.CENTER @@ -224,8 +265,8 @@ def create_grid_wrapper(self) -> GridWrapper: return gwrap def _create_grid_shape_(self, ds: nc.Dataset) -> np.ndarray: - x_size = get_nc_dimension(ds, self.spec.x_dim).size - y_size = get_nc_dimension(ds, self.spec.y_dim).size + x_size = _get_nc_dimension_(ds, self.spec.x_dim).size + y_size = _get_nc_dimension_(ds, self.spec.y_dim).size if self.spec.x_index == 0: grid_shape = (x_size, y_size) elif self.spec.x_index == 1: @@ -246,19 +287,22 @@ def _add_corner_coords_(self, ds: nc.Dataset, grid: esmpy.Grid) -> DimensionColl class NcToField(BaseModel): + """Converts a netCDF file to an ``esmpy`` field.""" + path: Path name: str gwrap: GridWrapper - dim_time: NameListType | None = None + dim_time: Union[NameListType, None] = None staggerloc: int = esmpy.StaggerLoc.CENTER def create_field_wrapper(self) -> FieldWrapper: + """Create a field wrapper.""" with open_nc(self.path, "r") as ds: if self.dim_time is None: ndbounds = None target_dims = self.gwrap.dims else: - ndbounds = (len(get_nc_dimension(ds, self.dim_time)),) + ndbounds = (len(_get_nc_dimension_(ds, self.dim_time)),) time_dim = Dimension( name=self.dim_time, size=ndbounds[0], diff --git a/ush/smoke_dust/core/regrid/processor.py b/ush/smoke_dust/core/regrid/processor.py index d0f611f55a..a11e7b0838 100644 --- a/ush/smoke_dust/core/regrid/processor.py +++ b/ush/smoke_dust/core/regrid/processor.py @@ -1,3 +1,5 @@ +"""Implements the smoke/dust regrid processor.""" + import logging from copy import copy, deepcopy from typing import Any @@ -26,6 +28,7 @@ class SmokeDustRegridProcessor: + """Regrids smoke/dust data to the forecast grid.""" def __init__(self, context: SmokeDustContext): self._context = context @@ -41,9 +44,11 @@ def __init__(self, context: SmokeDustContext): self.__regridder = None def log(self, *args: Any, **kwargs: Any) -> None: + """See ``SmokeDustContext.log``.""" self._context.log(*args, **kwargs) def run(self, forecast_metadata: pd.DataFrame) -> None: + """Run the regrid processor. This may be run in parallel using MPI.""" # Select which RAVE files to interpolate rave_to_interpolate = forecast_metadata[ forecast_metadata["rave_interpolated"].isnull() @@ -116,11 +121,11 @@ def _get_regridder_(self, src_fwrap: FieldWrapper, dst_fwrap: FieldWrapper) -> e self.log(f"{src_fwrap.value.data.shape=}", level=logging.DEBUG) self.log(f"{dst_fwrap.value.data.shape=}", level=logging.DEBUG) if ( - self._context.predef_grid == PredefinedGrid.RRFS_NA_13km + self._context.predef_grid == PredefinedGrid.RRFS_NA_13KM or self._context.regrid_in_memory ): - # ESMF does not like reading the weights for this field combination (rc=-1). The error can be - # bypassed by creating weights in-memory. + # ESMF does not like reading the weights for this field combination (rc=-1). The + # error can be bypassed by creating weights in-memory. self.log("creating regridding in-memory") regridder = esmpy.Regrid( src_fwrap.value, @@ -128,7 +133,8 @@ def _get_regridder_(self, src_fwrap: FieldWrapper, dst_fwrap: FieldWrapper) -> e regrid_method=esmpy.RegridMethod.CONSERVE, unmapped_action=esmpy.UnmappedAction.IGNORE, ignore_degenerate=True, - # filename="/opt/project/weight_file.nc" # Can be used to create a weight file for testing + # Can be used to create a weight file for testing + # filename="/opt/project/weight_file.nc" ) else: self.log("creating regridding from file") @@ -199,7 +205,8 @@ def _run_impl_(self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Se rave_qa = load_variable_data(rave_ds.variables["QA"], src_fwrap.dims) set_to_zero = rave_qa < 2 self.log( - f"RAVE QA filter applied: {self._context.rave_qa_filter=}; {set_to_zero.size=}; {np.sum(set_to_zero)=}" + f"RAVE QA filter applied: {self._context.rave_qa_filter=}; " + f"{set_to_zero.size=}; {np.sum(set_to_zero)=}" ) src_data[set_to_zero] = 0.0 else: @@ -207,12 +214,12 @@ def _run_impl_(self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Se raise NotImplementedError # Execute the ESMF regridding - self.log(f"run regridding", level=logging.DEBUG) + self.log("run regridding", level=logging.DEBUG) regridder = self._get_regridder_(src_fwrap, dst_fwrap) _ = regridder(src_fwrap.value, dst_fwrap.value) # Persist the destination field - self.log(f"filling netcdf", level=logging.DEBUG) + self.log("filling netcdf", level=logging.DEBUG) dst_fwrap.fill_nc_variable(output_file_path) # Update the forecast metadata with the interpolated RAVE file data diff --git a/ush/smoke_dust/core/variable.py b/ush/smoke_dust/core/variable.py index 93b23597ba..38dbcdf07b 100644 --- a/ush/smoke_dust/core/variable.py +++ b/ush/smoke_dust/core/variable.py @@ -1,24 +1,33 @@ +"""Variable definitions used by smoke/dust.""" + from typing import Tuple -from pydantic import BaseModel, field_validator +from pydantic import BaseModel, field_validator, Field class SmokeDustVariable(BaseModel): - name: str - long_name: str - units: str - fill_value_str: str - fill_value_float: float + """Model for a smoke/dust variable.""" + + name: str = Field(description="Standard (short) variable name.") + long_name: str = Field(description="Long (descriptive) name for the variable.") + units: str = Field(description="Units for the variable.") + fill_value_str: str = Field(description="Fill value for the variable in string format.") + fill_value_float: float = Field(description="Fill value for the variable in float format.") SmokeDustVariablesType = Tuple[SmokeDustVariable, ...] class SmokeDustVariables(BaseModel): - values: SmokeDustVariablesType + """Canonical collection of smoke/dust variables.""" + + values: SmokeDustVariablesType = Field( + description="All variables associated with the smoke/dust preprocessor." + ) def get(self, name: str) -> SmokeDustVariable: - for value in self.values: + """Get a smoke/dust variable from the collection.""" + for value in self.values: # pylint: disable=not-an-iterable if value.name == name: return value raise ValueError(name) @@ -26,6 +35,7 @@ def get(self, name: str) -> SmokeDustVariable: @field_validator("values", mode="after") @classmethod def _validate_values_(cls, values: SmokeDustVariablesType) -> SmokeDustVariablesType: + """Asserts all variable names are unique.""" names = [ii.name for ii in values] if len(names) != len(set(names)): raise ValueError("Variable names must be unique") diff --git a/ush/smoke_dust/generate_emissions.py b/ush/smoke_dust/generate_emissions.py index d218d00749..4a8db12011 100755 --- a/ush/smoke_dust/generate_emissions.py +++ b/ush/smoke_dust/generate_emissions.py @@ -1,57 +1,112 @@ #!/usr/bin/env python3 -######################################################################### -# # -# Python script for fire emissions preprocessing from RAVE FRP and FRE # -# (Li et al.,2022). # -# johana.romero-alvarez@noaa.gov # -# # -######################################################################### - +""" +Python script for fire emissions preprocessing from RAVE FRP and FRE (Li et al.,2022) +Author: johana.romero-alvarez@noaa.gov +""" +import os import sys +from enum import StrEnum, unique from pathlib import Path -from typing import List + +import typer sys.path.append(str(Path(__file__).parent.parent)) -from smoke_dust.core.context import SmokeDustContext +# pylint: disable=wrong-import-position +from smoke_dust.core.context import ( + PredefinedGrid, + EbbDCycle, + RaveQaFilter, + LogLevel, + SmokeDustContext, +) from smoke_dust.core.preprocessor import SmokeDustPreprocessor +# pylint: enable=wrong-import-position + +os.environ["NO_COLOR"] = "1" +app = typer.Typer(pretty_exceptions_enable=False) + + +@unique +class StringBool(StrEnum): + """Allow CLI to use boolean string arguments to avoid logic in shell scripts.""" -def main(args: List[str]) -> None: - """ - Prepares fire-related ICs. This is the main function that handles data movement and interpolation. - #tdk: doc - Args: - staticdir: Path to fix files for the smoke and dust component - ravedir: Path to the directory containing RAVE fire data files (hourly). This is typically the working directory (DATA) - intp_dir: Path to interpolated RAVE data files from the previous cycles (DATA_SHARE) - predef_grid: If ``RRFS_NA_3km``, use pre-defined grid dimensions - ebb_dcycle_flag: Select the EBB cycle to run. Valid values are ``"1"`` or ``"2"`` - restart_interval: Indicates if restart files should be copied. The actual interval values are not used - persistence: If ``TRUE``, use satellite observations from the previous day. Otherwise, use observations from the same day. - """ - - context = SmokeDustContext.create_from_args(args) + TRUE = "True" + FALSE = "False" + + +# pylint: disable=line-too-long +@app.command() +def main( # pylint:disable=too-many-arguments,too-many-positional-arguments + staticdir: Path = typer.Option( + ..., "--staticdir", help="Path to the smoke and dust fixed files." + ), + ravedir: Path = typer.Option( + ..., "--ravedir", help="Path to the directory containing RAVE data files (hourly)." + ), + intp_dir: Path = typer.Option( + ..., "--intp-dir", help="Path to the directory containing interpolated RAVE data files." + ), + predef_grid: PredefinedGrid = typer.Option( + ..., "--predef-grid", help="SRW predefined grid to use as the forecast domain." + ), + ebb_dcycle: EbbDCycle = typer.Option(..., "--ebb-dcycle", help="The forecast cycle to run."), + restart_interval: str = typer.Option( + ..., + "--restart-interval", + help="Restart intervals used for restart file search. For example '6 12 18 24'.", + ), + persistence: StringBool = typer.Option( + ..., + "--persistence", + help="If true, use satellite observations from the previous day. Otherwise, use observations from the same day.", + ), + rave_qa_filter: RaveQaFilter = typer.Option( + ..., "--rave-qa-filter", help="Filter level for RAVE QA flags when regridding fields." + ), + log_level: LogLevel = typer.Option( + LogLevel.INFO, "--log-level", help="Logging level to use for the preprocessor." + ), + exit_on_error: StringBool = typer.Option( + StringBool.TRUE, + "--exit-on-error", + help="If false, log errors and write a dummy emissions file but do not raise an exception.", + ), + regrid_in_memory: StringBool = typer.Option( + StringBool.FALSE, + "--regrid-in-memory", + help="If true, do esmpy regridding in-memory as opposed to reading from the fixed weight file.", + ), +): + # pylint:enable=line-too-long + """Main entrypoint for generating ICs for smoke and dust.""" + typer.echo("Welcome to interpolating RAVE and processing fire emissions!") + + context = SmokeDustContext( + staticdir=staticdir, + ravedir=ravedir, + intp_dir=intp_dir, + predef_grid=predef_grid, + ebb_dcycle=ebb_dcycle, + restart_interval=restart_interval, + persistence=persistence, + rave_qa_filter=rave_qa_filter, + log_level=log_level, + exit_on_error=exit_on_error, + regrid_in_memory=regrid_in_memory, + ) processor = SmokeDustPreprocessor(context) try: processor.run() processor.finalize() - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught processor.create_dummy_emissions_file() context.log("unhandled error", exc_info=e) + typer.echo("Exiting. Bye!") + if __name__ == "__main__": - print("") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("Welcome to interpolating RAVE and processing fire emissions!") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("") - # tdk:story: use argparse - main(sys.argv[1:]) - print("") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("Exiting. Bye!") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("") + app() From 02f815f1873cc55ee56691a94fab6165155e6790 Mon Sep 17 00:00:00 2001 From: benkozi Date: Thu, 6 Feb 2025 11:14:30 -0700 Subject: [PATCH 30/41] fix: missed some pylints with wrong .rc file (#8) --- .pylintrc | 2 +- tests/test_python/test_smoke_dust/conftest.py | 18 ++--- .../test_core/test_preprocessor.py | 34 +++++----- .../test_smoke_dust/test_core/test_regrid.py | 20 +++--- .../test_generate_emissions.py | 4 +- ush/smoke_dust/core/common.py | 48 +++++++------- ush/smoke_dust/core/context.py | 12 ++-- ush/smoke_dust/core/cycle.py | 42 ++++++------ ush/smoke_dust/core/preprocessor.py | 16 ++--- ush/smoke_dust/core/regrid/common.py | 66 ++++++++++--------- ush/smoke_dust/core/regrid/processor.py | 29 ++++---- ush/smoke_dust/generate_emissions.py | 2 +- 12 files changed, 153 insertions(+), 140 deletions(-) diff --git a/.pylintrc b/.pylintrc index 1d441d141f..3167c2eb34 100644 --- a/.pylintrc +++ b/.pylintrc @@ -25,7 +25,7 @@ clear-cache-post-run=no # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code. -extension-pkg-allow-list= +extension-pkg-allow-list=mpi4py,netCDF4 # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may diff --git a/tests/test_python/test_smoke_dust/conftest.py b/tests/test_python/test_smoke_dust/conftest.py index 48ef54f4f6..8ae06ac5f8 100644 --- a/tests/test_python/test_smoke_dust/conftest.py +++ b/tests/test_python/test_smoke_dust/conftest.py @@ -6,9 +6,9 @@ from pathlib import Path from typing import Union -import netCDF4 as nc import numpy as np import pytest +from netCDF4 import Dataset from smoke_dust.core.context import SmokeDustContext @@ -47,11 +47,11 @@ def create_fake_grid_out(root_dir: Path, shape: FakeGridOutShape) -> None: root_dir: Directory to write grid to. shape: Grid output shape. """ - with nc.Dataset(root_dir / "ds_out_base.nc", "w") as ds: - ds.createDimension("grid_yt", shape.y_size) - ds.createDimension("grid_xt", shape.x_size) + with Dataset(root_dir / "ds_out_base.nc", "w") as nc_ds: + nc_ds.createDimension("grid_yt", shape.y_size) + nc_ds.createDimension("grid_xt", shape.x_size) for varname in ["area", "grid_latt", "grid_lont"]: - var = ds.createVariable(varname, "f4", ("grid_yt", "grid_xt")) + var = nc_ds.createVariable(varname, "f4", ("grid_yt", "grid_xt")) var[:] = np.ones((shape.y_size, shape.x_size)) @@ -88,8 +88,8 @@ def create_fake_context(root_dir: Path, overrides: Union[dict, None] = None) -> try: context = SmokeDustContext.model_validate(kwds) except: - for ii in ["CDATE", "DATA"]: - os.unsetenv(ii) + for env_var in ["CDATE", "DATA"]: + os.unsetenv(env_var) raise return context @@ -103,8 +103,8 @@ def create_file_hash(path: Path) -> str: Returns: The file's hex digest. """ - with open(path, "rb") as f: + with open(path, "rb") as target_file: file_hash = hashlib.md5() - while chunk := f.read(8192): + while chunk := target_file.read(8192): file_hash.update(chunk) return file_hash.hexdigest() diff --git a/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py b/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py index a3f4278bb5..4a8ec21e31 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py +++ b/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py @@ -3,11 +3,11 @@ from pathlib import Path from typing import Type -import netCDF4 as nc import numpy as np import pandas as pd import pytest from _pytest.fixtures import SubRequest +from netCDF4 import Dataset from pydantic import BaseModel from pytest_mock import MockerFixture @@ -41,13 +41,15 @@ def create_fake_restart_files( restart_dir.mkdir() for date in forecast_dates: restart_file = restart_dir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" - with nc.Dataset(restart_file, "w") as ds: - ds.createDimension("Time") - ds.createDimension("yaxis_1", shape.y_size) - ds.createDimension("xaxis_1", shape.x_size) - totprcp_ave = ds.createVariable("totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) + with Dataset(restart_file, "w") as nc_ds: + nc_ds.createDimension("Time") + nc_ds.createDimension("yaxis_1", shape.y_size) + nc_ds.createDimension("xaxis_1", shape.x_size) + totprcp_ave = nc_ds.createVariable("totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) totprcp_ave[0, ...] = np.ones(shape.as_tuple) - rrfs_hwp_ave = ds.createVariable("rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) + rrfs_hwp_ave = nc_ds.createVariable( + "rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1") + ) rrfs_hwp_ave[0, ...] = totprcp_ave[:] + 2 @@ -69,12 +71,12 @@ def create_fake_rave_interpolated( for date in forecast_dates: intp_file = root_dir / f"{rave_to_intp}{date}00_{date}59.nc" dims = ("t", "lat", "lon") - with nc.Dataset(intp_file, "w") as ds: - ds.createDimension("t") - ds.createDimension("lat", shape.y_size) - ds.createDimension("lon", shape.x_size) + with Dataset(intp_file, "w") as nc_ds: + nc_ds.createDimension("t") + nc_ds.createDimension("lat", shape.y_size) + nc_ds.createDimension("lon", shape.x_size) for varname in ["frp_avg_hr", "FRE"]: - var = ds.createVariable(varname, "f4", dims) + var = nc_ds.createVariable(varname, "f4", dims) var[0, ...] = np.ones(shape.as_tuple) @@ -86,10 +88,10 @@ def create_fake_veg_map(root_dir: Path, shape: FakeGridOutShape) -> None: root_dir: The directory to create the file in. shape: Shape of the output grid. """ - with nc.Dataset(root_dir / "veg_map.nc", "w") as ds: - ds.createDimension("grid_yt", shape.y_size) - ds.createDimension("grid_xt", shape.x_size) - emiss_factor = ds.createVariable("emiss_factor", "f4", ("grid_yt", "grid_xt")) + with Dataset(root_dir / "veg_map.nc", "w") as nc_ds: + nc_ds.createDimension("grid_yt", shape.y_size) + nc_ds.createDimension("grid_xt", shape.x_size) + emiss_factor = nc_ds.createVariable("emiss_factor", "f4", ("grid_yt", "grid_xt")) emiss_factor[:] = np.ones((shape.y_size, shape.x_size)) diff --git a/tests/test_python/test_smoke_dust/test_core/test_regrid.py b/tests/test_python/test_smoke_dust/test_core/test_regrid.py index ea5b33224f..3a040a61fd 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_regrid.py +++ b/tests/test_python/test_smoke_dust/test_core/test_regrid.py @@ -143,27 +143,27 @@ def create_fake_rave_and_rrfs_like_data(params: FakeGridParams) -> xr.Dataset: lon = np.arange(params.shape.x_size, dtype=float) + params.min_lon lat = np.arange(params.shape.y_size, dtype=float) + params.min_lat lon_mesh, lat_mesh = np.meshgrid(lon, lat) - ds = xr.Dataset() + nc_ds = xr.Dataset() dims = ["grid_yt", "grid_xt"] - ds["grid_lont"] = xr.DataArray(lon_mesh, dims=dims) - ds["grid_latt"] = xr.DataArray(lat_mesh, dims=dims) + nc_ds["grid_lont"] = xr.DataArray(lon_mesh, dims=dims) + nc_ds["grid_latt"] = xr.DataArray(lat_mesh, dims=dims) if params.with_corners: lonc = np.hstack((lon - 0.5, [lon[-1] + 0.5])) latc = np.hstack((lat - 0.5, [lat[-1] + 0.5])) lonc_mesh, latc_mesh = np.meshgrid(lonc, latc) - ds["grid_lon"] = xr.DataArray(lonc_mesh, dims=["grid_y", "grid_x"]) - ds["grid_lat"] = xr.DataArray(latc_mesh, dims=["grid_y", "grid_x"]) + nc_ds["grid_lon"] = xr.DataArray(lonc_mesh, dims=["grid_y", "grid_x"]) + nc_ds["grid_lat"] = xr.DataArray(latc_mesh, dims=["grid_y", "grid_x"]) if params.fields is not None: if params.ntime is not None: field_dims = ["time"] + dims else: field_dims = dims for field in params.fields: - ds[field] = create_analytic_data_array( + nc_ds[field] = create_analytic_data_array( field_dims, lon_mesh, lat_mesh, ntime=params.ntime ) - ds.to_netcdf(params.path) - return ds + nc_ds.to_netcdf(params.path) + return nc_ds class TestSmokeDustRegridProcessor: # pylint: disable=too-few-public-methods @@ -184,6 +184,6 @@ def test_run( f"*{data_for_test.context.rave_to_intp}*nc", root_dir=tmp_path ) assert len(interpolated_files) == 24 - for f in interpolated_files: - fpath = tmp_path / f + for intp_file in interpolated_files: + fpath = tmp_path / intp_file assert create_file_hash(fpath) == "8e90b769137aad054a2e49559d209c4d" diff --git a/tests/test_python/test_smoke_dust/test_generate_emissions.py b/tests/test_python/test_smoke_dust/test_generate_emissions.py index df0527675a..8f4cce37cf 100644 --- a/tests/test_python/test_smoke_dust/test_generate_emissions.py +++ b/tests/test_python/test_smoke_dust/test_generate_emissions.py @@ -45,8 +45,8 @@ def test(tmp_path: Path, fake_grid_out_shape: FakeGridOutShape, mocker: MockerFi ] result = runner.invoke(app, args, catch_exceptions=False) except: - for ii in ["CDATE", "DATA"]: - os.unsetenv(ii) + for env_var in ["CDATE", "DATA"]: + os.unsetenv(env_var) raise print(result.output) diff --git a/ush/smoke_dust/core/common.py b/ush/smoke_dust/core/common.py index 3699c1601f..4e30df4d6b 100644 --- a/ush/smoke_dust/core/common.py +++ b/ush/smoke_dust/core/common.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Tuple, Literal, Dict -import netCDF4 as nc +from netCDF4 import Dataset import numpy as np import pandas as pd from mpi4py import MPI @@ -18,7 +18,7 @@ def open_nc( mode: Literal["r", "w", "a"] = "r", clobber: bool = False, parallel: bool = True, -) -> nc.Dataset: +) -> Dataset: """ Open a netCDF file for various operations. @@ -31,7 +31,7 @@ def open_nc( Returns: A netCDF dataset object. """ - ds = nc.Dataset( + nc_ds = Dataset( path, mode=mode, clobber=clobber, @@ -40,23 +40,23 @@ def open_nc( info=MPI.Info(), ) try: - yield ds + yield nc_ds finally: - ds.close() + nc_ds.close() def create_sd_coordinate_variable( - ds: nc.Dataset, + nc_ds: Dataset, sd_variable: SmokeDustVariable, ) -> None: """ Create a smoke/dust netCDF spatial coordinate variable. Args: - ds: Dataset to update. + nc_ds: Dataset to update. sd_variable: Contains variable metadata. """ - var_out = ds.createVariable( + var_out = nc_ds.createVariable( sd_variable.name, "f4", ("lat", "lon"), fill_value=sd_variable.fill_value_float ) var_out.units = sd_variable.units @@ -67,7 +67,7 @@ def create_sd_coordinate_variable( def create_sd_variable( - ds: nc.Dataset, + nc_ds: Dataset, sd_variable: SmokeDustVariable, fill_first_time_index: bool = True, ) -> None: @@ -75,11 +75,11 @@ def create_sd_variable( Create a smoke/dust netCDF variable. Args: - ds: Dataset to update + nc_ds: Dataset to update sd_variable: Contains variable metadata fill_first_time_index: If True, fill the first time index with provided `fill_value_float` """ - var_out = ds.createVariable( + var_out = nc_ds.createVariable( sd_variable.name, "f4", ("t", "lat", "lon"), @@ -106,26 +106,26 @@ def create_sd_variable( def create_template_emissions_file( - ds: nc.Dataset, grid_shape: Tuple[int, int], is_dummy: bool = False + nc_ds: Dataset, grid_shape: Tuple[int, int], is_dummy: bool = False ): """ Create a smoke/dust template netCDF emissions file. Args: - ds: The target netCDF dataset object. + nc_ds: The target netCDF dataset object. grid_shape: The grid shape to create. is_dummy: Converted to a netCDF attribute to indicate if the created file is dummy emissions or will contain actual values. """ - ds.createDimension("t", None) - ds.createDimension("lat", grid_shape[0]) - ds.createDimension("lon", grid_shape[1]) - setattr(ds, "PRODUCT_ALGORITHM_VERSION", "Beta") - setattr(ds, "TIME_RANGE", "1 hour") - setattr(ds, "is_dummy", str(is_dummy)) + nc_ds.createDimension("t", None) + nc_ds.createDimension("lat", grid_shape[0]) + nc_ds.createDimension("lon", grid_shape[1]) + setattr(nc_ds, "PRODUCT_ALGORITHM_VERSION", "Beta") + setattr(nc_ds, "TIME_RANGE", "1 hour") + setattr(nc_ds, "is_dummy", str(is_dummy)) for varname in ["geolat", "geolon"]: - create_sd_coordinate_variable(ds, SD_VARS.get(varname)) + create_sd_coordinate_variable(nc_ds, SD_VARS.get(varname)) def create_descriptive_statistics( @@ -145,13 +145,13 @@ def create_descriptive_statistics( Returns: A dataframe containing descriptive statistics fields. """ - df = pd.DataFrame.from_dict({k: v.filled(np.nan).ravel() for k, v in container.items()}) - desc = df.describe() + data_frame = pd.DataFrame.from_dict({k: v.filled(np.nan).ravel() for k, v in container.items()}) + desc = data_frame.describe() adds = {} for field_name in container.keys(): adds[field_name] = [ - df[field_name].sum(), - df[field_name].isnull().sum(), + data_frame[field_name].sum(), + data_frame[field_name].isnull().sum(), origin, path, ] diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index 5b8d6cb9e5..62b70171df 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -189,10 +189,10 @@ def _initialize_values_(cls, values: dict) -> dict: def _finalize_model_(self) -> "SmokeDustContext": self._logger = self._init_logging_() - with open_nc(self.grid_out, parallel=False) as ds: + with open_nc(self.grid_out, parallel=False) as nc_ds: self.grid_out_shape = ( - ds.dimensions["grid_yt"].size, - ds.dimensions["grid_xt"].size, + nc_ds.dimensions["grid_yt"].size, + nc_ds.dimensions["grid_xt"].size, ) self.log(f"{self.grid_out_shape=}") return self @@ -205,7 +205,7 @@ def veg_map(self) -> Path: @property def rave_to_intp(self) -> str: """File prefix for interpolated RAVE files.""" - return self.predef_grid.value + "_intp_" + return self.predef_grid.value + "_intp_" # pylint: disable=no-member @property def grid_in(self) -> Path: @@ -287,7 +287,9 @@ def _init_logging_(self) -> logging.Logger: "loggers": { project_name: { "handlers": ["default"], - "level": getattr(logging, self.log_level.value.upper()), + "level": getattr( + logging, self.log_level.value.upper() # pylint: disable=no-member + ), }, }, } diff --git a/ush/smoke_dust/core/cycle.py b/ush/smoke_dust/core/cycle.py index 13de40e7e0..da28e452c4 100644 --- a/ush/smoke_dust/core/cycle.py +++ b/ush/smoke_dust/core/cycle.py @@ -111,16 +111,16 @@ def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: ebb_smoke_total = [] frp_avg_hr = [] - with xr.open_dataset(self._context.veg_map) as ds: - emiss_factor = ds["emiss_factor"].values - with xr.open_dataset(self._context.grid_out) as ds: - target_area = ds["area"].values + with xr.open_dataset(self._context.veg_map) as nc_ds: + emiss_factor = nc_ds["emiss_factor"].values + with xr.open_dataset(self._context.grid_out) as nc_ds: + target_area = nc_ds["area"].values for row_idx, row_df in forecast_metadata.iterrows(): self.log(f"processing emissions: {row_idx}, {row_df.to_dict()}") - with xr.open_dataset(row_df["rave_interpolated"]) as ds: - fre = ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values - frp = ds[EmissionVariable.FRP.smoke_dust_name()][0, :, :].values + with xr.open_dataset(row_df["rave_interpolated"]) as nc_ds: + fre = nc_ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values + frp = nc_ds[EmissionVariable.FRP.smoke_dust_name()][0, :, :].values frp_avg_hr.append(frp) ebb_hourly = (fre * emiss_factor * self._context.beta * self._context.fg_to_ug) / ( @@ -163,9 +163,9 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: phy_data_path = self._context.hourly_hwpdir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" rave_path = self._context.intp_dir / f"{self._context.rave_to_intp}{date}00_{date}59.nc" self.log(f"processing emissions for: {phy_data_path=}, {rave_path=}") - with xr.open_dataset(phy_data_path) as ds: - hwp_values = ds.rrfs_hwp_ave.values.ravel() - tprcp_values = ds.totprcp_ave.values.ravel() + with xr.open_dataset(phy_data_path) as nc_ds: + hwp_values = nc_ds.rrfs_hwp_ave.values.ravel() + tprcp_values = nc_ds.totprcp_ave.values.ravel() totprcp += np.where(tprcp_values > 0, tprcp_values, 0) hwp_ave.append(hwp_values) hwp_ave_arr = np.nanmean(hwp_ave, axis=0).reshape(*self._context.grid_out_shape) @@ -178,21 +178,21 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: t_fire = np.zeros(self._context.grid_out_shape) for date in forecast_metadata["forecast_date"]: rave_path = self._context.intp_dir / f"{self._context.rave_to_intp}{date}00_{date}59.nc" - with xr.open_dataset(rave_path) as ds: - frp = ds.frp_avg_hr[0, :, :].values + with xr.open_dataset(rave_path) as nc_ds: + frp = nc_ds.frp_avg_hr[0, :, :].values dates_filtered = np.where(frp > 0, int(date[:10]), 0) t_fire = np.maximum(t_fire, dates_filtered) t_fire_flattened = [int(i) if i != 0 else 0 for i in t_fire.flatten()] hr_ends = [ dt.datetime.strptime(str(hr), "%Y%m%d%H") if hr != 0 else 0 for hr in t_fire_flattened ] - te = np.array( + temp_fire_age = np.array( [ ((self._context.fcst_datetime - i).total_seconds() / 3600 if i != 0 else 0) for i in hr_ends ] ) - fire_age = np.array(te).reshape(self._context.grid_out_shape) + fire_age = np.array(temp_fire_age).reshape(self._context.grid_out_shape) # Ensure arrays are not negative or NaN frp_avg_reshaped = np.clip(derived.data["frp_avg_hr"], 0, None) @@ -245,16 +245,16 @@ def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: frp_daily = np.zeros(self._context.grid_out_shape).ravel() ebb_smoke_total = [] - with xr.open_dataset(self._context.veg_map) as ds: - emiss_factor = ds["emiss_factor"].values - with xr.open_dataset(self._context.grid_out) as ds: - target_area = ds["area"].values + with xr.open_dataset(self._context.veg_map) as nc_ds: + emiss_factor = nc_ds["emiss_factor"].values + with xr.open_dataset(self._context.grid_out) as nc_ds: + target_area = nc_ds["area"].values for row_idx, row_df in forecast_metadata.iterrows(): self.log(f"processing emissions: {row_idx}, {row_df.to_dict()}") - with xr.open_dataset(row_df["rave_interpolated"]) as ds: - fre = ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values - frp = ds[EmissionVariable.FRP.smoke_dust_name()][0, :, :].values + with xr.open_dataset(row_df["rave_interpolated"]) as nc_ds: + fre = nc_ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values + frp = nc_ds[EmissionVariable.FRP.smoke_dust_name()][0, :, :].values ebb_hourly = ( fre * emiss_factor * self._context.beta * self._context.fg_to_ug / target_area diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py index 6d41f9f331..da6d6a590d 100644 --- a/ush/smoke_dust/core/preprocessor.py +++ b/ush/smoke_dust/core/preprocessor.py @@ -102,15 +102,15 @@ def forecast_metadata(self) -> pd.DataFrame: self.log(f"{self.forecast_dates}", level=logging.DEBUG) self.log(f"{intp_path=}", level=logging.DEBUG) self.log(f"{rave_to_forecast=}", level=logging.DEBUG) - df = pd.DataFrame( + data_frame = pd.DataFrame( data={ "forecast_date": self.forecast_dates, "rave_interpolated": intp_path, "rave_raw": rave_to_forecast, } ) - self._forecast_metadata = df - return df + self._forecast_metadata = data_frame + return data_frame @property def is_first_day(self) -> bool: @@ -141,11 +141,11 @@ def create_dummy_emissions_file(self) -> None: there is an exception and the context is set to not exit on error.""" self.log("create_dummy_emissions_file: enter") self.log(f"{self._context.emissions_path=}") - with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as ds: - create_template_emissions_file(ds, self._context.grid_out_shape, is_dummy=True) + with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as nc_ds: + create_template_emissions_file(nc_ds, self._context.grid_out_shape, is_dummy=True) with open_nc(self._context.grid_out, parallel=False) as ds_src: - ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] - ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] + nc_ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] + nc_ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] for varname in [ "frp_davg", @@ -154,7 +154,7 @@ def create_dummy_emissions_file(self) -> None: "hwp_davg", "totprcp_24hrs", ]: - create_sd_variable(ds, SD_VARS.get(varname)) + create_sd_variable(nc_ds, SD_VARS.get(varname)) self.log("create_dummy_emissions_file: exit") def finalize(self) -> None: diff --git a/ush/smoke_dust/core/regrid/common.py b/ush/smoke_dust/core/regrid/common.py index 75a3a899cc..a88f0a2753 100644 --- a/ush/smoke_dust/core/regrid/common.py +++ b/ush/smoke_dust/core/regrid/common.py @@ -38,10 +38,10 @@ def get(self, name: Union[str, NameListType]) -> Dimension: name_to_find = (name,) else: name_to_find = name - for jj in name_to_find: - for ii in self.value: - if jj in ii.name: - return ii + for curr_name in name_to_find: + for curr_value in self.value: + if curr_name in curr_value.name: + return curr_value raise ValueError(f"dimension not found: {name}") @@ -110,7 +110,7 @@ def get_y_data(self, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc) -> np.ndarr return grid.get_coords(self.y_index, staggerloc=staggerloc) def create_grid_dims( - self, ds: nc.Dataset, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc + self, nc_ds: nc.Dataset, grid: esmpy.Grid, staggerloc: esmpy.StaggerLoc ) -> DimensionCollection: """Create a dimension collection from a netCDF dataset and ``esmpy`` grid.""" if staggerloc == esmpy.StaggerLoc.CENTER: @@ -121,7 +121,7 @@ def create_grid_dims( raise NotImplementedError(staggerloc) x_dimobj = Dimension( name=x_dim, - size=_get_nc_dimension_(ds, x_dim).size, + size=_get_nc_dimension_(nc_ds, x_dim).size, lower=grid.lower_bounds[staggerloc][self.x_index], upper=grid.upper_bounds[staggerloc][self.x_index], staggerloc=staggerloc, @@ -129,7 +129,7 @@ def create_grid_dims( ) y_dimobj = Dimension( name=y_dim, - size=_get_nc_dimension_(ds, y_dim).size, + size=_get_nc_dimension_(nc_ds, y_dim).size, lower=grid.lower_bounds[staggerloc][self.y_index], upper=grid.upper_bounds[staggerloc][self.y_index], staggerloc=staggerloc, @@ -155,12 +155,12 @@ def fill_nc_variables(self, path: Path): """Fill netCDF variables using coordinate data from an ``esmpy`` grid.""" if self.corner_dims is not None: raise NotImplementedError - with open_nc(path, "a") as ds: + with open_nc(path, "a") as nc_ds: staggerloc = esmpy.StaggerLoc.CENTER x_center_data = self.spec.get_x_data(self.value, staggerloc) - _set_variable_data_(ds.variables[self.spec.x_center], self.dims, x_center_data) + _set_variable_data_(nc_ds.variables[self.spec.x_center], self.dims, x_center_data) y_center_data = self.spec.get_y_data(self.value, staggerloc) - _set_variable_data_(ds.variables[self.spec.y_center], self.dims, y_center_data) + _set_variable_data_(nc_ds.variables[self.spec.y_center], self.dims, y_center_data) class FieldWrapper(AbstractWrapper): @@ -171,8 +171,8 @@ class FieldWrapper(AbstractWrapper): def fill_nc_variable(self, path: Path): """Fill the netCDF variable associated with the ``esmpy`` field.""" - with open_nc(path, "a") as ds: - var = ds.variables[self.value.name] + with open_nc(path, "a") as nc_ds: + var = nc_ds.variables[self.value.name] _set_variable_data_(var, self.dims, self.value.data) @@ -192,8 +192,8 @@ def _get_aliased_key_(source: Dict, keys: Union[NameListType, str]) -> Any: raise ValueError(f"key not found: {keys}") -def _get_nc_dimension_(ds: nc.Dataset, names: NameListType) -> nc.Dimension: - return _get_aliased_key_(ds.dimensions, names) +def _get_nc_dimension_(nc_ds: nc.Dataset, names: NameListType) -> nc.Dimension: + return _get_aliased_key_(nc_ds.dimensions, names) def _create_dimension_map_(dims: DimensionCollection) -> Dict[str, int]: @@ -242,31 +242,35 @@ class NcToGrid(BaseModel): def create_grid_wrapper(self) -> GridWrapper: """Create a grid wrapper.""" - with open_nc(self.path, "r") as ds: - grid_shape = self._create_grid_shape_(ds) + with open_nc(self.path, "r") as nc_ds: + grid_shape = self._create_grid_shape_(nc_ds) staggerloc = esmpy.StaggerLoc.CENTER grid = esmpy.Grid( grid_shape, staggerloc=staggerloc, coord_sys=esmpy.CoordSys.SPH_DEG, ) - dims = self.spec.create_grid_dims(ds, grid, staggerloc) + dims = self.spec.create_grid_dims(nc_ds, grid, staggerloc) grid_x_center_coords = self.spec.get_x_data(grid, staggerloc) - grid_x_center_coords[:] = load_variable_data(ds.variables[self.spec.x_center], dims) + grid_x_center_coords[:] = load_variable_data( + nc_ds.variables[self.spec.x_center], dims # pylint: disable=unsubscriptable-object + ) grid_y_center_coords = self.spec.get_y_data(grid, staggerloc) - grid_y_center_coords[:] = load_variable_data(ds.variables[self.spec.y_center], dims) + grid_y_center_coords[:] = load_variable_data( + nc_ds.variables[self.spec.y_center], dims # pylint: disable=unsubscriptable-object + ) if self.spec.has_corners: - corner_dims = self._add_corner_coords_(ds, grid) + corner_dims = self._add_corner_coords_(nc_ds, grid) else: corner_dims = None gwrap = GridWrapper(value=grid, dims=dims, spec=self.spec, corner_dims=corner_dims) return gwrap - def _create_grid_shape_(self, ds: nc.Dataset) -> np.ndarray: - x_size = _get_nc_dimension_(ds, self.spec.x_dim).size - y_size = _get_nc_dimension_(ds, self.spec.y_dim).size + def _create_grid_shape_(self, nc_ds: nc.Dataset) -> np.ndarray: + x_size = _get_nc_dimension_(nc_ds, self.spec.x_dim).size + y_size = _get_nc_dimension_(nc_ds, self.spec.y_dim).size if self.spec.x_index == 0: grid_shape = (x_size, y_size) elif self.spec.x_index == 1: @@ -275,14 +279,14 @@ def _create_grid_shape_(self, ds: nc.Dataset) -> np.ndarray: raise NotImplementedError(self.spec.x_index, self.spec.y_index) return np.array(grid_shape) - def _add_corner_coords_(self, ds: nc.Dataset, grid: esmpy.Grid) -> DimensionCollection: + def _add_corner_coords_(self, nc_ds: nc.Dataset, grid: esmpy.Grid) -> DimensionCollection: staggerloc = esmpy.StaggerLoc.CORNER grid.add_coords(staggerloc) - dims = self.spec.create_grid_dims(ds, grid, staggerloc) + dims = self.spec.create_grid_dims(nc_ds, grid, staggerloc) grid_x_corner_coords = self.spec.get_x_data(grid, staggerloc) - grid_x_corner_coords[:] = load_variable_data(ds.variables[self.spec.x_corner], dims) + grid_x_corner_coords[:] = load_variable_data(nc_ds.variables[self.spec.x_corner], dims) grid_y_corner_coords = self.spec.get_y_data(grid, staggerloc) - grid_y_corner_coords[:] = load_variable_data(ds.variables[self.spec.y_corner], dims) + grid_y_corner_coords[:] = load_variable_data(nc_ds.variables[self.spec.y_corner], dims) return dims @@ -297,12 +301,12 @@ class NcToField(BaseModel): def create_field_wrapper(self) -> FieldWrapper: """Create a field wrapper.""" - with open_nc(self.path, "r") as ds: + with open_nc(self.path, "r") as nc_ds: if self.dim_time is None: ndbounds = None target_dims = self.gwrap.dims else: - ndbounds = (len(_get_nc_dimension_(ds, self.dim_time)),) + ndbounds = (len(_get_nc_dimension_(nc_ds, self.dim_time)),) time_dim = Dimension( name=self.dim_time, size=ndbounds[0], @@ -318,7 +322,9 @@ def create_field_wrapper(self) -> FieldWrapper: ndbounds=ndbounds, staggerloc=self.staggerloc, ) - field.data[:] = load_variable_data(ds.variables[self.name], target_dims) + field.data[:] = load_variable_data( + nc_ds.variables[self.name], target_dims # pylint: disable=unsubscriptable-object + ) fwrap = FieldWrapper(value=field, dims=target_dims, gwrap=self.gwrap) return fwrap diff --git a/ush/smoke_dust/core/regrid/processor.py b/ush/smoke_dust/core/regrid/processor.py index a11e7b0838..c8833276c8 100644 --- a/ush/smoke_dust/core/regrid/processor.py +++ b/ush/smoke_dust/core/regrid/processor.py @@ -157,10 +157,10 @@ def _run_impl_(self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Se / f"{self._context.rave_to_intp}{forecast_date}00_{forecast_date}59.nc" ) self.log(f"creating output file: {output_file_path}") - with open_nc(output_file_path, "w") as ds: - create_template_emissions_file(ds, self._context.grid_out_shape) + with open_nc(output_file_path, "w") as nc_ds: + create_template_emissions_file(nc_ds, self._context.grid_out_shape) for varname in ["frp_avg_hr", "FRE"]: - create_sd_variable(ds, SD_VARS.get(varname)) + create_sd_variable(nc_ds, SD_VARS.get(varname)) self._dst_output_gwrap.fill_nc_variables(output_file_path) @@ -202,7 +202,10 @@ def _run_impl_(self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Se if self._context.rave_qa_filter == RaveQaFilter.HIGH: with open_nc(row_data["rave_raw"], parallel=True) as rave_ds: - rave_qa = load_variable_data(rave_ds.variables["QA"], src_fwrap.dims) + rave_qa = load_variable_data( + rave_ds.variables["QA"], # pylint: disable=unsubscriptable-object + src_fwrap.dims, + ) set_to_zero = rave_qa < 2 self.log( f"RAVE QA filter applied: {self._context.rave_qa_filter=}; " @@ -251,27 +254,27 @@ def _regrid_postprocessing_(self, row_data: pd.Series) -> None: "frp_avg_hr", "FRE", ] - with open_nc(row_data["rave_interpolated"], parallel=False) as ds: - dst_data = {ii: ds.variables[ii][:] for ii in field_names_dst} + with open_nc(row_data["rave_interpolated"], parallel=False) as nc_ds: + dst_data = {ii: nc_ds.variables[ii][:] for ii in field_names_dst} if calc_stats: # Do these calculations before we modify the arrays since edge masking is inplace dst_desc_unmasked = create_descriptive_statistics(dst_data, "dst_unmasked", None) # Mask edges to reduce model edge effects self.log("masking edges", level=logging.DEBUG) - for v in dst_data.values(): + for value in dst_data.values(): # Operation is inplace - mask_edges(v[0, :, :]) + mask_edges(value[0, :, :]) # Persist masked data to disk - with open_nc(row_data["rave_interpolated"], parallel=False, mode="a") as ds: - for k, v in dst_data.items(): - ds.variables[k][:] = v + with open_nc(row_data["rave_interpolated"], parallel=False, mode="a") as nc_ds: + for key, value in dst_data.items(): + nc_ds.variables[key][:] = value if calc_stats: - with open_nc(row_data["rave_raw"], parallel=False) as ds: + with open_nc(row_data["rave_raw"], parallel=False) as nc_ds: src_desc = create_descriptive_statistics( - {ii: ds.variables[ii][:] for ii in self._context.vars_emis}, + {ii: nc_ds.variables[ii][:] for ii in self._context.vars_emis}, "src", row_data["rave_raw"], ) diff --git a/ush/smoke_dust/generate_emissions.py b/ush/smoke_dust/generate_emissions.py index 4a8db12011..812c746e4c 100755 --- a/ush/smoke_dust/generate_emissions.py +++ b/ush/smoke_dust/generate_emissions.py @@ -39,7 +39,7 @@ class StringBool(StrEnum): # pylint: disable=line-too-long @app.command() -def main( # pylint:disable=too-many-arguments,too-many-positional-arguments +def main( # pylint:disable=too-many-arguments staticdir: Path = typer.Option( ..., "--staticdir", help="Path to the smoke and dust fixed files." ), From b507813af00fdefbc6ae5538bfad034d9e28bf6d Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Thu, 6 Feb 2025 11:28:13 -0700 Subject: [PATCH 31/41] fix for supposedly unscriptable --- ush/smoke_dust/core/preprocessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py index da6d6a590d..416c9ac395 100644 --- a/ush/smoke_dust/core/preprocessor.py +++ b/ush/smoke_dust/core/preprocessor.py @@ -144,8 +144,8 @@ def create_dummy_emissions_file(self) -> None: with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as nc_ds: create_template_emissions_file(nc_ds, self._context.grid_out_shape, is_dummy=True) with open_nc(self._context.grid_out, parallel=False) as ds_src: - nc_ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] - nc_ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] + nc_ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] # pylint: disable=unsubscriptable-object + nc_ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] # pylint: disable=unsubscriptable-object for varname in [ "frp_davg", From 895668eb2749b85cfde804cc0be2ce7ef3921340 Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Thu, 6 Feb 2025 12:01:28 -0700 Subject: [PATCH 32/41] fix test_help --- .../test_python/test_smoke_dust/test_generate_emissions.py | 5 ++++- ush/smoke_dust/core/preprocessor.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/test_python/test_smoke_dust/test_generate_emissions.py b/tests/test_python/test_smoke_dust/test_generate_emissions.py index 8f4cce37cf..d28651cf3b 100644 --- a/tests/test_python/test_smoke_dust/test_generate_emissions.py +++ b/tests/test_python/test_smoke_dust/test_generate_emissions.py @@ -57,4 +57,7 @@ def test(tmp_path: Path, fake_grid_out_shape: FakeGridOutShape, mocker: MockerFi def test_help() -> None: """Test that the help message can be displayed.""" - subprocess.check_call(["python", "../../../ush/smoke_dust/generate_emissions.py", "--help"]) + cli_path = ( + Path(__file__).parent.parent.parent.parent / "ush" / "smoke_dust" / "generate_emissions.py" + ) + subprocess.check_call(["python", str(cli_path), "--help"]) diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py index 416c9ac395..e3c242eaa4 100644 --- a/ush/smoke_dust/core/preprocessor.py +++ b/ush/smoke_dust/core/preprocessor.py @@ -144,8 +144,10 @@ def create_dummy_emissions_file(self) -> None: with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as nc_ds: create_template_emissions_file(nc_ds, self._context.grid_out_shape, is_dummy=True) with open_nc(self._context.grid_out, parallel=False) as ds_src: - nc_ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] # pylint: disable=unsubscriptable-object - nc_ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] # pylint: disable=unsubscriptable-object + # pylint: disable=unsubscriptable-object + nc_ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] + nc_ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] + # pylint: enable=unsubscriptable-object for varname in [ "frp_davg", From 9e4ccca503bc6e78ef51db385cc6733e32508fca Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Thu, 6 Feb 2025 12:14:36 -0700 Subject: [PATCH 33/41] minor fixes for docs; remove extra copy --- scripts/exsrw_smoke_dust.sh | 1 - ush/smoke_dust/add_smoke.py | 8 ++++---- ush/smoke_dust/generate_emissions.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/scripts/exsrw_smoke_dust.sh b/scripts/exsrw_smoke_dust.sh index c73049f041..6e05885248 100755 --- a/scripts/exsrw_smoke_dust.sh +++ b/scripts/exsrw_smoke_dust.sh @@ -127,7 +127,6 @@ else # Copy Smoke file to COMOUT cp -p ${DATA_SHARE}/${smokeFile} ${COMOUT} - cp -p ${DATA_SHARE}/${smokeFile} ${DATA} fi # #----------------------------------------------------------------------- diff --git a/ush/smoke_dust/add_smoke.py b/ush/smoke_dust/add_smoke.py index fda5aa41aa..dd07d3ccf1 100755 --- a/ush/smoke_dust/add_smoke.py +++ b/ush/smoke_dust/add_smoke.py @@ -9,7 +9,7 @@ import xarray as xr -def populate_data(data: np.ndarray, target_shape: Tuple) -> np.ndarray: +def _populate_data_(data: np.ndarray, target_shape: Tuple) -> np.ndarray: """ Extracted variables need to match the target shape so we first populating it into a zero array. @@ -62,9 +62,9 @@ def main() -> None: lon_dim = file_input.dims["lon"] # Populate the extracted data to match the target shape - # smoke_2_add_populated = populate_data(smoke_2_add, (lev_dim, lat_dim, lon_dim)) - # dust_2_add_populated = populate_data(dust_2_add, (lev_dim, lat_dim, lon_dim)) - # coarsepm_2_add_populated = populate_data(coarsepm_2_add, (lev_dim, lat_dim, lon_dim)) + # smoke_2_add_populated = _populate_data_(smoke_2_add, (lev_dim, lat_dim, lon_dim)) + # dust_2_add_populated = _populate_data_(dust_2_add, (lev_dim, lat_dim, lon_dim)) + # coarsepm_2_add_populated = _populate_data_(coarsepm_2_add, (lev_dim, lat_dim, lon_dim)) # Create new data arrays filled with zeros smoke_zero = xr.DataArray( diff --git a/ush/smoke_dust/generate_emissions.py b/ush/smoke_dust/generate_emissions.py index 812c746e4c..40da05d1c2 100755 --- a/ush/smoke_dust/generate_emissions.py +++ b/ush/smoke_dust/generate_emissions.py @@ -80,8 +80,8 @@ def main( # pylint:disable=too-many-arguments help="If true, do esmpy regridding in-memory as opposed to reading from the fixed weight file.", ), ): - # pylint:enable=line-too-long """Main entrypoint for generating ICs for smoke and dust.""" + # pylint:enable=line-too-long typer.echo("Welcome to interpolating RAVE and processing fire emissions!") context = SmokeDustContext( From 72de8514e368ead41620898722950417b97ec8d8 Mon Sep 17 00:00:00 2001 From: benkozi Date: Sun, 9 Feb 2025 11:03:58 -0700 Subject: [PATCH 34/41] fix: dummy emissions for EBB_DCYCLE=2 (#9) --- .../BuildingRunningTesting/SRW-SD.rst | 22 +-- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 2 +- .../test_smoke_dust/test_core/test_cycle.py | 74 +++++++++ .../test_core/test_preprocessor.py | 6 +- ush/smoke_dust/core/common.py | 2 +- ush/smoke_dust/core/context.py | 27 +++- ush/smoke_dust/core/cycle.py | 151 +++++++++++++++--- ush/smoke_dust/core/preprocessor.py | 116 ++------------ ush/smoke_dust/generate_emissions.py | 2 +- 9 files changed, 256 insertions(+), 146 deletions(-) create mode 100644 tests/test_python/test_smoke_dust/test_core/test_cycle.py diff --git a/doc/UsersGuide/BuildingRunningTesting/SRW-SD.rst b/doc/UsersGuide/BuildingRunningTesting/SRW-SD.rst index 1d990a0bfa..e3a3675cff 100644 --- a/doc/UsersGuide/BuildingRunningTesting/SRW-SD.rst +++ b/doc/UsersGuide/BuildingRunningTesting/SRW-SD.rst @@ -1,8 +1,8 @@ .. _srw-sd: -===================================== +================================== SRW Smoke & Dust (SRW-SD) Features -===================================== +================================== .. attention:: @@ -15,14 +15,14 @@ This chapter provides instructions for running a simple, example six-hour foreca Although this chapter is the primary documentation resource for running the SRW-SD configuration, users may need to refer to :numref:`Chapter %s ` and :numref:`Chapter %s ` for additional information on building and running the SRW App, respectively. Quick Start Guide (SRW-SD) -============================= +========================== .. attention:: These instructions should work smoothly on Hera and Orion/Hercules, but users on other systems may need to make additional adjustments. Download the Code -------------------- +----------------- Clone the |branch| branch of the authoritative SRW App repository: @@ -32,7 +32,7 @@ Clone the |branch| branch of the authoritative SRW App repository: cd ufs-srweather-app/sorc Checkout Externals ---------------------- +------------------ Users must run the ``checkout_externals`` script to collect (or "check out") the individual components of the SRW App (AQM version) from their respective GitHub repositories. @@ -41,7 +41,7 @@ Users must run the ``checkout_externals`` script to collect (or "check out") the ./manage_externals/checkout_externals -e Externals_smoke_dust.cfg Build the SRW App ------------------- +----------------- .. code-block:: console @@ -54,7 +54,7 @@ Building the SRW App with SRW-SD on other machines, including other :srw-wiki:`L If SRW-SD builds correctly, users should see the standard executables listed in :numref:`Table %s ` in the ``ufs-srweather-app/exec`` directory. Load the |wflow_env| Environment --------------------------------------------- +-------------------------------- Load the workflow environment: @@ -70,7 +70,7 @@ where ```` is ``hera``, ``orion``, or ``hercules``. The workflow should .. _srw-sd-config: Configure an Experiment ---------------------------- +----------------------- Users will need to configure their experiment by setting parameters in the ``config.yaml`` file. To start, users can copy a default experiment setting into ``config.yaml``: @@ -115,7 +115,7 @@ When using the basic ``config.smoke_dust.yaml`` experiment, the usual pre-proces .. _srw-sd-more-tasks: Additional SRW-SD Tasks --------------------------- +----------------------- .. COMMENT: Add workflow diagram in the future. @@ -164,7 +164,7 @@ The Python scripts listed in :numref:`Table %s ` are used to perform - Regridding utilities using `esmpy `_ that interpolate data from the RAVE observational grid to the RRFS grid. Generate the Workflow ------------------------- +--------------------- Generate the workflow: @@ -197,7 +197,7 @@ Users may check experiment status from the experiment directory with either of t .. _srw-sd-success: Experiment Output --------------------- +----------------- The workflow run is complete when all tasks display a "SUCCEEDED" message. If everything goes smoothly, users will eventually see a workflow status table similar to the following: diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index f0b30c10b9..6b5f5d60a1 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -2073,7 +2073,7 @@ Non-default parameters for coupled Air Quality Modeling (AQM) tasks are set in t Smoke and Dust Configuration Parameters -===================================== +======================================= Non-default parameters for Smoke and Dust tasks are set in the ``smoke_dust_parm:`` section of the ``config.yaml`` file. diff --git a/tests/test_python/test_smoke_dust/test_core/test_cycle.py b/tests/test_python/test_smoke_dust/test_core/test_cycle.py new file mode 100644 index 0000000000..1e4b302074 --- /dev/null +++ b/tests/test_python/test_smoke_dust/test_core/test_cycle.py @@ -0,0 +1,74 @@ +"""Tests related to the smoke/dust cycle processor.""" + +from pathlib import Path + +import pytest +from _pytest.fixtures import SubRequest + +from smoke_dust.core.common import open_nc +from smoke_dust.core.context import SmokeDustContext +from smoke_dust.core.cycle import SmokeDustCycleTwo +from test_python.test_smoke_dust.conftest import ( + create_fake_context, + create_fake_grid_out, + FakeGridOutShape, +) + + +@pytest.fixture(params=[True, False], ids=lambda p: f"allow_dummy_restart={p}") +def context_for_dummy_test( + request: SubRequest, tmp_path: Path, fake_grid_out_shape: FakeGridOutShape +) -> SmokeDustContext: + """Create a context for the dummy restart files test.""" + create_fake_grid_out(tmp_path, fake_grid_out_shape) + context = create_fake_context(tmp_path, overrides={"allow_dummy_restart": request.param}) + return context + + +def create_restart_ncfile(path: Path, varnames: list[str]) -> None: + """Create a physics-related restart netCDF file.""" + with open_nc(path, mode="w") as nc_ds: + dim = nc_ds.createDimension("foo") + for varname in varnames: + nc_ds.createVariable(varname, "f4", (dim.name,)) + + +class TestSmokeDustCycleTwo: + """...""" + + def test_writes_dummy_emissions_with_no_restart_files( + self, context_for_dummy_test: SmokeDustContext # pylint: disable=redefined-outer-name + ) -> None: + """Test that dummy emissions are handled appropriately when no restart files are present.""" + cycle = SmokeDustCycleTwo(context_for_dummy_test) + assert not context_for_dummy_test.emissions_path.exists() + try: + cycle.run() + except FileNotFoundError: + assert not context_for_dummy_test.allow_dummy_restart + else: + assert context_for_dummy_test.emissions_path.exists() + + def test_iter_restart_files( + self, tmp_path: Path, fake_grid_out_shape: FakeGridOutShape + ) -> None: + """Test iterating over restart files.""" + create_fake_grid_out(tmp_path, fake_grid_out_shape) + context = create_fake_context(tmp_path) + cycle = SmokeDustCycleTwo(context) + expected_vars = ("totprcp_ave", "rrfs_hwp_ave") + restart_slug = "phy_data" + outdir = tmp_path / "RESTART" + outdir.mkdir() + create_restart_ncfile(outdir / f"foobar.nonsense.{restart_slug}.0000.nc", expected_vars) + create_restart_ncfile(outdir / f"foobar.nonsense.{restart_slug}.1111.nc", []) + create_restart_ncfile(outdir / "foobar.nonsense.nc", []) + for root_dir in [outdir, tmp_path]: + print(root_dir) + restart_files = list( + cycle._iter_restart_files_( # pylint: disable=protected-access + root_dir, + expected_vars, + ) + ) + assert len(restart_files) == 1 diff --git a/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py b/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py index 4a8ec21e31..54b8e09776 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py +++ b/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py @@ -150,16 +150,16 @@ def test_run( """Test core capabilities of the preprocessor. Note this does not test regridding.""" # pylint: disable=protected-access preprocessor = data_for_test.preprocessor - spy1 = mocker.spy(preprocessor, "create_dummy_emissions_file") + spy1 = mocker.spy(preprocessor._context.__class__, "create_dummy_emissions_file") regrid_processor_class = preprocessor._regrid_processor.__class__ spy2 = mocker.spy(regrid_processor_class, "_run_impl_") spy3 = mocker.spy(regrid_processor_class, "run") cycle_processor_class = preprocessor._cycle_processor.__class__ - spy4 = mocker.spy(cycle_processor_class, "process_emissions") + spy4 = mocker.spy(cycle_processor_class, "run") spy5 = mocker.spy(cycle_processor_class, "average_frp") assert isinstance(preprocessor._cycle_processor, data_for_test.expected.klass) - assert preprocessor._forecast_metadata is None + assert preprocessor._cycle_processor._forecast_metadata is None # pylint: enable=protected-access assert not data_for_test.context.emissions_path.exists() diff --git a/ush/smoke_dust/core/common.py b/ush/smoke_dust/core/common.py index 4e30df4d6b..806921490b 100644 --- a/ush/smoke_dust/core/common.py +++ b/ush/smoke_dust/core/common.py @@ -4,10 +4,10 @@ from pathlib import Path from typing import Tuple, Literal, Dict -from netCDF4 import Dataset import numpy as np import pandas as pd from mpi4py import MPI +from netCDF4 import Dataset from smoke_dust.core.variable import SmokeDustVariable, SD_VARS diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index 62b70171df..fb67009f54 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -11,7 +11,8 @@ from mpi4py import MPI from pydantic import BaseModel, model_validator, BeforeValidator, Field -from smoke_dust.core.common import open_nc +from smoke_dust.core.common import open_nc, create_template_emissions_file, create_sd_variable +from smoke_dust.core.variable import SD_VARS @unique @@ -170,6 +171,7 @@ class SmokeDustContext(BaseModel): to_s: int = 3600 rank: int = MPI.COMM_WORLD.Get_rank() esmpy_debug: bool = False + allow_dummy_restart: bool = True # Set in _finalize_model_ grid_out_shape: tuple[int, int] = (0, 0) @@ -261,6 +263,29 @@ def log( if exc_info is not None and self.exit_on_error: raise exc_info + def create_dummy_emissions_file(self) -> None: + """Create a dummy emissions file. This occurs if it is the first day of the forecast or + there is an exception and the context is set to not exit on error.""" + self.log("create_dummy_emissions_file: enter") + self.log(f"{self.emissions_path=}") + with open_nc(self.emissions_path, "w", parallel=False, clobber=True) as nc_ds: + create_template_emissions_file(nc_ds, self.grid_out_shape, is_dummy=True) + with open_nc(self.grid_out, parallel=False) as ds_src: + # pylint: disable=unsubscriptable-object + nc_ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] + nc_ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] + # pylint: enable=unsubscriptable-object + + for varname in [ + "frp_davg", + "ebb_rate", + "fire_end_hr", + "hwp_davg", + "totprcp_24hrs", + ]: + create_sd_variable(nc_ds, SD_VARS.get(varname)) + self.log("create_dummy_emissions_file: exit") + def _init_logging_(self) -> logging.Logger: project_name = "smoke-dust-preprocessor" diff --git a/ush/smoke_dust/core/cycle.py b/ush/smoke_dust/core/cycle.py index da28e452c4..a296450e0a 100644 --- a/ush/smoke_dust/core/cycle.py +++ b/ush/smoke_dust/core/cycle.py @@ -2,7 +2,11 @@ import abc import datetime as dt -from typing import Any +import fnmatch +import glob +import logging +from pathlib import Path +from typing import Any, Iterator import numpy as np import pandas as pd @@ -38,6 +42,81 @@ class AbstractSmokeDustCycleProcessor(abc.ABC): def __init__(self, context: SmokeDustContext): self._context = context + # On-demand/cached property values + self._forecast_metadata = None + self._forecast_dates = None + + @property + def forecast_dates(self) -> pd.DatetimeIndex: + """Create the forecast dates for cycle.""" + if self._forecast_dates is not None: + return self._forecast_dates + start_datetime = self.create_start_datetime() + self.log(f"{start_datetime=}") + forecast_dates = pd.date_range(start=start_datetime, periods=24, freq="h").strftime( + "%Y%m%d%H" + ) + self._forecast_dates = forecast_dates + return self._forecast_dates + + @property + def forecast_metadata(self) -> pd.DataFrame: + """Create forecast metadata consisting of: + + * ``forecast_date``: The forecast timestep as a `datetime` object. + * ``rave_interpolated``: To the date's corresponding interpolated RAVE file. Null if not + found. + * ``rave_raw``: Raw RAVE data before interpolation. Null if not found. + """ + if self._forecast_metadata is not None: + return self._forecast_metadata + + # Collect metadata on data files related to forecast dates + self.log("creating forecast metadata") + intp_path = [] + rave_to_forecast = [] + for date in self.forecast_dates: + # Check for pre-existing interpolated RAVE data + file_path = ( + Path(self._context.intp_dir) / f"{self._context.rave_to_intp}{date}00_{date}59.nc" + ) + if file_path.exists() and file_path.is_file(): + try: + resolved = file_path.resolve(strict=True) + except FileNotFoundError: + continue + else: + intp_path.append(resolved) + else: + intp_path.append(None) + + # Check for raw RAVE data + wildcard_name = f"*-3km*{date}*{date}59590*.nc" + name_retro = f"*3km*{date}*{date}*.nc" + found = False + for rave_path in self._context.ravedir.iterdir(): + if fnmatch.fnmatch(str(rave_path), wildcard_name) or fnmatch.fnmatch( + str(rave_path), name_retro + ): + rave_to_forecast.append(rave_path) + found = True + break + if not found: + rave_to_forecast.append(None) + + self.log(f"{self.forecast_dates}", level=logging.DEBUG) + self.log(f"{intp_path=}", level=logging.DEBUG) + self.log(f"{rave_to_forecast=}", level=logging.DEBUG) + data_frame = pd.DataFrame( + data={ + "forecast_date": self.forecast_dates, + "rave_interpolated": intp_path, + "rave_raw": rave_to_forecast, + } + ) + self._forecast_metadata = data_frame + return data_frame + def log(self, *args: Any, **kwargs: Any) -> None: """ See ``SmokeDustContext.log``. @@ -58,25 +137,23 @@ def create_start_datetime(self) -> dt.datetime: """ @abc.abstractmethod - def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: + def average_frp(self) -> AverageFrpOutput: """ Calculate fire radiative power and smoke emissions from biomass burning. - Args: - forecast_metadata: Dataframe containing forecast metadata. Returns: Fire radiative power and smoke emissions. """ @abc.abstractmethod - def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: + def run(self) -> None: """ Create smoke/dust ICs emissions file. - - Args: - forecast_metadata: Dataframe containing forecast metadata. """ + def finalize(self) -> None: + """Optional override for subclasses.""" + class SmokeDustCycleOne(AbstractSmokeDustCycleProcessor): """Creates ICs consisting of fire radiative power and smoke emissions from biomass burning.""" @@ -95,8 +172,8 @@ def create_start_datetime(self) -> dt.datetime: start_datetime = self._context.fcst_datetime return start_datetime - def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: - derived = self.average_frp(forecast_metadata) + def run(self) -> None: + derived = self.average_frp() self.log(f"creating 24-hour emissions file: {self._context.emissions_path}") with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as ds_out: create_template_emissions_file(ds_out, self._context.grid_out_shape) @@ -107,7 +184,7 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: create_sd_variable(ds_out, SD_VARS.get(var_name)) ds_out.variables[var_name][:] = fill_array - def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: + def average_frp(self) -> AverageFrpOutput: ebb_smoke_total = [] frp_avg_hr = [] @@ -116,7 +193,7 @@ def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: with xr.open_dataset(self._context.grid_out) as nc_ds: target_area = nc_ds["area"].values - for row_idx, row_df in forecast_metadata.iterrows(): + for row_idx, row_df in self.forecast_metadata.iterrows(): self.log(f"processing emissions: {row_idx}, {row_df.to_dict()}") with xr.open_dataset(row_df["rave_interpolated"]) as nc_ds: fre = nc_ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values @@ -153,16 +230,30 @@ def create_start_datetime(self) -> dt.datetime: self.log("Creating emissions for modulated persistence by Wildfire potential") return self._context.fcst_datetime - dt.timedelta(days=1, hours=1) - def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: + def run(self) -> None: # pylint: disable=too-many-statements - self.log("process_emissions: enter") + self.log("run: enter") + forecast_metadata = self.forecast_metadata hwp_ave = [] totprcp = np.zeros(self._context.grid_out_shape).ravel() - for date in forecast_metadata["forecast_date"]: - phy_data_path = self._context.hourly_hwpdir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" - rave_path = self._context.intp_dir / f"{self._context.rave_to_intp}{date}00_{date}59.nc" - self.log(f"processing emissions for: {phy_data_path=}, {rave_path=}") + + phy_data_paths = list( + self._iter_restart_files_(self._context.hourly_hwpdir, ("rrfs_hwp_ave", "totprcp_ave")) + ) + if len(phy_data_paths) == 0: + if self._context.allow_dummy_restart: + self.log( + "restart files not found and dummy restart allowed. creating_dummy_emissions", + level=logging.WARN, + ) + if self._context.rank == 0: + self._context.create_dummy_emissions_file() + return + raise FileNotFoundError("no restart files found") + + for phy_data_path in phy_data_paths: + self.log(f"processing emissions for: {phy_data_path=}") with xr.open_dataset(phy_data_path) as nc_ds: hwp_values = nc_ds.rrfs_hwp_ave.values.ravel() tprcp_values = nc_ds.totprcp_ave.values.ravel() @@ -173,7 +264,7 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: xarr_hwp = xr.DataArray(hwp_ave_arr) xarr_totprcp = xr.DataArray(totprcp_ave_arr) - derived = self.average_frp(forecast_metadata) + derived = self.average_frp() t_fire = np.zeros(self._context.grid_out_shape) for date in forecast_metadata["forecast_date"]: @@ -236,10 +327,10 @@ def process_emissions(self, forecast_metadata: pd.DataFrame) -> None: create_sd_variable(ds_out, SD_VARS.get(varname)) ds_out.variables[varname][0, :, :] = fill_array - self.log("process_emissions: exit") + self.log("run: exit") # pylint: enable=too-many-statements - def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: + def average_frp(self) -> AverageFrpOutput: self.log("average_frp: entering") frp_daily = np.zeros(self._context.grid_out_shape).ravel() @@ -250,7 +341,7 @@ def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: with xr.open_dataset(self._context.grid_out) as nc_ds: target_area = nc_ds["area"].values - for row_idx, row_df in forecast_metadata.iterrows(): + for row_idx, row_df in self.forecast_metadata.iterrows(): self.log(f"processing emissions: {row_idx}, {row_df.to_dict()}") with xr.open_dataset(row_df["rave_interpolated"]) as nc_ds: fre = nc_ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values @@ -297,6 +388,22 @@ def average_frp(self, forecast_metadata: pd.DataFrame) -> AverageFrpOutput: } ) + def _iter_restart_files_( + self, root_dir: Path, expected_vars: tuple[str, ...] + ) -> Iterator[Path]: + filenames = glob.glob("**/*phy_data*nc", root_dir=root_dir, recursive=True) + for filename in filenames: + path = root_dir / filename + try: + resolved = path.resolve(strict=True) + except FileNotFoundError: + self.log(f"restart file link not resolvable: {path}", level=logging.WARN) + continue + with open_nc(resolved) as nc_ds: + variables = nc_ds.variables.keys() # pylint: disable=no-member + if all(expected_var in variables for expected_var in expected_vars): + yield path + def create_cycle_processor( context: SmokeDustContext, diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py index e3c242eaa4..0046896645 100644 --- a/ush/smoke_dust/core/preprocessor.py +++ b/ush/smoke_dust/core/preprocessor.py @@ -1,21 +1,12 @@ """Smoke/dust preprocessor core implementation.""" -import fnmatch -import logging -from pathlib import Path from typing import Any import pandas as pd -from smoke_dust.core.common import ( - open_nc, - create_template_emissions_file, - create_sd_variable, -) from smoke_dust.core.context import SmokeDustContext from smoke_dust.core.cycle import create_cycle_processor from smoke_dust.core.regrid.processor import SmokeDustRegridProcessor -from smoke_dust.core.variable import SD_VARS class SmokeDustPreprocessor: @@ -30,10 +21,6 @@ def __init__(self, context: SmokeDustContext) -> None: # Processes cycle-specific data transformations self._cycle_processor = create_cycle_processor(context) - # On-demand/cached property values - self._forecast_metadata = None - self._forecast_dates = None - self.log(f"{self._context=}") self.log("__init__: exit") @@ -43,83 +30,23 @@ def log(self, *args: Any, **kwargs: Any) -> None: @property def forecast_dates(self) -> pd.DatetimeIndex: - """Create the forecast dates for cycle.""" - if self._forecast_dates is not None: - return self._forecast_dates - start_datetime = self._cycle_processor.create_start_datetime() - self.log(f"{start_datetime=}") - forecast_dates = pd.date_range(start=start_datetime, periods=24, freq="h").strftime( - "%Y%m%d%H" - ) - self._forecast_dates = forecast_dates - return self._forecast_dates + """See ``AbstractSmokeDustCycleProcessor.forecast_dates``.""" + return self._cycle_processor.forecast_dates @property def forecast_metadata(self) -> pd.DataFrame: - """Create forecast metadata consisting of: - - * ``forecast_date``: The forecast timestep as a `datetime` object. - * ``rave_interpolated``: To the date's corresponding interpolated RAVE file. Null if not - found. - * ``rave_raw``: Raw RAVE data before interpolation. Null if not found. - """ - if self._forecast_metadata is not None: - return self._forecast_metadata - - # Collect metadata on data files related to forecast dates - self.log("creating forecast metadata") - intp_path = [] - rave_to_forecast = [] - for date in self.forecast_dates: - # Check for pre-existing interpolated RAVE data - file_path = ( - Path(self._context.intp_dir) / f"{self._context.rave_to_intp}{date}00_{date}59.nc" - ) - if file_path.exists() and file_path.is_file(): - try: - resolved = file_path.resolve(strict=True) - except FileNotFoundError: - continue - else: - intp_path.append(resolved) - else: - intp_path.append(None) - - # Check for raw RAVE data - wildcard_name = f"*-3km*{date}*{date}59590*.nc" - name_retro = f"*3km*{date}*{date}*.nc" - found = False - for rave_path in self._context.ravedir.iterdir(): - if fnmatch.fnmatch(str(rave_path), wildcard_name) or fnmatch.fnmatch( - str(rave_path), name_retro - ): - rave_to_forecast.append(rave_path) - found = True - break - if not found: - rave_to_forecast.append(None) - - self.log(f"{self.forecast_dates}", level=logging.DEBUG) - self.log(f"{intp_path=}", level=logging.DEBUG) - self.log(f"{rave_to_forecast=}", level=logging.DEBUG) - data_frame = pd.DataFrame( - data={ - "forecast_date": self.forecast_dates, - "rave_interpolated": intp_path, - "rave_raw": rave_to_forecast, - } - ) - self._forecast_metadata = data_frame - return data_frame + """See ``AbstractSmokeDustCycleProcessor.forecast_metadata``.""" + return self._cycle_processor.forecast_metadata @property def is_first_day(self) -> bool: """``True`` if this is considered the "first day" of the simulation where there is no interpolated or raw RAVE data available.""" + forecast_metadata = self._cycle_processor.forecast_metadata is_first_day = ( - self.forecast_metadata["rave_interpolated"].isnull().all() - and self.forecast_metadata["rave_raw"].isnull().all() + forecast_metadata["rave_interpolated"].isnull().all() + and forecast_metadata["rave_raw"].isnull().all() ) self.log(f"{is_first_day=}") return is_first_day @@ -129,36 +56,13 @@ def run(self) -> None: self.log("run: entering") if self.is_first_day: if self._context.rank == 0: - self.create_dummy_emissions_file() + self._context.create_dummy_emissions_file() else: - self._regrid_processor.run(self.forecast_metadata) + self._regrid_processor.run(self._cycle_processor.forecast_metadata) if self._context.rank == 0: - self._cycle_processor.process_emissions(self.forecast_metadata) + self._cycle_processor.run() self.log("run: exiting") - def create_dummy_emissions_file(self) -> None: - """Create a dummy emissions file. This occurs if it is the first day of the forecast or - there is an exception and the context is set to not exit on error.""" - self.log("create_dummy_emissions_file: enter") - self.log(f"{self._context.emissions_path=}") - with open_nc(self._context.emissions_path, "w", parallel=False, clobber=True) as nc_ds: - create_template_emissions_file(nc_ds, self._context.grid_out_shape, is_dummy=True) - with open_nc(self._context.grid_out, parallel=False) as ds_src: - # pylint: disable=unsubscriptable-object - nc_ds.variables["geolat"][:] = ds_src.variables["grid_latt"][:] - nc_ds.variables["geolon"][:] = ds_src.variables["grid_lont"][:] - # pylint: enable=unsubscriptable-object - - for varname in [ - "frp_davg", - "ebb_rate", - "fire_end_hr", - "hwp_davg", - "totprcp_24hrs", - ]: - create_sd_variable(nc_ds, SD_VARS.get(varname)) - self.log("create_dummy_emissions_file: exit") - def finalize(self) -> None: """Finalize the preprocessor.""" self.log("finalize: exiting") diff --git a/ush/smoke_dust/generate_emissions.py b/ush/smoke_dust/generate_emissions.py index 40da05d1c2..f1f2a4397b 100755 --- a/ush/smoke_dust/generate_emissions.py +++ b/ush/smoke_dust/generate_emissions.py @@ -102,7 +102,7 @@ def main( # pylint:disable=too-many-arguments processor.run() processor.finalize() except Exception as e: # pylint: disable=broad-exception-caught - processor.create_dummy_emissions_file() + context.create_dummy_emissions_file() context.log("unhandled error", exc_info=e) typer.echo("Exiting. Bye!") From 65fb00cb9c94e15ad6cb2e5a3632b3fa27025c11 Mon Sep 17 00:00:00 2001 From: benkozi Date: Wed, 12 Feb 2025 09:46:10 -0700 Subject: [PATCH 35/41] fix: find restart files in community mode (#10) --- jobs/JSRW_SMOKE_DUST | 4 + tests/test_python/test_smoke_dust/conftest.py | 35 +++++++- .../test_smoke_dust/test_core/test_cycle.py | 33 ++++--- .../test_core/test_preprocessor.py | 40 ++------- .../test_smoke_dust/test_core/test_regrid.py | 4 +- .../test_generate_emissions.py | 4 +- ush/smoke_dust/core/context.py | 7 +- ush/smoke_dust/core/cycle.py | 88 +++++++++++-------- ush/smoke_dust/core/preprocessor.py | 20 ++--- ush/smoke_dust/core/regrid/processor.py | 17 ++-- ush/smoke_dust/generate_emissions.py | 3 + 11 files changed, 138 insertions(+), 117 deletions(-) diff --git a/jobs/JSRW_SMOKE_DUST b/jobs/JSRW_SMOKE_DUST index 81d0898770..43cd1bf241 100755 --- a/jobs/JSRW_SMOKE_DUST +++ b/jobs/JSRW_SMOKE_DUST @@ -71,6 +71,10 @@ if [ "${WORKFLOW_MANAGER}" = "rocoto" ]; then fi # #----------------------------------------------------------------------- +# Save the COMIN directory. This is a workaround until the scripts +# are decoupled from NCO mode and can run in COMMUNITY mode. +#----------------------------------------------------------------------- +export COMIN_SMOKE_DUST_COMMUNITY=${EXPTDIR} # # Create a temp working directory (DATA) and cd into it. # diff --git a/tests/test_python/test_smoke_dust/conftest.py b/tests/test_python/test_smoke_dust/conftest.py index 8ae06ac5f8..11bb3108ed 100644 --- a/tests/test_python/test_smoke_dust/conftest.py +++ b/tests/test_python/test_smoke_dust/conftest.py @@ -7,6 +7,7 @@ from typing import Union import numpy as np +import pandas as pd import pytest from netCDF4 import Dataset @@ -68,9 +69,10 @@ def create_fake_context(root_dir: Path, overrides: Union[dict, None] = None) -> A fake context to use for testing. """ current_day = "2019072200" - nwges_dir = root_dir + comin = root_dir / current_day + comin.mkdir(exist_ok=True) os.environ["CDATE"] = current_day - os.environ["DATA"] = str(nwges_dir) + os.environ["COMIN_SMOKE_DUST_COMMUNITY"] = str(comin) kwds = { "staticdir": root_dir, "ravedir": root_dir, @@ -88,7 +90,7 @@ def create_fake_context(root_dir: Path, overrides: Union[dict, None] = None) -> try: context = SmokeDustContext.model_validate(kwds) except: - for env_var in ["CDATE", "DATA"]: + for env_var in ["CDATE", "COMIN_SMOKE_DUST_COMMUNITY"]: os.unsetenv(env_var) raise return context @@ -108,3 +110,30 @@ def create_file_hash(path: Path) -> str: while chunk := target_file.read(8192): file_hash.update(chunk) return file_hash.hexdigest() + + +def create_fake_restart_files( + root_dir: Path, cycle_dates: pd.DatetimeIndex, shape: FakeGridOutShape +) -> None: + """ + Create fake restart files expected for EBB_DCYLE=2. + + Args: + root_dir: Directory to create fake files in. + cycle_dates: The series of dates to create the restart files for. + shape: Output grid shape. + """ + restart_dir = root_dir / "RESTART" + restart_dir.mkdir(exist_ok=True) + for date in cycle_dates: + restart_file = restart_dir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" + with Dataset(restart_file, "w") as nc_ds: + nc_ds.createDimension("Time") + nc_ds.createDimension("yaxis_1", shape.y_size) + nc_ds.createDimension("xaxis_1", shape.x_size) + totprcp_ave = nc_ds.createVariable("totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) + totprcp_ave[0, ...] = np.ones(shape.as_tuple) + rrfs_hwp_ave = nc_ds.createVariable( + "rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1") + ) + rrfs_hwp_ave[0, ...] = totprcp_ave[:] + 2 diff --git a/tests/test_python/test_smoke_dust/test_core/test_cycle.py b/tests/test_python/test_smoke_dust/test_core/test_cycle.py index 1e4b302074..00f2dffbcf 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_cycle.py +++ b/tests/test_python/test_smoke_dust/test_core/test_cycle.py @@ -1,5 +1,6 @@ """Tests related to the smoke/dust cycle processor.""" +from datetime import datetime, timedelta from pathlib import Path import pytest @@ -12,6 +13,7 @@ create_fake_context, create_fake_grid_out, FakeGridOutShape, + create_fake_restart_files, ) @@ -49,26 +51,21 @@ def test_writes_dummy_emissions_with_no_restart_files( else: assert context_for_dummy_test.emissions_path.exists() - def test_iter_restart_files( + def test_find_restart_files( self, tmp_path: Path, fake_grid_out_shape: FakeGridOutShape ) -> None: - """Test iterating over restart files.""" + """Test finding restart files.""" create_fake_grid_out(tmp_path, fake_grid_out_shape) context = create_fake_context(tmp_path) cycle = SmokeDustCycleTwo(context) - expected_vars = ("totprcp_ave", "rrfs_hwp_ave") - restart_slug = "phy_data" - outdir = tmp_path / "RESTART" - outdir.mkdir() - create_restart_ncfile(outdir / f"foobar.nonsense.{restart_slug}.0000.nc", expected_vars) - create_restart_ncfile(outdir / f"foobar.nonsense.{restart_slug}.1111.nc", []) - create_restart_ncfile(outdir / "foobar.nonsense.nc", []) - for root_dir in [outdir, tmp_path]: - print(root_dir) - restart_files = list( - cycle._iter_restart_files_( # pylint: disable=protected-access - root_dir, - expected_vars, - ) - ) - assert len(restart_files) == 1 + create_fake_restart_files(context.nwges_dir, cycle.cycle_dates, fake_grid_out_shape) + create_fake_restart_files( + context.nwges_dir, + [ + str(datetime.strptime(date, "%Y%m%d%H") + timedelta(days=10)) + for date in cycle.cycle_dates + ], + fake_grid_out_shape, + ) + actual = cycle._find_restart_files_() # pylint: disable=protected-access + assert len(actual) == len(cycle.cycle_dates) diff --git a/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py b/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py index 54b8e09776..9851d467fb 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py +++ b/tests/test_python/test_smoke_dust/test_core/test_preprocessor.py @@ -23,39 +23,13 @@ create_fake_grid_out, create_fake_context, create_file_hash, + create_fake_restart_files, ) -def create_fake_restart_files( - root_dir: Path, forecast_dates: pd.DatetimeIndex, shape: FakeGridOutShape -) -> None: - """ - Create fake restart files expected for EBB_DCYLE=2. - - Args: - root_dir: Directory to create fake files in. - forecast_dates: The series of dates to create the restart files for. - shape: Output grid shape. - """ - restart_dir = root_dir / "RESTART" - restart_dir.mkdir() - for date in forecast_dates: - restart_file = restart_dir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" - with Dataset(restart_file, "w") as nc_ds: - nc_ds.createDimension("Time") - nc_ds.createDimension("yaxis_1", shape.y_size) - nc_ds.createDimension("xaxis_1", shape.x_size) - totprcp_ave = nc_ds.createVariable("totprcp_ave", "f4", ("Time", "yaxis_1", "xaxis_1")) - totprcp_ave[0, ...] = np.ones(shape.as_tuple) - rrfs_hwp_ave = nc_ds.createVariable( - "rrfs_hwp_ave", "f4", ("Time", "yaxis_1", "xaxis_1") - ) - rrfs_hwp_ave[0, ...] = totprcp_ave[:] + 2 - - def create_fake_rave_interpolated( root_dir: Path, - forecast_dates: pd.DatetimeIndex, + cycle_dates: pd.DatetimeIndex, shape: FakeGridOutShape, rave_to_intp: str, ) -> None: @@ -64,11 +38,11 @@ def create_fake_rave_interpolated( Args: root_dir: The directory to create fake interpolated data in. - forecast_dates: The series of dates to create the interpolated data for. + cycle_dates: The series of dates to create the interpolated data for. shape: The output grid shape. rave_to_intp: Filename prefix to use for output files. """ - for date in forecast_dates: + for date in cycle_dates: intp_file = root_dir / f"{rave_to_intp}{date}00_{date}59.nc" dims = ("t", "lat", "lon") with Dataset(intp_file, "w") as nc_ds: @@ -129,10 +103,10 @@ def data_for_test( create_fake_veg_map(tmp_path, fake_grid_out_shape) context = create_fake_context(tmp_path, overrides={"ebb_dcycle": request.param.flag}) preprocessor = SmokeDustPreprocessor(context) - create_fake_restart_files(tmp_path, preprocessor.forecast_dates, fake_grid_out_shape) + create_fake_restart_files(tmp_path, preprocessor.cycle_dates, fake_grid_out_shape) create_fake_rave_interpolated( tmp_path, - preprocessor.forecast_dates, + preprocessor.cycle_dates, fake_grid_out_shape, context.predef_grid.value + "_intp_", ) @@ -159,7 +133,7 @@ def test_run( spy5 = mocker.spy(cycle_processor_class, "average_frp") assert isinstance(preprocessor._cycle_processor, data_for_test.expected.klass) - assert preprocessor._cycle_processor._forecast_metadata is None + assert preprocessor._cycle_processor._cycle_metadata is None # pylint: enable=protected-access assert not data_for_test.context.emissions_path.exists() diff --git a/tests/test_python/test_smoke_dust/test_core/test_regrid.py b/tests/test_python/test_smoke_dust/test_core/test_regrid.py index 3a040a61fd..1d48199b0a 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_regrid.py +++ b/tests/test_python/test_smoke_dust/test_core/test_regrid.py @@ -90,7 +90,7 @@ def data_for_test( ) context = create_fake_context(tmp_path, overrides={"regrid_in_memory": request.param}) preprocessor = SmokeDustPreprocessor(context) - for date in preprocessor.forecast_dates: + for date in preprocessor.cycle_dates: path = tmp_path / f"Hourly_Emissions_3km_{date}_{date}.nc" _ = create_fake_rave_and_rrfs_like_data( FakeGridParams(path=path, shape=fake_grid_out_shape, fields=["FRP_MEAN", "FRE"]) @@ -178,7 +178,7 @@ def test_run( """Test the regrid processor.""" spy1 = mocker.spy(SmokeDustRegridProcessor, "_run_impl_") regrid_processor = SmokeDustRegridProcessor(data_for_test.context) - regrid_processor.run(data_for_test.preprocessor.forecast_metadata) + regrid_processor.run(data_for_test.preprocessor.cycle_metadata) spy1.assert_called_once() interpolated_files = glob.glob( f"*{data_for_test.context.rave_to_intp}*nc", root_dir=tmp_path diff --git a/tests/test_python/test_smoke_dust/test_generate_emissions.py b/tests/test_python/test_smoke_dust/test_generate_emissions.py index d28651cf3b..04752377ee 100644 --- a/tests/test_python/test_smoke_dust/test_generate_emissions.py +++ b/tests/test_python/test_smoke_dust/test_generate_emissions.py @@ -20,7 +20,7 @@ def test(tmp_path: Path, fake_grid_out_shape: FakeGridOutShape, mocker: MockerFi strpath = str(tmp_path) runner = CliRunner() os.environ["CDATE"] = "2019072200" - os.environ["DATA"] = strpath + os.environ["COMIN_SMOKE_DUST_COMMUNITY"] = strpath try: args = [ @@ -45,7 +45,7 @@ def test(tmp_path: Path, fake_grid_out_shape: FakeGridOutShape, mocker: MockerFi ] result = runner.invoke(app, args, catch_exceptions=False) except: - for env_var in ["CDATE", "DATA"]: + for env_var in ["CDATE", "COMIN_SMOKE_DUST_COMMUNITY"]: os.unsetenv(env_var) raise print(result.output) diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index fb67009f54..86e22638a3 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -183,7 +183,7 @@ def _initialize_values_(cls, values: dict) -> dict: # Format environment-level variables values["current_day"] = os.environ["CDATE"] - values["nwges_dir"] = os.environ["DATA"] + values["nwges_dir"] = os.environ["COMIN_SMOKE_DUST_COMMUNITY"] return values @@ -226,8 +226,9 @@ def grid_out(self) -> Path: @property def hourly_hwpdir(self) -> Path: - """Path to the directory containing restart files for `EBB_DCYCLE=2`.""" - return self.nwges_dir / "RESTART" + """Path to the root directory containing restart files.""" + assert isinstance(self.nwges_dir, Path) + return self.nwges_dir.parent # pylint: disable=no-member @property def emissions_path(self) -> Path: diff --git a/ush/smoke_dust/core/cycle.py b/ush/smoke_dust/core/cycle.py index a296450e0a..bdb122efa9 100644 --- a/ush/smoke_dust/core/cycle.py +++ b/ush/smoke_dust/core/cycle.py @@ -6,7 +6,7 @@ import glob import logging from pathlib import Path -from typing import Any, Iterator +from typing import Any import numpy as np import pandas as pd @@ -43,24 +43,22 @@ def __init__(self, context: SmokeDustContext): self._context = context # On-demand/cached property values - self._forecast_metadata = None - self._forecast_dates = None + self._cycle_metadata = None + self._cycle_dates = None @property - def forecast_dates(self) -> pd.DatetimeIndex: + def cycle_dates(self) -> pd.DatetimeIndex: """Create the forecast dates for cycle.""" - if self._forecast_dates is not None: - return self._forecast_dates + if self._cycle_dates is not None: + return self._cycle_dates start_datetime = self.create_start_datetime() self.log(f"{start_datetime=}") - forecast_dates = pd.date_range(start=start_datetime, periods=24, freq="h").strftime( - "%Y%m%d%H" - ) - self._forecast_dates = forecast_dates - return self._forecast_dates + cycle_dates = pd.date_range(start=start_datetime, periods=24, freq="h").strftime("%Y%m%d%H") + self._cycle_dates = cycle_dates + return self._cycle_dates @property - def forecast_metadata(self) -> pd.DataFrame: + def cycle_metadata(self) -> pd.DataFrame: """Create forecast metadata consisting of: * ``forecast_date``: The forecast timestep as a `datetime` object. @@ -68,14 +66,14 @@ def forecast_metadata(self) -> pd.DataFrame: found. * ``rave_raw``: Raw RAVE data before interpolation. Null if not found. """ - if self._forecast_metadata is not None: - return self._forecast_metadata + if self._cycle_metadata is not None: + return self._cycle_metadata # Collect metadata on data files related to forecast dates self.log("creating forecast metadata") intp_path = [] rave_to_forecast = [] - for date in self.forecast_dates: + for date in self.cycle_dates: # Check for pre-existing interpolated RAVE data file_path = ( Path(self._context.intp_dir) / f"{self._context.rave_to_intp}{date}00_{date}59.nc" @@ -104,17 +102,17 @@ def forecast_metadata(self) -> pd.DataFrame: if not found: rave_to_forecast.append(None) - self.log(f"{self.forecast_dates}", level=logging.DEBUG) + self.log(f"{self.cycle_dates}", level=logging.DEBUG) self.log(f"{intp_path=}", level=logging.DEBUG) self.log(f"{rave_to_forecast=}", level=logging.DEBUG) data_frame = pd.DataFrame( data={ - "forecast_date": self.forecast_dates, + "forecast_date": self.cycle_dates, "rave_interpolated": intp_path, "rave_raw": rave_to_forecast, } ) - self._forecast_metadata = data_frame + self._cycle_metadata = data_frame return data_frame def log(self, *args: Any, **kwargs: Any) -> None: @@ -193,7 +191,7 @@ def average_frp(self) -> AverageFrpOutput: with xr.open_dataset(self._context.grid_out) as nc_ds: target_area = nc_ds["area"].values - for row_idx, row_df in self.forecast_metadata.iterrows(): + for row_idx, row_df in self.cycle_metadata.iterrows(): self.log(f"processing emissions: {row_idx}, {row_df.to_dict()}") with xr.open_dataset(row_df["rave_interpolated"]) as nc_ds: fre = nc_ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values @@ -225,6 +223,7 @@ class SmokeDustCycleTwo(AbstractSmokeDustCycleProcessor): """ flag = EbbDCycle.TWO + expected_restart_varnames = ("totprcp_ave", "rrfs_hwp_ave") def create_start_datetime(self) -> dt.datetime: self.log("Creating emissions for modulated persistence by Wildfire potential") @@ -234,13 +233,11 @@ def run(self) -> None: # pylint: disable=too-many-statements self.log("run: enter") - forecast_metadata = self.forecast_metadata + cycle_metadata = self.cycle_metadata hwp_ave = [] totprcp = np.zeros(self._context.grid_out_shape).ravel() - phy_data_paths = list( - self._iter_restart_files_(self._context.hourly_hwpdir, ("rrfs_hwp_ave", "totprcp_ave")) - ) + phy_data_paths = list(self._find_restart_files_()) if len(phy_data_paths) == 0: if self._context.allow_dummy_restart: self.log( @@ -267,7 +264,7 @@ def run(self) -> None: derived = self.average_frp() t_fire = np.zeros(self._context.grid_out_shape) - for date in forecast_metadata["forecast_date"]: + for date in cycle_metadata["forecast_date"]: rave_path = self._context.intp_dir / f"{self._context.rave_to_intp}{date}00_{date}59.nc" with xr.open_dataset(rave_path) as nc_ds: frp = nc_ds.frp_avg_hr[0, :, :].values @@ -341,7 +338,7 @@ def average_frp(self) -> AverageFrpOutput: with xr.open_dataset(self._context.grid_out) as nc_ds: target_area = nc_ds["area"].values - for row_idx, row_df in self.forecast_metadata.iterrows(): + for row_idx, row_df in self.cycle_metadata.iterrows(): self.log(f"processing emissions: {row_idx}, {row_df.to_dict()}") with xr.open_dataset(row_df["rave_interpolated"]) as nc_ds: fre = nc_ds[EmissionVariable.FRE.smoke_dust_name()][0, :, :].values @@ -388,21 +385,38 @@ def average_frp(self) -> AverageFrpOutput: } ) - def _iter_restart_files_( - self, root_dir: Path, expected_vars: tuple[str, ...] - ) -> Iterator[Path]: + def _find_restart_files_( + self, + ) -> tuple[Path, ...]: + root_dir = self._context.hourly_hwpdir + self.log(f"_find_restart_files_: {root_dir=}") filenames = glob.glob("**/*phy_data*nc", root_dir=root_dir, recursive=True) + potential_restart_files = [ + f"{cycle[:8]}.{cycle[8:10]}0000.phy_data.nc" for cycle in self.cycle_dates + ] + self.log(f"_find_restart_files_: {potential_restart_files=}") + found_potentials = [] + restart_files = [] for filename in filenames: + self.log(f"_find_restart_files_: {filename=}", level=logging.DEBUG) path = root_dir / filename - try: - resolved = path.resolve(strict=True) - except FileNotFoundError: - self.log(f"restart file link not resolvable: {path}", level=logging.WARN) - continue - with open_nc(resolved) as nc_ds: - variables = nc_ds.variables.keys() # pylint: disable=no-member - if all(expected_var in variables for expected_var in expected_vars): - yield path + if path.name in potential_restart_files and path.name not in found_potentials: + try: + resolved = path.resolve(strict=True) + except FileNotFoundError: + self.log(f"restart file link not resolvable: {path=}", level=logging.WARN) + continue + with open_nc(resolved) as nc_ds: + variables = nc_ds.variables.keys() # pylint: disable=no-member + if all( + expected_var in variables for expected_var in self.expected_restart_varnames + ): + self.log( + f"_find_restart_files_: found restart path {path=}", level=logging.DEBUG + ) + restart_files.append(path) + found_potentials.append(path.name) + return tuple(restart_files) def create_cycle_processor( diff --git a/ush/smoke_dust/core/preprocessor.py b/ush/smoke_dust/core/preprocessor.py index 0046896645..9897ef187f 100644 --- a/ush/smoke_dust/core/preprocessor.py +++ b/ush/smoke_dust/core/preprocessor.py @@ -29,24 +29,24 @@ def log(self, *args: Any, **kwargs: Any) -> None: self._context.log(*args, **kwargs) @property - def forecast_dates(self) -> pd.DatetimeIndex: - """See ``AbstractSmokeDustCycleProcessor.forecast_dates``.""" - return self._cycle_processor.forecast_dates + def cycle_dates(self) -> pd.DatetimeIndex: + """See ``AbstractSmokeDustCycleProcessor.cycle_dates``.""" + return self._cycle_processor.cycle_dates @property - def forecast_metadata(self) -> pd.DataFrame: - """See ``AbstractSmokeDustCycleProcessor.forecast_metadata``.""" - return self._cycle_processor.forecast_metadata + def cycle_metadata(self) -> pd.DataFrame: + """See ``AbstractSmokeDustCycleProcessor.cycle_metadata``.""" + return self._cycle_processor.cycle_metadata @property def is_first_day(self) -> bool: """``True`` if this is considered the "first day" of the simulation where there is no interpolated or raw RAVE data available.""" - forecast_metadata = self._cycle_processor.forecast_metadata + cycle_metadata = self._cycle_processor.cycle_metadata is_first_day = ( - forecast_metadata["rave_interpolated"].isnull().all() - and forecast_metadata["rave_raw"].isnull().all() + cycle_metadata["rave_interpolated"].isnull().all() + and cycle_metadata["rave_raw"].isnull().all() ) self.log(f"{is_first_day=}") return is_first_day @@ -58,7 +58,7 @@ def run(self) -> None: if self._context.rank == 0: self._context.create_dummy_emissions_file() else: - self._regrid_processor.run(self._cycle_processor.forecast_metadata) + self._regrid_processor.run(self._cycle_processor.cycle_metadata) if self._context.rank == 0: self._cycle_processor.run() self.log("run: exiting") diff --git a/ush/smoke_dust/core/regrid/processor.py b/ush/smoke_dust/core/regrid/processor.py index c8833276c8..2362f9b45c 100644 --- a/ush/smoke_dust/core/regrid/processor.py +++ b/ush/smoke_dust/core/regrid/processor.py @@ -47,18 +47,17 @@ def log(self, *args: Any, **kwargs: Any) -> None: """See ``SmokeDustContext.log``.""" self._context.log(*args, **kwargs) - def run(self, forecast_metadata: pd.DataFrame) -> None: + def run(self, cycle_metadata: pd.DataFrame) -> None: """Run the regrid processor. This may be run in parallel using MPI.""" # Select which RAVE files to interpolate - rave_to_interpolate = forecast_metadata[ - forecast_metadata["rave_interpolated"].isnull() - & ~forecast_metadata["rave_raw"].isnull() + rave_to_interpolate = cycle_metadata[ + cycle_metadata["rave_interpolated"].isnull() & ~cycle_metadata["rave_raw"].isnull() ] if len(rave_to_interpolate) == 0: self.log("all rave files have been interpolated") return - self._run_impl_(forecast_metadata, rave_to_interpolate) + self._run_impl_(cycle_metadata, rave_to_interpolate) @property def _src_gwrap(self) -> GridWrapper: @@ -146,7 +145,7 @@ def _get_regridder_(self, src_fwrap: FieldWrapper, dst_fwrap: FieldWrapper) -> e self.__regridder = regridder return self.__regridder - def _run_impl_(self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Series) -> None: + def _run_impl_(self, cycle_metadata: pd.DataFrame, rave_to_interpolate: pd.Series) -> None: for row_idx, row_data in rave_to_interpolate.iterrows(): row_dict = row_data.to_dict() self.log(f"processing RAVE interpolation row: {row_idx}, {row_dict}") @@ -226,7 +225,7 @@ def _run_impl_(self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Se dst_fwrap.fill_nc_variable(output_file_path) # Update the forecast metadata with the interpolated RAVE file data - forecast_metadata.loc[row_idx, "rave_interpolated"] = output_file_path + cycle_metadata.loc[row_idx, "rave_interpolated"] = output_file_path row_data["rave_interpolated"] = output_file_path if self._context.rank == 0: @@ -237,10 +236,10 @@ def _run_impl_(self, forecast_metadata: pd.DataFrame, rave_to_interpolate: pd.Se and self._context.should_calc_desc_stats and self._interpolation_stats is not None ): - forecast_dates = forecast_metadata["forecast_date"] + cycle_dates = cycle_metadata["forecast_date"] stats_path = ( self._context.intp_dir - / f"stats_regridding_{forecast_dates.min()}_{forecast_dates.max()}.csv" + / f"stats_regridding_{cycle_dates.min()}_{cycle_dates.max()}.csv" ) self.log(f"writing interpolation statistics: {stats_path=}") self._interpolation_stats.to_csv(stats_path, index=False) diff --git a/ush/smoke_dust/generate_emissions.py b/ush/smoke_dust/generate_emissions.py index f1f2a4397b..355ad9650e 100755 --- a/ush/smoke_dust/generate_emissions.py +++ b/ush/smoke_dust/generate_emissions.py @@ -97,6 +97,9 @@ def main( # pylint:disable=too-many-arguments exit_on_error=exit_on_error, regrid_in_memory=regrid_in_memory, ) + # Uncomment to write environment data to the output log. Comment again when done. + # if context.rank == 0: + # context.log(f"{os.environ=}") processor = SmokeDustPreprocessor(context) try: processor.run() From aa2013f2e950b162a98a6c9c9470d609fa63e6dc Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Wed, 12 Feb 2025 09:49:45 -0700 Subject: [PATCH 36/41] fix: pylint --- ush/smoke_dust/core/context.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ush/smoke_dust/core/context.py b/ush/smoke_dust/core/context.py index 86e22638a3..430fd216e7 100644 --- a/ush/smoke_dust/core/context.py +++ b/ush/smoke_dust/core/context.py @@ -192,10 +192,12 @@ def _finalize_model_(self) -> "SmokeDustContext": self._logger = self._init_logging_() with open_nc(self.grid_out, parallel=False) as nc_ds: + # pylint: disable=unsubscriptable-object self.grid_out_shape = ( nc_ds.dimensions["grid_yt"].size, nc_ds.dimensions["grid_xt"].size, ) + # pylint: enable=unsubscriptable-object self.log(f"{self.grid_out_shape=}") return self From 2b3045ae604320edd7e21e65d72233192a872a4c Mon Sep 17 00:00:00 2001 From: benkozi Date: Fri, 21 Feb 2025 07:18:30 -0700 Subject: [PATCH 37/41] feat: data staging on T1 platforms (#11) --- scripts/exsrw_smoke_dust.sh | 2 +- ush/config.smoke_dust.yaml | 10 +--------- ush/config_defaults.yaml | 1 + ush/machine/derecho.yaml | 10 ++++++++-- ush/machine/gaea-c6.yaml | 11 ++++++----- ush/machine/gaea.yaml | 8 ++++++++ ush/machine/hera.yaml | 12 ++++++------ ush/machine/hercules.yaml | 14 +++++++------- ush/machine/jet.yaml | 8 ++++++++ ush/machine/noaacloud.yaml | 8 ++++++++ ush/machine/orion.yaml | 14 +++++++------- ush/machine/singularity.yaml | 9 +++++++++ 12 files changed, 70 insertions(+), 37 deletions(-) diff --git a/scripts/exsrw_smoke_dust.sh b/scripts/exsrw_smoke_dust.sh index 6e05885248..aa9d13a8d6 100755 --- a/scripts/exsrw_smoke_dust.sh +++ b/scripts/exsrw_smoke_dust.sh @@ -107,7 +107,7 @@ else # #----------------------------------------------------------------------- # - mpirun -n ${nprocs} ${USHdir}/smoke_dust/generate_emissions.py \ + ${RUN_CMD_SMOKE_DUST} ${USHdir}/smoke_dust/generate_emissions.py \ --staticdir "${FIXsmoke}/${PREDEF_GRID_NAME}" \ --ravedir "${DATA}" \ --intp-dir "${DATA_SHARE}" \ diff --git a/ush/config.smoke_dust.yaml b/ush/config.smoke_dust.yaml index dcd84d4390..ede4b9905d 100644 --- a/ush/config.smoke_dust.yaml +++ b/ush/config.smoke_dust.yaml @@ -2,7 +2,7 @@ metadata: description: config for Smoke and Dust, RRFS_CONUS_3km user: RUN_ENVIR: community - MACHINE: [hera/orion/hercules/gaea-c6] + MACHINE: [machine name] ACCOUNT: [account name] workflow: USE_CRON_TO_RELAUNCH: false @@ -36,18 +36,10 @@ rocoto: task_get_extrn_ics: EXTRN_MDL_NAME_ICS: RAP EXTRN_MDL_ICS_OFFSET_HRS: 0 - USE_USER_STAGED_EXTRN_FILES: true - EXTRN_MDL_SOURCE_BASEDIR_ICS: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAP_DATA_SD/${yyyymmddhh} # hera -# EXTRN_MDL_SOURCE_BASEDIR_ICS: /work/noaa/epic/SRW-AQM_DATA/input_model_data/RAP/${yyyymmddhh} # orion/hercules -# EXTRN_MDL_SOURCE_BASEDIR_ICS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmddhh} # gaea-c6 task_get_extrn_lbcs: EXTRN_MDL_NAME_LBCS: RAP LBC_SPEC_INTVL_HRS: 6 EXTRN_MDL_LBCS_OFFSET_HRS: 0 - USE_USER_STAGED_EXTRN_FILES: true - EXTRN_MDL_SOURCE_BASEDIR_LBCS: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAP_DATA_SD/${yyyymmddhh} # hera -# EXTRN_MDL_SOURCE_BASEDIR_LBCS: /work/noaa/epic/SRW-AQM_DATA/input_model_data/RAP/${yyyymmddhh} # orion/hercules -# EXTRN_MDL_SOURCE_BASEDIR_LBCS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmddhh} # gaea-c6 task_make_ics: LEVP: 66 VCOORD_FILE: "{{ workflow.FIXam }}/global_hyblev_fcst_rrfsL65.txt" diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 1b594bd5a9..b3deaa1b84 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -249,6 +249,7 @@ platform: RUN_CMD_PRDGEN: "" RUN_CMD_NEXUS: "" RUN_CMD_AQMLBC: "" + RUN_CMD_SMOKE_DUST: mpirun -n $nprocs # #----------------------------------------------------------------------- # diff --git a/ush/machine/derecho.yaml b/ush/machine/derecho.yaml index d8a3e8f4d4..8ebfd4e109 100644 --- a/ush/machine/derecho.yaml +++ b/ush/machine/derecho.yaml @@ -32,8 +32,11 @@ platform: FIXorg: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/NaturalEarth - FIXaqm: /glade/work/chanhooj/SRW-AQM_DATA/fix_aqm - FIXemis: /glade/work/chanhooj/SRW-AQM_DATA/fix_emis + FIXaqm: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_aqm + FIXemis: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_emis + FIXsmoke: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: aws data: ics_lbcs: @@ -49,3 +52,6 @@ cpl_aqm_parm: COMINfire_default: /glade/work/chanhooj/SRW-AQM_DATA/aqm_data/RAVE_fire COMINgefs_default: /glade/work/chanhooj/SRW-AQM_DATA/aqm_data/GEFS_DATA NEXUS_GFS_SFC_DIR: /glade/work/chanhooj/SRW-AQM_DATA/aqm_data/GFS_SFC_DATA +smoke_dust_parm: + COMINsmoke_default: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire \ No newline at end of file diff --git a/ush/machine/gaea-c6.yaml b/ush/machine/gaea-c6.yaml index 1f293adcb8..3aae8a6f02 100644 --- a/ush/machine/gaea-c6.yaml +++ b/ush/machine/gaea-c6.yaml @@ -20,6 +20,7 @@ platform: RUN_CMD_PRDGEN: srun --export=ALL -n $nprocs RUN_CMD_SERIAL: time RUN_CMD_UTILS: srun --export=ALL -n $nprocs + RUN_CMD_SMOKE_DUST: python SCHED_NATIVE_CMD: --clusters=c6 --export=NONE SCHED_NATIVE_CMD_HPSS: --clusters=es --export=NONE PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }' @@ -35,9 +36,9 @@ platform: FIXorg: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/NaturalEarth - FIXsmoke: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_smoke - FIXupp: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_upp - FIXcrtm: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/fix_crtm + FIXsmoke: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: aws data: @@ -52,8 +53,8 @@ data: GSMGFS: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/input_model_data/GSMGFS/${yyyymmdd}${hh} smoke_dust_parm: - COMINsmoke_default: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/data_smoke_dust/RAVE_smoke_dust - COMINrave_default: /gpfs/f6/bil-fire8/world-shared/SRW_AQM_data/data_smoke_dust/RAVE_fire + COMINsmoke_default: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire rocoto: tasks: diff --git a/ush/machine/gaea.yaml b/ush/machine/gaea.yaml index 17537d0a5b..415fd267ac 100644 --- a/ush/machine/gaea.yaml +++ b/ush/machine/gaea.yaml @@ -35,6 +35,11 @@ platform: FIXorg: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/NaturalEarth + FIXaqm: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/fix/fix_aqm + FIXemis: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/fix/fix_emis + FIXsmoke: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: aws data: ics_lbcs: @@ -46,6 +51,9 @@ data: HRRR: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/input_model_data/HRRR/${yyyymmdd}${hh} RAP: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh} GSMGFS: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/input_model_data/GSMGFS/${yyyymmdd}${hh} +smoke_dust_parm: + COMINsmoke_default: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire rocoto: tasks: metatask_run_ensemble: diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index c034471905..a4b05048b1 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -38,11 +38,11 @@ platform: FIXorg: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/NaturalEarth - FIXaqm: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_aqm + FIXaqm: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_aqm FIXemis: /scratch1/RDARCH/rda-arl-gpu/Barry.Baker/emissions/nexus - FIXsmoke: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_smoke - FIXupp: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_upp - FIXcrtm: /scratch2/NAGAPE/epic/SRW-AQM_DATA/fix_crtm + FIXsmoke: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: hpss aws nomads cpl_aqm_parm: @@ -51,8 +51,8 @@ cpl_aqm_parm: NEXUS_GFS_SFC_DIR: /scratch2/NAGAPE/epic/SRW-AQM_DATA/aqm_data/GFS_SFC_DATA smoke_dust_parm: - COMINsmoke_default: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_smoke_dust - COMINrave_default: /scratch2/NAGAPE/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_fire + COMINsmoke_default: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire rocoto: tasks: diff --git a/ush/machine/hercules.yaml b/ush/machine/hercules.yaml index 523fa31233..dec65c173e 100644 --- a/ush/machine/hercules.yaml +++ b/ush/machine/hercules.yaml @@ -34,11 +34,11 @@ platform: FIXorg: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/NaturalEarth - FIXaqm: /work/noaa/epic/SRW-AQM_DATA/fix_aqm - FIXemis: /work/noaa/epic/SRW-AQM_DATA/fix_emis - FIXsmoke: /work/noaa/epic/SRW-AQM_DATA/fix_smoke - FIXupp: /work/noaa/epic/SRW-AQM_DATA/fix_upp - FIXcrtm: /work/noaa/epic/SRW-AQM_DATA/fix_crtm + FIXaqm: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_aqm + FIXemis: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_emis + FIXsmoke: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: aws data: ics_lbcs: @@ -57,8 +57,8 @@ cpl_aqm_parm: NEXUS_GFS_SFC_DIR: /work/noaa/epic/SRW-AQM_DATA/aqm_data/GFS_SFC_DATA smoke_dust_parm: - COMINsmoke_default: /work/noaa/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_smoke_dust - COMINrave_default: /work/noaa/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_fire + COMINsmoke_default: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire rocoto: tasks: diff --git a/ush/machine/jet.yaml b/ush/machine/jet.yaml index b14a4ab9ff..ccfb395374 100644 --- a/ush/machine/jet.yaml +++ b/ush/machine/jet.yaml @@ -33,6 +33,10 @@ platform: FIXorg: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/NaturalEarth + FIXaqm: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/fix/fix_aqm + FIXsmoke: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: hpss aws nomads data: ics_lbcs: @@ -46,6 +50,10 @@ data: prepbufr: /public/data/grids/gfs/prepbufr tcvitals: /public/data/grids/gfs/bufr +smoke_dust_parm: + COMINsmoke_default: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire + rocoto: tasks: metatask_run_ensemble: diff --git a/ush/machine/noaacloud.yaml b/ush/machine/noaacloud.yaml index 2b27c0c139..3890e20b07 100644 --- a/ush/machine/noaacloud.yaml +++ b/ush/machine/noaacloud.yaml @@ -27,9 +27,17 @@ platform: FIXorg: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /contrib/EPIC/UFS_SRW_data/develop/NaturalEarth + FIXaqm: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_aqm + FIXemis: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_emis + FIXsmoke: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: aws nomads data: ics_lbcs: FV3GFS: nemsio: /contrib/EPIC/UFS_SRW_data/develop/input_model_data/FV3GFS/nemsio/${yyyymmdd}${hh} grib2: /contrib/EPIC/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh} +smoke_dust_parm: + COMINsmoke_default: /contrib/EPIC/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /contrib/EPIC/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire \ No newline at end of file diff --git a/ush/machine/orion.yaml b/ush/machine/orion.yaml index d69ab9e965..892f8fa281 100644 --- a/ush/machine/orion.yaml +++ b/ush/machine/orion.yaml @@ -33,11 +33,11 @@ platform: FIXorg: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/NaturalEarth - FIXaqm: /work/noaa/epic/SRW-AQM_DATA/fix_aqm - FIXemis: /work/noaa/epic/SRW-AQM_DATA/fix_emis - FIXsmoke: /work/noaa/epic/SRW-AQM_DATA/fix_smoke - FIXupp: /work/noaa/epic/SRW-AQM_DATA/fix_upp - FIXcrtm: /work/noaa/epic/SRW-AQM_DATA/fix_crtm + FIXaqm: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_aqm + FIXemis: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_emis + FIXsmoke: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: aws nomads data: ics_lbcs: @@ -56,8 +56,8 @@ cpl_aqm_parm: NEXUS_GFS_SFC_DIR: /work/noaa/epic/SRW-AQM_DATA/aqm_data/GFS_SFC_DATA smoke_dust_parm: - COMINsmoke_default: /work/noaa/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_smoke_dust - COMINrave_default: /work/noaa/epic/SRW-AQM_DATA/data_smoke_dust/RAVE_fire + COMINsmoke_default: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire rocoto: tasks: diff --git a/ush/machine/singularity.yaml b/ush/machine/singularity.yaml index 72db7bdb45..353c73361c 100644 --- a/ush/machine/singularity.yaml +++ b/ush/machine/singularity.yaml @@ -22,4 +22,13 @@ platform: FIXorg: /contrib/global/glopara/fix/fix_orog FIXsfc: /contrib/global/glopara/fix/fix_sfc_climo FIXshp: /contrib/global/glopara/NaturalEarth + FIXaqm: /contrib/global/glopara/fix/fix_aqm + FIXemis: /contrib/global/glopara/fix/fix_emis + FIXsmoke: /contrib/global/glopara/fix/fix_smoke + FIXupp: /contrib/global/glopara/fix/fix_upp + FIXcrtm: /contrib/global/glopara/fix/fix_crtm EXTRN_MDL_DATA_STORES: aws nomads + +smoke_dust_parm: + COMINsmoke_default: /contrib/global/glopara/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /contrib/global/glopara/data_smoke_dust/RAVE_fire From 01eca63ad7e9b6730f5c5c175a090ee1fbbd1151 Mon Sep 17 00:00:00 2001 From: benkozi Date: Fri, 21 Feb 2025 10:01:11 -0700 Subject: [PATCH 38/41] fix: mpirun on gaea-c6 (#12) --- ush/config_defaults.yaml | 2 +- ush/machine/gaea-c6.yaml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index b3deaa1b84..545153db56 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -249,7 +249,7 @@ platform: RUN_CMD_PRDGEN: "" RUN_CMD_NEXUS: "" RUN_CMD_AQMLBC: "" - RUN_CMD_SMOKE_DUST: mpirun -n $nprocs + RUN_CMD_SMOKE_DUST: mpirun -n $nprocs python # #----------------------------------------------------------------------- # diff --git a/ush/machine/gaea-c6.yaml b/ush/machine/gaea-c6.yaml index 3aae8a6f02..55b813623b 100644 --- a/ush/machine/gaea-c6.yaml +++ b/ush/machine/gaea-c6.yaml @@ -20,7 +20,6 @@ platform: RUN_CMD_PRDGEN: srun --export=ALL -n $nprocs RUN_CMD_SERIAL: time RUN_CMD_UTILS: srun --export=ALL -n $nprocs - RUN_CMD_SMOKE_DUST: python SCHED_NATIVE_CMD: --clusters=c6 --export=NONE SCHED_NATIVE_CMD_HPSS: --clusters=es --export=NONE PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }' From 5e5021b2545c2aca67282578dc61d81d19374d3a Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Fri, 21 Feb 2025 14:55:45 -0700 Subject: [PATCH 39/41] update build_gaea_intel.lua modulefile --- modulefiles/build_gaea_intel.lua | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modulefiles/build_gaea_intel.lua b/modulefiles/build_gaea_intel.lua index 9c627a5b1a..fd366eaf13 100644 --- a/modulefiles/build_gaea_intel.lua +++ b/modulefiles/build_gaea_intel.lua @@ -20,6 +20,9 @@ load(pathJoin("cmake", cmake_ver)) load("srw_common") +load(pathJoin("nco", os.getenv("nco_ver") or "5.0.6")) +load(pathJoin("prod_util", os.getenv("prod_util_ver") or "2.1.1")) + unload("darshan-runtime/3.4.0") unload("cray-pmi/6.1.10") From b67276b8e813708ec17682866ce653c3a61f8529 Mon Sep 17 00:00:00 2001 From: benkozi Date: Mon, 24 Feb 2025 10:18:47 -0700 Subject: [PATCH 40/41] fix: restart files optimization + non-parallel open (#13) --- tests/test_python/test_smoke_dust/conftest.py | 4 +- .../test_smoke_dust/test_core/test_cycle.py | 2 +- ush/smoke_dust/core/cycle.py | 48 +++++++++++-------- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tests/test_python/test_smoke_dust/conftest.py b/tests/test_python/test_smoke_dust/conftest.py index 11bb3108ed..990a8e397c 100644 --- a/tests/test_python/test_smoke_dust/conftest.py +++ b/tests/test_python/test_smoke_dust/conftest.py @@ -123,9 +123,9 @@ def create_fake_restart_files( cycle_dates: The series of dates to create the restart files for. shape: Output grid shape. """ - restart_dir = root_dir / "RESTART" - restart_dir.mkdir(exist_ok=True) for date in cycle_dates: + restart_dir = root_dir / date / "RESTART" + restart_dir.mkdir(exist_ok=True, parents=True) restart_file = restart_dir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc" with Dataset(restart_file, "w") as nc_ds: nc_ds.createDimension("Time") diff --git a/tests/test_python/test_smoke_dust/test_core/test_cycle.py b/tests/test_python/test_smoke_dust/test_core/test_cycle.py index 00f2dffbcf..eccde74412 100644 --- a/tests/test_python/test_smoke_dust/test_core/test_cycle.py +++ b/tests/test_python/test_smoke_dust/test_core/test_cycle.py @@ -58,7 +58,7 @@ def test_find_restart_files( create_fake_grid_out(tmp_path, fake_grid_out_shape) context = create_fake_context(tmp_path) cycle = SmokeDustCycleTwo(context) - create_fake_restart_files(context.nwges_dir, cycle.cycle_dates, fake_grid_out_shape) + create_fake_restart_files(context.hourly_hwpdir, cycle.cycle_dates, fake_grid_out_shape) create_fake_restart_files( context.nwges_dir, [ diff --git a/ush/smoke_dust/core/cycle.py b/ush/smoke_dust/core/cycle.py index bdb122efa9..b5379c1c00 100644 --- a/ush/smoke_dust/core/cycle.py +++ b/ush/smoke_dust/core/cycle.py @@ -390,32 +390,40 @@ def _find_restart_files_( ) -> tuple[Path, ...]: root_dir = self._context.hourly_hwpdir self.log(f"_find_restart_files_: {root_dir=}") - filenames = glob.glob("**/*phy_data*nc", root_dir=root_dir, recursive=True) potential_restart_files = [ f"{cycle[:8]}.{cycle[8:10]}0000.phy_data.nc" for cycle in self.cycle_dates ] self.log(f"_find_restart_files_: {potential_restart_files=}") + potential_restart_dirs = [root_dir / cycle / "RESTART" for cycle in self.cycle_dates] + restart_dirs = [ + restart_dir for restart_dir in potential_restart_dirs if restart_dir.exists() + ] + self.log(f"_find_restart_files_: {restart_dirs=}") found_potentials = [] restart_files = [] - for filename in filenames: - self.log(f"_find_restart_files_: {filename=}", level=logging.DEBUG) - path = root_dir / filename - if path.name in potential_restart_files and path.name not in found_potentials: - try: - resolved = path.resolve(strict=True) - except FileNotFoundError: - self.log(f"restart file link not resolvable: {path=}", level=logging.WARN) - continue - with open_nc(resolved) as nc_ds: - variables = nc_ds.variables.keys() # pylint: disable=no-member - if all( - expected_var in variables for expected_var in self.expected_restart_varnames - ): - self.log( - f"_find_restart_files_: found restart path {path=}", level=logging.DEBUG - ) - restart_files.append(path) - found_potentials.append(path.name) + for restart_dir in restart_dirs: + filenames = glob.glob("**/*phy_data*nc", root_dir=restart_dir, recursive=True) + for filename in filenames: + self.log(f"_find_restart_files_: {filename=}", level=logging.DEBUG) + path = restart_dir / filename + if path.name in potential_restart_files and path.name not in found_potentials: + try: + resolved = path.resolve(strict=True) + except FileNotFoundError: + self.log(f"restart file link not resolvable: {path=}", level=logging.WARN) + continue + with open_nc(resolved, parallel=False) as nc_ds: + variables = nc_ds.variables.keys() # pylint: disable=no-member + if all( + expected_var in variables + for expected_var in self.expected_restart_varnames + ): + self.log( + f"_find_restart_files_: found restart path {path=}", + level=logging.DEBUG, + ) + restart_files.append(path) + found_potentials.append(path.name) return tuple(restart_files) From 8ecd735af7fba5eca7bdbfdceb03ed292332046e Mon Sep 17 00:00:00 2001 From: Ben Koziol Date: Mon, 24 Feb 2025 12:41:02 -0700 Subject: [PATCH 41/41] fix: env changes for derecho and gaeac5 --- modulefiles/build_derecho_intel.lua | 2 +- modulefiles/tasks/derecho/prepstart.local.lua | 1 + modulefiles/tasks/derecho/smoke_dust.local.lua | 1 + modulefiles/tasks/gaeac5/prepstart.local.lua | 1 + modulefiles/tasks/gaeac5/smoke_dust.local.lua | 1 + 5 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 modulefiles/tasks/derecho/prepstart.local.lua create mode 100644 modulefiles/tasks/derecho/smoke_dust.local.lua create mode 100644 modulefiles/tasks/gaeac5/prepstart.local.lua create mode 100644 modulefiles/tasks/gaeac5/smoke_dust.local.lua diff --git a/modulefiles/build_derecho_intel.lua b/modulefiles/build_derecho_intel.lua index ba24823e82..83856c00b1 100644 --- a/modulefiles/build_derecho_intel.lua +++ b/modulefiles/build_derecho_intel.lua @@ -14,7 +14,7 @@ load(pathJoin("cmake", os.getenv("cmake_ver") or "3.23.1")) load("srw_common") +load(pathJoin("nco", os.getenv("nco_ver") or "5.0.6")) load(pathJoin("prod_util", os.getenv("prod_util_ver") or "2.1.1")) setenv("CMAKE_Platform","derecho.intel") - diff --git a/modulefiles/tasks/derecho/prepstart.local.lua b/modulefiles/tasks/derecho/prepstart.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/derecho/prepstart.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/derecho/smoke_dust.local.lua b/modulefiles/tasks/derecho/smoke_dust.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/derecho/smoke_dust.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/gaeac5/prepstart.local.lua b/modulefiles/tasks/gaeac5/prepstart.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/gaeac5/prepstart.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/gaeac5/smoke_dust.local.lua b/modulefiles/tasks/gaeac5/smoke_dust.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/gaeac5/smoke_dust.local.lua @@ -0,0 +1 @@ +load("python_srw_sd")