Skip to content

Commit

Permalink
diag_table_MARBL.json from ecosys_diagnostics
Browse files Browse the repository at this point in the history
First pass at turning the ecosys_diagnostics file MARBL generates into
something MOM can turn into diag_table entries. It hard codes the following:

1. double precision output (should be easy to make this an option)
2. variables on native grid (not sure about interpolating to z-space)
3. Names for streams (all have hm_bgc_...)

Also, while buildnml supports the idea of logical variables to control ALT_CO2
output and to say "all MARBL variables should be output". The former is useful
for avoiding duplicate output when the ALT_CO2 tracers are identical to the
regular tracers, and the latter is useful for testing, so I do plan on
incorporating them soon.

NOTE: this update requires modifying FMS:
* increase max_num_axis_sets by 3 (from 25 to 28)
* increase max_output_fields by 300 (from 300 to 600)

the latter will need to be even bigger once lMARBL_output_all option exists
  • Loading branch information
mnlevy1981 committed Feb 9, 2021
1 parent 877688c commit 304797a
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 157 deletions.
6 changes: 1 addition & 5 deletions cime_config/MARBL_scripts/MARBL_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,10 @@ def __init__(self, MARBL_dir, caseroot, MARBL_settings):
# PUBLIC CLASS METHODS #
################################################################################

def write_diagnostics_file(self, diagnostics_file_out, diagnostics_list_out, append):
def write_diagnostics_file(self, diagnostics_file_out, append):
""" Add all MARBL diagnostics to file containing MOM diagnostics
Also create a list of diagnostics generated by MOM
"""
from MARBL_tools import generate_diagnostics_file
generate_diagnostics_file(self._MARBL_diagnostics, diagnostics_file_out, append)
fout = open(diagnostics_list_out,"w")
for key in self._MARBL_diagnostics.diagnostics_dict.keys():
fout.write(key+"\n")


291 changes: 291 additions & 0 deletions cime_config/MARBL_scripts/MARBL_diags_to_diag_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
#!/usr/bin/env python

""" Convert MARBL diagnostics file to diag_table_MARBL.json
MARBL diagnostics file is a file containing a list of
DIAGNOSTIC_NAME : frequency_operator[,frequency2frequency2_operator2, ..., frequencyN_operatorN]
MOM uses this same format for defining its ecosystem-based diagnostics to allow
users to change the requested MOM and MARBL diagnostics in the same place.
usage: MARBL_diags_to_diag_table.py [-h] -i ECOSYS_DIAGNOSTICS_IN -t
DIAG_TABLE_OUT [-l LOW_FREQUENCY_STREAM]
[-m MEDIUM_FREQUENCY_STREAM]
[-g HIGH_FREQUENCY_STREAM]
[--lMARBL_output_all LMARBL_OUTPUT_ALL]
[--lMARBL_output_alt_co2 LMARBL_OUTPUT_ALT_CO2]
Generate MOM diag table from MARBL diagnostics
optional arguments:
-h, --help show this help message and exit
-i ECOSYS_DIAGNOSTICS_IN, --ecosys_diagnostics_in ECOSYS_DIAGNOSTICS_IN
File generated by MARBL_generate_diagnostics_file
(default: None)
-t DIAG_TABLE_OUT, --diag_table_out DIAG_TABLE_OUT
Location of diag table (JSON) file to create (default:
None)
-l LOW_FREQUENCY_STREAM, --low_frequency_stream LOW_FREQUENCY_STREAM
Stream to put low frequency output into (required if
not lMARBL_output_all) (default: 0)
-m MEDIUM_FREQUENCY_STREAM, --medium_frequency_stream MEDIUM_FREQUENCY_STREAM
Stream to put medium frequency output into (required
if not lMARBL_output_all) (default: 0)
-g HIGH_FREQUENCY_STREAM, --high_frequency_stream HIGH_FREQUENCY_STREAM
Stream to put high frequency output into (required if
not lMARBL_output_all) (default: 0)
--lMARBL_output_all LMARBL_OUTPUT_ALL
Put all MARBL diagnostics in hm_bgc stream (default:
False)
--lMARBL_output_alt_co2 LMARBL_OUTPUT_ALT_CO2
Include ALT_CO2 diagnostics in streams (default:
False)
"""

#######################################

class DiagTableClass(object):
"""
Class that is used to generate JSON file to extend diag_table from ecosys_diagnostics file
"""
def __init__(self):
"""
Constructor: creates a dictionary object to eventually dump to JSON
"""
# TODO: other streams change names in spinup mode, so I kept that practice here. However,
# I don't like how I handle the names... namely, hm_bgc_annual in spinup mode has
# completely different variables from hm_bgc_annual in "regular" mode
self._diag_table_dict = dict()

# "medium" frequency should be treated like "hm" stream -- annual in spinup runs, monthly otherwise
suffix_dict = {'$OCN_DIAG_MODE == "spinup"': "hm_bgc_annual%4yr", "else": "hm_bgc_monthly%4yr-%2mo"}
output_freq_units_dict = {'$OCN_DIAG_MODE == "spinup"': "years", "$TEST == True": "days", "else": "months"}
self._diag_table_dict["medium"] = self._dict_template(suffix_dict, output_freq_units_dict)

# "high" frequency should be treated like "sfc" stream -- 5-day averages in spinup, daily otherwise
# unlike "sfc", this stream will write one file per month instead of per year (except in spinup)
suffix_dict = {'$OCN_DIAG_MODE == "spinup"': "hm_bgc_daily5%4yr", "else": "hm_bgc_daily%4yr-%2mo"}
output_freq_dict = {'$OCN_DIAG_MODE == "spinup"': 5, "else": 1}
new_file_freq_units_dict = {'$OCN_DIAG_MODE == "spinup"': "years", "else": "months"}
self._diag_table_dict["high"] = self._dict_template(suffix_dict, "days", new_file_freq_units_dict, output_freq_dict)

# "low" frequency should be treated as annual averages
suffix_dict = {'$OCN_DIAG_MODE == "spinup"': "hm_bgc_annual2%4yr", "else": "hm_bgc_annual%4yr"}
self._diag_table_dict["low"] = self._dict_template(suffix_dict, "years")


def update(self, varname, frequency, lMARBL_output_all):
if lMARBL_output_all:
use_freq = ['medium']
else:
use_freq = []
for freq in frequency:
use_freq.append(freq)

# iv. Update dictionary
for freq in use_freq:
if freq == "never":
continue
self._diag_table_dict[freq]["fields"][0]["lists"][0].append(varname)


def dump_to_json(self, filename):
import json

out_dict = dict()
out_dict["Files"] = dict()
transports = ["volcello", "vmo", "vhGM", "vhml", "umo", "uhGM", "uhml"]
for freq in self._diag_table_dict:
if len(self._diag_table_dict[freq]["fields"][0]["lists"][0]) > 0:
out_dict["Files"][freq] = self._diag_table_dict[freq].copy()
out_dict["Files"][freq]["fields"][0]["lists"].append(transports)
if out_dict["Files"]:
with open(filename, "w") as fp:
json.dump(out_dict, fp, separators=(',', ': '), sort_keys=False, indent=3)
else:
print("WARNING: no JSON file written as no variables were requested")


def _dict_template(self, suffix, output_freq_units, new_file_freq_units=None, output_freq=1, new_file_freq=1, packing=1):
"""
Return the basic template for MOM6 diag_table dictionary.
Variables will be added to output file by appending to template["fields"][0]["lists"][0]
Parameters:
* suffix: string used to identify output file; could also be a dictionary
where keys are logical evaluations
* output_freq_units: units used to determine how often to output; similar
to suffix, this can also be a dictionary
* new_file_freq_units: units used to determine how often to generate new stream
files; if None, will use output_freq_units (default: None)
* output_freq: how frequently to output (default: 1)
* new_file_freq: how frequently to create new files (default: 1)
* packing: integer that is used to determine precision when writing output
(default: 1 => double precision output)
"""
template = dict()
template["suffix"] = suffix
template["output_freq"] = output_freq
template["new_file_freq"] = new_file_freq
template["output_freq_units"] = output_freq_units
if new_file_freq_units:
template["new_file_freq_units"] = new_file_freq_units
else:
template["new_file_freq_units"] = output_freq_units
template["time_axis_units"] = "days"
template["reduction_method"] = "mean"
template["regional_section"] = "none"
template["fields"] = [{"module": "ocean_model", "packing": packing, "lists" : [[]]}]
return template


#######################################

def _parse_args():
""" Parse command line arguments
"""

import argparse

parser = argparse.ArgumentParser(description="Generate MOM diag table from MARBL diagnostics",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

# Command line argument to point to MARBL diagnostics input file (required!)
parser.add_argument('-i', '--ecosys_diagnostics_in', action='store', dest='ecosys_diagnostics_in',
required=True, help='File generated by MARBL_generate_diagnostics_file')

# Command line argument to point to diag table output file (required!)
parser.add_argument('-t', '--diag_table_out', action='store', dest='diag_table_out',
required=True, help='Location of diag table (JSON) file to create')

# Command line arguments for the different streams to use (low, medium, high)
parser.add_argument('-l', '--low_frequency_stream', action='store', dest='low_frequency_stream',
type=int, default= 0, help='Stream to put low frequency output into (required if not lMARBL_output_all)')

parser.add_argument('-m', '--medium_frequency_stream', action='store', dest='medium_frequency_stream',
type=int, default= 0, help='Stream to put medium frequency output into (required if not lMARBL_output_all)')

parser.add_argument('-g', '--high_frequency_stream', action='store', dest='high_frequency_stream',
type=int, default= 0, help='Stream to put high frequency output into (required if not lMARBL_output_all)')

# Should all MARBL diagnostics be included in the hm_bgc stream?
parser.add_argument('--lMARBL_output_all', action='store', dest='lMARBL_output_all',
type=bool, default=False, help="Put all MARBL diagnostics in hm_bgc stream")

# Should MARBL's ALT_CO2 diagnostics be included in the diag table?
parser.add_argument('--lMARBL_output_alt_co2', action='store', dest='lMARBL_output_alt_co2',
type=bool, default=False, help="Include ALT_CO2 diagnostics in streams")

return parser.parse_args()

#######################################

def _parse_line(line_in):
""" Take a line of input from the MARBL diagnostic output and return the variable
name, frequency, and operator. Lines that are commented out or empty should
return None for all three; non-empty lines that are not in the proper format
should trigger errors.
If they are not None, frequency and operator are always returned as lists
(although they often have just one element).
"""
import logging
import sys

line_loc = line_in.split('#')[0].strip()
# Return None, None if line is empty
if len(line_loc) == 0:
return None, None, None

logger = logging.getLogger("__name__")
line_split = line_loc.split(':')
if len(line_split) != 2:
logger.error("Can not determine variable name from following line: '%s'" % line_in)
sys.exit(1)

freq = []
op = []
for freq_op in line_split[1].split(','):
freq_op_split = freq_op.strip().split('_')
if len(freq_op_split) != 2:
logger.error("Can not determine frequency and operator from following entry: '%s'" % line_split[1])
sys.exit(1)
freq.append(freq_op_split[0])
op.append(freq_op_split[1])

return line_split[0].strip(), freq, op

#######################################


def diagnostics_to_diag_table(ecosys_diagnostics_in,
diag_table_out,
lMARBL_output_all,
lMARBL_output_alt_co2):
"""
Build a diag_table dictionary to dump to JSON format
"""

import os, sys, logging
logger = logging.getLogger("__name__")
labort = False
processed_vars = dict()

# 1. Check arguments:
# ecosys_diagnostics_in can not be None and must be path of an existing file
if ecosys_diagnostics_in == None:
logger.error("Must specific ecosys_diagnostics_in")
labort = True
elif not os.path.isfile(ecosys_diagnostics_in):
logger.error("File not found %s" % ecosys_diagnostics_in)
labort = True
if labort:
sys.exit(1)

# 2. Set up diag_table object
diag_table = DiagTableClass()

# 3. Read ecosys_diagnostics_in line by line, convert each line to diag table entry
with open(ecosys_diagnostics_in, 'r') as file_in:
all_lines = file_in.readlines()

for line in all_lines:
varname, frequency, operator = _parse_line(line.strip())
# i. Continue to next line in the following circumstances
# * varname = None
if varname == None:
continue
# * Skip ALT_CO2 vars unless explicitly requested
if (not lMARBL_output_alt_co2) and ("ALT_CO2" in varname):
continue

# ii. Abort if varname has already appeared in file at given frequency
for freq in frequency:
if freq not in processed_vars:
processed_vars[freq] = []
if varname in processed_vars[freq]:
logger.error(f"{varname} appears in {ecosys_diagnostics_in} with frequency %{freq} multiple times")
sys.exit(1)
processed_vars[freq].append(varname)

# iii. Update diag table
diag_table.update(varname, frequency, lMARBL_output_all)

# File footer
diag_table.dump_to_json(diag_table_out)

#######################################

if __name__ == "__main__":
# Parse command line arguments
import logging
args = _parse_args()

logging.basicConfig(format='%(levelname)s (%(funcName)s): %(message)s', level=logging.DEBUG)

# call diagnostics_to_diag_table()
diagnostics_to_diag_table(args.ecosys_diagnostics_in,
args.diag_table_out,
args.lMARBL_output_all,
args.lMARBL_output_alt_co2)
16 changes: 6 additions & 10 deletions cime_config/buildnml
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,10 @@ def prep_input(case):
diag_table_dest = os.path.join(rundir,"diag_table")
diag_table = FType_diag_table.from_json(diag_table_src)
if use_MARBL:
generate_diag_table_MARBL(caseroot, MARBL_settings, SourceMods_dir, momconfdir, MARBL_dir)
# TODO: instead of copying json file here, python script should generate it based on ecosys_diagnostics
diag_table_src = os.path.join(json_templates_dir, "diag_table_MARBL.json")
diag_table_buildconf = os.path.join(momconfdir, "diag_table_MARBL.json")
shutil.copy(diag_table_src, diag_table_buildconf)
diag_table.append(FType_diag_table.from_json(diag_table_buildconf))
generate_diag_table_MARBL(caseroot, MARBL_settings, SourceMods_dir, momconfdir, MARBL_dir, diag_table_buildconf)
if os.path.isfile(diag_table_buildconf):
diag_table.append(FType_diag_table.from_json(diag_table_buildconf))
diag_table.write(diag_table_dest, case)

def init_MOM_override(rundir):
Expand Down Expand Up @@ -169,7 +167,7 @@ def _copy_files_to_momconf(case):
for filename in ["MOM_input", "MOM_override", "diag_table", "input.nml"]:
shutil.copy(os.path.join(rundir,filename), momconfdir)

def generate_diag_table_MARBL(caseroot, MARBL_settings, SourceMods_dir, momconfdir, MARBL_dir):
def generate_diag_table_MARBL(caseroot, MARBL_settings, SourceMods_dir, momconfdir, MARBL_dir, diag_table_buildconf):
"""
Create Buildconf/momconf/diag_table_MARBL.json
Expand All @@ -179,7 +177,6 @@ def generate_diag_table_MARBL(caseroot, MARBL_settings, SourceMods_dir, momconfd
"""
# 1. Check for diag_table_MARBL.json in SourceMods
srcmods_version = os.path.join(SourceMods_dir, "diag_table_MARBL.json")
diag_table_buildconf = os.path.join(momconfdir, "diag_table_MARBL.json")
if os.path.isfile(srcmods_version):
shutil.copy(srcmods_version, diag_table_buildconf)
return
Expand All @@ -205,13 +202,12 @@ def generate_diag_table_MARBL(caseroot, MARBL_settings, SourceMods_dir, momconfd
MARBL_diagnostics = MARBL_diagnostics_for_MOM(MARBL_dir, caseroot, MARBL_settings)

# (iv) append MARBL diagnostics to ecosys_diagnostics
# This also creates marbl_diagnostics_list to help track which
# diagnostics came from MARBL
MARBL_diagnostics.write_diagnostics_file(ecosys_diags_buildconf,
os.path.join(momconfdir, "marbl_diagnostics_list"),
append=True)

# 3. Generate diag_table based on contents of ecosys_diagnostics
from MARBL_diags_to_diag_table import diagnostics_to_diag_table
diagnostics_to_diag_table(ecosys_diags_buildconf, diag_table_buildconf, False, False)


# pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
Expand Down
Loading

0 comments on commit 304797a

Please sign in to comment.