NOAA-EMC · kevindougherty-noaa · Nov 17, 2021 · Oct 26, 2021 · Nov 12, 2021 · Nov 12, 2021
diff --git a/LAMDA/bias_rmse_timeseries.py b/LAMDA/bias_rmse_timeseries.py
@@ -0,0 +1,104 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from pyGSI.gsi_stat import GSIstat
+from emcpy.plots.plots import LinePlot, HorizontalLine
+from emcpy.plots import CreatePlot
+
+
+def _plot_bias_rmse_timeseries(df, config, outdir):
+    """
+    Used indexed df to plot rmse and bias.
+    """
+    df = df.reset_index()
+
+    # Grab data from dataframe
+    cycles = df['date'].dt.strftime("%d %b\n%Y %Hz")
+    omf_bc = df['OmF_bc']
+    omf_wobc = df['OmF_wobc']
+    rmse = df['rms']
+
+    # Create bias correction object
+    bc_line = LinePlot(cycles, omf_bc)
+    bc_line.label = 'Bias w/ BC'
+    bc_line.marker = 'o'
+    bc_line.markersize = 3
+    bc_line.linestyle = '-.'
+
+    # Create object without bias correction
+    wobc_line = LinePlot(cycles, omf_wobc)
+    wobc_line.color = 'tab:green'
+    wobc_line.label = 'Bias w/o BC'
+    wobc_line.marker = 'o'
+    wobc_line.markersize = 3
+    wobc_line.linestyle = '--'
+
+    # Create rmse
+    rmse_line = LinePlot(cycles, rmse)
+    rmse_line.color = 'tab:brown'
+    rmse_line.label = 'RMSE'
+    rmse_line.marker = 'o'
+    rmse_line.markersize = 3
+
+    # Create a line at 0
+    zero_line = HorizontalLine(y=0)
+
+    # Create plot and draw data
+    myplt = CreatePlot(figsize=(10, 6))
+    plt_list = [bc_line, wobc_line, rmse_line, zero_line]
+    myplt.draw_data(plt_list)
+
+    # Add features
+    myplt.set_ylim(-5, 5)
+    myplt.add_grid(linewidth=0.5, color='grey', linestyle='--')
+    myplt.add_legend(loc='lower right', fontsize='large')
+
+    title = (f"{config['bias type']} RMSE and Bias Time Series\n{config['sensor']} "
+             f"{config['satellite']} Channel {config['channel']} tm0{config['tm']}")
+    myplt.add_title(title, fontsize=14)
+
+    # Return matplotlib figure
+    fig = myplt.return_figure()
+
+    # Save figure
+    save_cycles = df['date'].dt.strftime('%Y%m%d%H').to_numpy()
+    savefile = (f"{save_cycles[0]}_{save_cycles[-1]}_{config['sensor']}"
+                f"_{config['satellite']}_channel_{config['channel']}"
+                f"_{config['bias type']}_tm0{config['tm']}_rmse_bias_timeseries.png")
+    fig.savefig('./' + savefile, bbox_inches='tight',
+                pad_inches=0.1)
+    plt.close('all')
+
+
+def bias_rmse_timeseries(df, config, outdir):
+    """
+    Plots a timeseries of bias and rmse.
+
+    Args:
+        df : (pandas dataframe) multidimensional pandas dataframe
+             with several cycles of gsi stats data
+        config : (dict) dictionary including informaton about the data
+                 being plotted
+        outdir : (str) path to output figures
+    """
+    # Select data by satellite and channel
+    for idx_col in ['satellite', 'channel']:
+        indx = df.index.get_level_values(idx_col) == ''
+        indx = np.ma.logical_or(indx, df.index.get_level_values(idx_col) == config[idx_col])
+        df = df.iloc[indx]
+
+    # Create omf and oma df
+    indx = df.index.get_level_values(idx_col) == ''
+    omf_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 1)
+    omf_df = df.iloc[omf_indx]
+
+    oma_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 3)
+    oma_df = df.iloc[oma_indx]
+
+    # Plot omf
+    config['bias type'] = 'OmF'
+    _plot_bias_rmse_timeseries(omf_df, config, outdir)
+
+    # Plot oma
+    config['bias type'] = 'OmA'
+    _plot_bias_rmse_timeseries(oma_df, config, outdir)
diff --git a/LAMDA/scripts/LAMDA_stats_workflow.py b/LAMDA/scripts/LAMDA_stats_workflow.py
@@ -13,6 +13,7 @@
 # import the plotting scripts i.e:
 # from LAMDA.obs_count import plot_obscount
 from LAMDA.minimization_plots import minimization_plots
+from LAMDA.bias_rmse_timeseries import bias_rmse_timeseries
 
 
 def concatenate_dfs(files, variable, cycles, data_type):
@@ -45,7 +46,8 @@ def concatenate_dfs(files, variable, cycles, data_type):
     return concatenated_df
 
 
-def plotting(config, data_dict, outdir, data_type, ob_type):
+def plotting(config, data_dict, outdir, data_type, ob_type,
+             experiment_name,):
     """
     Main plotting function that gets all inputs for plotting scripts
     in order which includes fits_df, plotting_config, and outdir.
@@ -60,29 +62,38 @@ def plotting(config, data_dict, outdir, data_type, ob_type):
         data_type : (str) determines if data is conventional or
                     radiance
         ob_type : (str) the observation type
+        experiment_name : (str) the type of experiment i.e.
+                          FV3LAMDA, LAMDAX, etc.
     """
 
     data_inputs = config[0]
     plot_type = config[-1]
 
-    plotting_config = {}
-    plotting_config['ob_type'] = ob_type
+    plotting_config = {
+        'ob type': ob_type,
+        'data type': data_type,
+        'experiment name': experiment_name
+    }
 
     if data_type == 'conventional':
         plotting_config['obsid'] = data_inputs[0]
         plotting_config['subtype'] = data_inputs[1]
         plotting_config['obuse'] = data_inputs[-1]
 
     elif data_type == 'radiance':
+        plotting_config['sensor'] = ob_type.split('_')[0]
+        plotting_config['satellite'] = ob_type.split('_')[-1]
+        plotting_config['channel'] = data_inputs[0]
+        plotting_config['obuse'] = data_inputs[-1]
+
         # change ob_type to just the sensor to utilize
         # GSIstat extract_sensor()
         ob_type = ob_type.split('_')[0]
-        plotting_config['channel'] = data_inputs[0]
-        plotting_config['obuse'] = data_inputs[-1]
 
     # Loop through t-minus hours to grab proper data and
     # generate plots
     for tm in np.arange(7):
+        plotting_config['tm'] = tm
         fits_data = []
         cycles = []
 
@@ -94,7 +105,7 @@ def plotting(config, data_dict, outdir, data_type, ob_type):
         fits_df = concatenate_dfs(fits_data, ob_type, cycles, data_type)
 
         plot_dict = {
-            'obs count': plot_obscount,
+            'bias rmse timeseries': bias_rmse_timeseries
         }
 
         plot_dict[plot_type](fits_df, plotting_config, outdir)
@@ -196,7 +207,8 @@ def stats_workflow(config_yaml, nprocs, outdir):
     # Create multiprocessing Pool
     p = Pool(processes=nprocs)
     p.map(partial(plotting, data_dict=data_dict, outdir=outdir,
-                  ob_type=ob_type, data_type=data_type), work_list)
+                  ob_type=ob_type, data_type=data_type,
+                  experiment_name=experiment_name), work_list)
 
 
 if __name__ == '__main__':

diff --git a/pyGSI/gsi_stat.py b/pyGSI/gsi_stat.py
@@ -149,18 +149,18 @@ def extract_instrument(self, obtype, instrument):
                 tmp.append(tst)
 
         columns = ['it', 'channel', 'instrument', 'satellite',
-                   'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc',
-                   'col1', 'col2', 'col3']
+                   'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc',
+                   'pen', 'rms', 'std']
         df = pd.DataFrame(data=tmp, columns=columns)
-        df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1)
+#         df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1)
         df[['channel', 'nassim', 'nrej']] = df[[
             'channel', 'nassim', 'nrej']].astype(np.int)
-        df[['oberr', 'OmF_bc', 'OmF_wobc']] = df[[
-            'oberr', 'OmF_bc', 'OmF_wobc']].astype(np.float)
+        df[['oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']] = df[[
+            'oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']].astype(np.float)
 
         # Since iteration number is not readily available, make one
         lendf = len(df)
-        nouter = np.int(lendf / len(df['it'].unique()))
+        nouter = np.int(np.round(lendf / len(df['it'].unique())))
         douter = np.int(lendf / nouter)
         it = np.zeros(lendf, dtype=int)
         for i in range(nouter):
@@ -170,7 +170,8 @@ def extract_instrument(self, obtype, instrument):
         df['it'] = it
 
         df = df[['it', 'instrument', 'satellite', 'channel',
-                 'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc']]
+                 'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc',
+                 'pen', 'rms', 'std']]
         df.set_index(['it', 'instrument', 'satellite',
                       'channel'], inplace=True)