Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bias and rmse time series diagnostic #107

Merged
merged 14 commits into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions LAMDA/bias_rmse_timeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyGSI.gsi_stat import GSIstat
from emcpy.plots.plots import LinePlot, HorizontalLine
from emcpy.plots import CreatePlot


def _plot_bias_rmse_timeseries(df, config, outdir):
"""
Used indexed df to plot rmse and bias.
"""
df = df.reset_index()

# Grab data from dataframe
cycles = df['date'].dt.strftime("%d %b\n%Y %Hz")
omf_bc = df['OmF_bc']
omf_wobc = df['OmF_wobc']
rmse = df['rms']

# Create bias correction object
bc_line = LinePlot(cycles, omf_bc)
bc_line.label = 'Bias w/ BC'
bc_line.marker = 'o'
bc_line.markersize = 3
bc_line.linestyle = '-.'

# Create object without bias correction
wobc_line = LinePlot(cycles, omf_wobc)
wobc_line.color = 'tab:green'
wobc_line.label = 'Bias w/o BC'
wobc_line.marker = 'o'
wobc_line.markersize = 3
wobc_line.linestyle = '--'

# Create rmse
rmse_line = LinePlot(cycles, rmse)
rmse_line.color = 'tab:brown'
rmse_line.label = 'RMSE'
rmse_line.marker = 'o'
rmse_line.markersize = 3

# Create a line at 0
zero_line = HorizontalLine(y=0)

# Create plot and draw data
myplt = CreatePlot(figsize=(10, 6))
plt_list = [bc_line, wobc_line, rmse_line, zero_line]
myplt.draw_data(plt_list)

# Add features
myplt.set_ylim(-5, 5)
myplt.add_grid(linewidth=0.5, color='grey', linestyle='--')
myplt.add_legend(loc='lower right', fontsize='large')

title = (f"{config['bias type']} RMSE and Bias Time Series\n{config['sensor']} "
f"{config['satellite']} Channel {config['channel']} tm0{config['tm']}")
myplt.add_title(title, fontsize=14)

# Return matplotlib figure
fig = myplt.return_figure()

# Save figure
save_cycles = df['date'].dt.strftime('%Y%m%d%H').to_numpy()
savefile = (f"{save_cycles[0]}_{save_cycles[-1]}_{config['sensor']}"
f"_{config['satellite']}_channel_{config['channel']}"
f"_{config['bias type']}_tm0{config['tm']}_rmse_bias_timeseries.png")
fig.savefig('./' + savefile, bbox_inches='tight',
pad_inches=0.1)
plt.close('all')


def bias_rmse_timeseries(df, config, outdir):
"""
Plots a timeseries of bias and rmse.
Args:
df : (pandas dataframe) multidimensional pandas dataframe
with several cycles of gsi stats data
config : (dict) dictionary including informaton about the data
being plotted
outdir : (str) path to output figures
"""
# Select data by satellite and channel
for idx_col in ['satellite', 'channel']:
indx = df.index.get_level_values(idx_col) == ''
indx = np.ma.logical_or(indx, df.index.get_level_values(idx_col) == config[idx_col])
df = df.iloc[indx]

# Create omf and oma df
indx = df.index.get_level_values(idx_col) == ''
omf_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 1)
omf_df = df.iloc[omf_indx]

oma_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 3)
oma_df = df.iloc[oma_indx]

# Plot omf
config['bias type'] = 'OmF'
_plot_bias_rmse_timeseries(omf_df, config, outdir)

# Plot oma
config['bias type'] = 'OmA'
_plot_bias_rmse_timeseries(oma_df, config, outdir)
26 changes: 19 additions & 7 deletions LAMDA/scripts/LAMDA_stats_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# import the plotting scripts i.e:
# from LAMDA.obs_count import plot_obscount
from LAMDA.minimization_plots import minimization_plots
from LAMDA.bias_rmse_timeseries import bias_rmse_timeseries


def concatenate_dfs(files, variable, cycles, data_type):
Expand Down Expand Up @@ -45,7 +46,8 @@ def concatenate_dfs(files, variable, cycles, data_type):
return concatenated_df


def plotting(config, data_dict, outdir, data_type, ob_type):
def plotting(config, data_dict, outdir, data_type, ob_type,
experiment_name,):
"""
Main plotting function that gets all inputs for plotting scripts
in order which includes fits_df, plotting_config, and outdir.
Expand All @@ -60,29 +62,38 @@ def plotting(config, data_dict, outdir, data_type, ob_type):
data_type : (str) determines if data is conventional or
radiance
ob_type : (str) the observation type
experiment_name : (str) the type of experiment i.e.
FV3LAMDA, LAMDAX, etc.
"""

data_inputs = config[0]
plot_type = config[-1]

plotting_config = {}
plotting_config['ob_type'] = ob_type
plotting_config = {
'ob type': ob_type,
'data type': data_type,
'experiment name': experiment_name
}

if data_type == 'conventional':
plotting_config['obsid'] = data_inputs[0]
plotting_config['subtype'] = data_inputs[1]
plotting_config['obuse'] = data_inputs[-1]

elif data_type == 'radiance':
plotting_config['sensor'] = ob_type.split('_')[0]
plotting_config['satellite'] = ob_type.split('_')[-1]
plotting_config['channel'] = data_inputs[0]
plotting_config['obuse'] = data_inputs[-1]

# change ob_type to just the sensor to utilize
# GSIstat extract_sensor()
ob_type = ob_type.split('_')[0]
plotting_config['channel'] = data_inputs[0]
plotting_config['obuse'] = data_inputs[-1]

# Loop through t-minus hours to grab proper data and
# generate plots
for tm in np.arange(7):
plotting_config['tm'] = tm
fits_data = []
cycles = []

Expand All @@ -94,7 +105,7 @@ def plotting(config, data_dict, outdir, data_type, ob_type):
fits_df = concatenate_dfs(fits_data, ob_type, cycles, data_type)

plot_dict = {
'obs count': plot_obscount,
'bias rmse timeseries': bias_rmse_timeseries
}

plot_dict[plot_type](fits_df, plotting_config, outdir)
Expand Down Expand Up @@ -196,7 +207,8 @@ def stats_workflow(config_yaml, nprocs, outdir):
# Create multiprocessing Pool
p = Pool(processes=nprocs)
p.map(partial(plotting, data_dict=data_dict, outdir=outdir,
ob_type=ob_type, data_type=data_type), work_list)
ob_type=ob_type, data_type=data_type,
experiment_name=experiment_name), work_list)


if __name__ == '__main__':
Expand Down
15 changes: 8 additions & 7 deletions pyGSI/gsi_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,18 @@ def extract_instrument(self, obtype, instrument):
tmp.append(tst)

columns = ['it', 'channel', 'instrument', 'satellite',
'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc',
'col1', 'col2', 'col3']
'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc',
'pen', 'rms', 'std']
df = pd.DataFrame(data=tmp, columns=columns)
df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1)
# df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1)
df[['channel', 'nassim', 'nrej']] = df[[
'channel', 'nassim', 'nrej']].astype(np.int)
df[['oberr', 'OmF_bc', 'OmF_wobc']] = df[[
'oberr', 'OmF_bc', 'OmF_wobc']].astype(np.float)
df[['oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']] = df[[
'oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']].astype(np.float)

# Since iteration number is not readily available, make one
lendf = len(df)
nouter = np.int(lendf / len(df['it'].unique()))
nouter = np.int(np.round(lendf / len(df['it'].unique())))
douter = np.int(lendf / nouter)
it = np.zeros(lendf, dtype=int)
for i in range(nouter):
Expand All @@ -170,7 +170,8 @@ def extract_instrument(self, obtype, instrument):
df['it'] = it

df = df[['it', 'instrument', 'satellite', 'channel',
'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc']]
'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc',
'pen', 'rms', 'std']]
df.set_index(['it', 'instrument', 'satellite',
'channel'], inplace=True)

Expand Down