Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev rewrite v1.0.16 debug and tutorial #558

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 150 additions & 151 deletions docs/tutorials/backtest.ipynb

Large diffs are not rendered by default.

155 changes: 67 additions & 88 deletions docs/tutorials/model_diagnostics.ipynb

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions docs/tutorials/utilities_simulation.ipynb

Large diffs are not rendered by default.

31 changes: 23 additions & 8 deletions orbit/diagnostics/backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..constants.constants import TimeSeriesSplitSchemeNames
from collections.abc import Mapping, Iterable
from ..constants.palette import OrbitPalette as OrbitPal
from orbit.utils.plot import orbit_style_decorator


class TimeSeriesSplitter(object):
Expand Down Expand Up @@ -161,6 +162,7 @@ def __str__(self):
message += f"Test start date: {tt_start_date} Test end date: {tt_end_date}\n"
return message

@orbit_style_decorator
def plot(self, lw=20, fig_width=20):
_, ax = plt.subplots(figsize=(fig_width, self.n_splits))
# visualize the train/test windows for each split
Expand Down Expand Up @@ -449,9 +451,17 @@ def grid_search_orbit(param_grid, model, df, min_train_len=None,
# return params.copy()

def _get_params(model):
init_args_tmpl = dict()
init_args = dict()

# get all the parent classes and their signatures
# get all the signatures in the hierarchy of model templates
for cls in inspect.getmro(model._model.__class__):
sig = inspect.signature(cls)
for key in sig.parameters.keys():
if key != 'kwargs':
if hasattr(model._model, key):
init_args_tmpl[key] = getattr(model._model, key)
# get all the signatures in the hierarchy of forecaster
for cls in inspect.getmro(model.__class__):
sig = inspect.signature(cls)
for key in sig.parameters.keys():
Expand All @@ -466,7 +476,7 @@ def _get_params(model):
if hasattr(model.estimator, key):
init_args[key] = getattr(model.estimator, key)

return init_args.copy()
return init_args_tmpl.copy(), init_args.copy()

def _yield_param_grid(param_grid):
# an internal function to mimic the ParameterGrid from scikit-learn
Expand All @@ -491,7 +501,7 @@ def _yield_param_grid(param_grid):
yield params

param_list_dict = list(_yield_param_grid(param_grid))
params = _get_params(model)
params_tmpl, params = _get_params(model)
res = pd.DataFrame(param_list_dict)
metric_values = list()

Expand All @@ -500,16 +510,21 @@ def _yield_param_grid(param_grid):
print("tuning hyper-params {}".format(tuned_param_dict))

params_ = params.copy()
params_tmpl_ = params_tmpl.copy()
for key, val in tuned_param_dict.items():
if key not in params_.keys():
raise Exception("tuned hyper-param {} is not in the model's parameters".format(key))
else:
if key in params_tmpl_.keys():
params_tmpl_[key] = val
elif key in params_.keys():
params_[key] = val
else:
raise Exception("tuned hyper-param {} is not in the model's parameters".format(key))

# it is safer to reinstantiate a model object than using deepcopy...
model_ = model.__class__(**params_)
new_model_template = model._model.__class__(**params_tmpl_)
new_model = model.__class__(model=new_model_template, **params_)

bt = BackTester(
model=model_,
model=new_model,
df=df,
min_train_len=min_train_len,
n_splits=n_splits,
Expand Down
19 changes: 7 additions & 12 deletions orbit/diagnostics/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,9 @@
from ..constants.palette import OrbitPalette
from ..constants.palette import PredictionPaletteClassic as PredPal
from orbit.diagnostics.metrics import smape
from orbit.utils.plot import get_orbit_style, orbit_style_decorator
from orbit.utils.plot import orbit_style_decorator


orbit_style = get_orbit_style()


# az.style.use("arviz-darkgrid")

@orbit_style_decorator
def plot_predicted_data(training_actual_df, predicted_df, date_col, actual_col,
pred_col=PredictionKeys.PREDICTION.value, prediction_percentiles=None,
Expand Down Expand Up @@ -348,11 +343,11 @@ def plot_posterior_params(mod, kind='density', n_bins=20, ci_level=.95,

posterior_samples = deepcopy(mod._posterior_samples)

if len(mod._regressor_col) > 0:
for i, regressor in enumerate(mod._regressor_col):
if len(mod._model._regressor_col) > 0:
for i, regressor in enumerate(mod._model._regressor_col):
posterior_samples[regressor] = posterior_samples['beta'][:, i]

params_ = mod._regressor_col + ['obs_sigma']
params_ = mod._model._regressor_col + ['obs_sigma']

if incl_trend_params:
# trend params in LGT or DLT
Expand Down Expand Up @@ -460,10 +455,10 @@ def get_arviz_plot_dict(mod,
raise Exception("This utility works for model object with MCMC or VI inference only.")

posterior_samples = mod.get_posterior_samples()
if len(mod._regressor_col) > 0:
for i, regressor in enumerate(mod._regressor_col):
if len(mod._model._regressor_col) > 0:
for i, regressor in enumerate(mod._model._regressor_col):
posterior_samples[regressor] = posterior_samples['beta'][:, i]
params_ = mod._regressor_col
params_ = mod._model._regressor_col

if incl_noise_params:
params_ += ['obs_sigma']
Expand Down
64 changes: 33 additions & 31 deletions tests/orbit/models/test_dlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from orbit.models import DLT
from orbit.template.dlt import DLTInitializer
from orbit.constants.constants import PredictionKeys
from orbit.diagnostics.backtest import grid_search_orbit


@pytest.mark.parametrize("estimator", ['stan-map', 'stan-mcmc'])
Expand Down Expand Up @@ -551,37 +552,38 @@ def test_dlt_fixed_sm_input(synthetic_data, level_sm_input, seasonality_sm_input
assert num_regressors == len(train_df.columns.tolist()[2:])


# @pytest.mark.parametrize("param_grid", [
# {
# 'level_sm_input': [0.3, 0.5, 0.8],
# 'seasonality_sm_input': [0.3, 0.5, 0.8],
# },
# {
# 'damped_factor': [0.3, 0.5, 0.8],
# 'slope_sm_input': [0.3, 0.5, 0.8],
# }
# ])
# def test_dlt_grid_tuning(synthetic_data, param_grid):
# train_df, test_df, coef = synthetic_data
# args = {
# 'response_col': 'response',
# 'date_col': 'week',
# 'seasonality': 52
# }

# dlt = DLTMAP(**args)

# best_params, tuned_df = grid_search_orbit(param_grid,
# model=dlt,
# df=train_df,
# min_train_len=80, incremental_len=20, forecast_len=20,
# metrics=None, criteria=None, verbose=True)



# assert best_params[0].keys() == param_grid.keys()
# assert set(tuned_df.columns.to_list()) == set(list(param_grid.keys()) + ['metrics'])
# assert tuned_df.shape == (9, 3)
@pytest.mark.parametrize("param_grid", [
{
'level_sm_input': [0.3, 0.5, 0.8],
'seasonality_sm_input': [0.3, 0.5, 0.8],
},
{
'damped_factor': [0.3, 0.5, 0.8],
'slope_sm_input': [0.3, 0.5, 0.8],
}
])
def test_dlt_grid_tuning(synthetic_data, param_grid):
train_df, test_df, coef = synthetic_data
args = {
'response_col': 'response',
'date_col': 'week',
'seasonality': 52,
'estimator': 'stan-map',
}

dlt = DLT(**args)

best_params, tuned_df = grid_search_orbit(param_grid,
model=dlt,
df=train_df,
min_train_len=80, incremental_len=20, forecast_len=20,
metrics=None, criteria=None, verbose=True)



assert best_params[0].keys() == param_grid.keys()
assert set(tuned_df.columns.to_list()) == set(list(param_grid.keys()) + ['metrics'])
assert tuned_df.shape == (9, 3)


def test_dlt_map_single_regressor(iclaims_training_data):
Expand Down
64 changes: 33 additions & 31 deletions tests/orbit/models/test_lgt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from orbit.models import LGT
from orbit.template.lgt import LGTInitializer
from orbit.constants.constants import PredictionKeys
from orbit.diagnostics.backtest import grid_search_orbit


@pytest.mark.parametrize("estimator", ['stan-map', 'stan-mcmc'])
Expand Down Expand Up @@ -507,37 +508,38 @@ def test_lgt_fixed_sm_input(synthetic_data, level_sm_input, seasonality_sm_input
assert num_regressors == len(train_df.columns.tolist()[2:])


# @pytest.mark.parametrize("param_grid", [
# {
# 'level_sm_input': [0.3, 0.5, 0.8],
# 'seasonality_sm_input': [0.3, 0.5, 0.8],
# },
# {
# 'level_sm_input': [0.3, 0.5, 0.8],
# 'slope_sm_input': [0.3, 0.5, 0.8],
# }
# ])
# def test_lgt_grid_tuning(synthetic_data, param_grid):
# train_df, test_df, coef = synthetic_data
# args = {
# 'response_col': 'response',
# 'date_col': 'week',
# 'seasonality': 52
# }

# lgt = LGTMAP(**args)

# best_params, tuned_df = grid_search_orbit(param_grid,
# model=lgt,
# df=train_df,
# min_train_len=80, incremental_len=20, forecast_len=20,
# metrics=None, criteria=None, verbose=True)



# assert best_params[0].keys() == param_grid.keys()
# assert set(tuned_df.columns.to_list()) == set(list(param_grid.keys()) + ['metrics'])
# assert tuned_df.shape == (9, 3)
@pytest.mark.parametrize("param_grid", [
{
'level_sm_input': [0.3, 0.5, 0.8],
'seasonality_sm_input': [0.3, 0.5, 0.8],
},
{
'level_sm_input': [0.3, 0.5, 0.8],
'slope_sm_input': [0.3, 0.5, 0.8],
}
])
def test_lgt_grid_tuning(synthetic_data, param_grid):
train_df, test_df, coef = synthetic_data
args = {
'response_col': 'response',
'date_col': 'week',
'seasonality': 52,
'estimator': 'stan-map',
}

lgt = LGT(**args)

best_params, tuned_df = grid_search_orbit(param_grid,
model=lgt,
df=train_df,
min_train_len=80, incremental_len=20, forecast_len=20,
metrics=None, criteria=None, verbose=True)



assert best_params[0].keys() == param_grid.keys()
assert set(tuned_df.columns.to_list()) == set(list(param_grid.keys()) + ['metrics'])
assert tuned_df.shape == (9, 3)


def test_lgt_map_single_regressor(iclaims_training_data):
Expand Down