From 15c953a3e86fc26eea9433278bd09cae9198e5d8 Mon Sep 17 00:00:00 2001 From: Edwin Ng Date: Wed, 9 Dec 2020 18:14:47 -0800 Subject: [PATCH] Refinement of KTR (#310) * Add support for multiple prediction percentiles * fixed unit tests * data load wrappers (#248) * data load wrappers * parse date column * Readme tutorial update (#264) * apply data loader * use DLT as example * apply data loader * add a button of "Edit on Github" (#265) * add button for edit on github * correct the sub-title * upgrade pyro config * update pyro requirement * small update on fft utils * enriching dataset - more data load - more dataset * Update dataset.py * small change on pyro-config - make default total decay as 1.0 meaning no decay * more fliexible plot components * adding doc string * refresh banner and icon * read me link * more banners * testing new dim of banner * Predict fix (#281) * fix regressor matrix extraction * unit test for cases with mixed pos® regressors * adding logo and some style for tutorial and docs * favicon * initialize `_regressor_col` in __init__() * small version bump * fix matplotlib * Create orbit.css * reorder fourier series calculation to match the df (#286) * plot utility enhancement (#287) * return the plotting object * receive returned plotting object to avoid dual display * return plotting axes object * Setup TravisCI deployment for PyPI (#292) * Setup TravisCI deployment for PyPI * debug deployment in travis YAML * debug travis pypi deployment (#293) * Debug travis package deployment (#294) * setup.py and module uses same version number * fix travis config for pypi deploy * dpl v2 for travis config (#295) * realign version number for module and release In testing and configuring travis ci for pypi deployment, version numbers were misaligned. Realigning the version number here with the latest release version number. * Update README.rst (#297) * Update README.rst * Update README.rst * Update README.rst * Update README.rst * Update README.rst * Update README.rst * upgrade of models and using stan - use linear kernel - use stan map and mcmc - allow outliers * upgrade of models and using stan - use linear kernel - use stan map and mcmc - allow outliers * fix fourier series * hotfix of pr and rr switch and fourier series shift * Update ktr.py * Dev gam pyro refine2 test2 (#311) * changes * testing the changes with iclaims example * allow level_knots and seasonality_knots_input for the API * remove unused arg * Update ktr.py Co-authored-by: Edwin Ng * tensor dtype assignment * testing * fix a bug in .get_regression_coefs * trim level_knots accordingly when level_knot_dates outside the training range Co-authored-by: Zhishi Wang Co-authored-by: Steve Yang Co-authored-by: Zhishi Wang Co-authored-by: Yifeng Wu Co-authored-by: Steve Yang --- orbit/models/ktr.py | 19 ++++++++++++++++--- orbit/pyro/ktr.py | 21 ++++++++++++++------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/orbit/models/ktr.py b/orbit/models/ktr.py index e6c8dbe6..66319551 100644 --- a/orbit/models/ktr.py +++ b/orbit/models/ktr.py @@ -249,6 +249,9 @@ def __init__(self, self._regular_regressor_knot_pooling_scale = list() self._regular_regressor_knot_scale = list() self._regressor_col = list() + # seasonal regressors + self._seasonal_regressor_col = [] + self._seasonal_regressor_col_gp = [] # set static data attributes self._set_static_data_attributes() @@ -362,6 +365,11 @@ def _validate_insert_prior(insert_prior_params): if not all(len_insert == len_insert_prior[0] for len_insert in len_insert_prior): raise IllegalArgument('Wrong dimension length in Insert Prior Input') + def _validate_level_knot_inputs(level_knot_dates, level_knots): + if level_knots is not None: + if len(level_knots) != len(level_knot_dates): + raise IllegalArgument('level_knots and level_knot_dates should have the same length') + # regressor defaults num_of_regressors = len(self.regressor_col) @@ -373,6 +381,9 @@ def _validate_insert_prior(insert_prior_params): _validate_insert_prior([self._insert_prior_regressor_col, self._insert_prior_tp_idx, self._insert_prior_mean, self._insert_prior_sd]) + _validate_level_knot_inputs(self.level_knot_dates, self.level_knots) + + if self.regressor_sign is None: self._regressor_sign = [DEFAULT_REGRESSOR_SIGN] * num_of_regressors @@ -413,8 +424,6 @@ def _set_static_regression_attributes(self): def _set_seasonality_attributes(self): """given list of seasonalities and their order, create list of seasonal_regressors_columns""" if len(self._seasonality) > 0: - self._seasonal_regressor_col = [] - self._seasonal_regressor_col_gp = [] for idx, s in enumerate(self._seasonality): fs_cols = [] order = self._seasonality_fs_order[idx] @@ -543,6 +552,10 @@ def _set_kernel_matrix(self, df): else: # FIXME: this only works up to daily series (not working on hourly series) self._level_knot_dates = pd.to_datetime([x for x in self._level_knot_dates if x <= df[self.date_col].max()]) + if len(self._level_knots) > 0: + # trim _level_knots accordingly + self._level_knots = self._level_knots[:len(self._level_knot_dates)] + self._knots_tp_level = np.array( ((self._level_knot_dates - self._training_df_meta['training_start']).days + 1) / ((self._training_df_meta['training_end'] - self._training_df_meta['training_start']).days + 1) @@ -1065,7 +1078,7 @@ def plot_regression_coefs(self, axes[row_idx, col_idx].ticklabel_format(useOffset=False) plt.tight_layout() - return fig + return axes class KTRFull(BaseKTR): diff --git a/orbit/pyro/ktr.py b/orbit/pyro/ktr.py index 6257f9b8..d02e3470 100644 --- a/orbit/pyro/ktr.py +++ b/orbit/pyro/ktr.py @@ -4,7 +4,6 @@ import pyro import pyro.distributions as dist -# FIXME: this is sort of dangerous; consider better implementation later torch.set_default_tensor_type('torch.DoubleTensor') @@ -14,8 +13,14 @@ class Model: def __init__(self, data): for key, value in data.items(): key = key.lower() - if isinstance(value, (list, np.ndarray, float)): - value = torch.tensor(value) + if isinstance(value, (list, np.ndarray)): + if key in ['which_valid_res']: + # to use as index, tensor type has to be long or int + value = torch.tensor(value) + else: + # loc/scale cannot be in long format + # sometimes they may be supplied as int, so dtype conversion is needed + value = torch.tensor(value, dtype=torch.double) self.__dict__[key] = value def __call__(self): @@ -63,6 +68,7 @@ def __call__(self): pr_knot_pool_scale = self.pr_knot_pool_scale pr_knot_scale = self.pr_knot_scale.unsqueeze(-1) + # sampling begins here # transformation of data regressors = torch.zeros(n_obs) if n_pr > 0 and n_rr > 0: @@ -72,8 +78,7 @@ def __call__(self): elif n_rr > 0: regressors = rr - response -= seas_term - response_tran = response - meany + response_tran = response - meany - seas_term # sampling begins here extra_out = {} @@ -152,6 +157,8 @@ def __call__(self): pyro.sample("prior_{}_{}".format(tp, idx), dist.Normal(m, sd), obs=coef[..., tp, idx]) + pyro.sample("init_lev", dist.Normal(response[0], sdy), obs=lev[..., 0]) + obs_scale = pyro.sample("obs_scale", dist.HalfCauchy(sdy)) with pyro.plate("response_plate", n_valid): pyro.sample("response", @@ -161,9 +168,9 @@ def __call__(self): lev_knot = lev_knot_tran + meany extra_out.update({ - 'yhat': yhat + seas_term, + 'yhat': yhat + seas_term + meany, 'lev': lev + meany, - 'lev_knot':lev_knot, + 'lev_knot': lev_knot, 'coef': coef, 'coef_knot': coef_knot })