Skip to content

Commit

Permalink
ktr lite house keeping (#440)
Browse files Browse the repository at this point in the history
* ktr lite house keeping

- revert level knots plot
- refine tutorial
- refine some plotting code

* more house keeping

- [x] enhance unit test
- [x] relable pool
- [x] initializer of ktrlite

* notebook update

* Update test_ktrlite.py

* Update test_ktrlite.py

* add comment for stan_extract cleaning

Co-authored-by: Zhishi Wang <zhishiw@uber.com>
  • Loading branch information
Edwin Ng and wangzhishi authored May 30, 2021
1 parent b18c8ad commit 47b76dd
Show file tree
Hide file tree
Showing 11 changed files with 714 additions and 342 deletions.
307 changes: 237 additions & 70 deletions docs/tutorials/ktrlite.ipynb

Large diffs are not rendered by default.

403 changes: 268 additions & 135 deletions examples/ktrlite.ipynb

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions orbit/constants/ktrlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ class DataInputMapper(Enum):
KERNEL_COEFFICIENTS = 'K_COEF'
NUM_OF_REGRESSORS = 'P'
REGRESSOR_MATRIX = 'REGRESSORS'
COEFFICIENTS_KNOT_POOLING_LOC = 'COEF_KNOT_POOL_LOC'
COEFFICIENTS_KNOT_POOLING_SCALE = 'COEF_KNOT_POOL_SCALE'
COEFFICIENTS_INITIAL_KNOT_SCALE = 'COEF_INIT_KNOT_SCALE'
COEFFICIENTS_KNOT_SCALE = 'COEF_KNOT_SCALE'


Expand Down
15 changes: 10 additions & 5 deletions orbit/constants/palette.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,17 @@ class QualitativePalette(Enum):
PostQ = ['#1fc600', '#ff4500']
# large amount of stacking series
Stack = ["#12939A", "#F15C17", "#DDB27C", "#88572C", "#FF991F", "#DA70BF", "#125C77",
"#4DC19C", "#776E57", "#17B8BE", "#F6D18A", "#B7885E", "#FFCB99", "#F89570",
"#829AE3", "#E79FD5", "#1E96BE", "#89DAC1", "#B3AD9E"]
"#4DC19C", "#776E57", "#17B8BE", "#F6D18A", "#B7885E", "#FFCB99", "#F89570",
"#829AE3", "#E79FD5", "#1E96BE", "#89DAC1", "#B3AD9E"]
# bar plot
Bar5 = ["#ef476fff", "#ffd166ff", "#06d6a0ff", "#118ab2ff", "#073b4cff"]


class KTRPalette(Enum):
KNOTS_SEGMENT = '#276ef1'
KNOTS_REGION = '#5b91f5ff'
class OrbitPalette(Enum):
Black = '#000000'
DarkGrey = '#333131'
SafetyBlue = '#276EF1'
LightBlue = '#5B91F5'
Green = '#05A357'
Orange = '#05A357'
Yellow = '#FFC043'
69 changes: 2 additions & 67 deletions orbit/diagnostics/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from orbit.constants.constants import PredictionKeys
from orbit.utils.general import is_empty_dataframe, is_ordered_datetime
from orbit.constants.palette import QualitativePalette, KTRPalette
from orbit.constants.palette import QualitativePalette

az.style.use("arviz-darkgrid")

Expand Down Expand Up @@ -422,72 +422,7 @@ def _pair_plot(posterior_samples, pair_type='scatter', n_bins=20):
return axes


def plot_ktr_lev_knots(actual_df, lev_knots_df, date_col, actual_col,
knots_delta_threshold=0.5,
path=None, is_visible=True, title="",
fontsize=16, markersize=150, figsize=(16, 8)):
""" Plot the fitted level knots along with the actual time series.

Parameters
----------
actual_df : pd.DataFrame
actual data frame including the actual response
lev_knots_df : pd.DataFrame
level knots data from KTRLite model
date_col : str
the date column name
actual_col : str
actual response column name
knots_delta_threshold : float
standardized threshold of level knots difference to detect change point of levels
path : str; optional
path to save the figure
is_visible : boolean
whether we want to show the plot. If called from unittest, is_visible might = False.
title : str; optional
title of the plot
fontsize : int; optional
fontsize of the title
markersize : int; optional
knot marker size
figsize : tuple; optional
figsize pass through to `matplotlib.pyplot.figure()`
Returns
-------
matplotlib axes object
"""
actuals = actual_df[actual_col]
ymin = min(actual_df[actual_col]) * 0.98
ymax = max(actual_df[actual_col]) * 1.02
fig, ax = plt.subplots(1, 1, figsize=figsize)
ax.set_ylim(ymin, ymax)
ax.plot(actual_df[date_col], actuals, color='black', lw=1, alpha=0.5, label='actual')

# plot all the segments divided by knots
dt = lev_knots_df[date_col].values
ax.vlines(x=dt, ymin=ymin, ymax=ymax, linestyles='dashed', alpha=0.8, linewidth=1,
facecolor=KTRPalette.KNOTS_SEGMENT.value, label='knots')

# standardized threshold of level knots difference to detect change point of levels
lk = lev_knots_df['lev_knot'].values
lk = (lk - np.mean(lk)) / np.std(lk)
lkd = np.diff(lk)
flag = np.fabs(lkd) > knots_delta_threshold
if flag.any():
# due to diff function creates a lag of 1
dt = lev_knots_df[date_col].values
for idx in np.where(flag)[0]:
ax.axvspan(dt[idx], dt[idx+1], facecolor=KTRPalette.KNOTS_REGION.value, alpha=0.5)

ax.legend()
ax.set_title(title, fontsize=fontsize)
if path:
fig.savefig(path)
if is_visible:
plt.show()
else:
plt.close()
return ax


def get_arviz_plot_dict(mod,
Expand Down Expand Up @@ -532,7 +467,7 @@ def get_arviz_plot_dict(mod,


def plot_param_diagnostics(mod, incl_noise_params=False, incl_trend_params=False, incl_smooth_params=False,
which='trace', **kwargs):
which='trace', **kwargs):
"""
Parameters
-----------
Expand Down
1 change: 1 addition & 0 deletions orbit/estimators/stan_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def fit(self, model_name, model_param_names, data_input, fitter=None, init_value
)

# todo: move dimension cleaning function to the model directly
# flatten the first two dims by preserving the chain order
for key, val in stan_extract.items():
if len(val.shape) == 2:
# here `order` is important to make samples flattened by chain
Expand Down
17 changes: 17 additions & 0 deletions orbit/initializer/ktrlite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import numpy as np
from ..constants import ktrlite as constants


class KTRLiteInitializer(object):
def __init__(self, num_regressor, num_knots_coefficients):
self.num_regressor = num_regressor
self.num_knots_coefficients = num_knots_coefficients

def __call__(self):
init_values = dict()
if self.num_regressor > 1:
init_values[constants.RegressionSamplingParameters.COEFFICIENTS_KNOT.value] = np.random.normal(
0, 0.1, (self.num_regressor, self.num_knots_coefficients)
)

return init_values
116 changes: 93 additions & 23 deletions orbit/models/ktrlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
import math
from scipy.stats import nct
from copy import deepcopy
import matplotlib.pyplot as plt

from ..estimators.stan_estimator import StanEstimatorMAP
from ..exceptions import IllegalArgument, ModelException
from ..utils.kernels import sandwich_kernel
from ..utils.features import make_fourier_series_df
from .template import BaseTemplate, MAPTemplate
from ..constants.constants import PredictionKeys, PredictMethod
from ..initializer.ktrlite import KTRLiteInitializer
from ..constants import ktrlite as constants
from orbit.constants.palette import OrbitPalette


class BaseKTRLite(BaseTemplate):
"""Base KTRLite model object with shared functionality for MAP method
Expand All @@ -23,10 +27,10 @@ class BaseKTRLite(BaseTemplate):
fourier series order for seasonality
level_knot_scale : float
sigma for level; default to be .5
seasonal_knot_pooling_scale : float
pooling sigma for seasonal fourier series regressors; default to be 1
seasonal_initial_knot_scale : float
scale parameter for seasonal regressors initial coefficient knots; default to be 1
seasonal_knot_scale : float
sigma for seasonal fourier series regressors; default to be 0.1.
scale parameter for seasonal regressors drift of coefficient knots; default to be 0.1.
span_level : float between (0, 1)
window width to decide the number of windows for the level (trend) term.
e.g., span 0.1 will produce 10 windows.
Expand Down Expand Up @@ -56,7 +60,7 @@ def __init__(self,
seasonality=None,
seasonality_fs_order=None,
level_knot_scale=0.5,
seasonal_knot_pooling_scale=1.0,
seasonal_initial_knot_scale=1.0,
seasonal_knot_scale=0.1,
span_level=0.1,
span_coefficients=0.3,
Expand All @@ -78,7 +82,7 @@ def __init__(self,

self.seasonality = seasonality
self.seasonality_fs_order = seasonality_fs_order
self.seasonal_knot_pooling_scale = seasonal_knot_pooling_scale
self.seasonal_initial_knot_scale = seasonal_initial_knot_scale
self.seasonal_knot_scale = seasonal_knot_scale

# set private var to arg value
Expand All @@ -87,22 +91,20 @@ def __init__(self,
self._seasonality = self.seasonality
self._seasonality_fs_order = self.seasonality_fs_order
self._seasonal_knot_scale = self.seasonal_knot_scale
self._seasonal_knot_pooling_scale = None
self._seasonal_initial_knot_scale = None
self._seasonal_knot_scale = None

self._level_knot_dates = self.level_knot_dates
self._degree_of_freedom = degree_of_freedom

self.span_coefficients = span_coefficients
# self.rho_coefficients = rho_coefficients
self.date_freq = date_freq

# regression attributes -- now is ONLY used for fourier series as seasonality
self.num_of_regressors = 0
self.regressor_col = list()
self.regressor_col_gp = list()
self.coefficients_knot_pooling_loc = list()
self.coefficients_knot_pooling_scale = list()
self.coefficients_initial_knot_scale = list()
self.coefficients_knot_scale = list()

# set static data attributes
Expand All @@ -127,9 +129,19 @@ def __init__(self,
self.num_knots_coefficients = None
self.knots_tp_coefficients = None
self.regressor_matrix = None
# self.coefficients_knot_dates = None

# initialization related modules
def _set_init_values(self):
"""Override function from Base Template"""
# init_values_partial = partial(init_values_callable, seasonality=seasonality)
# partialfunc does not work when passed to PyStan because PyStan uses
# inspect.getargspec(func) which seems to raise an exception with keyword-only args
# caused by using partialfunc
# lambda as an alternative workaround
if len(self._seasonality) > 1 and self.num_of_regressors > 0:
init_values_callable = KTRLiteInitializer(self.num_of_regressors, self.num_knots_coefficients)
self._init_values = init_values_callable

# set defaults
def _set_default_args(self):
"""Set default attributes for None
"""
Expand All @@ -151,11 +163,11 @@ def _set_default_args(self):
if 2 * order > self._seasonality[k] - 1:
raise IllegalArgument('reduce seasonality_fs_order to avoid over-fitting')

if not isinstance(self.seasonal_knot_pooling_scale, list) and \
isinstance(self.seasonal_knot_pooling_scale * 1.0, float):
self._seasonal_knot_pooling_scale = [self.seasonal_knot_pooling_scale] * len(self._seasonality)
if not isinstance(self.seasonal_initial_knot_scale, list) and \
isinstance(self.seasonal_initial_knot_scale * 1.0, float):
self._seasonal_initial_knot_scale = [self.seasonal_initial_knot_scale] * len(self._seasonality)
else:
self._seasonal_knot_pooling_scale = self.seasonal_knot_pooling_scale
self._seasonal_initial_knot_scale = self.seasonal_initial_knot_scale

if not isinstance(self.seasonal_knot_scale, list) and isinstance(self.seasonal_knot_scale * 1.0, float):
self._seasonal_knot_scale = [self.seasonal_knot_scale] * len(self._seasonality)
Expand All @@ -166,16 +178,14 @@ def _set_seasonality_attributes(self):
"""given list of seasonalities and their order, create list of seasonal_regressors_columns"""
self.regressor_col_gp = list()
self.regressor_col = list()
self.coefficients_knot_pooling_loc = list()
self.coefficients_knot_pooling_scale = list()
self.coefficients_initial_knot_scale = list()
self.coefficients_knot_scale = list()

if len(self._seasonality) > 0:
for idx, s in enumerate(self._seasonality):
fs_cols = []
order = self._seasonality_fs_order[idx]
self.coefficients_knot_pooling_loc += [0.0] * order * 2
self.coefficients_knot_pooling_scale += [self._seasonal_knot_pooling_scale[idx]] * order * 2
self.coefficients_initial_knot_scale += [self._seasonal_initial_knot_scale[idx]] * order * 2
self.coefficients_knot_scale += [self._seasonal_knot_scale[idx]] * order * 2
for i in range(1, order + 1):
fs_cols.append('seas{}_fs_cos{}'.format(s, i))
Expand Down Expand Up @@ -235,6 +245,7 @@ def _set_regressor_matrix(self, df):
if self.num_of_regressors > 0:
self.regressor_matrix = df.filter(items=self.regressor_col, ).values

# TODO: docstring and make this a utils since it is quite generic?
@staticmethod
def get_gap_between_dates(start_date, end_date, freq):
diff = end_date - start_date
Expand Down Expand Up @@ -271,10 +282,13 @@ def _set_kernel_matrix(self, df):
self.knots_tp_level = (1 + knots_idx_level) / self.num_of_observations
self._level_knot_dates = df[self.date_col].values[knots_idx_level]
else:
# to exclude dates which are not within training period
self._level_knot_dates = pd.to_datetime([
x for x in self._level_knot_dates if
(x <= df[self.date_col].max()) and (x >= df[self.date_col].min())
x for x in self._level_knot_dates if
(x <= df[self.date_col].values[-1]) and (x >= df[self.date_col].values[0])
])
# since we allow _level_knot_dates to be continuous, we calculate distance between knots
# in continuous value as well (instead of index)
if self.date_freq is None:
self.date_freq = pd.infer_freq(df[self.date_col])[0]
start_date = self.training_start
Expand Down Expand Up @@ -412,6 +426,8 @@ def _predict(self, posterior_estimates, df, include_error=False, **kwargs):
return out




class KTRLiteMAP(MAPTemplate, BaseKTRLite):
"""Concrete KTRLite model for MAP (Maximum a Posteriori) prediction
Expand All @@ -430,6 +446,60 @@ def get_level_knots(self):
constants.BaseSamplingParameters.LEVEL_KNOT.value:
# TODO: this is hacky, investigate why we have an extra dimension here?
np.squeeze(self._aggregate_posteriors[PredictMethod.MAP.value][
constants.BaseSamplingParameters.LEVEL_KNOT.value], 0),
constants.BaseSamplingParameters.LEVEL_KNOT.value], 0),
}
return pd.DataFrame(out)

def get_levels(self):
out = {
self.date_col:
self.date_array,
constants.BaseSamplingParameters.LEVEL.value:
# TODO: this is hacky, investigate why we have an extra dimension here?
np.squeeze(self._aggregate_posteriors[PredictMethod.MAP.value][
constants.BaseSamplingParameters.LEVEL.value], 0),
}
return pd.DataFrame(out)
return pd.DataFrame(out)

def plot_lev_knots(self, path=None, is_visible=True, title="",
fontsize=16, markersize=250, figsize=(16, 8)):
""" Plot the fitted level knots along with the actual time series.
Parameters
----------
path : str; optional
path to save the figure
is_visible : boolean
whether we want to show the plot. If called from unittest, is_visible might = False.
title : str; optional
title of the plot
fontsize : int; optional
fontsize of the title
markersize : int; optional
knot marker size
figsize : tuple; optional
figsize pass through to `matplotlib.pyplot.figure()`
Returns
-------
matplotlib axes object
"""
levels_df = self.get_levels()
knots_df = self.get_level_knots()

fig, ax = plt.subplots(1, 1, figsize=figsize)
ax.plot(self.date_array, self.response, color=OrbitPalette.DarkGrey.value, lw=1, alpha=0.7, label='actual')
ax.plot(levels_df[self.date_col], levels_df[constants.BaseSamplingParameters.LEVEL.value],
color=OrbitPalette.SafetyBlue.value, lw=1, alpha=0.8,
label=constants.BaseSamplingParameters.LEVEL.value)
ax.scatter(knots_df[self.date_col], knots_df[constants.BaseSamplingParameters.LEVEL_KNOT.value],
color=OrbitPalette.Green.value, lw=1, s=markersize, marker='^', alpha=0.8,
label=constants.BaseSamplingParameters.LEVEL_KNOT.value)
ax.legend()
ax.grid(True, which='major', c='grey', ls='-', lw=1, alpha=0.5)
ax.set_title(title, fontsize=fontsize)
if path:
fig.savefig(path)
if is_visible:
plt.show()
else:
plt.close()
return ax
Loading

0 comments on commit 47b76dd

Please sign in to comment.