Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refinement of KTR #310

Merged
merged 52 commits into from
Dec 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
1a98053
Add support for multiple prediction percentiles
Oct 30, 2020
f879468
fixed unit tests
Oct 30, 2020
31dba6e
Merge pull request #263 from uber/multi-pred-percentile
Oct 30, 2020
57bffa3
data load wrappers (#248)
wangzhishi Nov 4, 2020
e6f13d4
Readme tutorial update (#264)
wangzhishi Nov 4, 2020
c29a313
add a button of "Edit on Github" (#265)
wangzhishi Nov 5, 2020
48b2872
upgrade pyro config
Nov 6, 2020
41e4962
update pyro requirement
Nov 6, 2020
a9facad
Merge pull request #270 from uber/pyro-config-upgrade
Nov 6, 2020
8f3be91
small update on fft utils
Nov 6, 2020
99a7e51
Merge pull request #273 from uber/features-fft-update
Nov 7, 2020
c407e1c
enriching dataset
Nov 7, 2020
eda800e
Update dataset.py
Nov 7, 2020
2885d41
small change on pyro-config
Nov 8, 2020
7fd2af1
Merge pull request #278 from uber/pyro-config-small-change
Nov 8, 2020
cd244cf
Merge pull request #277 from uber/dataset-enhance
Nov 8, 2020
3755262
more fliexible plot components
Nov 8, 2020
f4133bf
adding doc string
Nov 8, 2020
22271ce
refresh banner and icon
Nov 9, 2020
391bfcc
read me link
Nov 9, 2020
4e1f9d6
more banners
Nov 9, 2020
5407a60
testing new dim of banner
Nov 9, 2020
1cdc1b7
Predict fix (#281)
wangzhishi Nov 9, 2020
0b968d1
adding logo and some style for tutorial and docs
Nov 9, 2020
36ddd30
favicon
Nov 10, 2020
d1ca203
Merge pull request #282 from uber/read-me-icon
Nov 10, 2020
95cc75c
initialize `_regressor_col` in __init__()
steveyang90 Nov 10, 2020
431bf7d
small version bump
wangzhishi Nov 11, 2020
68b066b
Merge pull request #284 from uber/ver-bump
Nov 11, 2020
ebc22e6
fix matplotlib
vincewu51 Nov 12, 2020
700e316
Merge pull request #279 from uber/plot-comp-refine
Nov 12, 2020
8a2e935
Create orbit.css
Nov 12, 2020
94f0fb1
reorder fourier series calculation to match the df (#286)
wangzhishi Nov 13, 2020
063e20f
plot utility enhancement (#287)
wangzhishi Nov 13, 2020
8cca93b
Setup TravisCI deployment for PyPI (#292)
steveyang90 Nov 15, 2020
e51ef6e
debug travis pypi deployment (#293)
steveyang90 Nov 15, 2020
2605d80
Debug travis package deployment (#294)
steveyang90 Nov 15, 2020
f6adc00
dpl v2 for travis config (#295)
steveyang90 Nov 15, 2020
437e693
realign version number for module and release
steveyang90 Nov 16, 2020
eb6ac25
Update README.rst (#297)
Nov 24, 2020
1c80359
Update README.rst
Nov 25, 2020
a25fbce
upgrade of models and using stan
Dec 7, 2020
4415a5e
upgrade of models and using stan
Dec 7, 2020
902d4ea
resolve conflicts
Dec 7, 2020
7a777f1
fix fourier series
Dec 8, 2020
0f7fcc5
hotfix of pr and rr switch and fourier series shift
Dec 8, 2020
1772511
Update ktr.py
Dec 8, 2020
ee2b2ce
Dev gam pyro refine2 test2 (#311)
wangzhishi Dec 9, 2020
8e734ce
tensor dtype assignment
wangzhishi Dec 9, 2020
8b513c1
testing
wangzhishi Dec 9, 2020
28f87fd
fix a bug in .get_regression_coefs
wangzhishi Dec 10, 2020
70a1e7d
trim level_knots accordingly when level_knot_dates outside the traini…
wangzhishi Dec 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,556 changes: 2,172 additions & 384 deletions examples/KTR_Pyro_Example.ipynb

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions orbit/constants/ktr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ class DataInputMapper(Enum):
# observation related
_NUM_OF_OBSERVATIONS = 'N_OBS'
_RESPONSE = 'RESPONSE'
# used for pyro
# _IS_VALID_RESPONSE = 'IS_VALID_RES'
_NUM_OF_VALID_RESPONSE = 'N_VALID_RES'
# mainly used for stan
_WHICH_VALID_RESPONSE = 'WHICH_VALID_RES'
_RESPONSE_SD = 'SDY'
_RESPONSE_MEAN = 'MEAN_Y'
_DEGREE_OF_FREEDOM = 'DOF'
# ---------- Level ---------- #
_NUM_KNOTS_LEVEL = 'N_KNOTS_LEV'
Expand All @@ -35,6 +41,8 @@ class DataInputMapper(Enum):
_INSERT_PRIOR_SD = 'PRIOR_SD'
_INSERT_PRIOR_TP_IDX = 'PRIOR_TP_IDX'
_INSERT_PRIOR_IDX = 'PRIOR_IDX'
_LEVEL_KNOTS = 'LEV_KNOT_LOC'
_SEAS_TERM = 'SEAS_TERM'


class BaseSamplingParameters(Enum):
Expand Down
12 changes: 10 additions & 2 deletions orbit/diagnostics/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,14 @@ def plot_predicted_data(training_actual_df, predicted_df, date_col, actual_col,
fig.savefig(path)
if is_visible:
plt.show()
else:
plt.close()

return ax


def plot_predicted_components(predicted_df, date_col, prediction_percentiles=None, plot_components=None,
title="", figsize=None, path=None):

title="", is_visible=True, figsize=None, path=None):
""" Plot predicted componenets with the data frame of decomposed prediction where components
has been pre-defined as `trend`, `seasonality` and `regression`.
Parameters
Expand All @@ -138,6 +139,8 @@ def plot_predicted_components(predicted_df, date_col, prediction_percentiles=Non
title of the plot
figsize: tuple
figsize pass through to `matplotlib.pyplot.figure()`
is_visible: boolean
whether we want to show the plot. If called from unittest, is_visible might = False.
path: str
path to save the figure
Returns
Expand Down Expand Up @@ -183,6 +186,11 @@ def plot_predicted_components(predicted_df, date_col, prediction_percentiles=Non
if path:
plt.savefig(path)

if is_visible:
plt.show()
else:
plt.close()

return axes


Expand Down
362 changes: 299 additions & 63 deletions orbit/models/ktr.py

Large diffs are not rendered by default.

68 changes: 52 additions & 16 deletions orbit/pyro/ktr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pyro
import pyro.distributions as dist

# FIXME: this is sort of dangerous; consider better implementation later
torch.set_default_tensor_type('torch.DoubleTensor')


Expand All @@ -14,7 +15,13 @@ def __init__(self, data):
for key, value in data.items():
key = key.lower()
if isinstance(value, (list, np.ndarray)):
value = torch.tensor(value, dtype=torch.double)
if key in ['which_valid_res']:
# to use as index, tensor type has to be long or int
value = torch.tensor(value)
else:
# loc/scale cannot be in long format
# sometimes they may be supplied as int, so dtype conversion is needed
value = torch.tensor(value, dtype=torch.double)
self.__dict__[key] = value

def __call__(self):
Expand All @@ -30,9 +37,15 @@ def __call__(self):
use _coef, _weight etc. instead of _beta, use _scale instead of _sigma
"""
response = self.response
which_valid = self.which_valid_res

n_obs = self.n_obs
n_valid = self.n_valid_res
sdy = self.sdy
meany = self.mean_y
dof = self.dof
lev_knot_loc = self.lev_knot_loc
seas_term = self.seas_term

pr = self.pr
rr = self.rr
Expand All @@ -43,13 +56,6 @@ def __call__(self):
k_coef = self.k_coef
n_knots_lev = self.n_knots_lev
n_knots_coef = self.n_knots_coef
regressors = torch.zeros(n_obs)
if n_pr > 0 and n_rr > 0:
regressors = torch.cat([rr, pr], dim=-1)
elif n_pr > 0:
regressors = pr
elif n_rr > 0:
regressors = rr

lev_knot_scale = self.lev_knot_scale

Expand All @@ -63,11 +69,33 @@ def __call__(self):
pr_knot_pool_scale = self.pr_knot_pool_scale
pr_knot_scale = self.pr_knot_scale.unsqueeze(-1)

# transformation of data
regressors = torch.zeros(n_obs)
if n_pr > 0 and n_rr > 0:
regressors = torch.cat([rr, pr], dim=-1)
elif n_pr > 0:
regressors = pr
elif n_rr > 0:
regressors = rr

response_tran = response - meany - seas_term

# sampling begins here
extra_out = {}

# levels sampling
lev_knot = pyro.sample("lev_knot", dist.Laplace(0, lev_knot_scale).expand([n_knots_lev]))
lev = (lev_knot @ k_lev.transpose(-2, -1))
# with pyro.plate("lev_plate", n_knots_lev):
# lev_drift = pyro.sample("lev_drift", dist.Laplace(0, lev_knot_scale))
# lev_knot_tran = lev_drift.cumsum(-1)
# lev = (lev_knot_tran @ k_lev.transpose(-2, -1))

# levels sampling
if len(lev_knot_loc) > 0:
lev_knot_tran = pyro.sample("lev_knot", dist.Normal(lev_knot_loc - meany, lev_knot_scale).expand([n_knots_lev]))
lev = (lev_knot_tran @ k_lev.transpose(-2, -1))
else:
lev_knot_tran = pyro.sample("lev_knot", dist.Laplace(0, lev_knot_scale).expand([n_knots_lev]))
lev = (lev_knot_tran @ k_lev.transpose(-2, -1))

# regular regressor sampling
if n_rr > 0:
Expand Down Expand Up @@ -129,11 +157,19 @@ def __call__(self):
pyro.sample("prior_{}_{}".format(tp, idx), dist.Normal(m, sd),
obs=coef[..., tp, idx])

pyro.sample("init_lev", dist.Normal(response[0], sdy), obs=lev[..., 0])

obs_scale = pyro.sample("obs_scale", dist.HalfCauchy(sdy))
with pyro.plate("response_plate", n_obs):
pyro.sample("response", dist.StudentT(dof, yhat[..., :], obs_scale), obs=response)

extra_out.update({'yhat': yhat, 'lev': lev, 'coef': coef, 'coef_knot': coef_knot})
with pyro.plate("response_plate", n_valid):
pyro.sample("response",
dist.StudentT(dof, yhat[..., which_valid], obs_scale),
obs=response_tran[which_valid])

lev_knot = lev_knot_tran + meany

extra_out.update({
'yhat': yhat + seas_term + meany,
'lev': lev + meany,
'lev_knot': lev_knot,
'coef': coef,
'coef_knot': coef_knot
})
return extra_out
104 changes: 104 additions & 0 deletions orbit/stan/ktr.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
data {
// response related
int<lower=0> N_OBS;
int<lower=0> N_VALID_RES;
real<lower=0> SDY;
real MEAN_Y;
int<lower=0> DOF;
vector[N_OBS] RESPONSE;
int WHICH_VALID_RES[N_VALID_RES];
// trend related
int<lower=0> N_KNOTS_LEV;
matrix[N_OBS, N_KNOTS_LEV] K_LEV;
real<lower=0> LEV_KNOT_SCALE;
// regrression related
int<lower=0> N_RR;
int<lower=0> N_PR;
vector[N_RR] RR_KNOT_POOL_LOC;
vector<lower=0>[N_RR] RR_KNOT_POOL_SCALE;
vector<lower=0>[N_RR] RR_KNOT_SCALE;
matrix[N_OBS, N_RR] RR;
matrix[N_OBS, N_PR] PR;
vector[N_PR] PR_KNOT_POOL_LOC;
vector<lower=0>[N_PR] PR_KNOT_POOL_SCALE;
vector<lower=0>[N_PR] PR_KNOT_SCALE;
// kernel
int<lower=0> N_KNOTS_COEF;
matrix[N_OBS, N_KNOTS_COEF] K_COEF;
}
transformed data {
matrix[N_OBS, N_RR + N_PR] REGRESSORS;
vector[N_OBS] RESPONSE_TRAN;
// convert numpy index to stan
int WHICH_VALID_RES2[N_VALID_RES];
for (n in 1:N_VALID_RES) {
WHICH_VALID_RES2[n] = WHICH_VALID_RES[n] + 1;
}
REGRESSORS = append_col(RR, PR);
RESPONSE_TRAN = RESPONSE - MEAN_Y;
}

parameters {
// vector[N_KNOTS_LEV] lev_knot;
vector[N_KNOTS_LEV] lev_knot_drift;
vector<lower=0>[N_PR] pr_knot_loc;
vector[N_RR] rr_knot_loc;
matrix<lower=0>[N_KNOTS_COEF, N_PR] pr_knot;
matrix[N_KNOTS_COEF, N_RR] rr_knot;
real<lower=0, upper=SDY> obs_scale;
}
transformed parameters {
vector[N_OBS] lev;
vector[N_OBS] regression;
vector[N_OBS] yhat;
vector[N_KNOTS_LEV] lev_knot_tran;
matrix[N_OBS, N_PR + N_RR] coef;
matrix[N_OBS, N_RR] rr_coef;
matrix<lower=0>[N_OBS, N_PR] pr_coef;

lev_knot_tran = cumulative_sum(lev_knot_drift);
lev = K_LEV * lev_knot_tran;

rr_coef = rep_matrix(0, N_OBS, N_RR);
pr_coef = rep_matrix(0, N_OBS, N_PR);

if (N_RR > 0) rr_coef = K_COEF * rr_knot;
if (N_PR > 0) pr_coef = K_COEF * pr_knot;

coef = append_col(rr_coef, pr_coef);
if (N_RR + N_PR > 0) {
for (n in 1:N_OBS) {
regression[n] = sum(REGRESSORS[n, :] .* coef[n, :]);
}
} else {
regression = rep_vector(0, N_OBS);
}
yhat = lev + regression;
}


model {
// lev_knot ~ double_exponential(0, LEV_KNOT_SCALE);
lev_knot_drift ~ double_exponential(0, LEV_KNOT_SCALE);
rr_knot_loc ~ normal(RR_KNOT_POOL_LOC, RR_KNOT_POOL_SCALE);
for (n in 1:N_KNOTS_COEF) {
rr_knot[n,:] ~ normal(rr_knot_loc, RR_KNOT_SCALE);
}
pr_knot_loc ~ normal(PR_KNOT_POOL_LOC, PR_KNOT_POOL_SCALE);
for (n in 1:N_KNOTS_COEF) {
pr_knot[n,:] ~ normal(pr_knot_loc, PR_KNOT_SCALE);
}
obs_scale ~ cauchy(0, SDY)T[0, SDY];
RESPONSE_TRAN[WHICH_VALID_RES2] ~ student_t(DOF, yhat[WHICH_VALID_RES2], obs_scale);
}

generated quantities {
matrix[N_RR + N_PR, N_KNOTS_COEF] coef_knot;
vector[N_KNOTS_LEV] lev_knot;
lev_knot = lev_knot_tran + MEAN_Y;
if (N_RR + N_PR > 0) {
coef_knot = append_col(rr_knot, pr_knot)';
} else {
coef_knot = rep_matrix(0, N_RR + N_PR, N_KNOTS_COEF);
}
}
Loading