From caaed17babfb0973f7168e84bc310b48c632628e Mon Sep 17 00:00:00 2001
From: VP Nagraj <nagraj@nagraj.net>
Date: Tue, 21 Dec 2021 11:33:18 -0500
Subject: [PATCH 1/5] adding glm code; misc edits to DESCRIPTION info

---
 DESCRIPTION         |   9 +-
 NAMESPACE           |   6 ++
 R/glm.R             | 208 ++++++++++++++++++++++++++++++++++++++++++++
 R/utils.R           |  58 ++++++++++++
 man/glm_fit.Rd      |  25 ++++++
 man/glm_forecast.Rd |  28 ++++++
 man/glm_quibble.Rd  |  25 ++++++
 man/glm_wrap.Rd     |  35 ++++++++
 man/plot_forc.Rd    |  21 +++++
 man/wis_score.Rd    |  19 ++++
 10 files changed, 432 insertions(+), 2 deletions(-)
 create mode 100644 R/glm.R
 create mode 100644 man/glm_fit.Rd
 create mode 100644 man/glm_forecast.Rd
 create mode 100644 man/glm_quibble.Rd
 create mode 100644 man/glm_wrap.Rd
 create mode 100644 man/plot_forc.Rd
 create mode 100644 man/wis_score.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index ba36044..5645bb2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,7 +22,7 @@ Authors@R:
              comment = c(ORCID = "0000-0001-9140-9028"))
       )
 Description: Miscellaneous functions for retrieving data, creating and evaluating time series 
-    forecasting models for influenza-like illness (ILI) cases, deaths, and hospitalizations in 
+    forecasting models for influenza-like illness (ILI) and influenza hospitalizations in 
     the United States.
 License: GPL (>= 3)
 Encoding: UTF-8
@@ -39,6 +39,11 @@ Imports:
     RSocrata,
     tibble,
     tidyr,
-    tsibble
+    tsibble,
+    trending,
+    trendeval,
+    ggplot2,
+    evalcast,
+    yardstick
 Depends: 
     R (>= 2.10)
diff --git a/NAMESPACE b/NAMESPACE
index 9ebdc39..c992771 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,7 +5,13 @@ export(get_cdc_hosp)
 export(get_cdc_ili)
 export(get_cdc_vax)
 export(get_hdgov_hosp)
+export(glm_fit)
+export(glm_forecast)
+export(glm_quibble)
+export(glm_wrap)
 export(is_monday)
 export(make_tsibble)
+export(plot_forc)
 export(this_monday)
+export(wis_score)
 importFrom(magrittr,"%>%")
diff --git a/R/glm.R b/R/glm.R
new file mode 100644
index 0000000..7ae5169
--- /dev/null
+++ b/R/glm.R
@@ -0,0 +1,208 @@
+#' Fit glm models
+#'
+#' This helper function is used in \link[fiphde]{glm_wrap} to fit a list of models and select the best one.
+#'
+#' @param .data Data including all explanatory and outcome variables needed for modeling; must include column for "location"
+#' @param .models List of models defined as \link[trending]{trending_model} objects
+#'
+#' @return A `tibble` containing characteristics from the "best" `glm` model (i.e., the model from ".models" list with lowest RMSE). The columns in this `tibble` include:
+#'
+#'- model_class: The "type" of model for the best fit
+#'- fit: The fitted model object for the best fit
+#'- location: The geographic
+#'- data: Original model fit data as a `tibble` in a list column
+#'
+#' @export
+#'
+#' @md
+#'
+glm_fit <- function(.data,
+                    .models) {
+
+  dat <- .data
+
+  message(paste0("Location ... "))
+  message(unique(dat$location))
+
+  ## evaluate models and use metrics provided in arg
+  res <-
+    trendeval::evaluate_models(
+      .models,
+      dat,
+      method = evaluate_resampling,
+      metrics = list(yardstick::rmse)
+    )
+
+  ## pull the best model by rmse
+  best_by_rmse <-
+    res %>%
+    # dplyr::filter(purrr::map_lgl(warning, is.null)) %>%  # remove models that gave warnings
+    dplyr::filter(purrr::map_lgl(error, is.null))  %>%   # remove models that errored
+    dplyr::slice_min(rmse) %>%
+    dplyr::select(model) %>%
+    purrr::pluck(1,1)
+
+  ## fit the model
+  tmp_fit <-
+    best_by_rmse %>%
+    fit(dat)
+
+  ## construct tibble with model type, actual fit, and the location
+  ret <- dplyr::tibble(model_class = best_by_rmse$model_class,
+                       fit = tmp_fit,
+                       location = unique(dat$location),
+                       data = tidyr::nest(dat, fit_data = everything()))
+
+  message("Selected model ...")
+  message(as.character(ret$fit$fitted_model$family)[1])
+
+  message("Variables ...")
+  message(paste0(names(ret$fit$fitted_model$coefficients), collapse = " + "))
+  return(ret)
+
+}
+
+
+#' Get quantiles from prediction intervals
+#'
+#' This function runs the \link[trending]{predict.trending_model_fit} method on a fitted model at specified values of "alpha" in order to create a range of prediction intervals. The processing also includes steps to convert the alpha to corresponding quantile values at upper and lower bounds.
+#'
+#' @param fit Fitted model object from \link[fiphde]{glm_fit}
+#' @param new_data Tibble with new data on which the \link[trending]{predict.trending_model_fit} method should run
+#' @param alpha Vector specifying the threshold(s) to be used for prediction intervals; alpha of `0.05` would correspond to 95% PI; default is `c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2` to range of intervals
+#'
+#' @return A tibble with predicted values at each quantile (lower and upper bound for each value of "alpha")
+#' @export
+#' @md
+#'
+glm_quibble <- function(fit, new_data, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) {
+
+  ## get the quantiles from the alpha
+  q_lower <- alpha/2
+  q_upper <- 1 - q_lower
+
+  ## run the predict method on the fitted model
+  ## use the given alpha
+  fit %>%
+    predict(new_data, alpha = alpha) %>%
+    ## get just the prediction interval bounds ...
+    ## index (time column must be named index) ...
+    dplyr::select(epiyear, epiweek, lower_pi, upper_pi) %>%
+    ## reshape so that its in long format
+    tidyr::gather(quantile, value, lower_pi:upper_pi) %>%
+    ## and subout out lower_pi/upper_pi for the appropriate quantile
+    dplyr::mutate(quantile = ifelse(quantile == "lower_pi", q_lower, q_upper))
+}
+
+
+#' Forecast glm models
+#'
+#' This function uses fitted model object from \link[fiphde]{glm_fit} and future covariate data to create probablistic forecasts at specific quantiles derived from the "alpha" parameter.
+#'
+#' @param .data Data including all explanatory and outcome variables needed for modeling
+#' @param new_covariates Tibble with one column per covariate, and n rows for n horizons being forecasted
+#' @param fit Fitted model object from \link[fiphde]{glm_fit}
+#' @param alpha Vector specifying the threshold(s) to be used for prediction intervals; alpha of `0.05` would correspond to 95% PI; default is `c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2` to range of intervals
+#'
+#' @return Tibble with forecasts (quantiles and point estimates)
+#' @export
+#' @md
+#'
+glm_forecast <- function(.data, new_covariates = NULL, fit, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) {
+
+  ## get the last date from the data provided
+  last_date <-
+    .data %>%
+    dplyr::arrange(location, epiweek, epiyear) %>%
+    dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
+    dplyr::pull(date) %>%
+    utils::tail(1)
+
+  tmp <- .data
+  ## set up "new data" with the epiweek/epiyear week being forecasted  ...
+  ## and calculation of lagged flu admissions (1 to 4 weeks back)
+  new_data <-
+    dplyr::tibble(lag_1 = utils::tail(tmp$flu.admits, 4)[4],
+                  lag_2 = utils::tail(tmp$flu.admits, 4)[3],
+                  lag_3 = utils::tail(tmp$flu.admits, 4)[2],
+                  lag_4 = utils::tail(tmp$flu.admits, 4)[1],
+                  epiweek = lubridate::epiweek(last_date + 7),
+                  epiyear = lubridate::epiyear(last_date + 7))
+
+  ## this should allow for a constant
+  if(!is.null(new_covariates)) {
+    new_data <-
+      cbind(new_data,new_covariates)
+  }
+  # ## take the fit object provided and use predict
+  point_estimates <-
+    fit %>%
+    predict(new_data) %>%
+    dplyr::select(epiweek, epiyear, estimate) %>%
+    dplyr::mutate(estimate = round(estimate)) %>%
+    dplyr::mutate(quantile = NA) %>%
+    dplyr::select(epiweek, epiyear, quantile, value = estimate)
+
+  ## map the quibble function over the alphas
+  quants <- map_df(alpha, .f = function(x) glm_quibble(fit = fit, new_data = new_data, alpha = x))
+
+  ## prep data
+  dplyr::bind_rows(point_estimates,quants) %>%
+    dplyr::arrange(epiyear,epiweek, quantile) %>%
+    dplyr::left_join(new_data, by = c("epiweek","epiyear")) %>%
+    dplyr::select(epiweek,epiyear,quantile,value)
+}
+
+#' Run glm modeling and forecasting
+#'
+#' This is a wrapper function that pipelines influenza hospitalization modeling (\link[fiphde]{glm_fit}) and forecasting (\link[fiphde]{glm_forecast}).
+#'
+#' @param .data Data including all explanatory and outcome variables needed for modeling
+#' @param .models List of models defined as \link[trending]{trending_model} objects
+#' @param new_covariates Tibble with one column per covariate, and n rows for n horizons being forecasted
+#' @param horizon Number of weeks ahead for forecasting
+#' @param alpha Vector specifying the threshold(s) to be used for prediction intervals; alpha of `0.05` would correspond to 95% PI; default is `c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2` to range of intervals
+#'
+#' @return Named list with two elements:
+#'
+#' - model: Output from \link[fiphde]{glm_fit} with selected model fit
+#' - forecasts: Output from \link[fiphde]{glm_forecast} with forecasts from each horizon combined as a single tibble
+#'
+#' @export
+#' @md
+glm_wrap <- function(.data, .models, new_covariates = NULL, horizon = 4, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) {
+
+  tmp_fit <- glm_fit(.data, .models = .models)
+
+  stopifnot(nrow(new_covariates) == horizon)
+  message("Forecasting 1 week ahead")
+  tmp_forc <-
+    glm_forecast(.data = .data, new_covariates = new_covariates[1,], fit = tmp_fit$fit, alpha = alpha)
+
+  forc_list <- list()
+  forc_list[[1]] <- tmp_forc
+
+  if(horizon > 1) {
+
+    for(i in 2:horizon) {
+
+      message(sprintf("Forecasting %d week ahead",i))
+
+      prev_weeks <-
+        do.call("rbind", forc_list)  %>%
+        ## get point estimate ... which will have NA quantile value
+        dplyr::filter(is.na(quantile)) %>%
+        dplyr::rename(flu.admits = value) %>%
+        dplyr::select(-quantile) %>%
+        dplyr::mutate(flu.admits.cov = NA, location = "US")
+
+      forc_list[[i]] <- glm_forecast(.data = bind_rows(.data, prev_weeks),
+                                     new_covariates = new_covariates[i,],
+                                     fit = tmp_fit$fit,
+                                     alpha = alpha)
+
+    }
+  }
+  forc_res <- do.call("rbind", forc_list)
+  return(list(model = tmp_fit, forecasts = forc_res))
+}
diff --git a/R/utils.R b/R/utils.R
index dfd6fc9..3860643 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -55,3 +55,61 @@ this_monday <- function() {
 is_monday <- function() {
   lubridate::wday(lubridate::today(), label=TRUE) %in% c("Mon")
 }
+
+#' Calculate WIS score
+#'
+#' Helper function to calculate weighted interval score (WIS) for prepped forecasts
+#'
+#' @param .forecasts Tibble with prepped foreacsts
+#' @param .test Tibble with test data including observed value for flu admissions stored in "flu.admits" column
+#'
+#' @return Tibble with the WIS for each combination of epiweek and epiyear
+#' @export
+#'
+wis_score <- function(.forecasts, .test) {
+  .forecasts %>%
+    dplyr::left_join(.test) %>%
+    dplyr::select(epiweek,epiyear,quantile,value,flu.admits) %>%
+    dplyr::group_by(epiweek, epiyear) %>%
+    dplyr::summarise(wis = evalcast::weighted_interval_score(quantile = quantile, value = value, actual_value = flu.admits))
+}
+
+#' Plot forecasts against observed data
+#'
+#' This helper function creates a plot to visualize the forecasted point estimates (and 95% prediction interval) alongside the observed data.
+#'
+#' @param .forecasts Tibble with prepped forecasts
+#' @param .train Tibble with data used for modeling
+#' @param .test Tibble with observed data held out from modeling
+#'
+#' @return A `ggplot2` plot object
+#' @export
+#'
+#'
+plot_forc <- function(.forecasts, .train, .test) {
+
+  forc_dat <-
+    .forecasts %>%
+    dplyr::filter(quantile %in% c(NA,0.025,0.975)) %>%
+    tidyr::spread(quantile,value) %>%
+    dplyr::rename(lower = `0.025`, upper = `0.975`, mean = `<NA>`)
+
+  .test %>%
+    dplyr::bind_rows(.train) %>%
+    dplyr::select(epiweek,epiyear, truth = flu.admits, location) %>%
+    dplyr::left_join(forc_dat) %>%
+    dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
+    ggplot2::ggplot() +
+    ggplot2::geom_line(ggplot2::aes(date,truth), lwd = 2, col = "black") +
+    ggplot2::geom_line(ggplot2::aes(date,mean), lwd = 2, alpha = 0.5, lty = "solid", col = "firebrick") +
+    ggplot2::geom_ribbon(ggplot2::aes(date, ymin = lower, ymax = upper), alpha = 0.25, fill = "firebrick") +
+    ## get an upper limit from whatever the max of observed or forcasted hospitalizations is
+    ggplot2::scale_y_continuous(limits = c(0,max(c(.test$flu.admits, .train$flu.admits, forc_dat$upper)))) +
+    ggplot2::scale_x_date(date_labels = "%Y-%m", date_breaks = "month") +
+    ggplot2::labs(x = "Date", y = "Count", title = "Influenza hospitalizations") +
+    ggplot2::theme_minimal() +
+    ggplot2::facet_wrap(~ location)
+
+}
+
+
diff --git a/man/glm_fit.Rd b/man/glm_fit.Rd
new file mode 100644
index 0000000..54cff76
--- /dev/null
+++ b/man/glm_fit.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/glm.R
+\name{glm_fit}
+\alias{glm_fit}
+\title{Fit glm models}
+\usage{
+glm_fit(.data, .models)
+}
+\arguments{
+\item{.data}{Data including all explanatory and outcome variables needed for modeling; must include column for "location"}
+
+\item{.models}{List of models defined as \link[trending]{trending_model} objects}
+}
+\value{
+A \code{tibble} containing characteristics from the "best" \code{glm} model (i.e., the model from ".models" list with lowest RMSE). The columns in this \code{tibble} include:
+\itemize{
+\item model_class: The "type" of model for the best fit
+\item fit: The fitted model object for the best fit
+\item location: The geographic
+\item data: Original model fit data as a \code{tibble} in a list column
+}
+}
+\description{
+This helper function is used in \link[fiphde]{glm_wrap} to fit a list of models and select the best one.
+}
diff --git a/man/glm_forecast.Rd b/man/glm_forecast.Rd
new file mode 100644
index 0000000..0a85929
--- /dev/null
+++ b/man/glm_forecast.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/glm.R
+\name{glm_forecast}
+\alias{glm_forecast}
+\title{Forecast glm models}
+\usage{
+glm_forecast(
+  .data,
+  new_covariates = NULL,
+  fit,
+  alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2
+)
+}
+\arguments{
+\item{.data}{Data including all explanatory and outcome variables needed for modeling}
+
+\item{new_covariates}{Tibble with one column per covariate, and n rows for n horizons being forecasted}
+
+\item{fit}{Fitted model object from \link[fiphde]{glm_fit}}
+
+\item{alpha}{Vector specifying the threshold(s) to be used for prediction intervals; alpha of \code{0.05} would correspond to 95\% PI; default is \code{c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2} to range of intervals}
+}
+\value{
+Tibble with forecasts (quantiles and point estimates)
+}
+\description{
+This function uses fitted model object from \link[fiphde]{glm_fit} and future covariate data to create probablistic forecasts at specific quantiles derived from the "alpha" parameter.
+}
diff --git a/man/glm_quibble.Rd b/man/glm_quibble.Rd
new file mode 100644
index 0000000..2b78956
--- /dev/null
+++ b/man/glm_quibble.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/glm.R
+\name{glm_quibble}
+\alias{glm_quibble}
+\title{Get quantiles from prediction intervals}
+\usage{
+glm_quibble(
+  fit,
+  new_data,
+  alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2
+)
+}
+\arguments{
+\item{fit}{Fitted model object from \link[fiphde]{glm_fit}}
+
+\item{new_data}{Tibble with new data on which the \link[trending]{predict.trending_model_fit} method should run}
+
+\item{alpha}{Vector specifying the threshold(s) to be used for prediction intervals; alpha of \code{0.05} would correspond to 95\% PI; default is \code{c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2} to range of intervals}
+}
+\value{
+A tibble with predicted values at each quantile (lower and upper bound for each value of "alpha")
+}
+\description{
+This function runs the \link[trending]{predict.trending_model_fit} method on a fitted model at specified values of "alpha" in order to create a range of prediction intervals. The processing also includes steps to convert the alpha to corresponding quantile values at upper and lower bounds.
+}
diff --git a/man/glm_wrap.Rd b/man/glm_wrap.Rd
new file mode 100644
index 0000000..b79161a
--- /dev/null
+++ b/man/glm_wrap.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/glm.R
+\name{glm_wrap}
+\alias{glm_wrap}
+\title{Run glm modeling and forecasting}
+\usage{
+glm_wrap(
+  .data,
+  .models,
+  new_covariates = NULL,
+  horizon = 4,
+  alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2
+)
+}
+\arguments{
+\item{.data}{Data including all explanatory and outcome variables needed for modeling}
+
+\item{.models}{List of models defined as \link[trending]{trending_model} objects}
+
+\item{new_covariates}{Tibble with one column per covariate, and n rows for n horizons being forecasted}
+
+\item{horizon}{Number of weeks ahead for forecasting}
+
+\item{alpha}{Vector specifying the threshold(s) to be used for prediction intervals; alpha of \code{0.05} would correspond to 95\% PI; default is \code{c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2} to range of intervals}
+}
+\value{
+Named list with two elements:
+\itemize{
+\item model: Output from \link[fiphde]{glm_fit} with selected model fit
+\item forecasts: Output from \link[fiphde]{glm_forecast} with forecasts from each horizon combined as a single tibble
+}
+}
+\description{
+This is a wrapper function that pipelines influenza hospitalization modeling (\link[fiphde]{glm_fit}) and forecasting (\link[fiphde]{glm_forecast}).
+}
diff --git a/man/plot_forc.Rd b/man/plot_forc.Rd
new file mode 100644
index 0000000..4be35fa
--- /dev/null
+++ b/man/plot_forc.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{plot_forc}
+\alias{plot_forc}
+\title{Plot forecasts against observed data}
+\usage{
+plot_forc(.forecasts, .train, .test)
+}
+\arguments{
+\item{.forecasts}{Tibble with prepped forecasts}
+
+\item{.train}{Tibble with data used for modeling}
+
+\item{.test}{Tibble with observed data held out from modeling}
+}
+\value{
+A \code{ggplot2} plot object
+}
+\description{
+This helper function creates a plot to visualize the forecasted point estimates (and 95\% prediction interval) alongside the observed data.
+}
diff --git a/man/wis_score.Rd b/man/wis_score.Rd
new file mode 100644
index 0000000..908b1e1
--- /dev/null
+++ b/man/wis_score.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{wis_score}
+\alias{wis_score}
+\title{Calculate WIS score}
+\usage{
+wis_score(.forecasts, .test)
+}
+\arguments{
+\item{.forecasts}{Tibble with prepped foreacsts}
+
+\item{.test}{Tibble with test data including observed value for flu admissions stored in "flu.admits" column}
+}
+\value{
+Tibble with the WIS for each combination of epiweek and epiyear
+}
+\description{
+Helper function to calculate weighted interval score (WIS) for prepped forecasts
+}

From 2c6cc0fac7c275ac209dceee88dc98773567e3d8 Mon Sep 17 00:00:00 2001
From: VP Nagraj <nagraj@nagraj.net>
Date: Tue, 21 Dec 2021 11:41:19 -0500
Subject: [PATCH 2/5] addressing issues with devtools::check()

---
 R/fiphde.R | 16 ++++++++++++++++
 R/glm.R    | 12 ++++++------
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/R/fiphde.R b/R/fiphde.R
index ac30e0c..3beddfe 100644
--- a/R/fiphde.R
+++ b/R/fiphde.R
@@ -47,5 +47,21 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c(".",
                                                         "sea_label",
                                                         "monday",
                                                         "yweek",
+                                                        "0.025",
+                                                        "0.975",
+                                                        "<NA>",
+                                                        "error",
+                                                        "estimate",
+                                                        "model",
+                                                        "lower",
+                                                        "upper",
+                                                        "lower_pi",
+                                                        "upper_pi",
+                                                        "flu.admits",
+                                                        "quantile",
+                                                        "fit",
+                                                        "truth",
+                                                        "rmse",
+                                                        "value",
                                                         "."))
 
diff --git a/R/glm.R b/R/glm.R
index 7ae5169..aa1873c 100644
--- a/R/glm.R
+++ b/R/glm.R
@@ -29,7 +29,7 @@ glm_fit <- function(.data,
     trendeval::evaluate_models(
       .models,
       dat,
-      method = evaluate_resampling,
+      method = trendeval::evaluate_resampling,
       metrics = list(yardstick::rmse)
     )
 
@@ -51,7 +51,7 @@ glm_fit <- function(.data,
   ret <- dplyr::tibble(model_class = best_by_rmse$model_class,
                        fit = tmp_fit,
                        location = unique(dat$location),
-                       data = tidyr::nest(dat, fit_data = everything()))
+                       data = tidyr::nest(dat, fit_data = dplyr::everything()))
 
   message("Selected model ...")
   message(as.character(ret$fit$fitted_model$family)[1])
@@ -84,7 +84,7 @@ glm_quibble <- function(fit, new_data, alpha = c(0.01, 0.025, seq(0.05, 0.45, by
   ## run the predict method on the fitted model
   ## use the given alpha
   fit %>%
-    predict(new_data, alpha = alpha) %>%
+    stats::predict(new_data, alpha = alpha) %>%
     ## get just the prediction interval bounds ...
     ## index (time column must be named index) ...
     dplyr::select(epiyear, epiweek, lower_pi, upper_pi) %>%
@@ -137,14 +137,14 @@ glm_forecast <- function(.data, new_covariates = NULL, fit, alpha = c(0.01, 0.02
   # ## take the fit object provided and use predict
   point_estimates <-
     fit %>%
-    predict(new_data) %>%
+    stats::predict(new_data) %>%
     dplyr::select(epiweek, epiyear, estimate) %>%
     dplyr::mutate(estimate = round(estimate)) %>%
     dplyr::mutate(quantile = NA) %>%
     dplyr::select(epiweek, epiyear, quantile, value = estimate)
 
   ## map the quibble function over the alphas
-  quants <- map_df(alpha, .f = function(x) glm_quibble(fit = fit, new_data = new_data, alpha = x))
+  quants <- purrr::map_df(alpha, .f = function(x) glm_quibble(fit = fit, new_data = new_data, alpha = x))
 
   ## prep data
   dplyr::bind_rows(point_estimates,quants) %>%
@@ -196,7 +196,7 @@ glm_wrap <- function(.data, .models, new_covariates = NULL, horizon = 4, alpha =
         dplyr::select(-quantile) %>%
         dplyr::mutate(flu.admits.cov = NA, location = "US")
 
-      forc_list[[i]] <- glm_forecast(.data = bind_rows(.data, prev_weeks),
+      forc_list[[i]] <- glm_forecast(.data = dplyr::bind_rows(.data, prev_weeks),
                                      new_covariates = new_covariates[i,],
                                      fit = tmp_fit$fit,
                                      alpha = alpha)

From efa40f8a72bf387ab7f7e79339241ef6c373de6a Mon Sep 17 00:00:00 2001
From: VP Nagraj <nagraj@nagraj.net>
Date: Tue, 21 Dec 2021 11:45:02 -0500
Subject: [PATCH 3/5] forgot to call trending::fit

---
 R/fiphde.R | 1 -
 R/glm.R    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/fiphde.R b/R/fiphde.R
index 3beddfe..f17e105 100644
--- a/R/fiphde.R
+++ b/R/fiphde.R
@@ -59,7 +59,6 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c(".",
                                                         "upper_pi",
                                                         "flu.admits",
                                                         "quantile",
-                                                        "fit",
                                                         "truth",
                                                         "rmse",
                                                         "value",
diff --git a/R/glm.R b/R/glm.R
index aa1873c..c3da768 100644
--- a/R/glm.R
+++ b/R/glm.R
@@ -45,7 +45,7 @@ glm_fit <- function(.data,
   ## fit the model
   tmp_fit <-
     best_by_rmse %>%
-    fit(dat)
+    trending::fit(dat)
 
   ## construct tibble with model type, actual fit, and the location
   ret <- dplyr::tibble(model_class = best_by_rmse$model_class,

From e89bd4404183db38571a3600feee44b2a9243fae Mon Sep 17 00:00:00 2001
From: VP Nagraj <nagraj@nagraj.net>
Date: Tue, 21 Dec 2021 11:45:20 -0500
Subject: [PATCH 4/5] adding scratch example of ILI to flu hosp forecasting
 workflow

---
 scratch/worfklow.R | 93 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 scratch/worfklow.R

diff --git a/scratch/worfklow.R b/scratch/worfklow.R
new file mode 100644
index 0000000..a980c7e
--- /dev/null
+++ b/scratch/worfklow.R
@@ -0,0 +1,93 @@
+library(fiphde)
+library(tidyverse)
+library(lubridate)
+library(focustools)
+library(fabletools)
+library(fable)
+
+## retreive ILI data
+ilidat <- get_cdc_ili()
+
+## set a date from whihc the
+trim_date <- "2020-03-01"
+
+us_ilidat <-
+  ilidat %>%
+  filter(location == "US") %>%
+  mutate(epiyear = epiyear(week_start),
+         epiweek = epiweek(week_start)) %>%
+  filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) %>%
+  select(location, epiyear, epiweek, weighted_ili)
+
+us_ilidat_tsibble <-
+  us_ilidat %>%
+  make_tsibble(chop=TRUE)
+
+us_ili_fit <-
+  us_ilidat_tsibble %>%
+  ## NOTE: parameter space here ??
+  model(arima = ARIMA(weighted_ili~PDQ(0,0,0)+pdq(0:5,0:5,0:5), stepwise=FALSE, approximation=FALSE))
+
+us_ili_fit %>%
+  extract_arima_params()
+
+## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools
+us_ili_forecast <-
+  focustools::ts_forecast(us_ili_fit, outcome = "icases")
+
+us_ili_forecast
+
+hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12")
+
+tmp_weekly_flu <-
+  hosp %>%
+  mutate(date = date - 1) %>%
+  mutate(epiweek = lubridate::epiweek(date),
+         epiyear = lubridate::epiyear(date)) %>%
+  group_by(epiweek, epiyear) %>%
+  summarise(flu.admits = sum(flu.admits, na.rm = TRUE),
+            flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE),
+            .groups = "drop") %>%
+  mutate(location = "US", .before = "epiweek") %>%
+  left_join(us_ilidat)
+
+## add lag columns
+tmp_weekly_flu_w_lag <-
+  tmp_weekly_flu %>%
+  mutate(lag_1 = lag(flu.admits, 1),
+         lag_2 = lag(flu.admits, 2),
+         lag_3 = lag(flu.admits, 3),
+         lag_4 = lag(flu.admits, 4)) %>%
+  filter(complete.cases(.)) %>%
+  dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
+  filter(date >= max(date) - 7*24)
+
+train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3)
+test_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() >= n() - 3)
+
+models <-
+  list(
+    glm_poisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "poisson"),
+    glm_quasipoisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "quasipoisson"),
+    glm_negbin_ili = trending::glm_nb_model(flu.admits ~ weighted_ili),
+    glm_poisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "poisson"),
+    glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "quasipoisson"),
+    glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + offset(flu.admits.cov)),
+    glm_poisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"),
+    glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"),
+    glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4),
+    glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"),
+    glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"),
+    glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov))
+  )
+
+res <- glm_wrap(train_dat,
+                ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above
+                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = c(2.31,2.43,2.45,2.68)),
+                .models = models,
+                alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2)
+
+res$forecasts
+res$model
+
+plot_forc(res$forecasts, train_dat, test_dat)

From 3f8565f6c56d1440a6faff539ba578974af91463 Mon Sep 17 00:00:00 2001
From: VP Nagraj <nagraj@nagraj.net>
Date: Tue, 21 Dec 2021 11:54:07 -0500
Subject: [PATCH 5/5] edits to hosp cols to numeric

---
 scratch/worfklow.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scratch/worfklow.R b/scratch/worfklow.R
index a980c7e..fe54de9 100644
--- a/scratch/worfklow.R
+++ b/scratch/worfklow.R
@@ -42,6 +42,8 @@ hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12")
 tmp_weekly_flu <-
   hosp %>%
   mutate(date = date - 1) %>%
+  mutate(flu.admits = as.numeric(flu.admits),
+         flu.admits.cov = as.numeric(flu.admits.cov)) %>%
   mutate(epiweek = lubridate::epiweek(date),
          epiyear = lubridate::epiyear(date)) %>%
   group_by(epiweek, epiyear) %>%
@@ -91,3 +93,4 @@ res$forecasts
 res$model
 
 plot_forc(res$forecasts, train_dat, test_dat)
+