From 8c495f77da7daa5dbcf178295687959423c40d67 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 06:54:15 -0500
Subject: [PATCH 01/23] ili forecast in scratch #7

---
 scratch/ili-forecast.R | 127 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 scratch/ili-forecast.R

diff --git a/scratch/ili-forecast.R b/scratch/ili-forecast.R
new file mode 100644
index 0000000..3a9802b
--- /dev/null
+++ b/scratch/ili-forecast.R
@@ -0,0 +1,127 @@
+library(fiphde)
+# library(tidyverse)
+# library(lubridate)
+# library(focustools)
+# library(fabletools)
+# library(fable)
+
+### Function arguments
+#' @param trim_date Earliest start date you want to use for ILI data
+#' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4`
+#' @param region Either "national" or "state" or c("national", "state") for both national and state-level data.
+#' @param location Vector specifying locations to filter to; `'US'` by default.
+trim_date="2020-03-01"
+horizon=4
+region="national"
+location="US"
+
+# Set the years passed to get_cdc_ili starting with the year of the trim_date to the current year
+years <- lubridate::year(trim_date):lubridate::year(lubridate::today())
+
+## retreive ILI data
+ilidat_all <- get_cdc_ili(region=region, years=years)
+
+## subset to US only and subset to the trim date to present
+ilidat <-
+  ilidat_all %>%
+  dplyr::filter(location %in% location) %>%
+  dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) %>%
+  dplyr::select(location, year, week, weighted_ili)
+
+## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week
+ilidat_tsibble <-
+  ilidat %>%
+  fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE)
+tail(ilidat_tsibble)
+
+# Nonseasonal fit: PDQ(0, 0, 0)
+# Nonseasonal components unrestricted: pdq(0:5,0:5,0:5)
+ili_fit <- fabletools::model(ilidat_tsibble,
+                             arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5),
+                                                  stepwise=FALSE,
+                                                  approximation=FALSE))
+# ili_fit %>% focustools::extract_arima_params()
+
+## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools
+# ili_forecast <- focustools::ts_forecast(ili_fit, outcome = "icases")
+
+# Get the forecast
+ili_forecast <- fabletools::forecast(ili_fit, h=horizon)
+
+## Look at the quantiles
+# ili_forecast %>%
+#   fabletools::hilo()
+# ili_forecast %>%
+#   fabletools::hilo() %>%
+#   fabletools::unpack_hilo(`80%`) %>%
+#   fabletools::unpack_hilo(`95%`)
+
+# Get the next #horizon weeks in a tibble
+ili_future <- ili_forecast %>%
+  tibble::as_tibble() %>%
+  dplyr::mutate(year=lubridate::epiyear(yweek)) %>%
+  dplyr::mutate(week=lubridate::epiweek(yweek)) %>%
+  dplyr::select(location, year, week, weighted_ili=.mean)
+
+
+ili_bound <- dplyr::bind_rows(ilidat     %>% dplyr::mutate(forecasted=FALSE),
+                              ili_future %>% dplyr::mutate(forecasted=TRUE))
+
+return(ili_bound)
+
+source("glm.R")
+
+hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12")
+
+tmp_weekly_flu <-
+  hosp %>%
+  mutate(date = date - 1) %>%
+  mutate(epiweek = lubridate::epiweek(date),
+         epiyear = lubridate::epiyear(date)) %>%
+  group_by(epiweek, epiyear) %>%
+  summarise(flu.admits = sum(flu.admits, na.rm = TRUE),
+            flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE),
+            .groups = "drop") %>%
+  mutate(location = "US", .before = "epiweek") %>%
+  left_join(ilidat)
+
+## add lag columns
+tmp_weekly_flu_w_lag <-
+  tmp_weekly_flu %>%
+  mutate(lag_1 = lag(flu.admits, 1),
+         lag_2 = lag(flu.admits, 2),
+         lag_3 = lag(flu.admits, 3),
+         lag_4 = lag(flu.admits, 4)) %>%
+  filter(complete.cases(.)) %>%
+  dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
+  filter(date >= max(date) - 7*24)
+
+train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3)
+test_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() >= n() - 3)
+
+models <-
+  list(
+    glm_poisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "poisson"),
+    glm_quasipoisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "quasipoisson"),
+    glm_negbin_ili = trending::glm_nb_model(flu.admits ~ weighted_ili),
+    glm_poisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "poisson"),
+    glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "quasipoisson"),
+    glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + offset(flu.admits.cov)),
+    glm_poisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"),
+    glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"),
+    glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4),
+    glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"),
+    glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"),
+    glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov))
+  )
+
+res <- glm_wrap(train_dat,
+                ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above
+                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = c(2.31,2.43,2.45,2.68)),
+                .models = models,
+                alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2)
+
+res$forecasts
+res$model
+
+plot_forc(res$forecasts, train_dat, test_dat)

From f68006ca544dee615c290e3925c91713434f0fa6 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 07:24:41 -0500
Subject: [PATCH 02/23] first draft on function to forecast ILI #7

---
 DESCRIPTION         |  2 +
 NAMESPACE           |  1 +
 R/fiphde.R          |  2 +
 R/forecast.R        | 89 +++++++++++++++++++++++++++++++++++++++++++++
 man/forecast_ili.Rd | 51 ++++++++++++++++++++++++++
 5 files changed, 145 insertions(+)
 create mode 100644 R/forecast.R
 create mode 100644 man/forecast_ili.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index ba36044..994fc5a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,6 +32,8 @@ RoxygenNote: 7.1.2
 Imports: 
     cdcfluview,
     dplyr,
+    fable,
+    fabletools,
     lubridate,
     magrittr,
     MMWRweek,
diff --git a/NAMESPACE b/NAMESPACE
index 9ebdc39..4ba5417 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export(forecast_ili)
 export(get_cdc_hosp)
 export(get_cdc_ili)
 export(get_cdc_vax)
diff --git a/R/fiphde.R b/R/fiphde.R
index 7637798..d55dc24 100644
--- a/R/fiphde.R
+++ b/R/fiphde.R
@@ -48,5 +48,7 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c(".",
                                                         "sea_label",
                                                         "monday",
                                                         "yweek",
+                                                        "weighted_ili",
+                                                        ".mean",
                                                         "."))
 
diff --git a/R/forecast.R b/R/forecast.R
new file mode 100644
index 0000000..2dbcc23
--- /dev/null
+++ b/R/forecast.R
@@ -0,0 +1,89 @@
+#' @title Forecast ILI
+#' @description Forecasts ILI up to specified weeks in the future. Used in downstream modeling.
+#' @details Currently limited to one location only.
+#' @param ilidat Data returned from [get_cdc_ili].
+#' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4`
+#' @param location Vector specifying locations to filter to; `'US'` by default.
+#' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim.
+#' @return A named list containing:
+#' 1. `ilidat`: The data sent into the function filtered to the location and the `trim_date`. Select columns returned.
+#' 1. `ilidat_tsibble`: The `tsibble` class object returned by running [make_tsibble] on the data above.
+#' 1. `ili_fit`: The fit from [fabletools::model].
+#' 1. `ili_forecast`: The forecast from [fabletools::forecast] at the specified horizon.
+#' 1. `ili_future`: The `horizon`-number of weeks of ILI data forecasted into the future.
+#' 1. `ili_bound`: The data in 1 bound to the data in 5.
+#' @examples
+#' \dontrun{
+#' # Get data
+#' ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today()))
+#' # Using data only from march 2020 forward
+#' ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01")
+#' head(ilifor_2020$ili_bound)
+#' tail(ilifor_2020$ili_bound)
+#' ilifor_2020$ili_fit
+#' ilifor_2020$ili_fit %>% focustools::extract_arima_params()
+#' ilifor_2020$ili_forecast
+#' # Using all the data we have (2010-forward, in this example)
+#' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US")
+#' head(ilifor_2010$ili_bound)
+#' tail(ilifor_2010$ili_bound)
+#' }
+#' @export
+forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL) {
+
+  # If trim_date is not null, trim to selected trim_date
+  if (!is.null(trim_date)) {
+    ilidat <-
+      ilidat %>%
+      dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d"))
+  }
+
+  ## subset to selected location and get columns you care about
+  ilidat <-
+    ilidat %>%
+    dplyr::filter(location %in% location) %>%
+    dplyr::select(location, year, week, weighted_ili)
+
+
+  ## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week
+  ilidat_tsibble <-
+    ilidat %>%
+    fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE)
+
+  # Nonseasonal fit: PDQ(0, 0, 0)
+  # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5)
+  ili_fit <- fabletools::model(ilidat_tsibble,
+                               arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5),
+                                                    stepwise=FALSE,
+                                                    approximation=FALSE))
+  # ili_fit %>% focustools::extract_arima_params()
+
+  ## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools
+  # ili_forecast <- focustools::ts_forecast(ili_fit, outcome = "icases")
+
+  # Get the forecast
+  ili_forecast <- fabletools::forecast(ili_fit, h=horizon)
+
+  ## Look at the quantiles
+  # ili_forecast %>%
+  #   fabletools::hilo()
+  # ili_forecast %>%
+  #   fabletools::hilo() %>%
+  #   fabletools::unpack_hilo(`80%`) %>%
+  #   fabletools::unpack_hilo(`95%`)
+
+  # Get the next #horizon weeks in a tibble
+  ili_future <- ili_forecast %>%
+    tibble::as_tibble() %>%
+    dplyr::mutate(year=lubridate::epiyear(yweek)) %>%
+    dplyr::mutate(week=lubridate::epiweek(yweek)) %>%
+    dplyr::select(location, year, week, weighted_ili=.mean)
+
+  # bind the historical data to the new data
+  ili_bound <- dplyr::bind_rows(ilidat     %>% dplyr::mutate(forecasted=FALSE),
+                                ili_future %>% dplyr::mutate(forecasted=TRUE))
+
+  # Create results
+  res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound)
+  return(res)
+}
diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
new file mode 100644
index 0000000..479480d
--- /dev/null
+++ b/man/forecast_ili.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/forecast.R
+\name{forecast_ili}
+\alias{forecast_ili}
+\title{Forecast ILI}
+\usage{
+forecast_ili(ilidat, horizon = 4L, location = "US", trim_date = NULL)
+}
+\arguments{
+\item{ilidat}{Data returned from \link{get_cdc_ili}.}
+
+\item{horizon}{Optional horizon periods through which the forecasts should be generated; default is \code{4}}
+
+\item{location}{Vector specifying locations to filter to; \code{'US'} by default.}
+
+\item{trim_date}{Earliest start date you want to use for ILI data. Default \code{NULL} doesn't trim.}
+}
+\value{
+A named list containing:
+\enumerate{
+\item \code{ilidat}: The data sent into the function filtered to the location and the \code{trim_date}. Select columns returned.
+\item \code{ilidat_tsibble}: The \code{tsibble} class object returned by running \link{make_tsibble} on the data above.
+\item \code{ili_fit}: The fit from \link[fabletools:model]{fabletools::model}.
+\item \code{ili_forecast}: The forecast from \link[fabletools:forecast]{fabletools::forecast} at the specified horizon.
+\item \code{ili_future}: The \code{horizon}-number of weeks of ILI data forecasted into the future.
+\item \code{ili_bound}: The data in 1 bound to the data in 5.
+}
+}
+\description{
+Forecasts ILI up to specified weeks in the future. Used in downstream modeling.
+}
+\details{
+Currently limited to one location only.
+}
+\examples{
+\dontrun{
+# Get data
+ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today()))
+# Using data only from march 2020 forward
+ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01")
+head(ilifor_2020$ili_bound)
+tail(ilifor_2020$ili_bound)
+ilifor_2020$ili_fit
+ilifor_2020$ili_fit \%>\% focustools::extract_arima_params()
+ilifor_2020$ili_forecast
+# Using all the data we have (2010-forward, in this example)
+ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US")
+head(ilifor_2010$ili_bound)
+tail(ilifor_2010$ili_bound)
+}
+}

From df64e62c2cf76079554d09c75f0c169322d2dcdb Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 11:11:45 -0500
Subject: [PATCH 03/23] add constrained parameter, additional examples, and
 extracted arima params without needing focustools dependency

---
 R/forecast.R        | 56 +++++++++++++++++++++++++++++++++------------
 man/forecast_ili.Rd | 34 +++++++++++++++++++++++----
 2 files changed, 72 insertions(+), 18 deletions(-)

diff --git a/R/forecast.R b/R/forecast.R
index 2dbcc23..a149fdb 100644
--- a/R/forecast.R
+++ b/R/forecast.R
@@ -5,6 +5,7 @@
 #' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4`
 #' @param location Vector specifying locations to filter to; `'US'` by default.
 #' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim.
+#' @param constrained Should the model be constrained to a non-seasonal model? Default `TRUE` sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See [fable::ARIMA].
 #' @return A named list containing:
 #' 1. `ilidat`: The data sent into the function filtered to the location and the `trim_date`. Select columns returned.
 #' 1. `ilidat_tsibble`: The `tsibble` class object returned by running [make_tsibble] on the data above.
@@ -12,6 +13,7 @@
 #' 1. `ili_forecast`: The forecast from [fabletools::forecast] at the specified horizon.
 #' 1. `ili_future`: The `horizon`-number of weeks of ILI data forecasted into the future.
 #' 1. `ili_bound`: The data in 1 bound to the data in 5.
+#' 1. `arima_params`: A named character vector of the ARIMA model parameters.
 #' @examples
 #' \dontrun{
 #' # Get data
@@ -19,17 +21,34 @@
 #' # Using data only from march 2020 forward
 #' ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01")
 #' head(ilifor_2020$ili_bound)
-#' tail(ilifor_2020$ili_bound)
+#' tail(ilifor_2020$ili_bound, 10)
 #' ilifor_2020$ili_fit
 #' ilifor_2020$ili_fit %>% focustools::extract_arima_params()
+#' ilifor_2020$arima_params
 #' ilifor_2020$ili_forecast
 #' # Using all the data we have (2010-forward, in this example)
-#' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US")
+#' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE)
 #' head(ilifor_2010$ili_bound)
-#' tail(ilifor_2010$ili_bound)
+#' tail(ilifor_2010$ili_bound, 10)
+#' ilifor_2010$ili_fit
+#' ilifor_2010$ili_fit %>% focustools::extract_arima_params()
+#' ilifor_2010$arima_params
+#' ilifor_2010$ili_forecast
+#' # Plot both forecasts
+#' library(dplyr)
+#' library(tidyr)
+#' library(ggplot2)
+#' inner_join(ilifor_2020$ili_bound %>% rename(nonseasonal_weighted_ili=weighted_ili),
+#'            ilifor_2010$ili_bound %>% rename(unconstrained_weighted_ili=weighted_ili),
+#'            by = c("location", "year", "week", "forecasted")) %>%
+#'   gather(key, value, ends_with("ili")) %>%
+#'   mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
+#'   filter(date>"2021-01-01") %>%
+#'   ggplot(aes(date, value)) + geom_point(aes(col=forecasted)) +
+#'   facet_wrap(~key)
 #' }
 #' @export
-forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL) {
+forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, constrained=TRUE) {
 
   # If trim_date is not null, trim to selected trim_date
   if (!is.null(trim_date)) {
@@ -50,16 +69,25 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL) {
     ilidat %>%
     fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE)
 
-  # Nonseasonal fit: PDQ(0, 0, 0)
-  # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5)
-  ili_fit <- fabletools::model(ilidat_tsibble,
-                               arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5),
-                                                    stepwise=FALSE,
-                                                    approximation=FALSE))
-  # ili_fit %>% focustools::extract_arima_params()
+  # Defaults to constrained, non-seasonal model.
+  if (constrained) {
+    # Nonseasonal fit: PDQ(0, 0, 0)
+    # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5)
+    message("Fitting nonseasonal ARIMA model ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5)")
+    ili_fit <- fabletools::model(ilidat_tsibble,
+                                 arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5),
+                                                      stepwise=FALSE,
+                                                      approximation=FALSE))
+  } else {
+    # If unconstrained, need to set stepwise=TRUE and approxmiation=NULL to speed up.
+    message("Fitting unconstrained ARIMA model...")
+    ili_fit <- fabletools::model(ilidat_tsibble,
+                                 arima = fable::ARIMA(weighted_ili,
+                                                      stepwise=TRUE,
+                                                      approximation=NULL))
+  }
 
-  ## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools
-  # ili_forecast <- focustools::ts_forecast(ili_fit, outcome = "icases")
+  arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6])
 
   # Get the forecast
   ili_forecast <- fabletools::forecast(ili_fit, h=horizon)
@@ -84,6 +112,6 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL) {
                                 ili_future %>% dplyr::mutate(forecasted=TRUE))
 
   # Create results
-  res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound)
+  res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params)
   return(res)
 }
diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
index 479480d..bd56905 100644
--- a/man/forecast_ili.Rd
+++ b/man/forecast_ili.Rd
@@ -4,7 +4,13 @@
 \alias{forecast_ili}
 \title{Forecast ILI}
 \usage{
-forecast_ili(ilidat, horizon = 4L, location = "US", trim_date = NULL)
+forecast_ili(
+  ilidat,
+  horizon = 4L,
+  location = "US",
+  trim_date = NULL,
+  constrained = TRUE
+)
 }
 \arguments{
 \item{ilidat}{Data returned from \link{get_cdc_ili}.}
@@ -14,6 +20,8 @@ forecast_ili(ilidat, horizon = 4L, location = "US", trim_date = NULL)
 \item{location}{Vector specifying locations to filter to; \code{'US'} by default.}
 
 \item{trim_date}{Earliest start date you want to use for ILI data. Default \code{NULL} doesn't trim.}
+
+\item{constrained}{Should the model be constrained to a non-seasonal model? Default \code{TRUE} sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See \link[fable:ARIMA]{fable::ARIMA}.}
 }
 \value{
 A named list containing:
@@ -24,6 +32,7 @@ A named list containing:
 \item \code{ili_forecast}: The forecast from \link[fabletools:forecast]{fabletools::forecast} at the specified horizon.
 \item \code{ili_future}: The \code{horizon}-number of weeks of ILI data forecasted into the future.
 \item \code{ili_bound}: The data in 1 bound to the data in 5.
+\item \code{arima_params}: A named character vector of the ARIMA model parameters.
 }
 }
 \description{
@@ -39,13 +48,30 @@ ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::t
 # Using data only from march 2020 forward
 ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01")
 head(ilifor_2020$ili_bound)
-tail(ilifor_2020$ili_bound)
+tail(ilifor_2020$ili_bound, 10)
 ilifor_2020$ili_fit
 ilifor_2020$ili_fit \%>\% focustools::extract_arima_params()
+ilifor_2020$arima_params
 ilifor_2020$ili_forecast
 # Using all the data we have (2010-forward, in this example)
-ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US")
+ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE)
 head(ilifor_2010$ili_bound)
-tail(ilifor_2010$ili_bound)
+tail(ilifor_2010$ili_bound, 10)
+ilifor_2010$ili_fit
+ilifor_2010$ili_fit \%>\% focustools::extract_arima_params()
+ilifor_2010$arima_params
+ilifor_2010$ili_forecast
+# Plot both forecasts
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_weighted_ili=weighted_ili),
+           ilifor_2010$ili_bound \%>\% rename(unconstrained_weighted_ili=weighted_ili),
+           by = c("location", "year", "week", "forecasted")) \%>\%
+  gather(key, value, ends_with("ili")) \%>\%
+  mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\%
+  filter(date>"2021-01-01") \%>\%
+  ggplot(aes(date, value)) + geom_point(aes(col=forecasted)) +
+  facet_wrap(~key)
 }
 }

From a804efc86fbb15cab8187d60fabc598183f038fd Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 11:14:24 -0500
Subject: [PATCH 04/23] better plot in examples

---
 R/forecast.R | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/R/forecast.R b/R/forecast.R
index a149fdb..bcfe505 100644
--- a/R/forecast.R
+++ b/R/forecast.R
@@ -43,9 +43,12 @@
 #'            by = c("location", "year", "week", "forecasted")) %>%
 #'   gather(key, value, ends_with("ili")) %>%
 #'   mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
-#'   filter(date>"2021-01-01") %>%
-#'   ggplot(aes(date, value)) + geom_point(aes(col=forecasted)) +
-#'   facet_wrap(~key)
+#'   filter(date>"2021-07-01") %>%
+#'   ggplot(aes(date, value)) +
+#'   geom_line(alpha=.5) +
+#'   geom_point(aes(col=forecasted)) +
+#'   facet_wrap(~key) +
+#'   theme_bw()
 #' }
 #' @export
 forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, constrained=TRUE) {

From 8bf0edabd4d4e6f2937bc6417b22022b7f443b13 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 11:48:03 -0500
Subject: [PATCH 05/23] add glm script from fluforce-init into scratch

---
 scratch/glm.R | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 187 insertions(+)
 create mode 100644 scratch/glm.R

diff --git a/scratch/glm.R b/scratch/glm.R
new file mode 100644
index 0000000..d4fc534
--- /dev/null
+++ b/scratch/glm.R
@@ -0,0 +1,187 @@
+library(tidyverse)
+library(trendeval)
+library(trending)
+
+glm_fit <- function(.data,
+                    .models,
+                    metrics = list(yardstick::rmse, yardstick::huber_loss, yardstick::mae)) {
+
+  dat <- .data
+
+  message(paste0("Location ... "))
+  message(unique(dat$location))
+
+  ## evaluate models and use metrics provided in arg
+  res <-
+    trendeval::evaluate_models(
+    .models,
+    dat,
+    method = evaluate_resampling,
+    metrics = metrics
+  )
+
+  ## pull the best model by rmse
+  best_by_rmse <-
+    res %>%
+    # dplyr::filter(purrr::map_lgl(warning, is.null)) %>%  # remove models that gave warnings
+    dplyr::filter(purrr::map_lgl(error, is.null))  %>%   # remove models that errored
+    dplyr::slice_min(rmse) %>%
+    dplyr::select(model) %>%
+    purrr::pluck(1,1)
+
+  ## fit the model
+  tmp_fit <-
+    best_by_rmse %>%
+    fit(dat)
+
+  ## construct tibble with model type, actual fit, and the location
+  ret <- dplyr::tibble(model_class = best_by_rmse$model_class,
+                       fit = tmp_fit,
+                       location = unique(dat$location),
+                       data = tidyr::nest(dat, fit_data = everything()))
+
+  message("Selected model ...")
+  message(as.character(ret$fit$fitted_model$family)[1])
+
+  message("Variables ...")
+  message(paste0(names(ret$fit$fitted_model$coefficients), collapse = " + "))
+  return(ret)
+
+}
+
+
+
+## helper to get the quantiles from prediction intervals
+glm_quibble <- function(fit, new_data, alpha) {
+
+  ## get the quantiles from the alpha
+  q_lower <- alpha/2
+  q_upper <- 1 - q_lower
+
+  ## run the predict method on the fitted model
+  ## use the given alpha
+  fit %>%
+    predict(new_data, alpha = alpha) %>%
+    ## get just the prediction interval bounds ...
+    ## index (time column must be named index) ...
+    select(epiyear, epiweek, lower_pi, upper_pi) %>%
+    ## reshape so that its in long format
+    gather(quantile, value, lower_pi:upper_pi) %>%
+    ## and subout out lower_pi/upper_pi for the appropriate quantile
+    mutate(quantile = ifelse(quantile == "lower_pi", q_lower, q_upper))
+}
+
+
+## a generic forecast function to use fit objects from above
+glm_forecast <- function(.data, new_covariates = NULL, fit, alpha = 0.05) {
+
+  ## get the last date from the data provided
+  last_date <-
+    .data %>%
+    dplyr::arrange(location, epiweek, epiyear) %>%
+    dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
+    dplyr::pull(date) %>%
+    tail(1)
+
+  tmp <- .data
+  new_data <-
+    dplyr::tibble(lag_1 = tail(tmp$flu.admits, 4)[4],
+                  lag_2 = tail(tmp$flu.admits, 4)[3],
+                  lag_3 = tail(tmp$flu.admits, 4)[2],
+                  lag_4 = tail(tmp$flu.admits, 4)[1],
+                  epiweek = lubridate::epiweek(last_date + 7),
+                  epiyear = lubridate::epiyear(last_date + 7))
+
+  ## this should allow for a constant
+  if(!is.null(new_covariates)) {
+    new_data <-
+      cbind(new_data,new_covariates)
+  }
+  # ## take the fit object provided and use predict
+  point_estimates <-
+    fit %>%
+    predict(new_data) %>%
+    dplyr::select(epiweek, epiyear, estimate) %>%
+    dplyr::mutate(estimate = round(estimate)) %>%
+    dplyr::mutate(quantile = NA) %>%
+    dplyr::select(epiweek, epiyear, quantile, value = estimate)
+
+  ## map the quibble function over the alphas
+  quants <- map_df(alpha, .f = function(x) glm_quibble(fit = fit, new_data = new_data, alpha = x))
+
+  ## prep data
+  dplyr::bind_rows(point_estimates,quants) %>%
+    dplyr::arrange(epiyear,epiweek, quantile) %>%
+    dplyr::left_join(new_data, by = c("epiweek","epiyear")) %>%
+    dplyr::select(epiweek,epiyear,quantile,value)
+}
+
+glm_wrap <- function(.data, .models, new_covariates = NULL, metrics = list(yardstick::rmse, yardstick::huber_loss, yardstick::mae), horizon = 4, alpha = 0.05) {
+  tmp_fit <- glm_fit(.data, .models = .models)
+
+  stopifnot(nrow(new_covariates) == horizon)
+  message("Forecasting 1 week ahead")
+  tmp_forc <-
+    glm_forecast(.data = .data, new_covariates = new_covariates[1,], fit = tmp_fit$fit, alpha = alpha)
+
+  forc_list <- list()
+  forc_list[[1]] <- tmp_forc
+
+  if(horizon > 1) {
+
+    for(i in 2:horizon) {
+
+      message(sprintf("Forecasting %d week ahead",i))
+
+      prev_weeks <-
+        do.call("rbind", forc_list)  %>%
+        ## get point estimate ... which will have NA quantile value
+        dplyr::filter(is.na(quantile)) %>%
+        dplyr::rename(flu.admits = value) %>%
+        dplyr::select(-quantile) %>%
+        dplyr::mutate(flu.admits.cov = NA, location = "US")
+
+      forc_list[[i]] <- glm_forecast(.data = bind_rows(.data, prev_weeks),
+                                     new_covariates = new_covariates[i,],
+                                     fit = tmp_fit$fit,
+                                     alpha = alpha)
+
+    }
+  }
+  forc_res <- do.call("rbind", forc_list)
+  return(list(model = tmp_fit, forecasts = forc_res))
+}
+
+plot_forc <- function(.forecasts, .train, .test) {
+
+  forc_dat <-
+    .forecasts %>%
+    dplyr::filter(quantile %in% c(NA,0.025,0.975)) %>%
+    tidyr::spread(quantile,value) %>%
+    dplyr::rename(lower = `0.025`, upper = `0.975`, mean = `<NA>`)
+
+  .test %>%
+    dplyr::bind_rows(.train) %>%
+    dplyr::select(epiweek,epiyear, truth = flu.admits, location) %>%
+    dplyr::left_join(forc_dat) %>%
+    dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
+    ggplot2::ggplot() +
+    ggplot2::geom_line(ggplot2::aes(date,truth), lwd = 2, col = "black") +
+    ggplot2::geom_line(ggplot2::aes(date,mean), lwd = 2, alpha = 0.5, lty = "solid", col = "firebrick") +
+    ggplot2::geom_ribbon(ggplot2::aes(date, ymin = lower, ymax = upper), alpha = 0.25, fill = "firebrick") +
+    ## get an upper limit from whatever the max of observed or forcasted hospitalizations is
+    ggplot2::scale_y_continuous(limits = c(0,max(c(.test$flu.admits, .train$flu.admits, forc_dat$upper)))) +
+    ggplot2::scale_x_date(date_labels = "%Y-%m", date_breaks = "month") +
+    ggplot2::labs(x = "Date", y = "Count", title = "Influenza hospitalizations") +
+    ggplot2::theme_minimal() +
+    ggplot2::facet_wrap(~ location)
+
+}
+
+wis_score <- function(.forecasts, .test) {
+  .forecasts %>%
+    dplyr::left_join(test_dat) %>%
+    dplyr::select(epiweek,epiyear,quantile,value,flu.admits) %>%
+    dplyr::group_by(epiweek, epiyear) %>%
+    dplyr::summarise(wis = evalcast::weighted_interval_score(quantile = quantile, value = value, actual_value = flu.admits))
+}

From e8f89f73eda3683888800e71496b1df4b56998bd Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 11:48:17 -0500
Subject: [PATCH 06/23] replace stuff at top with new functions

---
 scratch/ili-forecast.R | 78 ++++--------------------------------------
 1 file changed, 7 insertions(+), 71 deletions(-)

diff --git a/scratch/ili-forecast.R b/scratch/ili-forecast.R
index 3a9802b..703e2de 100644
--- a/scratch/ili-forecast.R
+++ b/scratch/ili-forecast.R
@@ -1,75 +1,10 @@
 library(fiphde)
-# library(tidyverse)
-# library(lubridate)
-# library(focustools)
-# library(fabletools)
-# library(fable)
+ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today()))
+ilifor <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01", constrained=TRUE)
+ilifor$arima_params
+ilifor$ili_bound %>% tail()
 
-### Function arguments
-#' @param trim_date Earliest start date you want to use for ILI data
-#' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4`
-#' @param region Either "national" or "state" or c("national", "state") for both national and state-level data.
-#' @param location Vector specifying locations to filter to; `'US'` by default.
-trim_date="2020-03-01"
-horizon=4
-region="national"
-location="US"
-
-# Set the years passed to get_cdc_ili starting with the year of the trim_date to the current year
-years <- lubridate::year(trim_date):lubridate::year(lubridate::today())
-
-## retreive ILI data
-ilidat_all <- get_cdc_ili(region=region, years=years)
-
-## subset to US only and subset to the trim date to present
-ilidat <-
-  ilidat_all %>%
-  dplyr::filter(location %in% location) %>%
-  dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) %>%
-  dplyr::select(location, year, week, weighted_ili)
-
-## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week
-ilidat_tsibble <-
-  ilidat %>%
-  fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE)
-tail(ilidat_tsibble)
-
-# Nonseasonal fit: PDQ(0, 0, 0)
-# Nonseasonal components unrestricted: pdq(0:5,0:5,0:5)
-ili_fit <- fabletools::model(ilidat_tsibble,
-                             arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5),
-                                                  stepwise=FALSE,
-                                                  approximation=FALSE))
-# ili_fit %>% focustools::extract_arima_params()
-
-## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools
-# ili_forecast <- focustools::ts_forecast(ili_fit, outcome = "icases")
-
-# Get the forecast
-ili_forecast <- fabletools::forecast(ili_fit, h=horizon)
-
-## Look at the quantiles
-# ili_forecast %>%
-#   fabletools::hilo()
-# ili_forecast %>%
-#   fabletools::hilo() %>%
-#   fabletools::unpack_hilo(`80%`) %>%
-#   fabletools::unpack_hilo(`95%`)
-
-# Get the next #horizon weeks in a tibble
-ili_future <- ili_forecast %>%
-  tibble::as_tibble() %>%
-  dplyr::mutate(year=lubridate::epiyear(yweek)) %>%
-  dplyr::mutate(week=lubridate::epiweek(yweek)) %>%
-  dplyr::select(location, year, week, weighted_ili=.mean)
-
-
-ili_bound <- dplyr::bind_rows(ilidat     %>% dplyr::mutate(forecasted=FALSE),
-                              ili_future %>% dplyr::mutate(forecasted=TRUE))
-
-return(ili_bound)
-
-source("glm.R")
+source(here::here("scratch/glm.R"))
 
 hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12")
 
@@ -83,7 +18,8 @@ tmp_weekly_flu <-
             flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE),
             .groups = "drop") %>%
   mutate(location = "US", .before = "epiweek") %>%
-  left_join(ilidat)
+  left_join(ilidat) %>%
+  print()
 
 ## add lag columns
 tmp_weekly_flu_w_lag <-

From 93df399235e14f0e0b6a63b2fe10dca06ba62802 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 12:04:30 -0500
Subject: [PATCH 07/23] updated docs

---
 man/forecast_ili.Rd | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
index bd56905..98605c5 100644
--- a/man/forecast_ili.Rd
+++ b/man/forecast_ili.Rd
@@ -70,8 +70,11 @@ inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_weighted_ili=weighted_
            by = c("location", "year", "week", "forecasted")) \%>\%
   gather(key, value, ends_with("ili")) \%>\%
   mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\%
-  filter(date>"2021-01-01") \%>\%
-  ggplot(aes(date, value)) + geom_point(aes(col=forecasted)) +
-  facet_wrap(~key)
+  filter(date>"2021-07-01") \%>\%
+  ggplot(aes(date, value)) +
+  geom_line(alpha=.5) +
+  geom_point(aes(col=forecasted)) +
+  facet_wrap(~key) +
+  theme_bw()
 }
 }

From cb401dd5fc2e4ef64409350df705341c8bcb1037 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 12:04:39 -0500
Subject: [PATCH 08/23] using forecasts instead of hardcoded

---
 scratch/ili-forecast.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scratch/ili-forecast.R b/scratch/ili-forecast.R
index 703e2de..4ff25d8 100644
--- a/scratch/ili-forecast.R
+++ b/scratch/ili-forecast.R
@@ -53,7 +53,7 @@ models <-
 
 res <- glm_wrap(train_dat,
                 ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above
-                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = c(2.31,2.43,2.45,2.68)),
+                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = ilifor$ili_future$weighted_ili),
                 .models = models,
                 alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2)
 

From 03d60e1af06170559c7d1097e934ec9199738f3e Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 12:21:16 -0500
Subject: [PATCH 09/23] replace all the fable stuff with package functions, and
 replace hard-coded ili with forecasted values

---
 scratch/worfklow.R | 54 ++++++++++++++--------------------------------
 1 file changed, 16 insertions(+), 38 deletions(-)

diff --git a/scratch/worfklow.R b/scratch/worfklow.R
index fe54de9..c45790b 100644
--- a/scratch/worfklow.R
+++ b/scratch/worfklow.R
@@ -1,41 +1,19 @@
 library(fiphde)
-library(tidyverse)
-library(lubridate)
-library(focustools)
-library(fabletools)
-library(fable)
 
-## retreive ILI data
-ilidat <- get_cdc_ili()
+# Get data
+ilidat <- get_cdc_ili(region="national", years=2020:lubridate::year(lubridate::today()))
 
-## set a date from whihc the
-trim_date <- "2020-03-01"
+# Forecast ILI
+ilifor <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01", constrained=TRUE)
 
-us_ilidat <-
-  ilidat %>%
-  filter(location == "US") %>%
-  mutate(epiyear = epiyear(week_start),
-         epiweek = epiweek(week_start)) %>%
-  filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) %>%
-  select(location, epiyear, epiweek, weighted_ili)
+# What are the arima params?
+ilifor$arima_params
 
-us_ilidat_tsibble <-
-  us_ilidat %>%
-  make_tsibble(chop=TRUE)
+# Take a look at the forecasted data
+ilifor$ili_future
 
-us_ili_fit <-
-  us_ilidat_tsibble %>%
-  ## NOTE: parameter space here ??
-  model(arima = ARIMA(weighted_ili~PDQ(0,0,0)+pdq(0:5,0:5,0:5), stepwise=FALSE, approximation=FALSE))
-
-us_ili_fit %>%
-  extract_arima_params()
-
-## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools
-us_ili_forecast <-
-  focustools::ts_forecast(us_ili_fit, outcome = "icases")
-
-us_ili_forecast
+# Take a look at the forecasted data bound do the real data
+ilifor$ili_bound %>% tail(8)
 
 hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12")
 
@@ -44,14 +22,14 @@ tmp_weekly_flu <-
   mutate(date = date - 1) %>%
   mutate(flu.admits = as.numeric(flu.admits),
          flu.admits.cov = as.numeric(flu.admits.cov)) %>%
-  mutate(epiweek = lubridate::epiweek(date),
-         epiyear = lubridate::epiyear(date)) %>%
-  group_by(epiweek, epiyear) %>%
+  mutate(week = lubridate::epiweek(date),
+         year = lubridate::epiyear(date)) %>%
+  group_by(year, week) %>%
   summarise(flu.admits = sum(flu.admits, na.rm = TRUE),
             flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE),
             .groups = "drop") %>%
-  mutate(location = "US", .before = "epiweek") %>%
-  left_join(us_ilidat)
+  mutate(location = "US", .before = "week") %>%
+  left_join(ilifor$ilidat)
 
 ## add lag columns
 tmp_weekly_flu_w_lag <-
@@ -85,7 +63,7 @@ models <-
 
 res <- glm_wrap(train_dat,
                 ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above
-                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = c(2.31,2.43,2.45,2.68)),
+                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = ilifor$ili_future$weighted_ili),
                 .models = models,
                 alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2)
 

From 782f98c3b1191da4d1efa1b2f4bd8e691596a5d3 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 12:21:47 -0500
Subject: [PATCH 10/23] ili-forecast.R is now in workflow.R

---
 scratch/ili-forecast.R | 63 ------------------------------------------
 1 file changed, 63 deletions(-)
 delete mode 100644 scratch/ili-forecast.R

diff --git a/scratch/ili-forecast.R b/scratch/ili-forecast.R
deleted file mode 100644
index 4ff25d8..0000000
--- a/scratch/ili-forecast.R
+++ /dev/null
@@ -1,63 +0,0 @@
-library(fiphde)
-ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today()))
-ilifor <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01", constrained=TRUE)
-ilifor$arima_params
-ilifor$ili_bound %>% tail()
-
-source(here::here("scratch/glm.R"))
-
-hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12")
-
-tmp_weekly_flu <-
-  hosp %>%
-  mutate(date = date - 1) %>%
-  mutate(epiweek = lubridate::epiweek(date),
-         epiyear = lubridate::epiyear(date)) %>%
-  group_by(epiweek, epiyear) %>%
-  summarise(flu.admits = sum(flu.admits, na.rm = TRUE),
-            flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE),
-            .groups = "drop") %>%
-  mutate(location = "US", .before = "epiweek") %>%
-  left_join(ilidat) %>%
-  print()
-
-## add lag columns
-tmp_weekly_flu_w_lag <-
-  tmp_weekly_flu %>%
-  mutate(lag_1 = lag(flu.admits, 1),
-         lag_2 = lag(flu.admits, 2),
-         lag_3 = lag(flu.admits, 3),
-         lag_4 = lag(flu.admits, 4)) %>%
-  filter(complete.cases(.)) %>%
-  dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
-  filter(date >= max(date) - 7*24)
-
-train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3)
-test_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() >= n() - 3)
-
-models <-
-  list(
-    glm_poisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "poisson"),
-    glm_quasipoisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "quasipoisson"),
-    glm_negbin_ili = trending::glm_nb_model(flu.admits ~ weighted_ili),
-    glm_poisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "poisson"),
-    glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "quasipoisson"),
-    glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + offset(flu.admits.cov)),
-    glm_poisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"),
-    glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"),
-    glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4),
-    glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"),
-    glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"),
-    glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov))
-  )
-
-res <- glm_wrap(train_dat,
-                ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above
-                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = ilifor$ili_future$weighted_ili),
-                .models = models,
-                alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2)
-
-res$forecasts
-res$model
-
-plot_forc(res$forecasts, train_dat, test_dat)

From d66db666f34de3bb5a01dcf3a8035b0a1f859be4 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 13:36:13 -0500
Subject: [PATCH 11/23] allow key column to be specified with {{}}

---
 R/utils.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index 3860643..5ffa4da 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -7,13 +7,14 @@
 #' @param df A `tibble` containing columns `epiyear` and `epiweek`.
 #' @param epiyear Unquoted variable name containing the MMWR epiyear.
 #' @param epiweek Unquoted variable name containing the MMWR epiweek.
+#' @param epiweek Unquoted variable name containing the name of the column to be the tsibble key. See [tsibble::as_tsibble].
 #' @param chop Logical indicating whether or not to remove the most current week (default `TRUE`).
 #' @return A `tsibble` containing additional columns `monday` indicating the date
 #'   for the Monday of that epiweek, and `yweek` (a yearweek vctr class object)
 #'   that indexes the `tsibble` in 1 week increments.
 #' @export
 #' @md
-make_tsibble <- function(df, epiyear, epiweek, chop=TRUE) {
+make_tsibble <- function(df, epiyear, epiweek, key=location, chop=TRUE) {
   out <- df %>%
     # get the monday that starts the MMWRweek
     dplyr::mutate(monday=MMWRweek::MMWRweek2Date(MMWRyear={{epiyear}},
@@ -23,7 +24,7 @@ make_tsibble <- function(df, epiyear, epiweek, chop=TRUE) {
     # convert represent as yearweek (see package?tsibble)
     dplyr::mutate(yweek=tsibble::yearweek(monday), .after="monday") %>%
     # convert to tsibble
-    tsibble::as_tsibble(index=yweek, key=location)
+    tsibble::as_tsibble(index=yweek, key={{key}})
   # Remove the incomplete week
   if (chop) out <- utils::head(out, -1)
   return(out)

From 833029c9231b52f08b0915b0b2303ea3d49f4643 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 13:56:54 -0500
Subject: [PATCH 12/23] fix documentation for key variable in make_tsibble

---
 R/utils.R           | 2 +-
 man/make_tsibble.Rd | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index 5ffa4da..3e1e194 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -7,7 +7,7 @@
 #' @param df A `tibble` containing columns `epiyear` and `epiweek`.
 #' @param epiyear Unquoted variable name containing the MMWR epiyear.
 #' @param epiweek Unquoted variable name containing the MMWR epiweek.
-#' @param epiweek Unquoted variable name containing the name of the column to be the tsibble key. See [tsibble::as_tsibble].
+#' @param key Unquoted variable name containing the name of the column to be the tsibble key. See [tsibble::as_tsibble].
 #' @param chop Logical indicating whether or not to remove the most current week (default `TRUE`).
 #' @return A `tsibble` containing additional columns `monday` indicating the date
 #'   for the Monday of that epiweek, and `yweek` (a yearweek vctr class object)
diff --git a/man/make_tsibble.Rd b/man/make_tsibble.Rd
index c346223..60480bf 100644
--- a/man/make_tsibble.Rd
+++ b/man/make_tsibble.Rd
@@ -4,7 +4,7 @@
 \alias{make_tsibble}
 \title{Make \code{tsibble}}
 \usage{
-make_tsibble(df, epiyear, epiweek, chop = TRUE)
+make_tsibble(df, epiyear, epiweek, key = location, chop = TRUE)
 }
 \arguments{
 \item{df}{A \code{tibble} containing columns \code{epiyear} and \code{epiweek}.}
@@ -13,6 +13,8 @@ make_tsibble(df, epiyear, epiweek, chop = TRUE)
 
 \item{epiweek}{Unquoted variable name containing the MMWR epiweek.}
 
+\item{key}{Unquoted variable name containing the name of the column to be the tsibble key. See \link[tsibble:as-tsibble]{tsibble::as_tsibble}.}
+
 \item{chop}{Logical indicating whether or not to remove the most current week (default \code{TRUE}).}
 }
 \value{

From 57603f2dc7a630e24695e39ad2a4bf5e13793511 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 13:57:11 -0500
Subject: [PATCH 13/23] small updates to prep for state-level keyed forecasts

---
 R/forecast.R        | 7 ++++---
 man/forecast_ili.Rd | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/forecast.R b/R/forecast.R
index bcfe505..ac52d4a 100644
--- a/R/forecast.R
+++ b/R/forecast.R
@@ -17,7 +17,7 @@
 #' @examples
 #' \dontrun{
 #' # Get data
-#' ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today()))
+#' ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today()))
 #' # Using data only from march 2020 forward
 #' ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01")
 #' head(ilifor_2020$ili_bound)
@@ -61,16 +61,17 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, cons
   }
 
   ## subset to selected location and get columns you care about
+  ## fixme: remove this filter column later when you get kinks worked out of state-level forecasts with key
   ilidat <-
     ilidat %>%
-    dplyr::filter(location %in% location) %>%
+    dplyr::filter(location %in% !!location) %>%
     dplyr::select(location, year, week, weighted_ili)
 
 
   ## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week
   ilidat_tsibble <-
     ilidat %>%
-    fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE)
+    make_tsibble(epiyear = year, epiweek = week, key=location, chop=FALSE)
 
   # Defaults to constrained, non-seasonal model.
   if (constrained) {
diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
index 98605c5..38a8ed0 100644
--- a/man/forecast_ili.Rd
+++ b/man/forecast_ili.Rd
@@ -44,7 +44,7 @@ Currently limited to one location only.
 \examples{
 \dontrun{
 # Get data
-ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today()))
+ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today()))
 # Using data only from march 2020 forward
 ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01")
 head(ilifor_2020$ili_bound)

From ed46111061b078d1089913f91fe5b7e1b5c9e419 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 17:10:31 -0500
Subject: [PATCH 14/23] first cut at states

---
 R/fiphde.R          |  7 +++++
 R/forecast.R        | 74 +++++++++++++++++++++++++++++++--------------
 man/forecast_ili.Rd | 35 +++++++++------------
 3 files changed, 73 insertions(+), 43 deletions(-)

diff --git a/R/fiphde.R b/R/fiphde.R
index 8621003..845b15d 100644
--- a/R/fiphde.R
+++ b/R/fiphde.R
@@ -65,5 +65,12 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c(".",
                                                         "truth",
                                                         "rmse",
                                                         "value",
+                                                        "unweighted_ili",
+                                                        "ili",
+                                                        "miss",
+                                                        "total",
+                                                        "pmiss",
+                                                        "arima",
+                                                        "x",
                                                         "."))
 
diff --git a/R/forecast.R b/R/forecast.R
index ac52d4a..e6224fa 100644
--- a/R/forecast.R
+++ b/R/forecast.R
@@ -3,7 +3,6 @@
 #' @details Currently limited to one location only.
 #' @param ilidat Data returned from [get_cdc_ili].
 #' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4`
-#' @param location Vector specifying locations to filter to; `'US'` by default.
 #' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim.
 #' @param constrained Should the model be constrained to a non-seasonal model? Default `TRUE` sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See [fable::ARIMA].
 #' @return A named list containing:
@@ -13,19 +12,23 @@
 #' 1. `ili_forecast`: The forecast from [fabletools::forecast] at the specified horizon.
 #' 1. `ili_future`: The `horizon`-number of weeks of ILI data forecasted into the future.
 #' 1. `ili_bound`: The data in 1 bound to the data in 5.
-#' 1. `arima_params`: A named character vector of the ARIMA model parameters.
+#' 1. `arima_params`: A tibble with ARIMA model parameters for each location.
+#' 1. `locstats`: A tibble with missing data information on all locations.
+#' 1. `removed`: A tibble with locations removed because of high missing ILI data.
 #' @examples
 #' \dontrun{
 #' # Get data
 #' ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today()))
-#' # Using data only from march 2020 forward
-#' ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01")
-#' head(ilifor_2020$ili_bound)
-#' tail(ilifor_2020$ili_bound, 10)
-#' ilifor_2020$ili_fit
-#' ilifor_2020$ili_fit %>% focustools::extract_arima_params()
-#' ilifor_2020$arima_params
-#' ilifor_2020$ili_forecast
+#'
+#' # Using data only from march 2020 forward, for US only
+#' ilidat_us <- ilidat %>% dplyr::filter(location=="US")
+#' ilifor_us <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01")
+#' head(ilifor_us$ili_bound)
+#' tail(ilifor_us$ili_bound, 10)
+#' ilifor_us$ili_fit
+#' ilifor_us$ili_fit %>% focustools::extract_arima_params()
+#' ilifor_us$arima_params
+#' ilifor_us$ili_forecast
 #' # Using all the data we have (2010-forward, in this example)
 #' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE)
 #' head(ilifor_2010$ili_bound)
@@ -38,8 +41,8 @@
 #' library(dplyr)
 #' library(tidyr)
 #' library(ggplot2)
-#' inner_join(ilifor_2020$ili_bound %>% rename(nonseasonal_weighted_ili=weighted_ili),
-#'            ilifor_2010$ili_bound %>% rename(unconstrained_weighted_ili=weighted_ili),
+#' inner_join(ilifor_2020$ili_bound %>% rename(nonseasonal_unweighted_ili=unweighted_ili),
+#'            ilifor_2010$ili_bound %>% rename(unconstrained_unweighted_ili=unweighted_ili),
 #'            by = c("location", "year", "week", "forecasted")) %>%
 #'   gather(key, value, ends_with("ili")) %>%
 #'   mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
@@ -51,7 +54,7 @@
 #'   theme_bw()
 #' }
 #' @export
-forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, constrained=TRUE) {
+forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
 
   # If trim_date is not null, trim to selected trim_date
   if (!is.null(trim_date)) {
@@ -60,13 +63,30 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, cons
       dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d"))
   }
 
-  ## subset to selected location and get columns you care about
-  ## fixme: remove this filter column later when you get kinks worked out of state-level forecasts with key
+  # Select just the columns you care about, and call "ili" the measure you're using
   ilidat <-
     ilidat %>%
-    dplyr::filter(location %in% !!location) %>%
-    dplyr::select(location, year, week, weighted_ili)
+    dplyr::select(location, year, week, ili=unweighted_ili)
 
+  # Get missing data rates
+  locstats <- ilidat %>%
+    dplyr::group_by(location) %>%
+    dplyr::summarize(miss=sum(is.na(ili)), total=dplyr::n()) %>%
+    dplyr::mutate(pmiss=miss/total) %>%
+    dplyr::arrange(dplyr::desc(pmiss)) %>%
+    dplyr::mutate(remove=pmiss>.1)
+
+  # Get locations that will be removed
+  removed <- locstats %>%
+    dplyr::filter(remove) %>%
+    dplyr::inner_join(locations, by="location")
+  if(nrow(removed)>0) message(sprintf("Removed %s row(s) because of missing data. See result$removed.", nrow(removed)))
+
+  # Remove those locations
+  ilidat <- locstats %>%
+    dplyr::filter(!remove) %>%
+    dplyr::distinct(location) %>%
+    dplyr::inner_join(ilidat, by="location")
 
   ## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week
   ilidat_tsibble <-
@@ -79,19 +99,26 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, cons
     # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5)
     message("Fitting nonseasonal ARIMA model ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5)")
     ili_fit <- fabletools::model(ilidat_tsibble,
-                                 arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5),
+                                 arima = fable::ARIMA(ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5),
                                                       stepwise=FALSE,
                                                       approximation=FALSE))
   } else {
     # If unconstrained, need to set stepwise=TRUE and approxmiation=NULL to speed up.
     message("Fitting unconstrained ARIMA model...")
     ili_fit <- fabletools::model(ilidat_tsibble,
-                                 arima = fable::ARIMA(weighted_ili,
+                                 arima = fable::ARIMA(ili,
                                                       stepwise=TRUE,
                                                       approximation=NULL))
   }
 
-  arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6])
+
+  # arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6])
+  arima_params <-
+    ili_fit %>%
+    dplyr::mutate(x=purrr::map(arima, ~purrr::pluck(., "fit") %>% purrr::pluck("spec"))) %>%
+    tidyr::unnest_wider(col=x) %>%
+    dplyr::select(-arima)
+
 
   # Get the forecast
   ili_forecast <- fabletools::forecast(ili_fit, h=horizon)
@@ -109,13 +136,14 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, cons
     tibble::as_tibble() %>%
     dplyr::mutate(year=lubridate::epiyear(yweek)) %>%
     dplyr::mutate(week=lubridate::epiweek(yweek)) %>%
-    dplyr::select(location, year, week, weighted_ili=.mean)
+    dplyr::select(location, year, week, ili=.mean)
 
   # bind the historical data to the new data
   ili_bound <- dplyr::bind_rows(ilidat     %>% dplyr::mutate(forecasted=FALSE),
-                                ili_future %>% dplyr::mutate(forecasted=TRUE))
+                                ili_future %>% dplyr::mutate(forecasted=TRUE)) %>%
+    dplyr::arrange(location, year, week)
 
   # Create results
-  res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params)
+  res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params, locstats, removed)
   return(res)
 }
diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
index 38a8ed0..a0dd731 100644
--- a/man/forecast_ili.Rd
+++ b/man/forecast_ili.Rd
@@ -4,21 +4,13 @@
 \alias{forecast_ili}
 \title{Forecast ILI}
 \usage{
-forecast_ili(
-  ilidat,
-  horizon = 4L,
-  location = "US",
-  trim_date = NULL,
-  constrained = TRUE
-)
+forecast_ili(ilidat, horizon = 4L, trim_date = NULL, constrained = TRUE)
 }
 \arguments{
 \item{ilidat}{Data returned from \link{get_cdc_ili}.}
 
 \item{horizon}{Optional horizon periods through which the forecasts should be generated; default is \code{4}}
 
-\item{location}{Vector specifying locations to filter to; \code{'US'} by default.}
-
 \item{trim_date}{Earliest start date you want to use for ILI data. Default \code{NULL} doesn't trim.}
 
 \item{constrained}{Should the model be constrained to a non-seasonal model? Default \code{TRUE} sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See \link[fable:ARIMA]{fable::ARIMA}.}
@@ -32,7 +24,9 @@ A named list containing:
 \item \code{ili_forecast}: The forecast from \link[fabletools:forecast]{fabletools::forecast} at the specified horizon.
 \item \code{ili_future}: The \code{horizon}-number of weeks of ILI data forecasted into the future.
 \item \code{ili_bound}: The data in 1 bound to the data in 5.
-\item \code{arima_params}: A named character vector of the ARIMA model parameters.
+\item \code{arima_params}: A tibble with ARIMA model parameters for each location.
+\item \code{locstats}: A tibble with missing data information on all locations.
+\item \code{removed}: A tibble with locations removed because of high missing ILI data.
 }
 }
 \description{
@@ -45,14 +39,15 @@ Currently limited to one location only.
 \dontrun{
 # Get data
 ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today()))
-# Using data only from march 2020 forward
-ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01")
-head(ilifor_2020$ili_bound)
-tail(ilifor_2020$ili_bound, 10)
-ilifor_2020$ili_fit
-ilifor_2020$ili_fit \%>\% focustools::extract_arima_params()
-ilifor_2020$arima_params
-ilifor_2020$ili_forecast
+# Using data only from march 2020 forward, for US only
+ilidat_us <- ilidat \%>\% dplyr::filter(location=="US")
+ilifor_us <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01")
+head(ilifor_us$ili_bound)
+tail(ilifor_us$ili_bound, 10)
+ilifor_us$ili_fit
+ilifor_us$ili_fit \%>\% focustools::extract_arima_params()
+ilifor_us$arima_params
+ilifor_us$ili_forecast
 # Using all the data we have (2010-forward, in this example)
 ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE)
 head(ilifor_2010$ili_bound)
@@ -65,8 +60,8 @@ ilifor_2010$ili_forecast
 library(dplyr)
 library(tidyr)
 library(ggplot2)
-inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_weighted_ili=weighted_ili),
-           ilifor_2010$ili_bound \%>\% rename(unconstrained_weighted_ili=weighted_ili),
+inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_unweighted_ili=unweighted_ili),
+           ilifor_2010$ili_bound \%>\% rename(unconstrained_unweighted_ili=unweighted_ili),
            by = c("location", "year", "week", "forecasted")) \%>\%
   gather(key, value, ends_with("ili")) \%>\%
   mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\%

From e69998a5553d55d10d2551cdb5be64e174ecfaf5 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Tue, 21 Dec 2021 17:37:00 -0500
Subject: [PATCH 15/23] state-level forecasting

---
 R/forecast.R        | 61 +++++++++++++++++++++++++--------------------
 R/utils.R           |  1 +
 man/forecast_ili.Rd | 54 +++++++++++++++++++++------------------
 3 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/R/forecast.R b/R/forecast.R
index e6224fa..0ee8f12 100644
--- a/R/forecast.R
+++ b/R/forecast.R
@@ -23,35 +23,40 @@
 #' # Using data only from march 2020 forward, for US only
 #' ilidat_us <- ilidat %>% dplyr::filter(location=="US")
 #' ilifor_us <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01")
-#' head(ilifor_us$ili_bound)
-#' tail(ilifor_us$ili_bound, 10)
 #' ilifor_us$ili_fit
-#' ilifor_us$ili_fit %>% focustools::extract_arima_params()
 #' ilifor_us$arima_params
 #' ilifor_us$ili_forecast
-#' # Using all the data we have (2010-forward, in this example)
-#' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE)
-#' head(ilifor_2010$ili_bound)
-#' tail(ilifor_2010$ili_bound, 10)
-#' ilifor_2010$ili_fit
-#' ilifor_2010$ili_fit %>% focustools::extract_arima_params()
-#' ilifor_2010$arima_params
-#' ilifor_2010$ili_forecast
-#' # Plot both forecasts
+#' head(ilifor_us$ili_bound)
+#' tail(ilifor_us$ili_bound, 10)
+#' # Plot
+#' library(dplyr)
+#' library(ggplot2)
+#' theme_set(theme_classic())
+#' ilifor_us$ili_bound %>%
+#'  mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
+#'  filter(date>"2021-03-01") %>%
+#'  ggplot(aes(date, ili)) +
+#'  geom_line(lwd=.3, alpha=.5) +
+#'  geom_point(aes(col=forecasted), size=2)
+#'
+#' # At the state level
+#' ilifor_st <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01")
+#' ilifor_st$ili_fit
+#' ilifor_st$arima_params
+#' ilifor_st$ili_forecast
+#' head(ilifor_us$ili_bound)
+#' tail(ilifor_us$ili_bound, 10)
+#' # Plot
 #' library(dplyr)
-#' library(tidyr)
 #' library(ggplot2)
-#' inner_join(ilifor_2020$ili_bound %>% rename(nonseasonal_unweighted_ili=unweighted_ili),
-#'            ilifor_2010$ili_bound %>% rename(unconstrained_unweighted_ili=unweighted_ili),
-#'            by = c("location", "year", "week", "forecasted")) %>%
-#'   gather(key, value, ends_with("ili")) %>%
+#' theme_set(theme_classic())
+#' ilifor_st$ili_bound %>%
 #'   mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
-#'   filter(date>"2021-07-01") %>%
-#'   ggplot(aes(date, value)) +
-#'   geom_line(alpha=.5) +
-#'   geom_point(aes(col=forecasted)) +
-#'   facet_wrap(~key) +
-#'   theme_bw()
+#'   filter(date>"2021-08-01") %>%
+#'   ggplot(aes(date, ili, col=forecasted)) +
+#'   geom_line(lwd=.3) +
+#'   geom_point(aes(col=forecasted), size=.7) +
+#'   facet_wrap(~abbreviation, scale="free_y")
 #' }
 #' @export
 forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
@@ -97,9 +102,9 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
   if (constrained) {
     # Nonseasonal fit: PDQ(0, 0, 0)
     # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5)
-    message("Fitting nonseasonal ARIMA model ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5)")
+    message("Fitting nonseasonal constrained ARIMA model...")
     ili_fit <- fabletools::model(ilidat_tsibble,
-                                 arima = fable::ARIMA(ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5),
+                                 arima = fable::ARIMA(ili ~ PDQ(0,0,0) + pdq(1:2,0:2,0),
                                                       stepwise=FALSE,
                                                       approximation=FALSE))
   } else {
@@ -112,12 +117,13 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
   }
 
 
-  # arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6])
+  # # arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6])
   arima_params <-
     ili_fit %>%
     dplyr::mutate(x=purrr::map(arima, ~purrr::pluck(., "fit") %>% purrr::pluck("spec"))) %>%
     tidyr::unnest_wider(col=x) %>%
     dplyr::select(-arima)
+  # arima_params <- ili_fit %>% extract_arima_params()
 
 
   # Get the forecast
@@ -141,7 +147,8 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
   # bind the historical data to the new data
   ili_bound <- dplyr::bind_rows(ilidat     %>% dplyr::mutate(forecasted=FALSE),
                                 ili_future %>% dplyr::mutate(forecasted=TRUE)) %>%
-    dplyr::arrange(location, year, week)
+    dplyr::arrange(location, year, week) %>%
+    dplyr::inner_join(locations, by="location")
 
   # Create results
   res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params, locstats, removed)
diff --git a/R/utils.R b/R/utils.R
index 901b2c1..e419f4c 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -114,3 +114,4 @@ plot_forc <- function(.forecasts, .train, .test) {
 }
 
 
+
diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
index a0dd731..fff2c66 100644
--- a/man/forecast_ili.Rd
+++ b/man/forecast_ili.Rd
@@ -39,37 +39,43 @@ Currently limited to one location only.
 \dontrun{
 # Get data
 ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today()))
+
 # Using data only from march 2020 forward, for US only
 ilidat_us <- ilidat \%>\% dplyr::filter(location=="US")
-ilifor_us <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01")
-head(ilifor_us$ili_bound)
-tail(ilifor_us$ili_bound, 10)
+ilifor_us <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01")
 ilifor_us$ili_fit
-ilifor_us$ili_fit \%>\% focustools::extract_arima_params()
 ilifor_us$arima_params
 ilifor_us$ili_forecast
-# Using all the data we have (2010-forward, in this example)
-ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE)
-head(ilifor_2010$ili_bound)
-tail(ilifor_2010$ili_bound, 10)
-ilifor_2010$ili_fit
-ilifor_2010$ili_fit \%>\% focustools::extract_arima_params()
-ilifor_2010$arima_params
-ilifor_2010$ili_forecast
-# Plot both forecasts
+head(ilifor_us$ili_bound)
+tail(ilifor_us$ili_bound, 10)
+# Plot
+library(dplyr)
+library(ggplot2)
+theme_set(theme_classic())
+ilifor_us$ili_bound \%>\%
+ mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\%
+ filter(date>"2021-03-01") \%>\%
+ ggplot(aes(date, ili)) +
+ geom_line(lwd=.3, alpha=.5) +
+ geom_point(aes(col=forecasted), size=2)
+
+# At the state level
+ilifor_st <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01")
+ilifor_st$ili_fit
+ilifor_st$arima_params
+ilifor_st$ili_forecast
+head(ilifor_us$ili_bound)
+tail(ilifor_us$ili_bound, 10)
+# Plot
 library(dplyr)
-library(tidyr)
 library(ggplot2)
-inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_unweighted_ili=unweighted_ili),
-           ilifor_2010$ili_bound \%>\% rename(unconstrained_unweighted_ili=unweighted_ili),
-           by = c("location", "year", "week", "forecasted")) \%>\%
-  gather(key, value, ends_with("ili")) \%>\%
+theme_set(theme_classic())
+ilifor_st$ili_bound \%>\%
   mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\%
-  filter(date>"2021-07-01") \%>\%
-  ggplot(aes(date, value)) +
-  geom_line(alpha=.5) +
-  geom_point(aes(col=forecasted)) +
-  facet_wrap(~key) +
-  theme_bw()
+  filter(date>"2021-08-01") \%>\%
+  ggplot(aes(date, ili, col=forecasted)) +
+  geom_line(lwd=.3) +
+  geom_point(aes(col=forecasted), size=.7) +
+  facet_wrap(~abbreviation, scale="free_y")
 }
 }

From d268541412d83280a1946946ef4526ea0411dc1e Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Wed, 22 Dec 2021 06:27:04 -0500
Subject: [PATCH 16/23] updating workflow but fails with metric colunn error I
 can't debug. @vpnagraj we'll have to chat on this one

---
 scratch/worfklow.R | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/scratch/worfklow.R b/scratch/worfklow.R
index c45790b..3b7051b 100644
--- a/scratch/worfklow.R
+++ b/scratch/worfklow.R
@@ -1,10 +1,14 @@
+library(tidyverse)
 library(fiphde)
 
 # Get data
 ilidat <- get_cdc_ili(region="national", years=2020:lubridate::year(lubridate::today()))
 
+# Subset to US only
+ilidat_us <- ilidat %>% filter(location=="US")
+
 # Forecast ILI
-ilifor <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01", constrained=TRUE)
+ilifor <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01", constrained=TRUE)
 
 # What are the arima params?
 ilifor$arima_params
@@ -15,6 +19,17 @@ ilifor$ili_future
 # Take a look at the forecasted data bound do the real data
 ilifor$ili_bound %>% tail(8)
 
+# Plot actual versus forecasted values
+p.ili <-
+  ilifor$ili_bound %>%
+  mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
+  ggplot(aes(date, ili)) +
+  geom_line(alpha=.5, lwd=.2) +
+  geom_point(aes(col=forecasted)) +
+  theme_bw() +
+  labs(x="Date", y="Unweighted ILI")
+p.ili
+
 hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12")
 
 tmp_weekly_flu <-
@@ -39,7 +54,7 @@ tmp_weekly_flu_w_lag <-
          lag_3 = lag(flu.admits, 3),
          lag_4 = lag(flu.admits, 4)) %>%
   filter(complete.cases(.)) %>%
-  dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
+  dplyr::mutate(date = MMWRweek::MMWRweek2Date(year, week)) %>%
   filter(date >= max(date) - 7*24)
 
 train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3)
@@ -62,8 +77,7 @@ models <-
   )
 
 res <- glm_wrap(train_dat,
-                ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above
-                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = ilifor$ili_future$weighted_ili),
+                new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), ili = ilifor$ili_future$ili),
                 .models = models,
                 alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2)
 

From 8d85d50bb377d67ed831afd7ccf84ca0c36d82a5 Mon Sep 17 00:00:00 2001
From: VP Nagraj <pnagraj@signaturescience.com>
Date: Wed, 22 Dec 2021 09:12:20 -0500
Subject: [PATCH 17/23] fixing ILI workflow script

---
 scratch/worfklow.R | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/scratch/worfklow.R b/scratch/worfklow.R
index 3b7051b..2ec0904 100644
--- a/scratch/worfklow.R
+++ b/scratch/worfklow.R
@@ -37,14 +37,14 @@ tmp_weekly_flu <-
   mutate(date = date - 1) %>%
   mutate(flu.admits = as.numeric(flu.admits),
          flu.admits.cov = as.numeric(flu.admits.cov)) %>%
-  mutate(week = lubridate::epiweek(date),
-         year = lubridate::epiyear(date)) %>%
-  group_by(year, week) %>%
+  mutate(epiweek = lubridate::epiweek(date),
+         epiyear = lubridate::epiyear(date)) %>%
+  group_by(epiyear, epiweek) %>%
   summarise(flu.admits = sum(flu.admits, na.rm = TRUE),
             flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE),
             .groups = "drop") %>%
-  mutate(location = "US", .before = "week") %>%
-  left_join(ilifor$ilidat)
+  mutate(location = "US", .before = "epiweek") %>%
+  left_join(rename(ilifor$ilidat, epiweek = week, epiyear = year))
 
 ## add lag columns
 tmp_weekly_flu_w_lag <-
@@ -54,7 +54,7 @@ tmp_weekly_flu_w_lag <-
          lag_3 = lag(flu.admits, 3),
          lag_4 = lag(flu.admits, 4)) %>%
   filter(complete.cases(.)) %>%
-  dplyr::mutate(date = MMWRweek::MMWRweek2Date(year, week)) %>%
+  dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>%
   filter(date >= max(date) - 7*24)
 
 train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3)
@@ -62,18 +62,18 @@ test_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() >= n() - 3)
 
 models <-
   list(
-    glm_poisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "poisson"),
-    glm_quasipoisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "quasipoisson"),
-    glm_negbin_ili = trending::glm_nb_model(flu.admits ~ weighted_ili),
-    glm_poisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "poisson"),
-    glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "quasipoisson"),
-    glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + offset(flu.admits.cov)),
-    glm_poisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"),
-    glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"),
-    glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4),
-    glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"),
-    glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"),
-    glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov))
+    glm_poisson_ili = trending::glm_model(flu.admits ~ ili, family = "poisson"),
+    glm_quasipoisson_ili = trending::glm_model(flu.admits ~ ili, family = "quasipoisson"),
+    glm_negbin_ili = trending::glm_nb_model(flu.admits ~ ili),
+    glm_poisson_ili_offset = trending::glm_model(flu.admits ~ ili + offset(flu.admits.cov), family = "poisson"),
+    glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ ili + offset(flu.admits.cov), family = "quasipoisson"),
+    glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ ili + offset(flu.admits.cov)),
+    glm_poisson_ili_lags = trending::glm_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"),
+    glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"),
+    glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4),
+    glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"),
+    glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"),
+    glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov))
   )
 
 res <- glm_wrap(train_dat,

From cc3c00abf96c8ecb28e0bb358d66eb5e51a24da0 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Wed, 22 Dec 2021 09:45:38 -0500
Subject: [PATCH 18/23] plots

---
 scratch/worfklow.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scratch/worfklow.R b/scratch/worfklow.R
index 2ec0904..edf20e9 100644
--- a/scratch/worfklow.R
+++ b/scratch/worfklow.R
@@ -27,7 +27,7 @@ p.ili <-
   geom_line(alpha=.5, lwd=.2) +
   geom_point(aes(col=forecasted)) +
   theme_bw() +
-  labs(x="Date", y="Unweighted ILI")
+  labs(x="Date", y="Unweighted ILI", title="ILI forecast")
 p.ili
 
 hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12")
@@ -84,5 +84,7 @@ res <- glm_wrap(train_dat,
 res$forecasts
 res$model
 
-plot_forc(res$forecasts, train_dat, test_dat)
+p.hosp <- plot_forc(res$forecasts, train_dat, test_dat)
 
+library(patchwork)
+p.ili / p.hosp

From 558e3f9bedd9abfc9b47aaf1361029d326674e26 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Wed, 22 Dec 2021 10:00:31 -0500
Subject: [PATCH 19/23] week/year -> epiweek/epiyear

---
 R/forecast.R        | 16 ++++++++--------
 R/retrieve.R        | 16 ++++++++--------
 man/forecast_ili.Rd |  4 ++--
 man/get_cdc_vax.Rd  |  2 +-
 scratch/worfklow.R  |  5 +++--
 5 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/R/forecast.R b/R/forecast.R
index 0ee8f12..f51ae12 100644
--- a/R/forecast.R
+++ b/R/forecast.R
@@ -33,7 +33,7 @@
 #' library(ggplot2)
 #' theme_set(theme_classic())
 #' ilifor_us$ili_bound %>%
-#'  mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
+#'  mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>%
 #'  filter(date>"2021-03-01") %>%
 #'  ggplot(aes(date, ili)) +
 #'  geom_line(lwd=.3, alpha=.5) +
@@ -51,7 +51,7 @@
 #' library(ggplot2)
 #' theme_set(theme_classic())
 #' ilifor_st$ili_bound %>%
-#'   mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
+#'   mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>%
 #'   filter(date>"2021-08-01") %>%
 #'   ggplot(aes(date, ili, col=forecasted)) +
 #'   geom_line(lwd=.3) +
@@ -71,7 +71,7 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
   # Select just the columns you care about, and call "ili" the measure you're using
   ilidat <-
     ilidat %>%
-    dplyr::select(location, year, week, ili=unweighted_ili)
+    dplyr::select(location, epiyear, epiweek, ili=unweighted_ili)
 
   # Get missing data rates
   locstats <- ilidat %>%
@@ -96,7 +96,7 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
   ## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week
   ilidat_tsibble <-
     ilidat %>%
-    make_tsibble(epiyear = year, epiweek = week, key=location, chop=FALSE)
+    make_tsibble(epiyear = epiyear, epiweek = epiweek, key=location, chop=FALSE)
 
   # Defaults to constrained, non-seasonal model.
   if (constrained) {
@@ -140,14 +140,14 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
   # Get the next #horizon weeks in a tibble
   ili_future <- ili_forecast %>%
     tibble::as_tibble() %>%
-    dplyr::mutate(year=lubridate::epiyear(yweek)) %>%
-    dplyr::mutate(week=lubridate::epiweek(yweek)) %>%
-    dplyr::select(location, year, week, ili=.mean)
+    dplyr::mutate(epiyear=lubridate::epiyear(yweek)) %>%
+    dplyr::mutate(epiweek=lubridate::epiweek(yweek)) %>%
+    dplyr::select(location, epiyear, epiweek, ili=.mean)
 
   # bind the historical data to the new data
   ili_bound <- dplyr::bind_rows(ilidat     %>% dplyr::mutate(forecasted=FALSE),
                                 ili_future %>% dplyr::mutate(forecasted=TRUE)) %>%
-    dplyr::arrange(location, year, week) %>%
+    dplyr::arrange(location, epiyear, epiweek) %>%
     dplyr::inner_join(locations, by="location")
 
   # Create results
diff --git a/R/retrieve.R b/R/retrieve.R
index f06c6db..580b2f0 100644
--- a/R/retrieve.R
+++ b/R/retrieve.R
@@ -106,7 +106,7 @@ get_hdgov_hosp <- function(endpoint="https://healthdata.gov/resource/g62h-syeh.j
 #' @references API documentation: <https://dev.socrata.com/foundry/data.cdc.gov/k87d-gv3u>.
 #' @examples
 #' \dontrun{
-#' d <- get_cdc_vax_data()
+#' d <- get_cdc_vax()
 #' d
 #' library(ggplot2)
 #' d %>%
@@ -183,14 +183,14 @@ get_cdc_ili <- function(region=c("national", "state"), years=NULL) {
   # Get only relevant columns (drop age group distributions)
   # Join to internal package data to get state abbreviations and FIPS codes
   d <- d %>%
-    dplyr::select(region_type, region, year, week, week_start, dplyr::contains("ili"), ilitotal:total_patients) %>%
+    dplyr::select(region_type, region, epiyear=year, epiweek=week, week_start, dplyr::contains("ili"), ilitotal:total_patients) %>%
     dplyr::mutate(region=gsub("National", "US", region)) %>%
     dplyr::inner_join(locations, by=c("region"="location_name")) %>%
     dplyr::select(location, region_type, abbreviation, region, dplyr::everything())
   message(sprintf("Latest week_start / year / epiweek available:\n%s / %d / %d",
                   max(d$week_start),
-                  unique(d$year[d$week_start==max(d$week_start)]),
-                  unique(d$week[d$week_start==max(d$week_start)])))
+                  unique(d$epiyear[d$week_start==max(d$week_start)]),
+                  unique(d$epiweek[d$week_start==max(d$week_start)])))
   return(d)
 }
 
@@ -211,8 +211,8 @@ get_cdc_hosp <- function(years=NULL) {
     dplyr::transmute(location="US",
                      abbreviation="US",
                      region="US",
-                     year,
-                     week=year_wk_num,
+                     epiyear=year,
+                     epiweek=year_wk_num,
                      week_start=wk_start,
                      week_end=wk_end,
                      rate,
@@ -220,7 +220,7 @@ get_cdc_hosp <- function(years=NULL) {
                      season=sea_label)
   message(sprintf("Latest week_start / year / epiweek available:\n%s / %d / %d",
                   max(d$week_start),
-                  unique(d$year[d$week_start==max(d$week_start)]),
-                  unique(d$week[d$week_start==max(d$week_start)])))
+                  unique(d$epiyear[d$week_start==max(d$week_start)]),
+                  unique(d$epiweek[d$week_start==max(d$week_start)])))
   return(d)
 }
diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
index fff2c66..1a4a294 100644
--- a/man/forecast_ili.Rd
+++ b/man/forecast_ili.Rd
@@ -53,7 +53,7 @@ library(dplyr)
 library(ggplot2)
 theme_set(theme_classic())
 ilifor_us$ili_bound \%>\%
- mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\%
+ mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) \%>\%
  filter(date>"2021-03-01") \%>\%
  ggplot(aes(date, ili)) +
  geom_line(lwd=.3, alpha=.5) +
@@ -71,7 +71,7 @@ library(dplyr)
 library(ggplot2)
 theme_set(theme_classic())
 ilifor_st$ili_bound \%>\%
-  mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\%
+  mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) \%>\%
   filter(date>"2021-08-01") \%>\%
   ggplot(aes(date, ili, col=forecasted)) +
   geom_line(lwd=.3) +
diff --git a/man/get_cdc_vax.Rd b/man/get_cdc_vax.Rd
index 2f5e080..63d8a8e 100644
--- a/man/get_cdc_vax.Rd
+++ b/man/get_cdc_vax.Rd
@@ -28,7 +28,7 @@ Get vaccination data from cdc.gov endpoint.
 }
 \examples{
 \dontrun{
-d <- get_cdc_vax_data()
+d <- get_cdc_vax()
 d
 library(ggplot2)
 d \%>\%
diff --git a/scratch/worfklow.R b/scratch/worfklow.R
index edf20e9..17f89de 100644
--- a/scratch/worfklow.R
+++ b/scratch/worfklow.R
@@ -22,7 +22,7 @@ ilifor$ili_bound %>% tail(8)
 # Plot actual versus forecasted values
 p.ili <-
   ilifor$ili_bound %>%
-  mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>%
+  mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>%
   ggplot(aes(date, ili)) +
   geom_line(alpha=.5, lwd=.2) +
   geom_point(aes(col=forecasted)) +
@@ -44,7 +44,7 @@ tmp_weekly_flu <-
             flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE),
             .groups = "drop") %>%
   mutate(location = "US", .before = "epiweek") %>%
-  left_join(rename(ilifor$ilidat, epiweek = week, epiyear = year))
+  left_join(ilifor$ilidat, by = c("epiyear", "location", "epiweek"))
 
 ## add lag columns
 tmp_weekly_flu_w_lag <-
@@ -85,6 +85,7 @@ res$forecasts
 res$model
 
 p.hosp <- plot_forc(res$forecasts, train_dat, test_dat)
+p.hosp
 
 library(patchwork)
 p.ili / p.hosp

From 4683e838e480e8d1533aee3a0612de8306fe3a10 Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Wed, 22 Dec 2021 10:10:56 -0500
Subject: [PATCH 20/23] include 2019-2020 flu season so we get early 2020 data

---
 scratch/worfklow.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scratch/worfklow.R b/scratch/worfklow.R
index 17f89de..4867f25 100644
--- a/scratch/worfklow.R
+++ b/scratch/worfklow.R
@@ -2,7 +2,10 @@ library(tidyverse)
 library(fiphde)
 
 # Get data
-ilidat <- get_cdc_ili(region="national", years=2020:lubridate::year(lubridate::today()))
+# the years argument for cdcfluview::ilinet gets the *season* corresponding to the year.
+# so, 2019 = the 2019-2020 flu season. If you only get 2020-2021, you're getting the
+# 2020-2021 and 2021-2022 flu season, meaning that you will *NOT* capture early 2020 data.
+ilidat <- get_cdc_ili(region="national", years=2019:lubridate::year(lubridate::today()))
 
 # Subset to US only
 ilidat_us <- ilidat %>% filter(location=="US")

From 366b01f00683a891a8e3a32d0077854a1f86581c Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Wed, 22 Dec 2021 10:28:11 -0500
Subject: [PATCH 21/23] sensitivity demonstration script

---
 scratch/sensitivity-demonstration.R | 40 +++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 scratch/sensitivity-demonstration.R

diff --git a/scratch/sensitivity-demonstration.R b/scratch/sensitivity-demonstration.R
new file mode 100644
index 0000000..03c0872
--- /dev/null
+++ b/scratch/sensitivity-demonstration.R
@@ -0,0 +1,40 @@
+library(tidyverse)
+library(fiphde)
+
+
+# Get data
+# the years argument for cdcfluview::ilinet gets the *season* corresponding to the year.
+# so, 2019 = the 2019-2020 flu season. If you only get 2020-2021, you're getting the
+# 2020-2021 and 2021-2022 flu season, meaning that you will *NOT* capture early 2020 data.
+ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today()))
+
+# Subset to US only
+ilidat_us <- ilidat %>% filter(location=="US")
+
+# convenience function
+plot_forecast <- function(forecast) {
+  forecast$ili_bound %>%
+    mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>%
+    filter(date>"2021-07-01") %>%
+    ggplot(aes(date, ili)) +
+    geom_line(alpha=.5, lwd=.2) +
+    geom_point(aes(col=forecasted)) +
+    theme_bw() +
+    labs(x="Date", y="Unweighted ILI", title="ILI forecast",
+         subtitle=paste0("Trim date: ", forecast$ilidat %>% mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>% pull(date) %>% min()))
+}
+
+# Forecast ILI
+p1 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01", constrained=TRUE) %>% plot_forecast() + ggtitle("Constrained")
+p2 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-01-01", constrained=TRUE) %>% plot_forecast() + ggtitle("Constrained")
+p3 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-05-01", constrained=TRUE) %>% plot_forecast() + ggtitle("Constrained")
+p4 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2015-05-01", constrained=TRUE) %>% plot_forecast() + ggtitle("Constrained")
+
+p5 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01", constrained=FALSE) %>% plot_forecast() + ggtitle("Unconstrained")
+p6 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-01-01", constrained=FALSE) %>% plot_forecast() + ggtitle("Unconstrained")
+p7 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-05-01", constrained=FALSE) %>% plot_forecast() + ggtitle("Unconstrained")
+p8 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2015-05-01", constrained=FALSE) %>% plot_forecast() + ggtitle("Unconstrained")
+
+(p1+p5)/(p2+p6)/(p3+p7)/(p4+p8) + plot_layout(guides = "collect")
+
+ggsave("~/Downloads/sensitivity-demo.png", width=8, height=11.5)

From b5285018ecac70c2784b9a108d0d50d6a32d76a2 Mon Sep 17 00:00:00 2001
From: VP Nagraj <pnagraj@signaturescience.com>
Date: Wed, 22 Dec 2021 17:51:07 -0500
Subject: [PATCH 22/23] adding option to forecast_ili for named param_space
 list

---
 R/forecast.R        | 21 +++++----------------
 man/forecast_ili.Rd | 12 ++++++++++--
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/R/forecast.R b/R/forecast.R
index f51ae12..a72f55f 100644
--- a/R/forecast.R
+++ b/R/forecast.R
@@ -4,7 +4,8 @@
 #' @param ilidat Data returned from [get_cdc_ili].
 #' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4`
 #' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim.
-#' @param constrained Should the model be constrained to a non-seasonal model? Default `TRUE` sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See [fable::ARIMA].
+#' @param constrained Should the model be constrained to a non-seasonal model? If `TRUE` the parameter space defined in "param_space" argument will be used. See [fable::ARIMA].
+#' @param param_space Named list for ARIMA parameter space constraint; only used if "constrained == `TRUE`"; default is `list(P=0,D=0,Q=0,p=1:2,d=0:2,0)`, which sets space to PDQ(0,0,0) and pdq(1:2,0:2,0).
 #' @return A named list containing:
 #' 1. `ilidat`: The data sent into the function filtered to the location and the `trim_date`. Select columns returned.
 #' 1. `ilidat_tsibble`: The `tsibble` class object returned by running [make_tsibble] on the data above.
@@ -59,7 +60,7 @@
 #'   facet_wrap(~abbreviation, scale="free_y")
 #' }
 #' @export
-forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
+forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE, param_space = list(P=0,D=0,Q=0,p=1:2,d=0:2,0)) {
 
   # If trim_date is not null, trim to selected trim_date
   if (!is.null(trim_date)) {
@@ -104,7 +105,7 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
     # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5)
     message("Fitting nonseasonal constrained ARIMA model...")
     ili_fit <- fabletools::model(ilidat_tsibble,
-                                 arima = fable::ARIMA(ili ~ PDQ(0,0,0) + pdq(1:2,0:2,0),
+                                 arima = fable::ARIMA(ili ~ PDQ(param_space$P,param_space$D,param_space$Q) + pdq(param_space$p,param_space$d, param_space$q),
                                                       stepwise=FALSE,
                                                       approximation=FALSE))
   } else {
@@ -116,27 +117,15 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
                                                       approximation=NULL))
   }
 
-
-  # # arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6])
   arima_params <-
     ili_fit %>%
     dplyr::mutate(x=purrr::map(arima, ~purrr::pluck(., "fit") %>% purrr::pluck("spec"))) %>%
     tidyr::unnest_wider(col=x) %>%
     dplyr::select(-arima)
-  # arima_params <- ili_fit %>% extract_arima_params()
-
 
   # Get the forecast
   ili_forecast <- fabletools::forecast(ili_fit, h=horizon)
 
-  ## Look at the quantiles
-  # ili_forecast %>%
-  #   fabletools::hilo()
-  # ili_forecast %>%
-  #   fabletools::hilo() %>%
-  #   fabletools::unpack_hilo(`80%`) %>%
-  #   fabletools::unpack_hilo(`95%`)
-
   # Get the next #horizon weeks in a tibble
   ili_future <- ili_forecast %>%
     tibble::as_tibble() %>%
@@ -145,7 +134,7 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) {
     dplyr::select(location, epiyear, epiweek, ili=.mean)
 
   # bind the historical data to the new data
-  ili_bound <- dplyr::bind_rows(ilidat     %>% dplyr::mutate(forecasted=FALSE),
+  ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE),
                                 ili_future %>% dplyr::mutate(forecasted=TRUE)) %>%
     dplyr::arrange(location, epiyear, epiweek) %>%
     dplyr::inner_join(locations, by="location")
diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
index 1a4a294..1896820 100644
--- a/man/forecast_ili.Rd
+++ b/man/forecast_ili.Rd
@@ -4,7 +4,13 @@
 \alias{forecast_ili}
 \title{Forecast ILI}
 \usage{
-forecast_ili(ilidat, horizon = 4L, trim_date = NULL, constrained = TRUE)
+forecast_ili(
+  ilidat,
+  horizon = 4L,
+  trim_date = NULL,
+  constrained = TRUE,
+  param_space = list(P = 0, D = 0, Q = 0, p = 1:2, d = 0:2, 0)
+)
 }
 \arguments{
 \item{ilidat}{Data returned from \link{get_cdc_ili}.}
@@ -13,7 +19,9 @@ forecast_ili(ilidat, horizon = 4L, trim_date = NULL, constrained = TRUE)
 
 \item{trim_date}{Earliest start date you want to use for ILI data. Default \code{NULL} doesn't trim.}
 
-\item{constrained}{Should the model be constrained to a non-seasonal model? Default \code{TRUE} sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See \link[fable:ARIMA]{fable::ARIMA}.}
+\item{constrained}{Should the model be constrained to a non-seasonal model? If \code{TRUE} the parameter space defined in "param_space" argument will be used. See \link[fable:ARIMA]{fable::ARIMA}.}
+
+\item{param_space}{Named list for ARIMA parameter space constraint; only used if "constrained == \code{TRUE}"; default is \code{list(P=0,D=0,Q=0,p=1:2,d=0:2,0)}, which sets space to PDQ(0,0,0) and pdq(1:2,0:2,0).}
 }
 \value{
 A named list containing:

From e11a52ce554ddca48382f325116413ecac97e05f Mon Sep 17 00:00:00 2001
From: Stephen Turner <vustephen@gmail.com>
Date: Thu, 23 Dec 2021 11:30:47 -0500
Subject: [PATCH 23/23] specify q

---
 R/forecast.R        | 2 +-
 man/forecast_ili.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/forecast.R b/R/forecast.R
index a72f55f..a948ff2 100644
--- a/R/forecast.R
+++ b/R/forecast.R
@@ -60,7 +60,7 @@
 #'   facet_wrap(~abbreviation, scale="free_y")
 #' }
 #' @export
-forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE, param_space = list(P=0,D=0,Q=0,p=1:2,d=0:2,0)) {
+forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE, param_space = list(P=0,D=0,Q=0,p=1:2,d=0:2,q=0)) {
 
   # If trim_date is not null, trim to selected trim_date
   if (!is.null(trim_date)) {
diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd
index 1896820..9580528 100644
--- a/man/forecast_ili.Rd
+++ b/man/forecast_ili.Rd
@@ -9,7 +9,7 @@ forecast_ili(
   horizon = 4L,
   trim_date = NULL,
   constrained = TRUE,
-  param_space = list(P = 0, D = 0, Q = 0, p = 1:2, d = 0:2, 0)
+  param_space = list(P = 0, D = 0, Q = 0, p = 1:2, d = 0:2, q = 0)
 )
 }
 \arguments{