From 8c495f77da7daa5dbcf178295687959423c40d67 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 06:54:15 -0500 Subject: [PATCH 01/23] ili forecast in scratch #7 --- scratch/ili-forecast.R | 127 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 scratch/ili-forecast.R diff --git a/scratch/ili-forecast.R b/scratch/ili-forecast.R new file mode 100644 index 0000000..3a9802b --- /dev/null +++ b/scratch/ili-forecast.R @@ -0,0 +1,127 @@ +library(fiphde) +# library(tidyverse) +# library(lubridate) +# library(focustools) +# library(fabletools) +# library(fable) + +### Function arguments +#' @param trim_date Earliest start date you want to use for ILI data +#' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4` +#' @param region Either "national" or "state" or c("national", "state") for both national and state-level data. +#' @param location Vector specifying locations to filter to; `'US'` by default. +trim_date="2020-03-01" +horizon=4 +region="national" +location="US" + +# Set the years passed to get_cdc_ili starting with the year of the trim_date to the current year +years <- lubridate::year(trim_date):lubridate::year(lubridate::today()) + +## retreive ILI data +ilidat_all <- get_cdc_ili(region=region, years=years) + +## subset to US only and subset to the trim date to present +ilidat <- + ilidat_all %>% + dplyr::filter(location %in% location) %>% + dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) %>% + dplyr::select(location, year, week, weighted_ili) + +## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week +ilidat_tsibble <- + ilidat %>% + fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE) +tail(ilidat_tsibble) + +# Nonseasonal fit: PDQ(0, 0, 0) +# Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) +ili_fit <- fabletools::model(ilidat_tsibble, + arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5), + stepwise=FALSE, + approximation=FALSE)) +# ili_fit %>% focustools::extract_arima_params() + +## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools +# ili_forecast <- focustools::ts_forecast(ili_fit, outcome = "icases") + +# Get the forecast +ili_forecast <- fabletools::forecast(ili_fit, h=horizon) + +## Look at the quantiles +# ili_forecast %>% +# fabletools::hilo() +# ili_forecast %>% +# fabletools::hilo() %>% +# fabletools::unpack_hilo(`80%`) %>% +# fabletools::unpack_hilo(`95%`) + +# Get the next #horizon weeks in a tibble +ili_future <- ili_forecast %>% + tibble::as_tibble() %>% + dplyr::mutate(year=lubridate::epiyear(yweek)) %>% + dplyr::mutate(week=lubridate::epiweek(yweek)) %>% + dplyr::select(location, year, week, weighted_ili=.mean) + + +ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), + ili_future %>% dplyr::mutate(forecasted=TRUE)) + +return(ili_bound) + +source("glm.R") + +hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12") + +tmp_weekly_flu <- + hosp %>% + mutate(date = date - 1) %>% + mutate(epiweek = lubridate::epiweek(date), + epiyear = lubridate::epiyear(date)) %>% + group_by(epiweek, epiyear) %>% + summarise(flu.admits = sum(flu.admits, na.rm = TRUE), + flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE), + .groups = "drop") %>% + mutate(location = "US", .before = "epiweek") %>% + left_join(ilidat) + +## add lag columns +tmp_weekly_flu_w_lag <- + tmp_weekly_flu %>% + mutate(lag_1 = lag(flu.admits, 1), + lag_2 = lag(flu.admits, 2), + lag_3 = lag(flu.admits, 3), + lag_4 = lag(flu.admits, 4)) %>% + filter(complete.cases(.)) %>% + dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% + filter(date >= max(date) - 7*24) + +train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3) +test_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() >= n() - 3) + +models <- + list( + glm_poisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "poisson"), + glm_quasipoisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "quasipoisson"), + glm_negbin_ili = trending::glm_nb_model(flu.admits ~ weighted_ili), + glm_poisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "poisson"), + glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "quasipoisson"), + glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + offset(flu.admits.cov)), + glm_poisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"), + glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"), + glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4), + glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"), + glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"), + glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov)) + ) + +res <- glm_wrap(train_dat, + ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above + new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = c(2.31,2.43,2.45,2.68)), + .models = models, + alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) + +res$forecasts +res$model + +plot_forc(res$forecasts, train_dat, test_dat) From f68006ca544dee615c290e3925c91713434f0fa6 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 07:24:41 -0500 Subject: [PATCH 02/23] first draft on function to forecast ILI #7 --- DESCRIPTION | 2 + NAMESPACE | 1 + R/fiphde.R | 2 + R/forecast.R | 89 +++++++++++++++++++++++++++++++++++++++++++++ man/forecast_ili.Rd | 51 ++++++++++++++++++++++++++ 5 files changed, 145 insertions(+) create mode 100644 R/forecast.R create mode 100644 man/forecast_ili.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ba36044..994fc5a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,8 @@ RoxygenNote: 7.1.2 Imports: cdcfluview, dplyr, + fable, + fabletools, lubridate, magrittr, MMWRweek, diff --git a/NAMESPACE b/NAMESPACE index 9ebdc39..4ba5417 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(forecast_ili) export(get_cdc_hosp) export(get_cdc_ili) export(get_cdc_vax) diff --git a/R/fiphde.R b/R/fiphde.R index 7637798..d55dc24 100644 --- a/R/fiphde.R +++ b/R/fiphde.R @@ -48,5 +48,7 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c(".", "sea_label", "monday", "yweek", + "weighted_ili", + ".mean", ".")) diff --git a/R/forecast.R b/R/forecast.R new file mode 100644 index 0000000..2dbcc23 --- /dev/null +++ b/R/forecast.R @@ -0,0 +1,89 @@ +#' @title Forecast ILI +#' @description Forecasts ILI up to specified weeks in the future. Used in downstream modeling. +#' @details Currently limited to one location only. +#' @param ilidat Data returned from [get_cdc_ili]. +#' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4` +#' @param location Vector specifying locations to filter to; `'US'` by default. +#' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim. +#' @return A named list containing: +#' 1. `ilidat`: The data sent into the function filtered to the location and the `trim_date`. Select columns returned. +#' 1. `ilidat_tsibble`: The `tsibble` class object returned by running [make_tsibble] on the data above. +#' 1. `ili_fit`: The fit from [fabletools::model]. +#' 1. `ili_forecast`: The forecast from [fabletools::forecast] at the specified horizon. +#' 1. `ili_future`: The `horizon`-number of weeks of ILI data forecasted into the future. +#' 1. `ili_bound`: The data in 1 bound to the data in 5. +#' @examples +#' \dontrun{ +#' # Get data +#' ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today())) +#' # Using data only from march 2020 forward +#' ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01") +#' head(ilifor_2020$ili_bound) +#' tail(ilifor_2020$ili_bound) +#' ilifor_2020$ili_fit +#' ilifor_2020$ili_fit %>% focustools::extract_arima_params() +#' ilifor_2020$ili_forecast +#' # Using all the data we have (2010-forward, in this example) +#' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US") +#' head(ilifor_2010$ili_bound) +#' tail(ilifor_2010$ili_bound) +#' } +#' @export +forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL) { + + # If trim_date is not null, trim to selected trim_date + if (!is.null(trim_date)) { + ilidat <- + ilidat %>% + dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) + } + + ## subset to selected location and get columns you care about + ilidat <- + ilidat %>% + dplyr::filter(location %in% location) %>% + dplyr::select(location, year, week, weighted_ili) + + + ## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week + ilidat_tsibble <- + ilidat %>% + fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE) + + # Nonseasonal fit: PDQ(0, 0, 0) + # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) + ili_fit <- fabletools::model(ilidat_tsibble, + arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5), + stepwise=FALSE, + approximation=FALSE)) + # ili_fit %>% focustools::extract_arima_params() + + ## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools + # ili_forecast <- focustools::ts_forecast(ili_fit, outcome = "icases") + + # Get the forecast + ili_forecast <- fabletools::forecast(ili_fit, h=horizon) + + ## Look at the quantiles + # ili_forecast %>% + # fabletools::hilo() + # ili_forecast %>% + # fabletools::hilo() %>% + # fabletools::unpack_hilo(`80%`) %>% + # fabletools::unpack_hilo(`95%`) + + # Get the next #horizon weeks in a tibble + ili_future <- ili_forecast %>% + tibble::as_tibble() %>% + dplyr::mutate(year=lubridate::epiyear(yweek)) %>% + dplyr::mutate(week=lubridate::epiweek(yweek)) %>% + dplyr::select(location, year, week, weighted_ili=.mean) + + # bind the historical data to the new data + ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), + ili_future %>% dplyr::mutate(forecasted=TRUE)) + + # Create results + res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound) + return(res) +} diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd new file mode 100644 index 0000000..479480d --- /dev/null +++ b/man/forecast_ili.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/forecast.R +\name{forecast_ili} +\alias{forecast_ili} +\title{Forecast ILI} +\usage{ +forecast_ili(ilidat, horizon = 4L, location = "US", trim_date = NULL) +} +\arguments{ +\item{ilidat}{Data returned from \link{get_cdc_ili}.} + +\item{horizon}{Optional horizon periods through which the forecasts should be generated; default is \code{4}} + +\item{location}{Vector specifying locations to filter to; \code{'US'} by default.} + +\item{trim_date}{Earliest start date you want to use for ILI data. Default \code{NULL} doesn't trim.} +} +\value{ +A named list containing: +\enumerate{ +\item \code{ilidat}: The data sent into the function filtered to the location and the \code{trim_date}. Select columns returned. +\item \code{ilidat_tsibble}: The \code{tsibble} class object returned by running \link{make_tsibble} on the data above. +\item \code{ili_fit}: The fit from \link[fabletools:model]{fabletools::model}. +\item \code{ili_forecast}: The forecast from \link[fabletools:forecast]{fabletools::forecast} at the specified horizon. +\item \code{ili_future}: The \code{horizon}-number of weeks of ILI data forecasted into the future. +\item \code{ili_bound}: The data in 1 bound to the data in 5. +} +} +\description{ +Forecasts ILI up to specified weeks in the future. Used in downstream modeling. +} +\details{ +Currently limited to one location only. +} +\examples{ +\dontrun{ +# Get data +ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today())) +# Using data only from march 2020 forward +ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01") +head(ilifor_2020$ili_bound) +tail(ilifor_2020$ili_bound) +ilifor_2020$ili_fit +ilifor_2020$ili_fit \%>\% focustools::extract_arima_params() +ilifor_2020$ili_forecast +# Using all the data we have (2010-forward, in this example) +ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US") +head(ilifor_2010$ili_bound) +tail(ilifor_2010$ili_bound) +} +} From df64e62c2cf76079554d09c75f0c169322d2dcdb Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 11:11:45 -0500 Subject: [PATCH 03/23] add constrained parameter, additional examples, and extracted arima params without needing focustools dependency --- R/forecast.R | 56 +++++++++++++++++++++++++++++++++------------ man/forecast_ili.Rd | 34 +++++++++++++++++++++++---- 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/R/forecast.R b/R/forecast.R index 2dbcc23..a149fdb 100644 --- a/R/forecast.R +++ b/R/forecast.R @@ -5,6 +5,7 @@ #' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4` #' @param location Vector specifying locations to filter to; `'US'` by default. #' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim. +#' @param constrained Should the model be constrained to a non-seasonal model? Default `TRUE` sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See [fable::ARIMA]. #' @return A named list containing: #' 1. `ilidat`: The data sent into the function filtered to the location and the `trim_date`. Select columns returned. #' 1. `ilidat_tsibble`: The `tsibble` class object returned by running [make_tsibble] on the data above. @@ -12,6 +13,7 @@ #' 1. `ili_forecast`: The forecast from [fabletools::forecast] at the specified horizon. #' 1. `ili_future`: The `horizon`-number of weeks of ILI data forecasted into the future. #' 1. `ili_bound`: The data in 1 bound to the data in 5. +#' 1. `arima_params`: A named character vector of the ARIMA model parameters. #' @examples #' \dontrun{ #' # Get data @@ -19,17 +21,34 @@ #' # Using data only from march 2020 forward #' ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01") #' head(ilifor_2020$ili_bound) -#' tail(ilifor_2020$ili_bound) +#' tail(ilifor_2020$ili_bound, 10) #' ilifor_2020$ili_fit #' ilifor_2020$ili_fit %>% focustools::extract_arima_params() +#' ilifor_2020$arima_params #' ilifor_2020$ili_forecast #' # Using all the data we have (2010-forward, in this example) -#' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US") +#' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE) #' head(ilifor_2010$ili_bound) -#' tail(ilifor_2010$ili_bound) +#' tail(ilifor_2010$ili_bound, 10) +#' ilifor_2010$ili_fit +#' ilifor_2010$ili_fit %>% focustools::extract_arima_params() +#' ilifor_2010$arima_params +#' ilifor_2010$ili_forecast +#' # Plot both forecasts +#' library(dplyr) +#' library(tidyr) +#' library(ggplot2) +#' inner_join(ilifor_2020$ili_bound %>% rename(nonseasonal_weighted_ili=weighted_ili), +#' ilifor_2010$ili_bound %>% rename(unconstrained_weighted_ili=weighted_ili), +#' by = c("location", "year", "week", "forecasted")) %>% +#' gather(key, value, ends_with("ili")) %>% +#' mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% +#' filter(date>"2021-01-01") %>% +#' ggplot(aes(date, value)) + geom_point(aes(col=forecasted)) + +#' facet_wrap(~key) #' } #' @export -forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL) { +forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, constrained=TRUE) { # If trim_date is not null, trim to selected trim_date if (!is.null(trim_date)) { @@ -50,16 +69,25 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL) { ilidat %>% fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE) - # Nonseasonal fit: PDQ(0, 0, 0) - # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) - ili_fit <- fabletools::model(ilidat_tsibble, - arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5), - stepwise=FALSE, - approximation=FALSE)) - # ili_fit %>% focustools::extract_arima_params() + # Defaults to constrained, non-seasonal model. + if (constrained) { + # Nonseasonal fit: PDQ(0, 0, 0) + # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) + message("Fitting nonseasonal ARIMA model ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5)") + ili_fit <- fabletools::model(ilidat_tsibble, + arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5), + stepwise=FALSE, + approximation=FALSE)) + } else { + # If unconstrained, need to set stepwise=TRUE and approxmiation=NULL to speed up. + message("Fitting unconstrained ARIMA model...") + ili_fit <- fabletools::model(ilidat_tsibble, + arima = fable::ARIMA(weighted_ili, + stepwise=TRUE, + approximation=NULL)) + } - ## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools - # ili_forecast <- focustools::ts_forecast(ili_fit, outcome = "icases") + arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6]) # Get the forecast ili_forecast <- fabletools::forecast(ili_fit, h=horizon) @@ -84,6 +112,6 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL) { ili_future %>% dplyr::mutate(forecasted=TRUE)) # Create results - res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound) + res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params) return(res) } diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd index 479480d..bd56905 100644 --- a/man/forecast_ili.Rd +++ b/man/forecast_ili.Rd @@ -4,7 +4,13 @@ \alias{forecast_ili} \title{Forecast ILI} \usage{ -forecast_ili(ilidat, horizon = 4L, location = "US", trim_date = NULL) +forecast_ili( + ilidat, + horizon = 4L, + location = "US", + trim_date = NULL, + constrained = TRUE +) } \arguments{ \item{ilidat}{Data returned from \link{get_cdc_ili}.} @@ -14,6 +20,8 @@ forecast_ili(ilidat, horizon = 4L, location = "US", trim_date = NULL) \item{location}{Vector specifying locations to filter to; \code{'US'} by default.} \item{trim_date}{Earliest start date you want to use for ILI data. Default \code{NULL} doesn't trim.} + +\item{constrained}{Should the model be constrained to a non-seasonal model? Default \code{TRUE} sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See \link[fable:ARIMA]{fable::ARIMA}.} } \value{ A named list containing: @@ -24,6 +32,7 @@ A named list containing: \item \code{ili_forecast}: The forecast from \link[fabletools:forecast]{fabletools::forecast} at the specified horizon. \item \code{ili_future}: The \code{horizon}-number of weeks of ILI data forecasted into the future. \item \code{ili_bound}: The data in 1 bound to the data in 5. +\item \code{arima_params}: A named character vector of the ARIMA model parameters. } } \description{ @@ -39,13 +48,30 @@ ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::t # Using data only from march 2020 forward ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01") head(ilifor_2020$ili_bound) -tail(ilifor_2020$ili_bound) +tail(ilifor_2020$ili_bound, 10) ilifor_2020$ili_fit ilifor_2020$ili_fit \%>\% focustools::extract_arima_params() +ilifor_2020$arima_params ilifor_2020$ili_forecast # Using all the data we have (2010-forward, in this example) -ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US") +ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE) head(ilifor_2010$ili_bound) -tail(ilifor_2010$ili_bound) +tail(ilifor_2010$ili_bound, 10) +ilifor_2010$ili_fit +ilifor_2010$ili_fit \%>\% focustools::extract_arima_params() +ilifor_2010$arima_params +ilifor_2010$ili_forecast +# Plot both forecasts +library(dplyr) +library(tidyr) +library(ggplot2) +inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_weighted_ili=weighted_ili), + ilifor_2010$ili_bound \%>\% rename(unconstrained_weighted_ili=weighted_ili), + by = c("location", "year", "week", "forecasted")) \%>\% + gather(key, value, ends_with("ili")) \%>\% + mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\% + filter(date>"2021-01-01") \%>\% + ggplot(aes(date, value)) + geom_point(aes(col=forecasted)) + + facet_wrap(~key) } } From a804efc86fbb15cab8187d60fabc598183f038fd Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 11:14:24 -0500 Subject: [PATCH 04/23] better plot in examples --- R/forecast.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/forecast.R b/R/forecast.R index a149fdb..bcfe505 100644 --- a/R/forecast.R +++ b/R/forecast.R @@ -43,9 +43,12 @@ #' by = c("location", "year", "week", "forecasted")) %>% #' gather(key, value, ends_with("ili")) %>% #' mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% -#' filter(date>"2021-01-01") %>% -#' ggplot(aes(date, value)) + geom_point(aes(col=forecasted)) + -#' facet_wrap(~key) +#' filter(date>"2021-07-01") %>% +#' ggplot(aes(date, value)) + +#' geom_line(alpha=.5) + +#' geom_point(aes(col=forecasted)) + +#' facet_wrap(~key) + +#' theme_bw() #' } #' @export forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, constrained=TRUE) { From 8bf0edabd4d4e6f2937bc6417b22022b7f443b13 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 11:48:03 -0500 Subject: [PATCH 05/23] add glm script from fluforce-init into scratch --- scratch/glm.R | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 scratch/glm.R diff --git a/scratch/glm.R b/scratch/glm.R new file mode 100644 index 0000000..d4fc534 --- /dev/null +++ b/scratch/glm.R @@ -0,0 +1,187 @@ +library(tidyverse) +library(trendeval) +library(trending) + +glm_fit <- function(.data, + .models, + metrics = list(yardstick::rmse, yardstick::huber_loss, yardstick::mae)) { + + dat <- .data + + message(paste0("Location ... ")) + message(unique(dat$location)) + + ## evaluate models and use metrics provided in arg + res <- + trendeval::evaluate_models( + .models, + dat, + method = evaluate_resampling, + metrics = metrics + ) + + ## pull the best model by rmse + best_by_rmse <- + res %>% + # dplyr::filter(purrr::map_lgl(warning, is.null)) %>% # remove models that gave warnings + dplyr::filter(purrr::map_lgl(error, is.null)) %>% # remove models that errored + dplyr::slice_min(rmse) %>% + dplyr::select(model) %>% + purrr::pluck(1,1) + + ## fit the model + tmp_fit <- + best_by_rmse %>% + fit(dat) + + ## construct tibble with model type, actual fit, and the location + ret <- dplyr::tibble(model_class = best_by_rmse$model_class, + fit = tmp_fit, + location = unique(dat$location), + data = tidyr::nest(dat, fit_data = everything())) + + message("Selected model ...") + message(as.character(ret$fit$fitted_model$family)[1]) + + message("Variables ...") + message(paste0(names(ret$fit$fitted_model$coefficients), collapse = " + ")) + return(ret) + +} + + + +## helper to get the quantiles from prediction intervals +glm_quibble <- function(fit, new_data, alpha) { + + ## get the quantiles from the alpha + q_lower <- alpha/2 + q_upper <- 1 - q_lower + + ## run the predict method on the fitted model + ## use the given alpha + fit %>% + predict(new_data, alpha = alpha) %>% + ## get just the prediction interval bounds ... + ## index (time column must be named index) ... + select(epiyear, epiweek, lower_pi, upper_pi) %>% + ## reshape so that its in long format + gather(quantile, value, lower_pi:upper_pi) %>% + ## and subout out lower_pi/upper_pi for the appropriate quantile + mutate(quantile = ifelse(quantile == "lower_pi", q_lower, q_upper)) +} + + +## a generic forecast function to use fit objects from above +glm_forecast <- function(.data, new_covariates = NULL, fit, alpha = 0.05) { + + ## get the last date from the data provided + last_date <- + .data %>% + dplyr::arrange(location, epiweek, epiyear) %>% + dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% + dplyr::pull(date) %>% + tail(1) + + tmp <- .data + new_data <- + dplyr::tibble(lag_1 = tail(tmp$flu.admits, 4)[4], + lag_2 = tail(tmp$flu.admits, 4)[3], + lag_3 = tail(tmp$flu.admits, 4)[2], + lag_4 = tail(tmp$flu.admits, 4)[1], + epiweek = lubridate::epiweek(last_date + 7), + epiyear = lubridate::epiyear(last_date + 7)) + + ## this should allow for a constant + if(!is.null(new_covariates)) { + new_data <- + cbind(new_data,new_covariates) + } + # ## take the fit object provided and use predict + point_estimates <- + fit %>% + predict(new_data) %>% + dplyr::select(epiweek, epiyear, estimate) %>% + dplyr::mutate(estimate = round(estimate)) %>% + dplyr::mutate(quantile = NA) %>% + dplyr::select(epiweek, epiyear, quantile, value = estimate) + + ## map the quibble function over the alphas + quants <- map_df(alpha, .f = function(x) glm_quibble(fit = fit, new_data = new_data, alpha = x)) + + ## prep data + dplyr::bind_rows(point_estimates,quants) %>% + dplyr::arrange(epiyear,epiweek, quantile) %>% + dplyr::left_join(new_data, by = c("epiweek","epiyear")) %>% + dplyr::select(epiweek,epiyear,quantile,value) +} + +glm_wrap <- function(.data, .models, new_covariates = NULL, metrics = list(yardstick::rmse, yardstick::huber_loss, yardstick::mae), horizon = 4, alpha = 0.05) { + tmp_fit <- glm_fit(.data, .models = .models) + + stopifnot(nrow(new_covariates) == horizon) + message("Forecasting 1 week ahead") + tmp_forc <- + glm_forecast(.data = .data, new_covariates = new_covariates[1,], fit = tmp_fit$fit, alpha = alpha) + + forc_list <- list() + forc_list[[1]] <- tmp_forc + + if(horizon > 1) { + + for(i in 2:horizon) { + + message(sprintf("Forecasting %d week ahead",i)) + + prev_weeks <- + do.call("rbind", forc_list) %>% + ## get point estimate ... which will have NA quantile value + dplyr::filter(is.na(quantile)) %>% + dplyr::rename(flu.admits = value) %>% + dplyr::select(-quantile) %>% + dplyr::mutate(flu.admits.cov = NA, location = "US") + + forc_list[[i]] <- glm_forecast(.data = bind_rows(.data, prev_weeks), + new_covariates = new_covariates[i,], + fit = tmp_fit$fit, + alpha = alpha) + + } + } + forc_res <- do.call("rbind", forc_list) + return(list(model = tmp_fit, forecasts = forc_res)) +} + +plot_forc <- function(.forecasts, .train, .test) { + + forc_dat <- + .forecasts %>% + dplyr::filter(quantile %in% c(NA,0.025,0.975)) %>% + tidyr::spread(quantile,value) %>% + dplyr::rename(lower = `0.025`, upper = `0.975`, mean = ``) + + .test %>% + dplyr::bind_rows(.train) %>% + dplyr::select(epiweek,epiyear, truth = flu.admits, location) %>% + dplyr::left_join(forc_dat) %>% + dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% + ggplot2::ggplot() + + ggplot2::geom_line(ggplot2::aes(date,truth), lwd = 2, col = "black") + + ggplot2::geom_line(ggplot2::aes(date,mean), lwd = 2, alpha = 0.5, lty = "solid", col = "firebrick") + + ggplot2::geom_ribbon(ggplot2::aes(date, ymin = lower, ymax = upper), alpha = 0.25, fill = "firebrick") + + ## get an upper limit from whatever the max of observed or forcasted hospitalizations is + ggplot2::scale_y_continuous(limits = c(0,max(c(.test$flu.admits, .train$flu.admits, forc_dat$upper)))) + + ggplot2::scale_x_date(date_labels = "%Y-%m", date_breaks = "month") + + ggplot2::labs(x = "Date", y = "Count", title = "Influenza hospitalizations") + + ggplot2::theme_minimal() + + ggplot2::facet_wrap(~ location) + +} + +wis_score <- function(.forecasts, .test) { + .forecasts %>% + dplyr::left_join(test_dat) %>% + dplyr::select(epiweek,epiyear,quantile,value,flu.admits) %>% + dplyr::group_by(epiweek, epiyear) %>% + dplyr::summarise(wis = evalcast::weighted_interval_score(quantile = quantile, value = value, actual_value = flu.admits)) +} From e8f89f73eda3683888800e71496b1df4b56998bd Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 11:48:17 -0500 Subject: [PATCH 06/23] replace stuff at top with new functions --- scratch/ili-forecast.R | 78 ++++-------------------------------------- 1 file changed, 7 insertions(+), 71 deletions(-) diff --git a/scratch/ili-forecast.R b/scratch/ili-forecast.R index 3a9802b..703e2de 100644 --- a/scratch/ili-forecast.R +++ b/scratch/ili-forecast.R @@ -1,75 +1,10 @@ library(fiphde) -# library(tidyverse) -# library(lubridate) -# library(focustools) -# library(fabletools) -# library(fable) +ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today())) +ilifor <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01", constrained=TRUE) +ilifor$arima_params +ilifor$ili_bound %>% tail() -### Function arguments -#' @param trim_date Earliest start date you want to use for ILI data -#' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4` -#' @param region Either "national" or "state" or c("national", "state") for both national and state-level data. -#' @param location Vector specifying locations to filter to; `'US'` by default. -trim_date="2020-03-01" -horizon=4 -region="national" -location="US" - -# Set the years passed to get_cdc_ili starting with the year of the trim_date to the current year -years <- lubridate::year(trim_date):lubridate::year(lubridate::today()) - -## retreive ILI data -ilidat_all <- get_cdc_ili(region=region, years=years) - -## subset to US only and subset to the trim date to present -ilidat <- - ilidat_all %>% - dplyr::filter(location %in% location) %>% - dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) %>% - dplyr::select(location, year, week, weighted_ili) - -## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week -ilidat_tsibble <- - ilidat %>% - fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE) -tail(ilidat_tsibble) - -# Nonseasonal fit: PDQ(0, 0, 0) -# Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) -ili_fit <- fabletools::model(ilidat_tsibble, - arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5), - stepwise=FALSE, - approximation=FALSE)) -# ili_fit %>% focustools::extract_arima_params() - -## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools -# ili_forecast <- focustools::ts_forecast(ili_fit, outcome = "icases") - -# Get the forecast -ili_forecast <- fabletools::forecast(ili_fit, h=horizon) - -## Look at the quantiles -# ili_forecast %>% -# fabletools::hilo() -# ili_forecast %>% -# fabletools::hilo() %>% -# fabletools::unpack_hilo(`80%`) %>% -# fabletools::unpack_hilo(`95%`) - -# Get the next #horizon weeks in a tibble -ili_future <- ili_forecast %>% - tibble::as_tibble() %>% - dplyr::mutate(year=lubridate::epiyear(yweek)) %>% - dplyr::mutate(week=lubridate::epiweek(yweek)) %>% - dplyr::select(location, year, week, weighted_ili=.mean) - - -ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), - ili_future %>% dplyr::mutate(forecasted=TRUE)) - -return(ili_bound) - -source("glm.R") +source(here::here("scratch/glm.R")) hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12") @@ -83,7 +18,8 @@ tmp_weekly_flu <- flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE), .groups = "drop") %>% mutate(location = "US", .before = "epiweek") %>% - left_join(ilidat) + left_join(ilidat) %>% + print() ## add lag columns tmp_weekly_flu_w_lag <- From 93df399235e14f0e0b6a63b2fe10dca06ba62802 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 12:04:30 -0500 Subject: [PATCH 07/23] updated docs --- man/forecast_ili.Rd | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd index bd56905..98605c5 100644 --- a/man/forecast_ili.Rd +++ b/man/forecast_ili.Rd @@ -70,8 +70,11 @@ inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_weighted_ili=weighted_ by = c("location", "year", "week", "forecasted")) \%>\% gather(key, value, ends_with("ili")) \%>\% mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\% - filter(date>"2021-01-01") \%>\% - ggplot(aes(date, value)) + geom_point(aes(col=forecasted)) + - facet_wrap(~key) + filter(date>"2021-07-01") \%>\% + ggplot(aes(date, value)) + + geom_line(alpha=.5) + + geom_point(aes(col=forecasted)) + + facet_wrap(~key) + + theme_bw() } } From cb401dd5fc2e4ef64409350df705341c8bcb1037 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 12:04:39 -0500 Subject: [PATCH 08/23] using forecasts instead of hardcoded --- scratch/ili-forecast.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scratch/ili-forecast.R b/scratch/ili-forecast.R index 703e2de..4ff25d8 100644 --- a/scratch/ili-forecast.R +++ b/scratch/ili-forecast.R @@ -53,7 +53,7 @@ models <- res <- glm_wrap(train_dat, ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above - new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = c(2.31,2.43,2.45,2.68)), + new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = ilifor$ili_future$weighted_ili), .models = models, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) From 03d60e1af06170559c7d1097e934ec9199738f3e Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 12:21:16 -0500 Subject: [PATCH 09/23] replace all the fable stuff with package functions, and replace hard-coded ili with forecasted values --- scratch/worfklow.R | 54 ++++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 38 deletions(-) diff --git a/scratch/worfklow.R b/scratch/worfklow.R index fe54de9..c45790b 100644 --- a/scratch/worfklow.R +++ b/scratch/worfklow.R @@ -1,41 +1,19 @@ library(fiphde) -library(tidyverse) -library(lubridate) -library(focustools) -library(fabletools) -library(fable) -## retreive ILI data -ilidat <- get_cdc_ili() +# Get data +ilidat <- get_cdc_ili(region="national", years=2020:lubridate::year(lubridate::today())) -## set a date from whihc the -trim_date <- "2020-03-01" +# Forecast ILI +ilifor <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01", constrained=TRUE) -us_ilidat <- - ilidat %>% - filter(location == "US") %>% - mutate(epiyear = epiyear(week_start), - epiweek = epiweek(week_start)) %>% - filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) %>% - select(location, epiyear, epiweek, weighted_ili) +# What are the arima params? +ilifor$arima_params -us_ilidat_tsibble <- - us_ilidat %>% - make_tsibble(chop=TRUE) +# Take a look at the forecasted data +ilifor$ili_future -us_ili_fit <- - us_ilidat_tsibble %>% - ## NOTE: parameter space here ?? - model(arima = ARIMA(weighted_ili~PDQ(0,0,0)+pdq(0:5,0:5,0:5), stepwise=FALSE, approximation=FALSE)) - -us_ili_fit %>% - extract_arima_params() - -## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools -us_ili_forecast <- - focustools::ts_forecast(us_ili_fit, outcome = "icases") - -us_ili_forecast +# Take a look at the forecasted data bound do the real data +ilifor$ili_bound %>% tail(8) hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12") @@ -44,14 +22,14 @@ tmp_weekly_flu <- mutate(date = date - 1) %>% mutate(flu.admits = as.numeric(flu.admits), flu.admits.cov = as.numeric(flu.admits.cov)) %>% - mutate(epiweek = lubridate::epiweek(date), - epiyear = lubridate::epiyear(date)) %>% - group_by(epiweek, epiyear) %>% + mutate(week = lubridate::epiweek(date), + year = lubridate::epiyear(date)) %>% + group_by(year, week) %>% summarise(flu.admits = sum(flu.admits, na.rm = TRUE), flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE), .groups = "drop") %>% - mutate(location = "US", .before = "epiweek") %>% - left_join(us_ilidat) + mutate(location = "US", .before = "week") %>% + left_join(ilifor$ilidat) ## add lag columns tmp_weekly_flu_w_lag <- @@ -85,7 +63,7 @@ models <- res <- glm_wrap(train_dat, ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above - new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = c(2.31,2.43,2.45,2.68)), + new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = ilifor$ili_future$weighted_ili), .models = models, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) From 782f98c3b1191da4d1efa1b2f4bd8e691596a5d3 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 12:21:47 -0500 Subject: [PATCH 10/23] ili-forecast.R is now in workflow.R --- scratch/ili-forecast.R | 63 ------------------------------------------ 1 file changed, 63 deletions(-) delete mode 100644 scratch/ili-forecast.R diff --git a/scratch/ili-forecast.R b/scratch/ili-forecast.R deleted file mode 100644 index 4ff25d8..0000000 --- a/scratch/ili-forecast.R +++ /dev/null @@ -1,63 +0,0 @@ -library(fiphde) -ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today())) -ilifor <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01", constrained=TRUE) -ilifor$arima_params -ilifor$ili_bound %>% tail() - -source(here::here("scratch/glm.R")) - -hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12") - -tmp_weekly_flu <- - hosp %>% - mutate(date = date - 1) %>% - mutate(epiweek = lubridate::epiweek(date), - epiyear = lubridate::epiyear(date)) %>% - group_by(epiweek, epiyear) %>% - summarise(flu.admits = sum(flu.admits, na.rm = TRUE), - flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE), - .groups = "drop") %>% - mutate(location = "US", .before = "epiweek") %>% - left_join(ilidat) %>% - print() - -## add lag columns -tmp_weekly_flu_w_lag <- - tmp_weekly_flu %>% - mutate(lag_1 = lag(flu.admits, 1), - lag_2 = lag(flu.admits, 2), - lag_3 = lag(flu.admits, 3), - lag_4 = lag(flu.admits, 4)) %>% - filter(complete.cases(.)) %>% - dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% - filter(date >= max(date) - 7*24) - -train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3) -test_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() >= n() - 3) - -models <- - list( - glm_poisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "poisson"), - glm_quasipoisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "quasipoisson"), - glm_negbin_ili = trending::glm_nb_model(flu.admits ~ weighted_ili), - glm_poisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "poisson"), - glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "quasipoisson"), - glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + offset(flu.admits.cov)), - glm_poisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"), - glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"), - glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4), - glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"), - glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"), - glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov)) - ) - -res <- glm_wrap(train_dat, - ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above - new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = ilifor$ili_future$weighted_ili), - .models = models, - alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) - -res$forecasts -res$model - -plot_forc(res$forecasts, train_dat, test_dat) From d66db666f34de3bb5a01dcf3a8035b0a1f859be4 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 13:36:13 -0500 Subject: [PATCH 11/23] allow key column to be specified with {{}} --- R/utils.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index 3860643..5ffa4da 100644 --- a/R/utils.R +++ b/R/utils.R @@ -7,13 +7,14 @@ #' @param df A `tibble` containing columns `epiyear` and `epiweek`. #' @param epiyear Unquoted variable name containing the MMWR epiyear. #' @param epiweek Unquoted variable name containing the MMWR epiweek. +#' @param epiweek Unquoted variable name containing the name of the column to be the tsibble key. See [tsibble::as_tsibble]. #' @param chop Logical indicating whether or not to remove the most current week (default `TRUE`). #' @return A `tsibble` containing additional columns `monday` indicating the date #' for the Monday of that epiweek, and `yweek` (a yearweek vctr class object) #' that indexes the `tsibble` in 1 week increments. #' @export #' @md -make_tsibble <- function(df, epiyear, epiweek, chop=TRUE) { +make_tsibble <- function(df, epiyear, epiweek, key=location, chop=TRUE) { out <- df %>% # get the monday that starts the MMWRweek dplyr::mutate(monday=MMWRweek::MMWRweek2Date(MMWRyear={{epiyear}}, @@ -23,7 +24,7 @@ make_tsibble <- function(df, epiyear, epiweek, chop=TRUE) { # convert represent as yearweek (see package?tsibble) dplyr::mutate(yweek=tsibble::yearweek(monday), .after="monday") %>% # convert to tsibble - tsibble::as_tsibble(index=yweek, key=location) + tsibble::as_tsibble(index=yweek, key={{key}}) # Remove the incomplete week if (chop) out <- utils::head(out, -1) return(out) From 833029c9231b52f08b0915b0b2303ea3d49f4643 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 13:56:54 -0500 Subject: [PATCH 12/23] fix documentation for key variable in make_tsibble --- R/utils.R | 2 +- man/make_tsibble.Rd | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index 5ffa4da..3e1e194 100644 --- a/R/utils.R +++ b/R/utils.R @@ -7,7 +7,7 @@ #' @param df A `tibble` containing columns `epiyear` and `epiweek`. #' @param epiyear Unquoted variable name containing the MMWR epiyear. #' @param epiweek Unquoted variable name containing the MMWR epiweek. -#' @param epiweek Unquoted variable name containing the name of the column to be the tsibble key. See [tsibble::as_tsibble]. +#' @param key Unquoted variable name containing the name of the column to be the tsibble key. See [tsibble::as_tsibble]. #' @param chop Logical indicating whether or not to remove the most current week (default `TRUE`). #' @return A `tsibble` containing additional columns `monday` indicating the date #' for the Monday of that epiweek, and `yweek` (a yearweek vctr class object) diff --git a/man/make_tsibble.Rd b/man/make_tsibble.Rd index c346223..60480bf 100644 --- a/man/make_tsibble.Rd +++ b/man/make_tsibble.Rd @@ -4,7 +4,7 @@ \alias{make_tsibble} \title{Make \code{tsibble}} \usage{ -make_tsibble(df, epiyear, epiweek, chop = TRUE) +make_tsibble(df, epiyear, epiweek, key = location, chop = TRUE) } \arguments{ \item{df}{A \code{tibble} containing columns \code{epiyear} and \code{epiweek}.} @@ -13,6 +13,8 @@ make_tsibble(df, epiyear, epiweek, chop = TRUE) \item{epiweek}{Unquoted variable name containing the MMWR epiweek.} +\item{key}{Unquoted variable name containing the name of the column to be the tsibble key. See \link[tsibble:as-tsibble]{tsibble::as_tsibble}.} + \item{chop}{Logical indicating whether or not to remove the most current week (default \code{TRUE}).} } \value{ From 57603f2dc7a630e24695e39ad2a4bf5e13793511 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 13:57:11 -0500 Subject: [PATCH 13/23] small updates to prep for state-level keyed forecasts --- R/forecast.R | 7 ++++--- man/forecast_ili.Rd | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/forecast.R b/R/forecast.R index bcfe505..ac52d4a 100644 --- a/R/forecast.R +++ b/R/forecast.R @@ -17,7 +17,7 @@ #' @examples #' \dontrun{ #' # Get data -#' ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today())) +#' ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today())) #' # Using data only from march 2020 forward #' ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01") #' head(ilifor_2020$ili_bound) @@ -61,16 +61,17 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, cons } ## subset to selected location and get columns you care about + ## fixme: remove this filter column later when you get kinks worked out of state-level forecasts with key ilidat <- ilidat %>% - dplyr::filter(location %in% location) %>% + dplyr::filter(location %in% !!location) %>% dplyr::select(location, year, week, weighted_ili) ## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week ilidat_tsibble <- ilidat %>% - fiphde::make_tsibble(epiyear = year, epiweek = week, chop=FALSE) + make_tsibble(epiyear = year, epiweek = week, key=location, chop=FALSE) # Defaults to constrained, non-seasonal model. if (constrained) { diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd index 98605c5..38a8ed0 100644 --- a/man/forecast_ili.Rd +++ b/man/forecast_ili.Rd @@ -44,7 +44,7 @@ Currently limited to one location only. \examples{ \dontrun{ # Get data -ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today())) +ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today())) # Using data only from march 2020 forward ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01") head(ilifor_2020$ili_bound) From ed46111061b078d1089913f91fe5b7e1b5c9e419 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 17:10:31 -0500 Subject: [PATCH 14/23] first cut at states --- R/fiphde.R | 7 +++++ R/forecast.R | 74 +++++++++++++++++++++++++++++++-------------- man/forecast_ili.Rd | 35 +++++++++------------ 3 files changed, 73 insertions(+), 43 deletions(-) diff --git a/R/fiphde.R b/R/fiphde.R index 8621003..845b15d 100644 --- a/R/fiphde.R +++ b/R/fiphde.R @@ -65,5 +65,12 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c(".", "truth", "rmse", "value", + "unweighted_ili", + "ili", + "miss", + "total", + "pmiss", + "arima", + "x", ".")) diff --git a/R/forecast.R b/R/forecast.R index ac52d4a..e6224fa 100644 --- a/R/forecast.R +++ b/R/forecast.R @@ -3,7 +3,6 @@ #' @details Currently limited to one location only. #' @param ilidat Data returned from [get_cdc_ili]. #' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4` -#' @param location Vector specifying locations to filter to; `'US'` by default. #' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim. #' @param constrained Should the model be constrained to a non-seasonal model? Default `TRUE` sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See [fable::ARIMA]. #' @return A named list containing: @@ -13,19 +12,23 @@ #' 1. `ili_forecast`: The forecast from [fabletools::forecast] at the specified horizon. #' 1. `ili_future`: The `horizon`-number of weeks of ILI data forecasted into the future. #' 1. `ili_bound`: The data in 1 bound to the data in 5. -#' 1. `arima_params`: A named character vector of the ARIMA model parameters. +#' 1. `arima_params`: A tibble with ARIMA model parameters for each location. +#' 1. `locstats`: A tibble with missing data information on all locations. +#' 1. `removed`: A tibble with locations removed because of high missing ILI data. #' @examples #' \dontrun{ #' # Get data #' ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today())) -#' # Using data only from march 2020 forward -#' ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01") -#' head(ilifor_2020$ili_bound) -#' tail(ilifor_2020$ili_bound, 10) -#' ilifor_2020$ili_fit -#' ilifor_2020$ili_fit %>% focustools::extract_arima_params() -#' ilifor_2020$arima_params -#' ilifor_2020$ili_forecast +#' +#' # Using data only from march 2020 forward, for US only +#' ilidat_us <- ilidat %>% dplyr::filter(location=="US") +#' ilifor_us <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01") +#' head(ilifor_us$ili_bound) +#' tail(ilifor_us$ili_bound, 10) +#' ilifor_us$ili_fit +#' ilifor_us$ili_fit %>% focustools::extract_arima_params() +#' ilifor_us$arima_params +#' ilifor_us$ili_forecast #' # Using all the data we have (2010-forward, in this example) #' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE) #' head(ilifor_2010$ili_bound) @@ -38,8 +41,8 @@ #' library(dplyr) #' library(tidyr) #' library(ggplot2) -#' inner_join(ilifor_2020$ili_bound %>% rename(nonseasonal_weighted_ili=weighted_ili), -#' ilifor_2010$ili_bound %>% rename(unconstrained_weighted_ili=weighted_ili), +#' inner_join(ilifor_2020$ili_bound %>% rename(nonseasonal_unweighted_ili=unweighted_ili), +#' ilifor_2010$ili_bound %>% rename(unconstrained_unweighted_ili=unweighted_ili), #' by = c("location", "year", "week", "forecasted")) %>% #' gather(key, value, ends_with("ili")) %>% #' mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% @@ -51,7 +54,7 @@ #' theme_bw() #' } #' @export -forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, constrained=TRUE) { +forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { # If trim_date is not null, trim to selected trim_date if (!is.null(trim_date)) { @@ -60,13 +63,30 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, cons dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) } - ## subset to selected location and get columns you care about - ## fixme: remove this filter column later when you get kinks worked out of state-level forecasts with key + # Select just the columns you care about, and call "ili" the measure you're using ilidat <- ilidat %>% - dplyr::filter(location %in% !!location) %>% - dplyr::select(location, year, week, weighted_ili) + dplyr::select(location, year, week, ili=unweighted_ili) + # Get missing data rates + locstats <- ilidat %>% + dplyr::group_by(location) %>% + dplyr::summarize(miss=sum(is.na(ili)), total=dplyr::n()) %>% + dplyr::mutate(pmiss=miss/total) %>% + dplyr::arrange(dplyr::desc(pmiss)) %>% + dplyr::mutate(remove=pmiss>.1) + + # Get locations that will be removed + removed <- locstats %>% + dplyr::filter(remove) %>% + dplyr::inner_join(locations, by="location") + if(nrow(removed)>0) message(sprintf("Removed %s row(s) because of missing data. See result$removed.", nrow(removed))) + + # Remove those locations + ilidat <- locstats %>% + dplyr::filter(!remove) %>% + dplyr::distinct(location) %>% + dplyr::inner_join(ilidat, by="location") ## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week ilidat_tsibble <- @@ -79,19 +99,26 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, cons # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) message("Fitting nonseasonal ARIMA model ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5)") ili_fit <- fabletools::model(ilidat_tsibble, - arima = fable::ARIMA(weighted_ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5), + arima = fable::ARIMA(ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5), stepwise=FALSE, approximation=FALSE)) } else { # If unconstrained, need to set stepwise=TRUE and approxmiation=NULL to speed up. message("Fitting unconstrained ARIMA model...") ili_fit <- fabletools::model(ilidat_tsibble, - arima = fable::ARIMA(weighted_ili, + arima = fable::ARIMA(ili, stepwise=TRUE, approximation=NULL)) } - arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6]) + + # arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6]) + arima_params <- + ili_fit %>% + dplyr::mutate(x=purrr::map(arima, ~purrr::pluck(., "fit") %>% purrr::pluck("spec"))) %>% + tidyr::unnest_wider(col=x) %>% + dplyr::select(-arima) + # Get the forecast ili_forecast <- fabletools::forecast(ili_fit, h=horizon) @@ -109,13 +136,14 @@ forecast_ili <- function(ilidat, horizon=4L, location="US", trim_date=NULL, cons tibble::as_tibble() %>% dplyr::mutate(year=lubridate::epiyear(yweek)) %>% dplyr::mutate(week=lubridate::epiweek(yweek)) %>% - dplyr::select(location, year, week, weighted_ili=.mean) + dplyr::select(location, year, week, ili=.mean) # bind the historical data to the new data ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), - ili_future %>% dplyr::mutate(forecasted=TRUE)) + ili_future %>% dplyr::mutate(forecasted=TRUE)) %>% + dplyr::arrange(location, year, week) # Create results - res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params) + res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params, locstats, removed) return(res) } diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd index 38a8ed0..a0dd731 100644 --- a/man/forecast_ili.Rd +++ b/man/forecast_ili.Rd @@ -4,21 +4,13 @@ \alias{forecast_ili} \title{Forecast ILI} \usage{ -forecast_ili( - ilidat, - horizon = 4L, - location = "US", - trim_date = NULL, - constrained = TRUE -) +forecast_ili(ilidat, horizon = 4L, trim_date = NULL, constrained = TRUE) } \arguments{ \item{ilidat}{Data returned from \link{get_cdc_ili}.} \item{horizon}{Optional horizon periods through which the forecasts should be generated; default is \code{4}} -\item{location}{Vector specifying locations to filter to; \code{'US'} by default.} - \item{trim_date}{Earliest start date you want to use for ILI data. Default \code{NULL} doesn't trim.} \item{constrained}{Should the model be constrained to a non-seasonal model? Default \code{TRUE} sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See \link[fable:ARIMA]{fable::ARIMA}.} @@ -32,7 +24,9 @@ A named list containing: \item \code{ili_forecast}: The forecast from \link[fabletools:forecast]{fabletools::forecast} at the specified horizon. \item \code{ili_future}: The \code{horizon}-number of weeks of ILI data forecasted into the future. \item \code{ili_bound}: The data in 1 bound to the data in 5. -\item \code{arima_params}: A named character vector of the ARIMA model parameters. +\item \code{arima_params}: A tibble with ARIMA model parameters for each location. +\item \code{locstats}: A tibble with missing data information on all locations. +\item \code{removed}: A tibble with locations removed because of high missing ILI data. } } \description{ @@ -45,14 +39,15 @@ Currently limited to one location only. \dontrun{ # Get data ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today())) -# Using data only from march 2020 forward -ilifor_2020 <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01") -head(ilifor_2020$ili_bound) -tail(ilifor_2020$ili_bound, 10) -ilifor_2020$ili_fit -ilifor_2020$ili_fit \%>\% focustools::extract_arima_params() -ilifor_2020$arima_params -ilifor_2020$ili_forecast +# Using data only from march 2020 forward, for US only +ilidat_us <- ilidat \%>\% dplyr::filter(location=="US") +ilifor_us <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01") +head(ilifor_us$ili_bound) +tail(ilifor_us$ili_bound, 10) +ilifor_us$ili_fit +ilifor_us$ili_fit \%>\% focustools::extract_arima_params() +ilifor_us$arima_params +ilifor_us$ili_forecast # Using all the data we have (2010-forward, in this example) ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE) head(ilifor_2010$ili_bound) @@ -65,8 +60,8 @@ ilifor_2010$ili_forecast library(dplyr) library(tidyr) library(ggplot2) -inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_weighted_ili=weighted_ili), - ilifor_2010$ili_bound \%>\% rename(unconstrained_weighted_ili=weighted_ili), +inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_unweighted_ili=unweighted_ili), + ilifor_2010$ili_bound \%>\% rename(unconstrained_unweighted_ili=unweighted_ili), by = c("location", "year", "week", "forecasted")) \%>\% gather(key, value, ends_with("ili")) \%>\% mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\% From e69998a5553d55d10d2551cdb5be64e174ecfaf5 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Tue, 21 Dec 2021 17:37:00 -0500 Subject: [PATCH 15/23] state-level forecasting --- R/forecast.R | 61 +++++++++++++++++++++++++-------------------- R/utils.R | 1 + man/forecast_ili.Rd | 54 +++++++++++++++++++++------------------ 3 files changed, 65 insertions(+), 51 deletions(-) diff --git a/R/forecast.R b/R/forecast.R index e6224fa..0ee8f12 100644 --- a/R/forecast.R +++ b/R/forecast.R @@ -23,35 +23,40 @@ #' # Using data only from march 2020 forward, for US only #' ilidat_us <- ilidat %>% dplyr::filter(location=="US") #' ilifor_us <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01") -#' head(ilifor_us$ili_bound) -#' tail(ilifor_us$ili_bound, 10) #' ilifor_us$ili_fit -#' ilifor_us$ili_fit %>% focustools::extract_arima_params() #' ilifor_us$arima_params #' ilifor_us$ili_forecast -#' # Using all the data we have (2010-forward, in this example) -#' ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE) -#' head(ilifor_2010$ili_bound) -#' tail(ilifor_2010$ili_bound, 10) -#' ilifor_2010$ili_fit -#' ilifor_2010$ili_fit %>% focustools::extract_arima_params() -#' ilifor_2010$arima_params -#' ilifor_2010$ili_forecast -#' # Plot both forecasts +#' head(ilifor_us$ili_bound) +#' tail(ilifor_us$ili_bound, 10) +#' # Plot +#' library(dplyr) +#' library(ggplot2) +#' theme_set(theme_classic()) +#' ilifor_us$ili_bound %>% +#' mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% +#' filter(date>"2021-03-01") %>% +#' ggplot(aes(date, ili)) + +#' geom_line(lwd=.3, alpha=.5) + +#' geom_point(aes(col=forecasted), size=2) +#' +#' # At the state level +#' ilifor_st <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01") +#' ilifor_st$ili_fit +#' ilifor_st$arima_params +#' ilifor_st$ili_forecast +#' head(ilifor_us$ili_bound) +#' tail(ilifor_us$ili_bound, 10) +#' # Plot #' library(dplyr) -#' library(tidyr) #' library(ggplot2) -#' inner_join(ilifor_2020$ili_bound %>% rename(nonseasonal_unweighted_ili=unweighted_ili), -#' ilifor_2010$ili_bound %>% rename(unconstrained_unweighted_ili=unweighted_ili), -#' by = c("location", "year", "week", "forecasted")) %>% -#' gather(key, value, ends_with("ili")) %>% +#' theme_set(theme_classic()) +#' ilifor_st$ili_bound %>% #' mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% -#' filter(date>"2021-07-01") %>% -#' ggplot(aes(date, value)) + -#' geom_line(alpha=.5) + -#' geom_point(aes(col=forecasted)) + -#' facet_wrap(~key) + -#' theme_bw() +#' filter(date>"2021-08-01") %>% +#' ggplot(aes(date, ili, col=forecasted)) + +#' geom_line(lwd=.3) + +#' geom_point(aes(col=forecasted), size=.7) + +#' facet_wrap(~abbreviation, scale="free_y") #' } #' @export forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { @@ -97,9 +102,9 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { if (constrained) { # Nonseasonal fit: PDQ(0, 0, 0) # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) - message("Fitting nonseasonal ARIMA model ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5)") + message("Fitting nonseasonal constrained ARIMA model...") ili_fit <- fabletools::model(ilidat_tsibble, - arima = fable::ARIMA(ili ~ PDQ(0,0,0) + pdq(0:5,0:5,0:5), + arima = fable::ARIMA(ili ~ PDQ(0,0,0) + pdq(1:2,0:2,0), stepwise=FALSE, approximation=FALSE)) } else { @@ -112,12 +117,13 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { } - # arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6]) + # # arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6]) arima_params <- ili_fit %>% dplyr::mutate(x=purrr::map(arima, ~purrr::pluck(., "fit") %>% purrr::pluck("spec"))) %>% tidyr::unnest_wider(col=x) %>% dplyr::select(-arima) + # arima_params <- ili_fit %>% extract_arima_params() # Get the forecast @@ -141,7 +147,8 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { # bind the historical data to the new data ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), ili_future %>% dplyr::mutate(forecasted=TRUE)) %>% - dplyr::arrange(location, year, week) + dplyr::arrange(location, year, week) %>% + dplyr::inner_join(locations, by="location") # Create results res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params, locstats, removed) diff --git a/R/utils.R b/R/utils.R index 901b2c1..e419f4c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -114,3 +114,4 @@ plot_forc <- function(.forecasts, .train, .test) { } + diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd index a0dd731..fff2c66 100644 --- a/man/forecast_ili.Rd +++ b/man/forecast_ili.Rd @@ -39,37 +39,43 @@ Currently limited to one location only. \dontrun{ # Get data ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today())) + # Using data only from march 2020 forward, for US only ilidat_us <- ilidat \%>\% dplyr::filter(location=="US") -ilifor_us <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01") -head(ilifor_us$ili_bound) -tail(ilifor_us$ili_bound, 10) +ilifor_us <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01") ilifor_us$ili_fit -ilifor_us$ili_fit \%>\% focustools::extract_arima_params() ilifor_us$arima_params ilifor_us$ili_forecast -# Using all the data we have (2010-forward, in this example) -ilifor_2010 <- forecast_ili(ilidat, horizon=4L, location="US", constrained=FALSE) -head(ilifor_2010$ili_bound) -tail(ilifor_2010$ili_bound, 10) -ilifor_2010$ili_fit -ilifor_2010$ili_fit \%>\% focustools::extract_arima_params() -ilifor_2010$arima_params -ilifor_2010$ili_forecast -# Plot both forecasts +head(ilifor_us$ili_bound) +tail(ilifor_us$ili_bound, 10) +# Plot +library(dplyr) +library(ggplot2) +theme_set(theme_classic()) +ilifor_us$ili_bound \%>\% + mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\% + filter(date>"2021-03-01") \%>\% + ggplot(aes(date, ili)) + + geom_line(lwd=.3, alpha=.5) + + geom_point(aes(col=forecasted), size=2) + +# At the state level +ilifor_st <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01") +ilifor_st$ili_fit +ilifor_st$arima_params +ilifor_st$ili_forecast +head(ilifor_us$ili_bound) +tail(ilifor_us$ili_bound, 10) +# Plot library(dplyr) -library(tidyr) library(ggplot2) -inner_join(ilifor_2020$ili_bound \%>\% rename(nonseasonal_unweighted_ili=unweighted_ili), - ilifor_2010$ili_bound \%>\% rename(unconstrained_unweighted_ili=unweighted_ili), - by = c("location", "year", "week", "forecasted")) \%>\% - gather(key, value, ends_with("ili")) \%>\% +theme_set(theme_classic()) +ilifor_st$ili_bound \%>\% mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\% - filter(date>"2021-07-01") \%>\% - ggplot(aes(date, value)) + - geom_line(alpha=.5) + - geom_point(aes(col=forecasted)) + - facet_wrap(~key) + - theme_bw() + filter(date>"2021-08-01") \%>\% + ggplot(aes(date, ili, col=forecasted)) + + geom_line(lwd=.3) + + geom_point(aes(col=forecasted), size=.7) + + facet_wrap(~abbreviation, scale="free_y") } } From d268541412d83280a1946946ef4526ea0411dc1e Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Wed, 22 Dec 2021 06:27:04 -0500 Subject: [PATCH 16/23] updating workflow but fails with metric colunn error I can't debug. @vpnagraj we'll have to chat on this one --- scratch/worfklow.R | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/scratch/worfklow.R b/scratch/worfklow.R index c45790b..3b7051b 100644 --- a/scratch/worfklow.R +++ b/scratch/worfklow.R @@ -1,10 +1,14 @@ +library(tidyverse) library(fiphde) # Get data ilidat <- get_cdc_ili(region="national", years=2020:lubridate::year(lubridate::today())) +# Subset to US only +ilidat_us <- ilidat %>% filter(location=="US") + # Forecast ILI -ilifor <- forecast_ili(ilidat, horizon=4L, location="US", trim_date="2020-03-01", constrained=TRUE) +ilifor <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01", constrained=TRUE) # What are the arima params? ilifor$arima_params @@ -15,6 +19,17 @@ ilifor$ili_future # Take a look at the forecasted data bound do the real data ilifor$ili_bound %>% tail(8) +# Plot actual versus forecasted values +p.ili <- + ilifor$ili_bound %>% + mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% + ggplot(aes(date, ili)) + + geom_line(alpha=.5, lwd=.2) + + geom_point(aes(col=forecasted)) + + theme_bw() + + labs(x="Date", y="Unweighted ILI") +p.ili + hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12") tmp_weekly_flu <- @@ -39,7 +54,7 @@ tmp_weekly_flu_w_lag <- lag_3 = lag(flu.admits, 3), lag_4 = lag(flu.admits, 4)) %>% filter(complete.cases(.)) %>% - dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% + dplyr::mutate(date = MMWRweek::MMWRweek2Date(year, week)) %>% filter(date >= max(date) - 7*24) train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3) @@ -62,8 +77,7 @@ models <- ) res <- glm_wrap(train_dat, - ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above - new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = ilifor$ili_future$weighted_ili), + new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), ili = ilifor$ili_future$ili), .models = models, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) From 8d85d50bb377d67ed831afd7ccf84ca0c36d82a5 Mon Sep 17 00:00:00 2001 From: VP Nagraj Date: Wed, 22 Dec 2021 09:12:20 -0500 Subject: [PATCH 17/23] fixing ILI workflow script --- scratch/worfklow.R | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/scratch/worfklow.R b/scratch/worfklow.R index 3b7051b..2ec0904 100644 --- a/scratch/worfklow.R +++ b/scratch/worfklow.R @@ -37,14 +37,14 @@ tmp_weekly_flu <- mutate(date = date - 1) %>% mutate(flu.admits = as.numeric(flu.admits), flu.admits.cov = as.numeric(flu.admits.cov)) %>% - mutate(week = lubridate::epiweek(date), - year = lubridate::epiyear(date)) %>% - group_by(year, week) %>% + mutate(epiweek = lubridate::epiweek(date), + epiyear = lubridate::epiyear(date)) %>% + group_by(epiyear, epiweek) %>% summarise(flu.admits = sum(flu.admits, na.rm = TRUE), flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE), .groups = "drop") %>% - mutate(location = "US", .before = "week") %>% - left_join(ilifor$ilidat) + mutate(location = "US", .before = "epiweek") %>% + left_join(rename(ilifor$ilidat, epiweek = week, epiyear = year)) ## add lag columns tmp_weekly_flu_w_lag <- @@ -54,7 +54,7 @@ tmp_weekly_flu_w_lag <- lag_3 = lag(flu.admits, 3), lag_4 = lag(flu.admits, 4)) %>% filter(complete.cases(.)) %>% - dplyr::mutate(date = MMWRweek::MMWRweek2Date(year, week)) %>% + dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% filter(date >= max(date) - 7*24) train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3) @@ -62,18 +62,18 @@ test_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() >= n() - 3) models <- list( - glm_poisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "poisson"), - glm_quasipoisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "quasipoisson"), - glm_negbin_ili = trending::glm_nb_model(flu.admits ~ weighted_ili), - glm_poisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "poisson"), - glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "quasipoisson"), - glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + offset(flu.admits.cov)), - glm_poisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"), - glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"), - glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4), - glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"), - glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"), - glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov)) + glm_poisson_ili = trending::glm_model(flu.admits ~ ili, family = "poisson"), + glm_quasipoisson_ili = trending::glm_model(flu.admits ~ ili, family = "quasipoisson"), + glm_negbin_ili = trending::glm_nb_model(flu.admits ~ ili), + glm_poisson_ili_offset = trending::glm_model(flu.admits ~ ili + offset(flu.admits.cov), family = "poisson"), + glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ ili + offset(flu.admits.cov), family = "quasipoisson"), + glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ ili + offset(flu.admits.cov)), + glm_poisson_ili_lags = trending::glm_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"), + glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"), + glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4), + glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"), + glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"), + glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov)) ) res <- glm_wrap(train_dat, From cc3c00abf96c8ecb28e0bb358d66eb5e51a24da0 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Wed, 22 Dec 2021 09:45:38 -0500 Subject: [PATCH 18/23] plots --- scratch/worfklow.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scratch/worfklow.R b/scratch/worfklow.R index 2ec0904..edf20e9 100644 --- a/scratch/worfklow.R +++ b/scratch/worfklow.R @@ -27,7 +27,7 @@ p.ili <- geom_line(alpha=.5, lwd=.2) + geom_point(aes(col=forecasted)) + theme_bw() + - labs(x="Date", y="Unweighted ILI") + labs(x="Date", y="Unweighted ILI", title="ILI forecast") p.ili hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12") @@ -84,5 +84,7 @@ res <- glm_wrap(train_dat, res$forecasts res$model -plot_forc(res$forecasts, train_dat, test_dat) +p.hosp <- plot_forc(res$forecasts, train_dat, test_dat) +library(patchwork) +p.ili / p.hosp From 558e3f9bedd9abfc9b47aaf1361029d326674e26 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Wed, 22 Dec 2021 10:00:31 -0500 Subject: [PATCH 19/23] week/year -> epiweek/epiyear --- R/forecast.R | 16 ++++++++-------- R/retrieve.R | 16 ++++++++-------- man/forecast_ili.Rd | 4 ++-- man/get_cdc_vax.Rd | 2 +- scratch/worfklow.R | 5 +++-- 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/R/forecast.R b/R/forecast.R index 0ee8f12..f51ae12 100644 --- a/R/forecast.R +++ b/R/forecast.R @@ -33,7 +33,7 @@ #' library(ggplot2) #' theme_set(theme_classic()) #' ilifor_us$ili_bound %>% -#' mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% +#' mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>% #' filter(date>"2021-03-01") %>% #' ggplot(aes(date, ili)) + #' geom_line(lwd=.3, alpha=.5) + @@ -51,7 +51,7 @@ #' library(ggplot2) #' theme_set(theme_classic()) #' ilifor_st$ili_bound %>% -#' mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% +#' mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>% #' filter(date>"2021-08-01") %>% #' ggplot(aes(date, ili, col=forecasted)) + #' geom_line(lwd=.3) + @@ -71,7 +71,7 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { # Select just the columns you care about, and call "ili" the measure you're using ilidat <- ilidat %>% - dplyr::select(location, year, week, ili=unweighted_ili) + dplyr::select(location, epiyear, epiweek, ili=unweighted_ili) # Get missing data rates locstats <- ilidat %>% @@ -96,7 +96,7 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { ## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week ilidat_tsibble <- ilidat %>% - make_tsibble(epiyear = year, epiweek = week, key=location, chop=FALSE) + make_tsibble(epiyear = epiyear, epiweek = epiweek, key=location, chop=FALSE) # Defaults to constrained, non-seasonal model. if (constrained) { @@ -140,14 +140,14 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { # Get the next #horizon weeks in a tibble ili_future <- ili_forecast %>% tibble::as_tibble() %>% - dplyr::mutate(year=lubridate::epiyear(yweek)) %>% - dplyr::mutate(week=lubridate::epiweek(yweek)) %>% - dplyr::select(location, year, week, ili=.mean) + dplyr::mutate(epiyear=lubridate::epiyear(yweek)) %>% + dplyr::mutate(epiweek=lubridate::epiweek(yweek)) %>% + dplyr::select(location, epiyear, epiweek, ili=.mean) # bind the historical data to the new data ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), ili_future %>% dplyr::mutate(forecasted=TRUE)) %>% - dplyr::arrange(location, year, week) %>% + dplyr::arrange(location, epiyear, epiweek) %>% dplyr::inner_join(locations, by="location") # Create results diff --git a/R/retrieve.R b/R/retrieve.R index f06c6db..580b2f0 100644 --- a/R/retrieve.R +++ b/R/retrieve.R @@ -106,7 +106,7 @@ get_hdgov_hosp <- function(endpoint="https://healthdata.gov/resource/g62h-syeh.j #' @references API documentation: . #' @examples #' \dontrun{ -#' d <- get_cdc_vax_data() +#' d <- get_cdc_vax() #' d #' library(ggplot2) #' d %>% @@ -183,14 +183,14 @@ get_cdc_ili <- function(region=c("national", "state"), years=NULL) { # Get only relevant columns (drop age group distributions) # Join to internal package data to get state abbreviations and FIPS codes d <- d %>% - dplyr::select(region_type, region, year, week, week_start, dplyr::contains("ili"), ilitotal:total_patients) %>% + dplyr::select(region_type, region, epiyear=year, epiweek=week, week_start, dplyr::contains("ili"), ilitotal:total_patients) %>% dplyr::mutate(region=gsub("National", "US", region)) %>% dplyr::inner_join(locations, by=c("region"="location_name")) %>% dplyr::select(location, region_type, abbreviation, region, dplyr::everything()) message(sprintf("Latest week_start / year / epiweek available:\n%s / %d / %d", max(d$week_start), - unique(d$year[d$week_start==max(d$week_start)]), - unique(d$week[d$week_start==max(d$week_start)]))) + unique(d$epiyear[d$week_start==max(d$week_start)]), + unique(d$epiweek[d$week_start==max(d$week_start)]))) return(d) } @@ -211,8 +211,8 @@ get_cdc_hosp <- function(years=NULL) { dplyr::transmute(location="US", abbreviation="US", region="US", - year, - week=year_wk_num, + epiyear=year, + epiweek=year_wk_num, week_start=wk_start, week_end=wk_end, rate, @@ -220,7 +220,7 @@ get_cdc_hosp <- function(years=NULL) { season=sea_label) message(sprintf("Latest week_start / year / epiweek available:\n%s / %d / %d", max(d$week_start), - unique(d$year[d$week_start==max(d$week_start)]), - unique(d$week[d$week_start==max(d$week_start)]))) + unique(d$epiyear[d$week_start==max(d$week_start)]), + unique(d$epiweek[d$week_start==max(d$week_start)]))) return(d) } diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd index fff2c66..1a4a294 100644 --- a/man/forecast_ili.Rd +++ b/man/forecast_ili.Rd @@ -53,7 +53,7 @@ library(dplyr) library(ggplot2) theme_set(theme_classic()) ilifor_us$ili_bound \%>\% - mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\% + mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) \%>\% filter(date>"2021-03-01") \%>\% ggplot(aes(date, ili)) + geom_line(lwd=.3, alpha=.5) + @@ -71,7 +71,7 @@ library(dplyr) library(ggplot2) theme_set(theme_classic()) ilifor_st$ili_bound \%>\% - mutate(date=cdcfluview::mmwr_week_to_date(year, week)) \%>\% + mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) \%>\% filter(date>"2021-08-01") \%>\% ggplot(aes(date, ili, col=forecasted)) + geom_line(lwd=.3) + diff --git a/man/get_cdc_vax.Rd b/man/get_cdc_vax.Rd index 2f5e080..63d8a8e 100644 --- a/man/get_cdc_vax.Rd +++ b/man/get_cdc_vax.Rd @@ -28,7 +28,7 @@ Get vaccination data from cdc.gov endpoint. } \examples{ \dontrun{ -d <- get_cdc_vax_data() +d <- get_cdc_vax() d library(ggplot2) d \%>\% diff --git a/scratch/worfklow.R b/scratch/worfklow.R index edf20e9..17f89de 100644 --- a/scratch/worfklow.R +++ b/scratch/worfklow.R @@ -22,7 +22,7 @@ ilifor$ili_bound %>% tail(8) # Plot actual versus forecasted values p.ili <- ilifor$ili_bound %>% - mutate(date=cdcfluview::mmwr_week_to_date(year, week)) %>% + mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>% ggplot(aes(date, ili)) + geom_line(alpha=.5, lwd=.2) + geom_point(aes(col=forecasted)) + @@ -44,7 +44,7 @@ tmp_weekly_flu <- flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE), .groups = "drop") %>% mutate(location = "US", .before = "epiweek") %>% - left_join(rename(ilifor$ilidat, epiweek = week, epiyear = year)) + left_join(ilifor$ilidat, by = c("epiyear", "location", "epiweek")) ## add lag columns tmp_weekly_flu_w_lag <- @@ -85,6 +85,7 @@ res$forecasts res$model p.hosp <- plot_forc(res$forecasts, train_dat, test_dat) +p.hosp library(patchwork) p.ili / p.hosp From 4683e838e480e8d1533aee3a0612de8306fe3a10 Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Wed, 22 Dec 2021 10:10:56 -0500 Subject: [PATCH 20/23] include 2019-2020 flu season so we get early 2020 data --- scratch/worfklow.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scratch/worfklow.R b/scratch/worfklow.R index 17f89de..4867f25 100644 --- a/scratch/worfklow.R +++ b/scratch/worfklow.R @@ -2,7 +2,10 @@ library(tidyverse) library(fiphde) # Get data -ilidat <- get_cdc_ili(region="national", years=2020:lubridate::year(lubridate::today())) +# the years argument for cdcfluview::ilinet gets the *season* corresponding to the year. +# so, 2019 = the 2019-2020 flu season. If you only get 2020-2021, you're getting the +# 2020-2021 and 2021-2022 flu season, meaning that you will *NOT* capture early 2020 data. +ilidat <- get_cdc_ili(region="national", years=2019:lubridate::year(lubridate::today())) # Subset to US only ilidat_us <- ilidat %>% filter(location=="US") From 366b01f00683a891a8e3a32d0077854a1f86581c Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Wed, 22 Dec 2021 10:28:11 -0500 Subject: [PATCH 21/23] sensitivity demonstration script --- scratch/sensitivity-demonstration.R | 40 +++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 scratch/sensitivity-demonstration.R diff --git a/scratch/sensitivity-demonstration.R b/scratch/sensitivity-demonstration.R new file mode 100644 index 0000000..03c0872 --- /dev/null +++ b/scratch/sensitivity-demonstration.R @@ -0,0 +1,40 @@ +library(tidyverse) +library(fiphde) + + +# Get data +# the years argument for cdcfluview::ilinet gets the *season* corresponding to the year. +# so, 2019 = the 2019-2020 flu season. If you only get 2020-2021, you're getting the +# 2020-2021 and 2021-2022 flu season, meaning that you will *NOT* capture early 2020 data. +ilidat <- get_cdc_ili(region="national", years=2010:lubridate::year(lubridate::today())) + +# Subset to US only +ilidat_us <- ilidat %>% filter(location=="US") + +# convenience function +plot_forecast <- function(forecast) { + forecast$ili_bound %>% + mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>% + filter(date>"2021-07-01") %>% + ggplot(aes(date, ili)) + + geom_line(alpha=.5, lwd=.2) + + geom_point(aes(col=forecasted)) + + theme_bw() + + labs(x="Date", y="Unweighted ILI", title="ILI forecast", + subtitle=paste0("Trim date: ", forecast$ilidat %>% mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>% pull(date) %>% min())) +} + +# Forecast ILI +p1 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01", constrained=TRUE) %>% plot_forecast() + ggtitle("Constrained") +p2 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-01-01", constrained=TRUE) %>% plot_forecast() + ggtitle("Constrained") +p3 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-05-01", constrained=TRUE) %>% plot_forecast() + ggtitle("Constrained") +p4 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2015-05-01", constrained=TRUE) %>% plot_forecast() + ggtitle("Constrained") + +p5 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01", constrained=FALSE) %>% plot_forecast() + ggtitle("Unconstrained") +p6 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-01-01", constrained=FALSE) %>% plot_forecast() + ggtitle("Unconstrained") +p7 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-05-01", constrained=FALSE) %>% plot_forecast() + ggtitle("Unconstrained") +p8 <- forecast_ili(ilidat_us, horizon=4L, trim_date="2015-05-01", constrained=FALSE) %>% plot_forecast() + ggtitle("Unconstrained") + +(p1+p5)/(p2+p6)/(p3+p7)/(p4+p8) + plot_layout(guides = "collect") + +ggsave("~/Downloads/sensitivity-demo.png", width=8, height=11.5) From b5285018ecac70c2784b9a108d0d50d6a32d76a2 Mon Sep 17 00:00:00 2001 From: VP Nagraj Date: Wed, 22 Dec 2021 17:51:07 -0500 Subject: [PATCH 22/23] adding option to forecast_ili for named param_space list --- R/forecast.R | 21 +++++---------------- man/forecast_ili.Rd | 12 ++++++++++-- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/R/forecast.R b/R/forecast.R index f51ae12..a72f55f 100644 --- a/R/forecast.R +++ b/R/forecast.R @@ -4,7 +4,8 @@ #' @param ilidat Data returned from [get_cdc_ili]. #' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4` #' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim. -#' @param constrained Should the model be constrained to a non-seasonal model? Default `TRUE` sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See [fable::ARIMA]. +#' @param constrained Should the model be constrained to a non-seasonal model? If `TRUE` the parameter space defined in "param_space" argument will be used. See [fable::ARIMA]. +#' @param param_space Named list for ARIMA parameter space constraint; only used if "constrained == `TRUE`"; default is `list(P=0,D=0,Q=0,p=1:2,d=0:2,0)`, which sets space to PDQ(0,0,0) and pdq(1:2,0:2,0). #' @return A named list containing: #' 1. `ilidat`: The data sent into the function filtered to the location and the `trim_date`. Select columns returned. #' 1. `ilidat_tsibble`: The `tsibble` class object returned by running [make_tsibble] on the data above. @@ -59,7 +60,7 @@ #' facet_wrap(~abbreviation, scale="free_y") #' } #' @export -forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { +forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE, param_space = list(P=0,D=0,Q=0,p=1:2,d=0:2,0)) { # If trim_date is not null, trim to selected trim_date if (!is.null(trim_date)) { @@ -104,7 +105,7 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { # Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) message("Fitting nonseasonal constrained ARIMA model...") ili_fit <- fabletools::model(ilidat_tsibble, - arima = fable::ARIMA(ili ~ PDQ(0,0,0) + pdq(1:2,0:2,0), + arima = fable::ARIMA(ili ~ PDQ(param_space$P,param_space$D,param_space$Q) + pdq(param_space$p,param_space$d, param_space$q), stepwise=FALSE, approximation=FALSE)) } else { @@ -116,27 +117,15 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { approximation=NULL)) } - - # # arima_params <- unlist(ili_fit$arima[[1]]$fit$spec[,1:6]) arima_params <- ili_fit %>% dplyr::mutate(x=purrr::map(arima, ~purrr::pluck(., "fit") %>% purrr::pluck("spec"))) %>% tidyr::unnest_wider(col=x) %>% dplyr::select(-arima) - # arima_params <- ili_fit %>% extract_arima_params() - # Get the forecast ili_forecast <- fabletools::forecast(ili_fit, h=horizon) - ## Look at the quantiles - # ili_forecast %>% - # fabletools::hilo() - # ili_forecast %>% - # fabletools::hilo() %>% - # fabletools::unpack_hilo(`80%`) %>% - # fabletools::unpack_hilo(`95%`) - # Get the next #horizon weeks in a tibble ili_future <- ili_forecast %>% tibble::as_tibble() %>% @@ -145,7 +134,7 @@ forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE) { dplyr::select(location, epiyear, epiweek, ili=.mean) # bind the historical data to the new data - ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), + ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), ili_future %>% dplyr::mutate(forecasted=TRUE)) %>% dplyr::arrange(location, epiyear, epiweek) %>% dplyr::inner_join(locations, by="location") diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd index 1a4a294..1896820 100644 --- a/man/forecast_ili.Rd +++ b/man/forecast_ili.Rd @@ -4,7 +4,13 @@ \alias{forecast_ili} \title{Forecast ILI} \usage{ -forecast_ili(ilidat, horizon = 4L, trim_date = NULL, constrained = TRUE) +forecast_ili( + ilidat, + horizon = 4L, + trim_date = NULL, + constrained = TRUE, + param_space = list(P = 0, D = 0, Q = 0, p = 1:2, d = 0:2, 0) +) } \arguments{ \item{ilidat}{Data returned from \link{get_cdc_ili}.} @@ -13,7 +19,9 @@ forecast_ili(ilidat, horizon = 4L, trim_date = NULL, constrained = TRUE) \item{trim_date}{Earliest start date you want to use for ILI data. Default \code{NULL} doesn't trim.} -\item{constrained}{Should the model be constrained to a non-seasonal model? Default \code{TRUE} sets PQD(0,0,0) & pdq(0:5,0:5,0:5). See \link[fable:ARIMA]{fable::ARIMA}.} +\item{constrained}{Should the model be constrained to a non-seasonal model? If \code{TRUE} the parameter space defined in "param_space" argument will be used. See \link[fable:ARIMA]{fable::ARIMA}.} + +\item{param_space}{Named list for ARIMA parameter space constraint; only used if "constrained == \code{TRUE}"; default is \code{list(P=0,D=0,Q=0,p=1:2,d=0:2,0)}, which sets space to PDQ(0,0,0) and pdq(1:2,0:2,0).} } \value{ A named list containing: From e11a52ce554ddca48382f325116413ecac97e05f Mon Sep 17 00:00:00 2001 From: Stephen Turner Date: Thu, 23 Dec 2021 11:30:47 -0500 Subject: [PATCH 23/23] specify q --- R/forecast.R | 2 +- man/forecast_ili.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/forecast.R b/R/forecast.R index a72f55f..a948ff2 100644 --- a/R/forecast.R +++ b/R/forecast.R @@ -60,7 +60,7 @@ #' facet_wrap(~abbreviation, scale="free_y") #' } #' @export -forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE, param_space = list(P=0,D=0,Q=0,p=1:2,d=0:2,0)) { +forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE, param_space = list(P=0,D=0,Q=0,p=1:2,d=0:2,q=0)) { # If trim_date is not null, trim to selected trim_date if (!is.null(trim_date)) { diff --git a/man/forecast_ili.Rd b/man/forecast_ili.Rd index 1896820..9580528 100644 --- a/man/forecast_ili.Rd +++ b/man/forecast_ili.Rd @@ -9,7 +9,7 @@ forecast_ili( horizon = 4L, trim_date = NULL, constrained = TRUE, - param_space = list(P = 0, D = 0, Q = 0, p = 1:2, d = 0:2, 0) + param_space = list(P = 0, D = 0, Q = 0, p = 1:2, d = 0:2, q = 0) ) } \arguments{