-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #23 from signaturescience/7-forcast-ili
forecast ILI
- Loading branch information
Showing
12 changed files
with
534 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,8 @@ RoxygenNote: 7.1.2 | |
Imports: | ||
cdcfluview, | ||
dplyr, | ||
fable, | ||
fabletools, | ||
lubridate, | ||
magrittr, | ||
MMWRweek, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#' @title Forecast ILI | ||
#' @description Forecasts ILI up to specified weeks in the future. Used in downstream modeling. | ||
#' @details Currently limited to one location only. | ||
#' @param ilidat Data returned from [get_cdc_ili]. | ||
#' @param horizon Optional horizon periods through which the forecasts should be generated; default is `4` | ||
#' @param trim_date Earliest start date you want to use for ILI data. Default `NULL` doesn't trim. | ||
#' @param constrained Should the model be constrained to a non-seasonal model? If `TRUE` the parameter space defined in "param_space" argument will be used. See [fable::ARIMA]. | ||
#' @param param_space Named list for ARIMA parameter space constraint; only used if "constrained == `TRUE`"; default is `list(P=0,D=0,Q=0,p=1:2,d=0:2,0)`, which sets space to PDQ(0,0,0) and pdq(1:2,0:2,0). | ||
#' @return A named list containing: | ||
#' 1. `ilidat`: The data sent into the function filtered to the location and the `trim_date`. Select columns returned. | ||
#' 1. `ilidat_tsibble`: The `tsibble` class object returned by running [make_tsibble] on the data above. | ||
#' 1. `ili_fit`: The fit from [fabletools::model]. | ||
#' 1. `ili_forecast`: The forecast from [fabletools::forecast] at the specified horizon. | ||
#' 1. `ili_future`: The `horizon`-number of weeks of ILI data forecasted into the future. | ||
#' 1. `ili_bound`: The data in 1 bound to the data in 5. | ||
#' 1. `arima_params`: A tibble with ARIMA model parameters for each location. | ||
#' 1. `locstats`: A tibble with missing data information on all locations. | ||
#' 1. `removed`: A tibble with locations removed because of high missing ILI data. | ||
#' @examples | ||
#' \dontrun{ | ||
#' # Get data | ||
#' ilidat <- get_cdc_ili(region=c("national", "state"), years=2010:lubridate::year(lubridate::today())) | ||
#' | ||
#' # Using data only from march 2020 forward, for US only | ||
#' ilidat_us <- ilidat %>% dplyr::filter(location=="US") | ||
#' ilifor_us <- forecast_ili(ilidat_us, horizon=4L, trim_date="2020-03-01") | ||
#' ilifor_us$ili_fit | ||
#' ilifor_us$arima_params | ||
#' ilifor_us$ili_forecast | ||
#' head(ilifor_us$ili_bound) | ||
#' tail(ilifor_us$ili_bound, 10) | ||
#' # Plot | ||
#' library(dplyr) | ||
#' library(ggplot2) | ||
#' theme_set(theme_classic()) | ||
#' ilifor_us$ili_bound %>% | ||
#' mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>% | ||
#' filter(date>"2021-03-01") %>% | ||
#' ggplot(aes(date, ili)) + | ||
#' geom_line(lwd=.3, alpha=.5) + | ||
#' geom_point(aes(col=forecasted), size=2) | ||
#' | ||
#' # At the state level | ||
#' ilifor_st <- forecast_ili(ilidat, horizon=4L, trim_date="2020-03-01") | ||
#' ilifor_st$ili_fit | ||
#' ilifor_st$arima_params | ||
#' ilifor_st$ili_forecast | ||
#' head(ilifor_us$ili_bound) | ||
#' tail(ilifor_us$ili_bound, 10) | ||
#' # Plot | ||
#' library(dplyr) | ||
#' library(ggplot2) | ||
#' theme_set(theme_classic()) | ||
#' ilifor_st$ili_bound %>% | ||
#' mutate(date=cdcfluview::mmwr_week_to_date(epiyear, epiweek)) %>% | ||
#' filter(date>"2021-08-01") %>% | ||
#' ggplot(aes(date, ili, col=forecasted)) + | ||
#' geom_line(lwd=.3) + | ||
#' geom_point(aes(col=forecasted), size=.7) + | ||
#' facet_wrap(~abbreviation, scale="free_y") | ||
#' } | ||
#' @export | ||
forecast_ili <- function(ilidat, horizon=4L, trim_date=NULL, constrained=TRUE, param_space = list(P=0,D=0,Q=0,p=1:2,d=0:2,q=0)) { | ||
|
||
# If trim_date is not null, trim to selected trim_date | ||
if (!is.null(trim_date)) { | ||
ilidat <- | ||
ilidat %>% | ||
dplyr::filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) | ||
} | ||
|
||
# Select just the columns you care about, and call "ili" the measure you're using | ||
ilidat <- | ||
ilidat %>% | ||
dplyr::select(location, epiyear, epiweek, ili=unweighted_ili) | ||
|
||
# Get missing data rates | ||
locstats <- ilidat %>% | ||
dplyr::group_by(location) %>% | ||
dplyr::summarize(miss=sum(is.na(ili)), total=dplyr::n()) %>% | ||
dplyr::mutate(pmiss=miss/total) %>% | ||
dplyr::arrange(dplyr::desc(pmiss)) %>% | ||
dplyr::mutate(remove=pmiss>.1) | ||
|
||
# Get locations that will be removed | ||
removed <- locstats %>% | ||
dplyr::filter(remove) %>% | ||
dplyr::inner_join(locations, by="location") | ||
if(nrow(removed)>0) message(sprintf("Removed %s row(s) because of missing data. See result$removed.", nrow(removed))) | ||
|
||
# Remove those locations | ||
ilidat <- locstats %>% | ||
dplyr::filter(!remove) %>% | ||
dplyr::distinct(location) %>% | ||
dplyr::inner_join(ilidat, by="location") | ||
|
||
## make a tsibble. do not chop the last week - because this is weekly data we won't have an incomplete final week | ||
ilidat_tsibble <- | ||
ilidat %>% | ||
make_tsibble(epiyear = epiyear, epiweek = epiweek, key=location, chop=FALSE) | ||
|
||
# Defaults to constrained, non-seasonal model. | ||
if (constrained) { | ||
# Nonseasonal fit: PDQ(0, 0, 0) | ||
# Nonseasonal components unrestricted: pdq(0:5,0:5,0:5) | ||
message("Fitting nonseasonal constrained ARIMA model...") | ||
ili_fit <- fabletools::model(ilidat_tsibble, | ||
arima = fable::ARIMA(ili ~ PDQ(param_space$P,param_space$D,param_space$Q) + pdq(param_space$p,param_space$d, param_space$q), | ||
stepwise=FALSE, | ||
approximation=FALSE)) | ||
} else { | ||
# If unconstrained, need to set stepwise=TRUE and approxmiation=NULL to speed up. | ||
message("Fitting unconstrained ARIMA model...") | ||
ili_fit <- fabletools::model(ilidat_tsibble, | ||
arima = fable::ARIMA(ili, | ||
stepwise=TRUE, | ||
approximation=NULL)) | ||
} | ||
|
||
arima_params <- | ||
ili_fit %>% | ||
dplyr::mutate(x=purrr::map(arima, ~purrr::pluck(., "fit") %>% purrr::pluck("spec"))) %>% | ||
tidyr::unnest_wider(col=x) %>% | ||
dplyr::select(-arima) | ||
|
||
# Get the forecast | ||
ili_forecast <- fabletools::forecast(ili_fit, h=horizon) | ||
|
||
# Get the next #horizon weeks in a tibble | ||
ili_future <- ili_forecast %>% | ||
tibble::as_tibble() %>% | ||
dplyr::mutate(epiyear=lubridate::epiyear(yweek)) %>% | ||
dplyr::mutate(epiweek=lubridate::epiweek(yweek)) %>% | ||
dplyr::select(location, epiyear, epiweek, ili=.mean) | ||
|
||
# bind the historical data to the new data | ||
ili_bound <- dplyr::bind_rows(ilidat %>% dplyr::mutate(forecasted=FALSE), | ||
ili_future %>% dplyr::mutate(forecasted=TRUE)) %>% | ||
dplyr::arrange(location, epiyear, epiweek) %>% | ||
dplyr::inner_join(locations, by="location") | ||
|
||
# Create results | ||
res <- tibble::lst(ilidat, ilidat_tsibble, ili_fit, ili_forecast, ili_future, ili_bound, arima_params, locstats, removed) | ||
return(res) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.