From caaed17babfb0973f7168e84bc310b48c632628e Mon Sep 17 00:00:00 2001 From: VP Nagraj Date: Tue, 21 Dec 2021 11:33:18 -0500 Subject: [PATCH 1/5] adding glm code; misc edits to DESCRIPTION info --- DESCRIPTION | 9 +- NAMESPACE | 6 ++ R/glm.R | 208 ++++++++++++++++++++++++++++++++++++++++++++ R/utils.R | 58 ++++++++++++ man/glm_fit.Rd | 25 ++++++ man/glm_forecast.Rd | 28 ++++++ man/glm_quibble.Rd | 25 ++++++ man/glm_wrap.Rd | 35 ++++++++ man/plot_forc.Rd | 21 +++++ man/wis_score.Rd | 19 ++++ 10 files changed, 432 insertions(+), 2 deletions(-) create mode 100644 R/glm.R create mode 100644 man/glm_fit.Rd create mode 100644 man/glm_forecast.Rd create mode 100644 man/glm_quibble.Rd create mode 100644 man/glm_wrap.Rd create mode 100644 man/plot_forc.Rd create mode 100644 man/wis_score.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ba36044..5645bb2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ Authors@R: comment = c(ORCID = "0000-0001-9140-9028")) ) Description: Miscellaneous functions for retrieving data, creating and evaluating time series - forecasting models for influenza-like illness (ILI) cases, deaths, and hospitalizations in + forecasting models for influenza-like illness (ILI) and influenza hospitalizations in the United States. License: GPL (>= 3) Encoding: UTF-8 @@ -39,6 +39,11 @@ Imports: RSocrata, tibble, tidyr, - tsibble + tsibble, + trending, + trendeval, + ggplot2, + evalcast, + yardstick Depends: R (>= 2.10) diff --git a/NAMESPACE b/NAMESPACE index 9ebdc39..c992771 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,13 @@ export(get_cdc_hosp) export(get_cdc_ili) export(get_cdc_vax) export(get_hdgov_hosp) +export(glm_fit) +export(glm_forecast) +export(glm_quibble) +export(glm_wrap) export(is_monday) export(make_tsibble) +export(plot_forc) export(this_monday) +export(wis_score) importFrom(magrittr,"%>%") diff --git a/R/glm.R b/R/glm.R new file mode 100644 index 0000000..7ae5169 --- /dev/null +++ b/R/glm.R @@ -0,0 +1,208 @@ +#' Fit glm models +#' +#' This helper function is used in \link[fiphde]{glm_wrap} to fit a list of models and select the best one. +#' +#' @param .data Data including all explanatory and outcome variables needed for modeling; must include column for "location" +#' @param .models List of models defined as \link[trending]{trending_model} objects +#' +#' @return A `tibble` containing characteristics from the "best" `glm` model (i.e., the model from ".models" list with lowest RMSE). The columns in this `tibble` include: +#' +#'- model_class: The "type" of model for the best fit +#'- fit: The fitted model object for the best fit +#'- location: The geographic +#'- data: Original model fit data as a `tibble` in a list column +#' +#' @export +#' +#' @md +#' +glm_fit <- function(.data, + .models) { + + dat <- .data + + message(paste0("Location ... ")) + message(unique(dat$location)) + + ## evaluate models and use metrics provided in arg + res <- + trendeval::evaluate_models( + .models, + dat, + method = evaluate_resampling, + metrics = list(yardstick::rmse) + ) + + ## pull the best model by rmse + best_by_rmse <- + res %>% + # dplyr::filter(purrr::map_lgl(warning, is.null)) %>% # remove models that gave warnings + dplyr::filter(purrr::map_lgl(error, is.null)) %>% # remove models that errored + dplyr::slice_min(rmse) %>% + dplyr::select(model) %>% + purrr::pluck(1,1) + + ## fit the model + tmp_fit <- + best_by_rmse %>% + fit(dat) + + ## construct tibble with model type, actual fit, and the location + ret <- dplyr::tibble(model_class = best_by_rmse$model_class, + fit = tmp_fit, + location = unique(dat$location), + data = tidyr::nest(dat, fit_data = everything())) + + message("Selected model ...") + message(as.character(ret$fit$fitted_model$family)[1]) + + message("Variables ...") + message(paste0(names(ret$fit$fitted_model$coefficients), collapse = " + ")) + return(ret) + +} + + +#' Get quantiles from prediction intervals +#' +#' This function runs the \link[trending]{predict.trending_model_fit} method on a fitted model at specified values of "alpha" in order to create a range of prediction intervals. The processing also includes steps to convert the alpha to corresponding quantile values at upper and lower bounds. +#' +#' @param fit Fitted model object from \link[fiphde]{glm_fit} +#' @param new_data Tibble with new data on which the \link[trending]{predict.trending_model_fit} method should run +#' @param alpha Vector specifying the threshold(s) to be used for prediction intervals; alpha of `0.05` would correspond to 95% PI; default is `c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2` to range of intervals +#' +#' @return A tibble with predicted values at each quantile (lower and upper bound for each value of "alpha") +#' @export +#' @md +#' +glm_quibble <- function(fit, new_data, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) { + + ## get the quantiles from the alpha + q_lower <- alpha/2 + q_upper <- 1 - q_lower + + ## run the predict method on the fitted model + ## use the given alpha + fit %>% + predict(new_data, alpha = alpha) %>% + ## get just the prediction interval bounds ... + ## index (time column must be named index) ... + dplyr::select(epiyear, epiweek, lower_pi, upper_pi) %>% + ## reshape so that its in long format + tidyr::gather(quantile, value, lower_pi:upper_pi) %>% + ## and subout out lower_pi/upper_pi for the appropriate quantile + dplyr::mutate(quantile = ifelse(quantile == "lower_pi", q_lower, q_upper)) +} + + +#' Forecast glm models +#' +#' This function uses fitted model object from \link[fiphde]{glm_fit} and future covariate data to create probablistic forecasts at specific quantiles derived from the "alpha" parameter. +#' +#' @param .data Data including all explanatory and outcome variables needed for modeling +#' @param new_covariates Tibble with one column per covariate, and n rows for n horizons being forecasted +#' @param fit Fitted model object from \link[fiphde]{glm_fit} +#' @param alpha Vector specifying the threshold(s) to be used for prediction intervals; alpha of `0.05` would correspond to 95% PI; default is `c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2` to range of intervals +#' +#' @return Tibble with forecasts (quantiles and point estimates) +#' @export +#' @md +#' +glm_forecast <- function(.data, new_covariates = NULL, fit, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) { + + ## get the last date from the data provided + last_date <- + .data %>% + dplyr::arrange(location, epiweek, epiyear) %>% + dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% + dplyr::pull(date) %>% + utils::tail(1) + + tmp <- .data + ## set up "new data" with the epiweek/epiyear week being forecasted ... + ## and calculation of lagged flu admissions (1 to 4 weeks back) + new_data <- + dplyr::tibble(lag_1 = utils::tail(tmp$flu.admits, 4)[4], + lag_2 = utils::tail(tmp$flu.admits, 4)[3], + lag_3 = utils::tail(tmp$flu.admits, 4)[2], + lag_4 = utils::tail(tmp$flu.admits, 4)[1], + epiweek = lubridate::epiweek(last_date + 7), + epiyear = lubridate::epiyear(last_date + 7)) + + ## this should allow for a constant + if(!is.null(new_covariates)) { + new_data <- + cbind(new_data,new_covariates) + } + # ## take the fit object provided and use predict + point_estimates <- + fit %>% + predict(new_data) %>% + dplyr::select(epiweek, epiyear, estimate) %>% + dplyr::mutate(estimate = round(estimate)) %>% + dplyr::mutate(quantile = NA) %>% + dplyr::select(epiweek, epiyear, quantile, value = estimate) + + ## map the quibble function over the alphas + quants <- map_df(alpha, .f = function(x) glm_quibble(fit = fit, new_data = new_data, alpha = x)) + + ## prep data + dplyr::bind_rows(point_estimates,quants) %>% + dplyr::arrange(epiyear,epiweek, quantile) %>% + dplyr::left_join(new_data, by = c("epiweek","epiyear")) %>% + dplyr::select(epiweek,epiyear,quantile,value) +} + +#' Run glm modeling and forecasting +#' +#' This is a wrapper function that pipelines influenza hospitalization modeling (\link[fiphde]{glm_fit}) and forecasting (\link[fiphde]{glm_forecast}). +#' +#' @param .data Data including all explanatory and outcome variables needed for modeling +#' @param .models List of models defined as \link[trending]{trending_model} objects +#' @param new_covariates Tibble with one column per covariate, and n rows for n horizons being forecasted +#' @param horizon Number of weeks ahead for forecasting +#' @param alpha Vector specifying the threshold(s) to be used for prediction intervals; alpha of `0.05` would correspond to 95% PI; default is `c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2` to range of intervals +#' +#' @return Named list with two elements: +#' +#' - model: Output from \link[fiphde]{glm_fit} with selected model fit +#' - forecasts: Output from \link[fiphde]{glm_forecast} with forecasts from each horizon combined as a single tibble +#' +#' @export +#' @md +glm_wrap <- function(.data, .models, new_covariates = NULL, horizon = 4, alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) { + + tmp_fit <- glm_fit(.data, .models = .models) + + stopifnot(nrow(new_covariates) == horizon) + message("Forecasting 1 week ahead") + tmp_forc <- + glm_forecast(.data = .data, new_covariates = new_covariates[1,], fit = tmp_fit$fit, alpha = alpha) + + forc_list <- list() + forc_list[[1]] <- tmp_forc + + if(horizon > 1) { + + for(i in 2:horizon) { + + message(sprintf("Forecasting %d week ahead",i)) + + prev_weeks <- + do.call("rbind", forc_list) %>% + ## get point estimate ... which will have NA quantile value + dplyr::filter(is.na(quantile)) %>% + dplyr::rename(flu.admits = value) %>% + dplyr::select(-quantile) %>% + dplyr::mutate(flu.admits.cov = NA, location = "US") + + forc_list[[i]] <- glm_forecast(.data = bind_rows(.data, prev_weeks), + new_covariates = new_covariates[i,], + fit = tmp_fit$fit, + alpha = alpha) + + } + } + forc_res <- do.call("rbind", forc_list) + return(list(model = tmp_fit, forecasts = forc_res)) +} diff --git a/R/utils.R b/R/utils.R index dfd6fc9..3860643 100644 --- a/R/utils.R +++ b/R/utils.R @@ -55,3 +55,61 @@ this_monday <- function() { is_monday <- function() { lubridate::wday(lubridate::today(), label=TRUE) %in% c("Mon") } + +#' Calculate WIS score +#' +#' Helper function to calculate weighted interval score (WIS) for prepped forecasts +#' +#' @param .forecasts Tibble with prepped foreacsts +#' @param .test Tibble with test data including observed value for flu admissions stored in "flu.admits" column +#' +#' @return Tibble with the WIS for each combination of epiweek and epiyear +#' @export +#' +wis_score <- function(.forecasts, .test) { + .forecasts %>% + dplyr::left_join(.test) %>% + dplyr::select(epiweek,epiyear,quantile,value,flu.admits) %>% + dplyr::group_by(epiweek, epiyear) %>% + dplyr::summarise(wis = evalcast::weighted_interval_score(quantile = quantile, value = value, actual_value = flu.admits)) +} + +#' Plot forecasts against observed data +#' +#' This helper function creates a plot to visualize the forecasted point estimates (and 95% prediction interval) alongside the observed data. +#' +#' @param .forecasts Tibble with prepped forecasts +#' @param .train Tibble with data used for modeling +#' @param .test Tibble with observed data held out from modeling +#' +#' @return A `ggplot2` plot object +#' @export +#' +#' +plot_forc <- function(.forecasts, .train, .test) { + + forc_dat <- + .forecasts %>% + dplyr::filter(quantile %in% c(NA,0.025,0.975)) %>% + tidyr::spread(quantile,value) %>% + dplyr::rename(lower = `0.025`, upper = `0.975`, mean = ``) + + .test %>% + dplyr::bind_rows(.train) %>% + dplyr::select(epiweek,epiyear, truth = flu.admits, location) %>% + dplyr::left_join(forc_dat) %>% + dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% + ggplot2::ggplot() + + ggplot2::geom_line(ggplot2::aes(date,truth), lwd = 2, col = "black") + + ggplot2::geom_line(ggplot2::aes(date,mean), lwd = 2, alpha = 0.5, lty = "solid", col = "firebrick") + + ggplot2::geom_ribbon(ggplot2::aes(date, ymin = lower, ymax = upper), alpha = 0.25, fill = "firebrick") + + ## get an upper limit from whatever the max of observed or forcasted hospitalizations is + ggplot2::scale_y_continuous(limits = c(0,max(c(.test$flu.admits, .train$flu.admits, forc_dat$upper)))) + + ggplot2::scale_x_date(date_labels = "%Y-%m", date_breaks = "month") + + ggplot2::labs(x = "Date", y = "Count", title = "Influenza hospitalizations") + + ggplot2::theme_minimal() + + ggplot2::facet_wrap(~ location) + +} + + diff --git a/man/glm_fit.Rd b/man/glm_fit.Rd new file mode 100644 index 0000000..54cff76 --- /dev/null +++ b/man/glm_fit.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/glm.R +\name{glm_fit} +\alias{glm_fit} +\title{Fit glm models} +\usage{ +glm_fit(.data, .models) +} +\arguments{ +\item{.data}{Data including all explanatory and outcome variables needed for modeling; must include column for "location"} + +\item{.models}{List of models defined as \link[trending]{trending_model} objects} +} +\value{ +A \code{tibble} containing characteristics from the "best" \code{glm} model (i.e., the model from ".models" list with lowest RMSE). The columns in this \code{tibble} include: +\itemize{ +\item model_class: The "type" of model for the best fit +\item fit: The fitted model object for the best fit +\item location: The geographic +\item data: Original model fit data as a \code{tibble} in a list column +} +} +\description{ +This helper function is used in \link[fiphde]{glm_wrap} to fit a list of models and select the best one. +} diff --git a/man/glm_forecast.Rd b/man/glm_forecast.Rd new file mode 100644 index 0000000..0a85929 --- /dev/null +++ b/man/glm_forecast.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/glm.R +\name{glm_forecast} +\alias{glm_forecast} +\title{Forecast glm models} +\usage{ +glm_forecast( + .data, + new_covariates = NULL, + fit, + alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2 +) +} +\arguments{ +\item{.data}{Data including all explanatory and outcome variables needed for modeling} + +\item{new_covariates}{Tibble with one column per covariate, and n rows for n horizons being forecasted} + +\item{fit}{Fitted model object from \link[fiphde]{glm_fit}} + +\item{alpha}{Vector specifying the threshold(s) to be used for prediction intervals; alpha of \code{0.05} would correspond to 95\% PI; default is \code{c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2} to range of intervals} +} +\value{ +Tibble with forecasts (quantiles and point estimates) +} +\description{ +This function uses fitted model object from \link[fiphde]{glm_fit} and future covariate data to create probablistic forecasts at specific quantiles derived from the "alpha" parameter. +} diff --git a/man/glm_quibble.Rd b/man/glm_quibble.Rd new file mode 100644 index 0000000..2b78956 --- /dev/null +++ b/man/glm_quibble.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/glm.R +\name{glm_quibble} +\alias{glm_quibble} +\title{Get quantiles from prediction intervals} +\usage{ +glm_quibble( + fit, + new_data, + alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2 +) +} +\arguments{ +\item{fit}{Fitted model object from \link[fiphde]{glm_fit}} + +\item{new_data}{Tibble with new data on which the \link[trending]{predict.trending_model_fit} method should run} + +\item{alpha}{Vector specifying the threshold(s) to be used for prediction intervals; alpha of \code{0.05} would correspond to 95\% PI; default is \code{c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2} to range of intervals} +} +\value{ +A tibble with predicted values at each quantile (lower and upper bound for each value of "alpha") +} +\description{ +This function runs the \link[trending]{predict.trending_model_fit} method on a fitted model at specified values of "alpha" in order to create a range of prediction intervals. The processing also includes steps to convert the alpha to corresponding quantile values at upper and lower bounds. +} diff --git a/man/glm_wrap.Rd b/man/glm_wrap.Rd new file mode 100644 index 0000000..b79161a --- /dev/null +++ b/man/glm_wrap.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/glm.R +\name{glm_wrap} +\alias{glm_wrap} +\title{Run glm modeling and forecasting} +\usage{ +glm_wrap( + .data, + .models, + new_covariates = NULL, + horizon = 4, + alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2 +) +} +\arguments{ +\item{.data}{Data including all explanatory and outcome variables needed for modeling} + +\item{.models}{List of models defined as \link[trending]{trending_model} objects} + +\item{new_covariates}{Tibble with one column per covariate, and n rows for n horizons being forecasted} + +\item{horizon}{Number of weeks ahead for forecasting} + +\item{alpha}{Vector specifying the threshold(s) to be used for prediction intervals; alpha of \code{0.05} would correspond to 95\% PI; default is \code{c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2} to range of intervals} +} +\value{ +Named list with two elements: +\itemize{ +\item model: Output from \link[fiphde]{glm_fit} with selected model fit +\item forecasts: Output from \link[fiphde]{glm_forecast} with forecasts from each horizon combined as a single tibble +} +} +\description{ +This is a wrapper function that pipelines influenza hospitalization modeling (\link[fiphde]{glm_fit}) and forecasting (\link[fiphde]{glm_forecast}). +} diff --git a/man/plot_forc.Rd b/man/plot_forc.Rd new file mode 100644 index 0000000..4be35fa --- /dev/null +++ b/man/plot_forc.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{plot_forc} +\alias{plot_forc} +\title{Plot forecasts against observed data} +\usage{ +plot_forc(.forecasts, .train, .test) +} +\arguments{ +\item{.forecasts}{Tibble with prepped forecasts} + +\item{.train}{Tibble with data used for modeling} + +\item{.test}{Tibble with observed data held out from modeling} +} +\value{ +A \code{ggplot2} plot object +} +\description{ +This helper function creates a plot to visualize the forecasted point estimates (and 95\% prediction interval) alongside the observed data. +} diff --git a/man/wis_score.Rd b/man/wis_score.Rd new file mode 100644 index 0000000..908b1e1 --- /dev/null +++ b/man/wis_score.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{wis_score} +\alias{wis_score} +\title{Calculate WIS score} +\usage{ +wis_score(.forecasts, .test) +} +\arguments{ +\item{.forecasts}{Tibble with prepped foreacsts} + +\item{.test}{Tibble with test data including observed value for flu admissions stored in "flu.admits" column} +} +\value{ +Tibble with the WIS for each combination of epiweek and epiyear +} +\description{ +Helper function to calculate weighted interval score (WIS) for prepped forecasts +} From 2c6cc0fac7c275ac209dceee88dc98773567e3d8 Mon Sep 17 00:00:00 2001 From: VP Nagraj Date: Tue, 21 Dec 2021 11:41:19 -0500 Subject: [PATCH 2/5] addressing issues with devtools::check() --- R/fiphde.R | 16 ++++++++++++++++ R/glm.R | 12 ++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/R/fiphde.R b/R/fiphde.R index ac30e0c..3beddfe 100644 --- a/R/fiphde.R +++ b/R/fiphde.R @@ -47,5 +47,21 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c(".", "sea_label", "monday", "yweek", + "0.025", + "0.975", + "", + "error", + "estimate", + "model", + "lower", + "upper", + "lower_pi", + "upper_pi", + "flu.admits", + "quantile", + "fit", + "truth", + "rmse", + "value", ".")) diff --git a/R/glm.R b/R/glm.R index 7ae5169..aa1873c 100644 --- a/R/glm.R +++ b/R/glm.R @@ -29,7 +29,7 @@ glm_fit <- function(.data, trendeval::evaluate_models( .models, dat, - method = evaluate_resampling, + method = trendeval::evaluate_resampling, metrics = list(yardstick::rmse) ) @@ -51,7 +51,7 @@ glm_fit <- function(.data, ret <- dplyr::tibble(model_class = best_by_rmse$model_class, fit = tmp_fit, location = unique(dat$location), - data = tidyr::nest(dat, fit_data = everything())) + data = tidyr::nest(dat, fit_data = dplyr::everything())) message("Selected model ...") message(as.character(ret$fit$fitted_model$family)[1]) @@ -84,7 +84,7 @@ glm_quibble <- function(fit, new_data, alpha = c(0.01, 0.025, seq(0.05, 0.45, by ## run the predict method on the fitted model ## use the given alpha fit %>% - predict(new_data, alpha = alpha) %>% + stats::predict(new_data, alpha = alpha) %>% ## get just the prediction interval bounds ... ## index (time column must be named index) ... dplyr::select(epiyear, epiweek, lower_pi, upper_pi) %>% @@ -137,14 +137,14 @@ glm_forecast <- function(.data, new_covariates = NULL, fit, alpha = c(0.01, 0.02 # ## take the fit object provided and use predict point_estimates <- fit %>% - predict(new_data) %>% + stats::predict(new_data) %>% dplyr::select(epiweek, epiyear, estimate) %>% dplyr::mutate(estimate = round(estimate)) %>% dplyr::mutate(quantile = NA) %>% dplyr::select(epiweek, epiyear, quantile, value = estimate) ## map the quibble function over the alphas - quants <- map_df(alpha, .f = function(x) glm_quibble(fit = fit, new_data = new_data, alpha = x)) + quants <- purrr::map_df(alpha, .f = function(x) glm_quibble(fit = fit, new_data = new_data, alpha = x)) ## prep data dplyr::bind_rows(point_estimates,quants) %>% @@ -196,7 +196,7 @@ glm_wrap <- function(.data, .models, new_covariates = NULL, horizon = 4, alpha = dplyr::select(-quantile) %>% dplyr::mutate(flu.admits.cov = NA, location = "US") - forc_list[[i]] <- glm_forecast(.data = bind_rows(.data, prev_weeks), + forc_list[[i]] <- glm_forecast(.data = dplyr::bind_rows(.data, prev_weeks), new_covariates = new_covariates[i,], fit = tmp_fit$fit, alpha = alpha) From efa40f8a72bf387ab7f7e79339241ef6c373de6a Mon Sep 17 00:00:00 2001 From: VP Nagraj Date: Tue, 21 Dec 2021 11:45:02 -0500 Subject: [PATCH 3/5] forgot to call trending::fit --- R/fiphde.R | 1 - R/glm.R | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R/fiphde.R b/R/fiphde.R index 3beddfe..f17e105 100644 --- a/R/fiphde.R +++ b/R/fiphde.R @@ -59,7 +59,6 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c(".", "upper_pi", "flu.admits", "quantile", - "fit", "truth", "rmse", "value", diff --git a/R/glm.R b/R/glm.R index aa1873c..c3da768 100644 --- a/R/glm.R +++ b/R/glm.R @@ -45,7 +45,7 @@ glm_fit <- function(.data, ## fit the model tmp_fit <- best_by_rmse %>% - fit(dat) + trending::fit(dat) ## construct tibble with model type, actual fit, and the location ret <- dplyr::tibble(model_class = best_by_rmse$model_class, From e89bd4404183db38571a3600feee44b2a9243fae Mon Sep 17 00:00:00 2001 From: VP Nagraj Date: Tue, 21 Dec 2021 11:45:20 -0500 Subject: [PATCH 4/5] adding scratch example of ILI to flu hosp forecasting workflow --- scratch/worfklow.R | 93 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 scratch/worfklow.R diff --git a/scratch/worfklow.R b/scratch/worfklow.R new file mode 100644 index 0000000..a980c7e --- /dev/null +++ b/scratch/worfklow.R @@ -0,0 +1,93 @@ +library(fiphde) +library(tidyverse) +library(lubridate) +library(focustools) +library(fabletools) +library(fable) + +## retreive ILI data +ilidat <- get_cdc_ili() + +## set a date from whihc the +trim_date <- "2020-03-01" + +us_ilidat <- + ilidat %>% + filter(location == "US") %>% + mutate(epiyear = epiyear(week_start), + epiweek = epiweek(week_start)) %>% + filter(week_start > as.Date(trim_date, format = "%Y-%m-%d")) %>% + select(location, epiyear, epiweek, weighted_ili) + +us_ilidat_tsibble <- + us_ilidat %>% + make_tsibble(chop=TRUE) + +us_ili_fit <- + us_ilidat_tsibble %>% + ## NOTE: parameter space here ?? + model(arima = ARIMA(weighted_ili~PDQ(0,0,0)+pdq(0:5,0:5,0:5), stepwise=FALSE, approximation=FALSE)) + +us_ili_fit %>% + extract_arima_params() + +## oddly this WORKS (even though the outcome is not icases) ... need to fix that behavior in focustools +us_ili_forecast <- + focustools::ts_forecast(us_ili_fit, outcome = "icases") + +us_ili_forecast + +hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12") + +tmp_weekly_flu <- + hosp %>% + mutate(date = date - 1) %>% + mutate(epiweek = lubridate::epiweek(date), + epiyear = lubridate::epiyear(date)) %>% + group_by(epiweek, epiyear) %>% + summarise(flu.admits = sum(flu.admits, na.rm = TRUE), + flu.admits.cov = sum(flu.admits.cov, na.rm = TRUE), + .groups = "drop") %>% + mutate(location = "US", .before = "epiweek") %>% + left_join(us_ilidat) + +## add lag columns +tmp_weekly_flu_w_lag <- + tmp_weekly_flu %>% + mutate(lag_1 = lag(flu.admits, 1), + lag_2 = lag(flu.admits, 2), + lag_3 = lag(flu.admits, 3), + lag_4 = lag(flu.admits, 4)) %>% + filter(complete.cases(.)) %>% + dplyr::mutate(date = MMWRweek::MMWRweek2Date(epiyear, epiweek)) %>% + filter(date >= max(date) - 7*24) + +train_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() < n() - 3) +test_dat <- tmp_weekly_flu_w_lag %>% filter(row_number() >= n() - 3) + +models <- + list( + glm_poisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "poisson"), + glm_quasipoisson_ili = trending::glm_model(flu.admits ~ weighted_ili, family = "quasipoisson"), + glm_negbin_ili = trending::glm_nb_model(flu.admits ~ weighted_ili), + glm_poisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "poisson"), + glm_quasipoisson_ili_offset = trending::glm_model(flu.admits ~ weighted_ili + offset(flu.admits.cov), family = "quasipoisson"), + glm_negbin_ili_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + offset(flu.admits.cov)), + glm_poisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "poisson"), + glm_quasipoisson_ili_lags = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4, family = "quasipoisson"), + glm_negbin_ili_lags = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4), + glm_poisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "poisson"), + glm_quasipoisson_ili_lags_offset = trending::glm_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov), family = "quasipoisson"), + glm_negbin_ili_lags_offset = trending::glm_nb_model(flu.admits ~ weighted_ili + lag_1 + lag_2 + lag_3 + lag_4 + offset(flu.admits.cov)) + ) + +res <- glm_wrap(train_dat, + ## NOTE: hardcoding ili covariates ... butshould actually get from forecasts above + new_covariates = tibble(flu.admits.cov = rep(tail(train_dat$flu.admits.cov,1), 4), weighted_ili = c(2.31,2.43,2.45,2.68)), + .models = models, + alpha = c(0.01, 0.025, seq(0.05, 0.45, by = 0.05)) * 2) + +res$forecasts +res$model + +plot_forc(res$forecasts, train_dat, test_dat) From 3f8565f6c56d1440a6faff539ba578974af91463 Mon Sep 17 00:00:00 2001 From: VP Nagraj Date: Tue, 21 Dec 2021 11:54:07 -0500 Subject: [PATCH 5/5] edits to hosp cols to numeric --- scratch/worfklow.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scratch/worfklow.R b/scratch/worfklow.R index a980c7e..fe54de9 100644 --- a/scratch/worfklow.R +++ b/scratch/worfklow.R @@ -42,6 +42,8 @@ hosp <- get_hdgov_hosp(mindate="2021-04-01", maxdate="2021-12-12") tmp_weekly_flu <- hosp %>% mutate(date = date - 1) %>% + mutate(flu.admits = as.numeric(flu.admits), + flu.admits.cov = as.numeric(flu.admits.cov)) %>% mutate(epiweek = lubridate::epiweek(date), epiyear = lubridate::epiyear(date)) %>% group_by(epiweek, epiyear) %>% @@ -91,3 +93,4 @@ res$forecasts res$model plot_forc(res$forecasts, train_dat, test_dat) +