-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcali_data_wrangle.R
76 lines (51 loc) · 1.95 KB
/
cali_data_wrangle.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
library(tidyverse)
library(here)
## start with the CDC data: a text csv file
cali_data <- read_csv(here::here("raw_data", "cali_data.txt"))
## We could easily chain the MUTATE statements together
## This often preferred
## Here, separated to show the process
## Percentages
# total population -- all cohorts
tot_pop <- sum(cali_data$pop_size)
# cohort population percentage
cali_data <- cali_data %>%
mutate(pop_per = pop_size / tot_pop)
# prob of cohort member being hospitalized
cali_data <- cali_data %>%
mutate(host_prob = C19_hospital / pop_size)
# scale to per 1000 people -- should match CDC estimates
cali_data <- cali_data %>%
mutate(per_thousand = host_prob * 1000)
# scale to per 100K people -- more understandable results
cali_data <- cali_data %>%
mutate(per_100K = host_prob * 100000)
## Probabilities
# probabilty of hospitalization for all cohorts
prob_sum <- sum(cali_data$host_prob)
# share breakdown by cohort
cali_data <- cali_data %>%
mutate(host_per = host_prob / prob_sum)
## For Visualization
# set levels for better visualization
cali_data$status <- factor(cali_data$status,
levels = c("Vax_Prior_C19",
"Vax_No_Prior",
"UnVax_Prior_C19",
"UnVax_No_Prior"))
## SAVE
save(here::here("tidy_data", "cali_data.RData"))
## Pretty version of data
cali_data_show <- cali_data %>%
rename(Cohort = status, Size = pop_size,
"Hospital Cases" = C19_hospital,
"Size (%)" = pop_per,
"Hosptial Prob." = host_prob,
"Per 1K" = per_thousand,
"Per 100K" = per_100K,
"Hosptial (%)" = host_per)
cali_data_show2 <- cali_data_show %>%
select(Cohort, Size, `Size (%)`, `Hospital Cases`,
`Hosptial (%)`, `Hosptial Prob.`, `Per 1K`,
`Per 100K`)
cali_data_show2