From 6e0874771849368320f0091e5e55be7db8b7d6c1 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Wed, 15 May 2024 19:53:39 +0100 Subject: [PATCH 1/3] Additional artefacts - Save data.frame going into figures for additional analysis - Remove EBOLA as a parameter as this is a pathogen-specific task --- src/ebola_severity/orderly.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ebola_severity/orderly.R b/src/ebola_severity/orderly.R index 9ecac64..d101100 100644 --- a/src/ebola_severity/orderly.R +++ b/src/ebola_severity/orderly.R @@ -15,8 +15,6 @@ library(doconv) orderly_strict_mode() -orderly_parameters(pathogen = "EBOLA") - orderly_artefact( "Plots and tables for severity parameters", c( @@ -39,7 +37,7 @@ orderly_artefact( # Get data from db_compilation orderly_dependency( "db_compilation", - "latest(parameter:pathogen == this:pathogen)", + "latest(parameter:pathogen == 'EBOLA')", c( "articles.csv", "parameters.csv" @@ -189,6 +187,8 @@ ordered_dat <- sev_dat %>% ) ) +saveRDS(ordered_dat, "ebola_severity.rds") +orderly_artefact("Ebola severity estimates", "ebola_severity.rds") # Pull numbers ordered_dat %>% nrow() # 166 length(unique(ordered_dat$covidence_id)) # 130 From 6597ebb9ec50a37a95a2b79be647bf78509497f0 Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Tue, 4 Jun 2024 17:40:15 +0100 Subject: [PATCH 2/3] Edits for Ebola R1 --- .../ebola_severity_additional.R | 156 ++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 src/ebola_severity_additional/ebola_severity_additional.R diff --git a/src/ebola_severity_additional/ebola_severity_additional.R b/src/ebola_severity_additional/ebola_severity_additional.R new file mode 100644 index 0000000..a152401 --- /dev/null +++ b/src/ebola_severity_additional/ebola_severity_additional.R @@ -0,0 +1,156 @@ +library(dplyr) +library(orderly2) +library(epireview) +orderly_dependency("ebola_severity", "latest", "ebola_severity.rds") +orderly_shared_resource("ebola_functions.R" = "ebola_functions.R") +orderly_shared_resource("ebola_visualisation.R" = "ebola_visualisation.R") + +source("ebola_functions.R") +source("ebola_visualisation.R") + +cfr <- readRDS("ebola_severity.rds") +## Removing this as it is a CFR across multiple outbreaks from 1976 to 2014 +idx <- cfr$covidence_id %in% 2594 & cfr$cfr_ifr_numerator %in% 526 +cfr <- cfr[!idx, ] +## Text says 96; table says 88. I can't see where the 96 comes from +## Going with 88. No CI provided for this estimate +idx <- cfr$covidence_id %in% 2594 & cfr$cfr_ifr_numerator %in% 252 +cfr$parameter_value[idx] <- 88 +cfr$parameter_uncertainty_lower_value[idx] <- NA +cfr$parameter_uncertainty_upper_value[idx] <- NA + +## Study in pregnant women; naive estimate +cfr$cfr_ifr_method[cfr$covidence_id %in% 1889] <- "Naive" + +## https://doi.org/10.1093/cid/ciz678 estimates CFR with and without +## imputation of missing data. Only number of cases without impuation +## has been extracted, even where the CFR itself is based on imputed +## data. Fixing this. +cfr$population_sample_size[cfr$covidence_id %in% 16500 & + cfr$cfr_ifr_method %in% "Adjusted"] <- 33338 +cfr$population_sample_size[cfr$covidence_id %in% 16500 & + cfr$cfr_ifr_method %in% "Naive" & cfr$parameter_value == 71.9] <- 33338 + +## Adjusted CFRs, high-quality studies +adjusted_high <- cfr[cfr$cfr_ifr_method %in% "Adjusted" & + cfr$article_qa_score > 50, ] +adjusted_high <- param_pm_uncertainty(adjusted_high) |> + mark_multiple_estimates(label_type = "letters") |> + reorder_studies() +p1 <- forest_plot( + adjusted_high, col_by = "population_country", facet_by = "ebola_species") + +labs(x = "Case fatality rate (%)", + title = "Adjusted case fatality rates in high-quality studies") + + theme(axis.text.y = element_text(size = 8)) + +ggsave("adjusted_high.png", p1, width = 10, height = 6, units = "in") + +adjusted_all <- cfr[cfr$cfr_ifr_method %in% "Adjusted", ] +adjusted_all <- param_pm_uncertainty(adjusted_all) |> + mark_multiple_estimates(label_type = "letters") |> + reorder_studies() +p2 <- forest_plot( + adjusted_all, col_by = "population_country", facet_by = "ebola_species") + +labs(x = "Case fatality rate (%)", + title = "Adjusted CFR estimates") + + theme(axis.text.y = element_text(size = 8)) + +ggsave("adjusted_all.png", p2, width = 10, height = 6, units = "in") + +naive_all <- cfr[cfr$cfr_ifr_method %in% "Naive", ] +naive_all <- param_pm_uncertainty(naive_all) |> + mark_multiple_estimates(label_type = "letters") |> + reorder_studies() + +p3 <- forest_plot( + naive_all, col_by = "population_country", facet_by = "ebola_species") + +labs(x = "Case fatality rate (%)", + title = "Naive CFR estimates") + + theme(axis.text.y = element_text(size = 6)) + +ggsave("naive_all.png", p3, width = 10, height = 6, units = "in") + +## Retain only general population estimates +pb <- cfr[cfr$population_sample_type %in% "Population based" & + cfr$article_qa_score > 50, ] +pb <- param_pm_uncertainty(pb) +pb <- mark_multiple_estimates(pb, label_type = "letters") +pb <- reorder_studies(pb) +p4 <- forest_plot( + pb, col_by = "population_country", facet_by = "ebola_species" +) + + labs(x = "Case fatality rate (%)", + title = "Population-based CFR estimates in high-quality studies") + + theme(axis.text.y = element_text(size = 8)) + +ggsave("population_based.png", p4, width = 10, height = 6, units = "in") + +pb <- cfr[cfr$population_sample_type %in% "Population based", ] +pb <- param_pm_uncertainty(pb) +pb <- mark_multiple_estimates(pb, label_type = "letters") +pb <- reorder_studies(pb) +p5 <- forest_plot( + pb, col_by = "population_country", facet_by = "ebola_species" +) + + labs(x = "Case fatality rate (%)", + title = "Population-based CFR estimates") + + theme(axis.text.y = element_text(size = 8)) + +ggsave("population_based_all.png", p5, width = 10, height = 6, units = "in") + +## Estimates where the population_sample_type is not population-based; could be +## missing or hospital-based etc +gb <- cfr[!cfr$population_sample_type %in% "Population based", ] +gb <- param_pm_uncertainty(gb) +gb <- mark_multiple_estimates(gb, label_type = "letters") +gb <- reorder_studies(gb) +p5 <- forest_plot( + gb, col_by = "population_country", facet_by = "ebola_species" +) + + labs(x = "Case fatality rate (%)", + title = "Population-based CFR estimates") + + theme(axis.text.y = element_text(size = 8)) + +ggsave("not_population_based_all.png", p5, width = 10, height = 6, units = "in") + +## Distribution of central values by denominator +wa <- cfr[cfr$outbreak %in% "West Africa 2013-2016", ] +wa$sample_size <- case_when( + wa$cfr_ifr_denominator <= 100 ~ "0-100", + wa$cfr_ifr_denominator > 100 & wa$cfr_ifr_denominator <= 1000 ~ + "101-1000", + wa$cfr_ifr_denominator > 1000 ~ " > 1000", + TRUE ~ NA_character_ +) + +ggplot(wa, aes(y = parameter_value, x = cfr_ifr_denominator)) + + geom_point(alpha = 0.5) + + facet_wrap(~sample_size, scales = "free_x") + + labs(x = "Case fatality rate (%)", + title = "Distribution of central values by denominator") + + theme(legend.position = "bottom") + + ylim(0, 100) + + +weighted_cfr <- function(x) { + sum( + (x$parameter_value * x$cfr_ifr_denominator) / + sum(x$cfr_ifr_denominator, na.rm = TRUE), + na.rm = TRUE + ) +} +w_overall <- weighted_cfr(cfr) +uw_overall <- mean(cfr$parameter_value, na.rm = TRUE) +w_by_method <- split(cfr, cfr$cfr_ifr_method) |> + map_dfr(weighted_cfr, .id = "cfr_ifr_method") +uw_by_method <- cfr |> + group_by(cfr_ifr_method) |> + summarise(uw = mean(parameter_value, na.rm = TRUE)) + +ggplot(cfr, aes(parameter_value)) + + geom_histogram(binwidth = 1, position = position_dodge()) + + facet_wrap(~cfr_ifr_method, ncol = 1) + + geom_vline(xintercept = weighted_cfr, color = "red") + + labs(x = "Case fatality rate (%)", + title = "Distribution of central values") + + theme_minimal() From dce8b5519b123a3e2a378fff37295ceec4131fad Mon Sep 17 00:00:00 2001 From: sangeetabhatia03 Date: Wed, 24 Jul 2024 17:17:36 +0100 Subject: [PATCH 3/3] Additional CFR analyses for Ebola R1 --- ebola_workflow.R | 7 +++++-- .../ebola_severity_additional.R | 11 +++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ebola_workflow.R b/ebola_workflow.R index 6fcbe46..e54c913 100644 --- a/ebola_workflow.R +++ b/ebola_workflow.R @@ -77,7 +77,8 @@ orderly_location_pull_packet() orderly_run("ebola_reproduction_number", parameters = list(pathogen = "EBOLA")) # Severity (CFR/IFR) -orderly_run("ebola_severity", parameters = list(pathogen = "EBOLA")) +orderly_run("ebola_severity") +orderly_run("ebola_severity_additional") # Human Delays orderly_run("ebola_delays", parameters = list(pathogen = "EBOLA")) @@ -117,5 +118,7 @@ orderly_run("ebola_summary", parameters = list(pathogen = "EBOLA")) orderly_run("ebola_figures", parameters = list(pathogen = "EBOLA")) # Figures and tables for supplementary material -orderly_run("ebola_supplementary_figures", parameters = list(pathogen = "EBOLA")) +# This task will not run because it pulls in pdfs from the previous tasks and +# we have commented out docx2pdf conversion. +# orderly_run("ebola_supplementary_figures", parameters = list(pathogen = "EBOLA")) diff --git a/src/ebola_severity_additional/ebola_severity_additional.R b/src/ebola_severity_additional/ebola_severity_additional.R index a152401..c1fbfcd 100644 --- a/src/ebola_severity_additional/ebola_severity_additional.R +++ b/src/ebola_severity_additional/ebola_severity_additional.R @@ -123,14 +123,15 @@ wa$sample_size <- case_when( TRUE ~ NA_character_ ) -ggplot(wa, aes(y = parameter_value, x = cfr_ifr_denominator)) + +p <- ggplot(wa, aes(y = parameter_value, x = cfr_ifr_denominator)) + geom_point(alpha = 0.5) + facet_wrap(~sample_size, scales = "free_x") + labs(x = "Case fatality rate (%)", title = "Distribution of central values by denominator") + theme(legend.position = "bottom") + ylim(0, 100) - + +ggsave("cfr_denominator.png", width = 10, height = 6, units = "in") weighted_cfr <- function(x) { sum( @@ -147,10 +148,12 @@ uw_by_method <- cfr |> group_by(cfr_ifr_method) |> summarise(uw = mean(parameter_value, na.rm = TRUE)) -ggplot(cfr, aes(parameter_value)) + +p <- ggplot(cfr, aes(parameter_value)) + geom_histogram(binwidth = 1, position = position_dodge()) + facet_wrap(~cfr_ifr_method, ncol = 1) + - geom_vline(xintercept = weighted_cfr, color = "red") + + ##geom_vline(xintercept = weighted_cfr, color = "red") + labs(x = "Case fatality rate (%)", title = "Distribution of central values") + theme_minimal() + +ggsave("cfr_hist.png", p, width = 10, height = 6, units = "in")