From 6e0874771849368320f0091e5e55be7db8b7d6c1 Mon Sep 17 00:00:00 2001
From: sangeetabhatia03 <sangeetabhatia03@gmail.com>
Date: Wed, 15 May 2024 19:53:39 +0100
Subject: [PATCH 1/3] Additional artefacts

- Save data.frame going into figures for additional analysis
- Remove EBOLA as a parameter as this is a pathogen-specific task
---
 src/ebola_severity/orderly.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/ebola_severity/orderly.R b/src/ebola_severity/orderly.R
index 9ecac64..d101100 100644
--- a/src/ebola_severity/orderly.R
+++ b/src/ebola_severity/orderly.R
@@ -15,8 +15,6 @@ library(doconv)
 
 orderly_strict_mode()
 
-orderly_parameters(pathogen = "EBOLA")
-
 orderly_artefact(
   "Plots and tables for severity parameters",
   c(
@@ -39,7 +37,7 @@ orderly_artefact(
 # Get data from db_compilation
 orderly_dependency(
   "db_compilation",
-  "latest(parameter:pathogen == this:pathogen)",
+  "latest(parameter:pathogen == 'EBOLA')",
   c(
     "articles.csv",
     "parameters.csv"
@@ -189,6 +187,8 @@ ordered_dat <- sev_dat %>%
     )
   )
 
+saveRDS(ordered_dat, "ebola_severity.rds")
+orderly_artefact("Ebola severity estimates", "ebola_severity.rds")
 # Pull numbers
 ordered_dat %>% nrow() # 166
 length(unique(ordered_dat$covidence_id)) # 130

From 6597ebb9ec50a37a95a2b79be647bf78509497f0 Mon Sep 17 00:00:00 2001
From: sangeetabhatia03 <sangeetabhatia03@gmail.com>
Date: Tue, 4 Jun 2024 17:40:15 +0100
Subject: [PATCH 2/3] Edits for Ebola R1

---
 .../ebola_severity_additional.R               | 156 ++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 src/ebola_severity_additional/ebola_severity_additional.R

diff --git a/src/ebola_severity_additional/ebola_severity_additional.R b/src/ebola_severity_additional/ebola_severity_additional.R
new file mode 100644
index 0000000..a152401
--- /dev/null
+++ b/src/ebola_severity_additional/ebola_severity_additional.R
@@ -0,0 +1,156 @@
+library(dplyr)
+library(orderly2)
+library(epireview)
+orderly_dependency("ebola_severity", "latest", "ebola_severity.rds")
+orderly_shared_resource("ebola_functions.R" = "ebola_functions.R")
+orderly_shared_resource("ebola_visualisation.R" = "ebola_visualisation.R")
+
+source("ebola_functions.R")
+source("ebola_visualisation.R")
+
+cfr <- readRDS("ebola_severity.rds")
+## Removing this as it is a CFR across multiple outbreaks from 1976 to 2014
+idx <- cfr$covidence_id %in% 2594 & cfr$cfr_ifr_numerator %in% 526
+cfr <- cfr[!idx, ]
+## Text says 96; table says 88. I can't see where the 96 comes from
+## Going with 88. No CI provided for this estimate
+idx <- cfr$covidence_id %in% 2594 & cfr$cfr_ifr_numerator %in% 252
+cfr$parameter_value[idx] <- 88
+cfr$parameter_uncertainty_lower_value[idx] <- NA
+cfr$parameter_uncertainty_upper_value[idx] <- NA
+
+## Study in pregnant women; naive estimate
+cfr$cfr_ifr_method[cfr$covidence_id %in% 1889] <- "Naive"
+
+## https://doi.org/10.1093/cid/ciz678 estimates CFR with and without
+## imputation of missing data. Only number of cases without impuation
+## has been extracted, even where the CFR itself is based on imputed
+## data. Fixing this.
+cfr$population_sample_size[cfr$covidence_id %in% 16500 & 
+  cfr$cfr_ifr_method %in% "Adjusted"] <- 33338
+cfr$population_sample_size[cfr$covidence_id %in% 16500 & 
+  cfr$cfr_ifr_method %in% "Naive" & cfr$parameter_value == 71.9] <- 33338  
+
+## Adjusted CFRs, high-quality studies
+adjusted_high <- cfr[cfr$cfr_ifr_method %in% "Adjusted" & 
+  cfr$article_qa_score > 50, ]
+adjusted_high <- param_pm_uncertainty(adjusted_high) |> 
+  mark_multiple_estimates(label_type = "letters") |>
+  reorder_studies()
+p1 <- forest_plot(
+  adjusted_high, col_by = "population_country", facet_by = "ebola_species") +
+labs(x = "Case fatality rate (%)", 
+  title = "Adjusted case fatality rates in high-quality studies") +
+  theme(axis.text.y = element_text(size = 8))
+
+ggsave("adjusted_high.png", p1, width = 10, height = 6, units = "in")
+
+adjusted_all <- cfr[cfr$cfr_ifr_method %in% "Adjusted", ]
+adjusted_all <- param_pm_uncertainty(adjusted_all) |> 
+  mark_multiple_estimates(label_type = "letters") |>
+  reorder_studies()
+p2 <- forest_plot(
+  adjusted_all, col_by = "population_country", facet_by = "ebola_species") +
+labs(x = "Case fatality rate (%)", 
+  title = "Adjusted CFR estimates") +
+  theme(axis.text.y = element_text(size = 8))
+
+ggsave("adjusted_all.png", p2, width = 10, height = 6, units = "in")
+
+naive_all <- cfr[cfr$cfr_ifr_method %in% "Naive", ]
+naive_all <- param_pm_uncertainty(naive_all) |> 
+  mark_multiple_estimates(label_type = "letters") |>
+  reorder_studies()
+
+p3 <- forest_plot(
+  naive_all, col_by = "population_country", facet_by = "ebola_species") +
+labs(x = "Case fatality rate (%)",
+  title = "Naive CFR estimates") +
+  theme(axis.text.y = element_text(size = 6))
+
+ggsave("naive_all.png", p3, width = 10, height = 6, units = "in")
+
+## Retain only general population estimates
+pb <- cfr[cfr$population_sample_type %in% "Population based" & 
+  cfr$article_qa_score > 50, ]
+pb <- param_pm_uncertainty(pb)  
+pb <- mark_multiple_estimates(pb, label_type = "letters")
+pb <- reorder_studies(pb)
+p4 <- forest_plot(
+  pb, col_by = "population_country", facet_by = "ebola_species"
+) +
+  labs(x = "Case fatality rate (%)", 
+    title = "Population-based CFR estimates in high-quality studies") +
+  theme(axis.text.y = element_text(size = 8))
+
+ggsave("population_based.png", p4, width = 10, height = 6, units = "in")
+
+pb <- cfr[cfr$population_sample_type %in% "Population based", ]
+pb <- param_pm_uncertainty(pb)  
+pb <- mark_multiple_estimates(pb, label_type = "letters")
+pb <- reorder_studies(pb)
+p5 <- forest_plot(
+  pb, col_by = "population_country", facet_by = "ebola_species"
+) +
+  labs(x = "Case fatality rate (%)", 
+    title = "Population-based CFR estimates") +
+  theme(axis.text.y = element_text(size = 8))
+
+ggsave("population_based_all.png", p5, width = 10, height = 6, units = "in")
+
+## Estimates where the population_sample_type is not population-based; could be 
+## missing or hospital-based etc
+gb <- cfr[!cfr$population_sample_type %in% "Population based", ]
+gb <- param_pm_uncertainty(gb)  
+gb <- mark_multiple_estimates(gb, label_type = "letters")
+gb <- reorder_studies(gb)
+p5 <- forest_plot(
+  gb, col_by = "population_country", facet_by = "ebola_species"
+) +
+  labs(x = "Case fatality rate (%)", 
+    title = "Population-based CFR estimates") +
+  theme(axis.text.y = element_text(size = 8))
+
+ggsave("not_population_based_all.png", p5, width = 10, height = 6, units = "in")
+
+## Distribution of central values by denominator
+wa <- cfr[cfr$outbreak %in% "West Africa 2013-2016", ]
+wa$sample_size <- case_when(
+  wa$cfr_ifr_denominator <= 100 ~ "0-100",
+  wa$cfr_ifr_denominator > 100 & wa$cfr_ifr_denominator <= 1000 ~ 
+    "101-1000",
+  wa$cfr_ifr_denominator > 1000 ~ " > 1000",
+  TRUE ~ NA_character_
+)
+
+ggplot(wa, aes(y = parameter_value, x = cfr_ifr_denominator)) +
+  geom_point(alpha = 0.5) +
+  facet_wrap(~sample_size, scales = "free_x") + 
+  labs(x = "Case fatality rate (%)", 
+    title = "Distribution of central values by denominator") +
+  theme(legend.position = "bottom") +
+  ylim(0, 100)
+  
+
+weighted_cfr <- function(x) {
+  sum(
+    (x$parameter_value * x$cfr_ifr_denominator) / 
+    sum(x$cfr_ifr_denominator, na.rm = TRUE),
+    na.rm = TRUE
+  )
+}
+w_overall <- weighted_cfr(cfr)
+uw_overall <- mean(cfr$parameter_value, na.rm = TRUE)
+w_by_method <- split(cfr, cfr$cfr_ifr_method) |>
+  map_dfr(weighted_cfr, .id = "cfr_ifr_method") 
+uw_by_method <- cfr |>
+  group_by(cfr_ifr_method) |>
+  summarise(uw = mean(parameter_value, na.rm = TRUE))
+
+ggplot(cfr, aes(parameter_value)) + 
+  geom_histogram(binwidth = 1, position = position_dodge()) +
+  facet_wrap(~cfr_ifr_method, ncol = 1) +
+  geom_vline(xintercept = weighted_cfr, color = "red") +
+  labs(x = "Case fatality rate (%)", 
+    title = "Distribution of central values") +
+    theme_minimal()

From dce8b5519b123a3e2a378fff37295ceec4131fad Mon Sep 17 00:00:00 2001
From: sangeetabhatia03 <sangeetabhatia03@gmail.com>
Date: Wed, 24 Jul 2024 17:17:36 +0100
Subject: [PATCH 3/3] Additional CFR analyses for Ebola R1

---
 ebola_workflow.R                                      |  7 +++++--
 .../ebola_severity_additional.R                       | 11 +++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/ebola_workflow.R b/ebola_workflow.R
index 6fcbe46..e54c913 100644
--- a/ebola_workflow.R
+++ b/ebola_workflow.R
@@ -77,7 +77,8 @@ orderly_location_pull_packet()
 orderly_run("ebola_reproduction_number", parameters = list(pathogen = "EBOLA"))
 
 # Severity (CFR/IFR)
-orderly_run("ebola_severity", parameters = list(pathogen = "EBOLA"))
+orderly_run("ebola_severity")
+orderly_run("ebola_severity_additional")
 
 # Human Delays
 orderly_run("ebola_delays", parameters = list(pathogen = "EBOLA"))
@@ -117,5 +118,7 @@ orderly_run("ebola_summary", parameters = list(pathogen = "EBOLA"))
 orderly_run("ebola_figures", parameters = list(pathogen = "EBOLA"))
 
 # Figures and tables for supplementary material
-orderly_run("ebola_supplementary_figures", parameters = list(pathogen = "EBOLA"))
+# This task will not run because it pulls in pdfs from the previous tasks and
+# we have commented out docx2pdf conversion.
+# orderly_run("ebola_supplementary_figures", parameters = list(pathogen = "EBOLA"))
 
diff --git a/src/ebola_severity_additional/ebola_severity_additional.R b/src/ebola_severity_additional/ebola_severity_additional.R
index a152401..c1fbfcd 100644
--- a/src/ebola_severity_additional/ebola_severity_additional.R
+++ b/src/ebola_severity_additional/ebola_severity_additional.R
@@ -123,14 +123,15 @@ wa$sample_size <- case_when(
   TRUE ~ NA_character_
 )
 
-ggplot(wa, aes(y = parameter_value, x = cfr_ifr_denominator)) +
+p <- ggplot(wa, aes(y = parameter_value, x = cfr_ifr_denominator)) +
   geom_point(alpha = 0.5) +
   facet_wrap(~sample_size, scales = "free_x") + 
   labs(x = "Case fatality rate (%)", 
     title = "Distribution of central values by denominator") +
   theme(legend.position = "bottom") +
   ylim(0, 100)
-  
+
+ggsave("cfr_denominator.png", width = 10, height = 6, units = "in")
 
 weighted_cfr <- function(x) {
   sum(
@@ -147,10 +148,12 @@ uw_by_method <- cfr |>
   group_by(cfr_ifr_method) |>
   summarise(uw = mean(parameter_value, na.rm = TRUE))
 
-ggplot(cfr, aes(parameter_value)) + 
+p <- ggplot(cfr, aes(parameter_value)) + 
   geom_histogram(binwidth = 1, position = position_dodge()) +
   facet_wrap(~cfr_ifr_method, ncol = 1) +
-  geom_vline(xintercept = weighted_cfr, color = "red") +
+  ##geom_vline(xintercept = weighted_cfr, color = "red") +
   labs(x = "Case fatality rate (%)", 
     title = "Distribution of central values") +
     theme_minimal()
+
+ggsave("cfr_hist.png", p, width = 10, height = 6, units = "in")