AlexsLemonade · jaclyn-taroni · Mar 10, 2020 · Mar 6, 2020 · Mar 6, 2020 · Mar 6, 2020
diff --git a/analyses/tmb-compare-tcga/compare-tmb.Rmd b/analyses/tmb-compare-tcga/compare-tmb.Rmd
@@ -29,9 +29,6 @@ _This assumes you are in the top directory of the repository._
 ```{r}
 # magrittr pipe
 `%>%` <- dplyr::`%>%`
-
-# Load in these functions so we can use `maf_to_granges`
-source(file.path("..", "snv-callers", "util", "tmb_functions.R"))
 ```
 
 Declare names of input and output directories.
@@ -56,58 +53,8 @@ if (!dir.exists(plots_dir)) {
 
 Custom function for plotting the TMB. 
 
-Function for making a combined CDF plot for TMB. 
-This plotting function was adapted from the [`breaks_cdf_plot` function in the 
-`chromosomal-instability`](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/b1b73fe321a97fa82d85c86d20bd85635aabba25/analyses/chromosomal-instability/util/chr-break-plot.R#L120)
-
 ```{r}
-tmb_cdf_plot <- function(tmb_df, plot_title, colour) {
-  tmb_df %>%
-    as.data.frame() %>%
-    dplyr::mutate(short_histology = tools::toTitleCase(short_histology)) %>%
-    # Only plot histologies groups with more than `min_samples` number of samples
-    dplyr::group_by(short_histology, add = TRUE) %>%
-    # Only keep groups with this amount of samples
-    dplyr::filter(dplyr::n() > 5) %>%
-    # Calculate histology group mean
-    dplyr::mutate(
-      hist_median = median(tmb),
-      hist_rank = rank(tmb, ties.method = "first") / dplyr::n(),
-      sample_size = paste0("n = ", dplyr::n())
-    ) %>%
-    dplyr::ungroup() %>%
-    dplyr::mutate(short_histology = reorder(short_histology, hist_median)) %>%
-    # Now we will plot these as cummulative distribution plots
-    ggplot2::ggplot(ggplot2::aes(
-      x = hist_rank,
-      y = tmb
-    )) +
-    ggplot2::geom_point(color = colour) +
-    # Add summary line for mean
-    ggplot2::geom_segment(
-      x = 0, xend = 1, color = "grey",
-      ggplot2::aes(y = hist_median, yend = hist_median)
-    ) +
-    # Separate by histology
-    ggplot2::facet_wrap(~ short_histology + sample_size, nrow = 1, strip.position = "bottom") +
-    ggplot2::theme_classic() +
-    ggplot2::xlab("") +
-    ggplot2::ylab("Coding mutations per Mb") +
-    # Transform to log10 make non-log y-axis labels
-    ggplot2::scale_y_continuous(trans = "log1p", breaks = c(0, 1, 3, 10, 30, 100, 10000, 30000), 
-                                limits = c(0, 30000)) +
-    ggplot2::scale_x_continuous(limits = c(-0.2, 1.2), breaks = c()) +
-    # Making it pretty
-    ggplot2::theme(legend.position = "none") +
-    ggplot2::theme(
-      axis.text.x = ggplot2::element_blank(),
-      axis.ticks.x = ggplot2::element_blank(),
-      strip.placement = "outside",
-      strip.text = ggplot2::element_text(size = 10, angle = 90, hjust = 1),
-      strip.background = ggplot2::element_rect(fill = NA, color = NA)
-    ) +
-    ggplot2::ggtitle(plot_title)
-}
+source(file.path("util", "cdf-plot-function.R"))
 ```
 
 ## Set up consensus data
@@ -116,24 +63,28 @@ Read in the consensus TMB file.
 
 ```{r}
 # TODO: update if the this tmb consensus file gets updated in a future data release
-tmb_pbta <- data.table::fread(file.path("..", 
-                                        "snv-callers",
-                                        "results",
-                                        "consensus", 
-                                        "pbta-snv-mutation-tmb-coding.tsv")) %>% 
-  # This variable is weird when binding but we don't need it for the plot so we'll just remove it. 
-  dplyr::select(-genome_size) %>% 
+tmb_pbta <- data.table::fread(file.path(
+  "..",
+  "snv-callers",
+  "results",
+  "consensus",
+  "pbta-snv-mutation-tmb-coding.tsv"
+)) %>%
+  # This variable is weird when binding but we don't need it for the plot so we'll just remove it.
+  dplyr::select(-genome_size) %>%
   dplyr::filter(experimental_strategy != "Panel")
 ```
 
 
 ```{r}
 # TODO: update if this tmb consensus file gets updated in a future data release
-tmb_tcga <- data.table::fread(file.path("..", 
-                                        "snv-callers",
-                                        "results",
-                                        "consensus",
-                                        "tcga-snv-mutation-tmb-coding.tsv")) %>% 
+tmb_tcga <- data.table::fread(file.path(
+  "..",
+  "snv-callers",
+  "results",
+  "consensus",
+  "tcga-snv-mutation-tmb-coding.tsv"
+)) %>%
   dplyr::select(-genome_size)
 ```
 
@@ -142,20 +93,42 @@ tmb_tcga <- data.table::fread(file.path("..",
 Plot each dataset as its own CDF plot. 
 
 ```{r}
-pbta_plot <- tmb_cdf_plot(tmb_pbta, plot_title = "PBTA", colour = "#3BC8A2") +
-    ggplot2::theme(
+pbta_plot <- cdf_plot(
+  df = tmb_pbta,
+  plot_title = "PBTA",
+  num_col = "tmb",
+  group_col = "short_histology",
+  color = "#3BC8A2",
+  n_group = 5,
+  x_lim = c(-1.2, 1.2),
+  y_lim = c(0, 30000),
+  x_lab = "",
+  y_lab = "Coding Mutations per Mb"
+) +
+  ggplot2::theme(
     strip.text.x = ggplot2::element_text(size = 12)
-    )
+  )
 ```
 
 ```{r}
-tcga_plot <- tmb_cdf_plot(tmb_tcga, plot_title = "TCGA (Adult)", colour = "#630882") +
-    ggplot2::theme(
+tcga_plot <- cdf_plot(
+  df = tmb_tcga,
+  plot_title = "TCGA (Adult)",
+  num_col = "tmb",
+  group_col = "short_histology",
+  color = "#3BC8A2",
+  n_group = 5,
+  x_lim = c(-1.2, 1.2),
+  y_lim = c(0, 30000),
+  x_lab = "",
+  y_lab = "Coding Mutations per Mb"
+) +
+  ggplot2::theme(
     axis.title.y = ggplot2::element_blank(),
     axis.text.y = ggplot2::element_blank(),
     axis.ticks.y = ggplot2::element_blank(),
     strip.text.x = ggplot2::element_text(size = 9)
-  ) 
+  )
 ```
 
 Combine both TMB data.frames into one plot
@@ -167,15 +140,16 @@ tmb_plot <- cowplot::plot_grid(pbta_plot, tcga_plot,
   axis = "left",
   rel_widths = c(2.5, 1),
   label_size = 12
-  )
+)
 ```
 
 Save this final plot to a png file.
 
 ```{r}
 # Save the plot to a png
-cowplot::save_plot(file.path(plots_dir, "tmb-cdf-pbta-tcga.png"), 
-                   plot = tmb_plot, base_width = 35, base_height = 20, unit = "cm")
+cowplot::save_plot(file.path(plots_dir, "tmb-cdf-pbta-tcga.png"),
+  plot = tmb_plot, base_width = 35, base_height = 20, unit = "cm"
+)
 ```
 
 Print from png so rendering is smoother