Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Functionalize CDF TMB Plot #612

Merged
merged 17 commits into from
Mar 10, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 49 additions & 75 deletions analyses/tmb-compare-tcga/compare-tmb.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ _This assumes you are in the top directory of the repository._
```{r}
# magrittr pipe
`%>%` <- dplyr::`%>%`

# Load in these functions so we can use `maf_to_granges`
source(file.path("..", "snv-callers", "util", "tmb_functions.R"))
```

Declare names of input and output directories.
Expand All @@ -56,58 +53,8 @@ if (!dir.exists(plots_dir)) {

Custom function for plotting the TMB.

Function for making a combined CDF plot for TMB.
This plotting function was adapted from the [`breaks_cdf_plot` function in the
`chromosomal-instability`](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/b1b73fe321a97fa82d85c86d20bd85635aabba25/analyses/chromosomal-instability/util/chr-break-plot.R#L120)

```{r}
tmb_cdf_plot <- function(tmb_df, plot_title, colour) {
tmb_df %>%
as.data.frame() %>%
dplyr::mutate(short_histology = tools::toTitleCase(short_histology)) %>%
# Only plot histologies groups with more than `min_samples` number of samples
dplyr::group_by(short_histology, add = TRUE) %>%
# Only keep groups with this amount of samples
dplyr::filter(dplyr::n() > 5) %>%
# Calculate histology group mean
dplyr::mutate(
hist_median = median(tmb),
hist_rank = rank(tmb, ties.method = "first") / dplyr::n(),
sample_size = paste0("n = ", dplyr::n())
) %>%
dplyr::ungroup() %>%
dplyr::mutate(short_histology = reorder(short_histology, hist_median)) %>%
# Now we will plot these as cummulative distribution plots
ggplot2::ggplot(ggplot2::aes(
x = hist_rank,
y = tmb
)) +
ggplot2::geom_point(color = colour) +
# Add summary line for mean
ggplot2::geom_segment(
x = 0, xend = 1, color = "grey",
ggplot2::aes(y = hist_median, yend = hist_median)
) +
# Separate by histology
ggplot2::facet_wrap(~ short_histology + sample_size, nrow = 1, strip.position = "bottom") +
ggplot2::theme_classic() +
ggplot2::xlab("") +
ggplot2::ylab("Coding mutations per Mb") +
# Transform to log10 make non-log y-axis labels
ggplot2::scale_y_continuous(trans = "log1p", breaks = c(0, 1, 3, 10, 30, 100, 10000, 30000),
limits = c(0, 30000)) +
ggplot2::scale_x_continuous(limits = c(-0.2, 1.2), breaks = c()) +
# Making it pretty
ggplot2::theme(legend.position = "none") +
ggplot2::theme(
axis.text.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
strip.placement = "outside",
strip.text = ggplot2::element_text(size = 10, angle = 90, hjust = 1),
strip.background = ggplot2::element_rect(fill = NA, color = NA)
) +
ggplot2::ggtitle(plot_title)
}
source(file.path("util", "cdf-plot-function.R"))
```

## Set up consensus data
Expand All @@ -116,24 +63,28 @@ Read in the consensus TMB file.

```{r}
# TODO: update if the this tmb consensus file gets updated in a future data release
tmb_pbta <- data.table::fread(file.path("..",
"snv-callers",
"results",
"consensus",
"pbta-snv-mutation-tmb-coding.tsv")) %>%
# This variable is weird when binding but we don't need it for the plot so we'll just remove it.
dplyr::select(-genome_size) %>%
tmb_pbta <- data.table::fread(file.path(
"..",
"snv-callers",
"results",
"consensus",
"pbta-snv-mutation-tmb-coding.tsv"
)) %>%
# This variable is weird when binding but we don't need it for the plot so we'll just remove it.
dplyr::select(-genome_size) %>%
dplyr::filter(experimental_strategy != "Panel")
```


```{r}
# TODO: update if this tmb consensus file gets updated in a future data release
tmb_tcga <- data.table::fread(file.path("..",
"snv-callers",
"results",
"consensus",
"tcga-snv-mutation-tmb-coding.tsv")) %>%
tmb_tcga <- data.table::fread(file.path(
"..",
"snv-callers",
"results",
"consensus",
"tcga-snv-mutation-tmb-coding.tsv"
)) %>%
dplyr::select(-genome_size)
```

Expand All @@ -142,20 +93,42 @@ tmb_tcga <- data.table::fread(file.path("..",
Plot each dataset as its own CDF plot.

```{r}
pbta_plot <- tmb_cdf_plot(tmb_pbta, plot_title = "PBTA", colour = "#3BC8A2") +
ggplot2::theme(
pbta_plot <- cdf_plot(
df = tmb_pbta,
plot_title = "PBTA",
num_col = "tmb",
group_col = "short_histology",
color = "#3BC8A2",
n_group = 5,
x_lim = c(-1.2, 1.2),
y_lim = c(0, 30000),
x_lab = "",
y_lab = "Coding Mutations per Mb"
) +
ggplot2::theme(
strip.text.x = ggplot2::element_text(size = 12)
)
)
```

```{r}
tcga_plot <- tmb_cdf_plot(tmb_tcga, plot_title = "TCGA (Adult)", colour = "#630882") +
ggplot2::theme(
tcga_plot <- cdf_plot(
df = tmb_tcga,
plot_title = "TCGA (Adult)",
num_col = "tmb",
group_col = "short_histology",
color = "#3BC8A2",
n_group = 5,
x_lim = c(-1.2, 1.2),
y_lim = c(0, 30000),
x_lab = "",
y_lab = "Coding Mutations per Mb"
) +
ggplot2::theme(
axis.title.y = ggplot2::element_blank(),
axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
strip.text.x = ggplot2::element_text(size = 9)
)
)
```

Combine both TMB data.frames into one plot
Expand All @@ -167,15 +140,16 @@ tmb_plot <- cowplot::plot_grid(pbta_plot, tcga_plot,
axis = "left",
rel_widths = c(2.5, 1),
label_size = 12
)
)
```

Save this final plot to a png file.

```{r}
# Save the plot to a png
cowplot::save_plot(file.path(plots_dir, "tmb-cdf-pbta-tcga.png"),
plot = tmb_plot, base_width = 35, base_height = 20, unit = "cm")
cowplot::save_plot(file.path(plots_dir, "tmb-cdf-pbta-tcga.png"),
plot = tmb_plot, base_width = 35, base_height = 20, unit = "cm"
)
```

Print from png so rendering is smoother
Expand Down
Loading