AlexsLemonade · cbethell · Dec 17, 2019 · Dec 17, 2019 · Dec 17, 2019 · Dec 17, 2019
diff --git a/analyses/molecular-subtyping-ATRT/03-ATRT-molecular-subtyping-gistic-comparison.Rmd b/analyses/molecular-subtyping-ATRT/03-ATRT-molecular-subtyping-gistic-comparison.Rmd
@@ -0,0 +1,219 @@
+---
+title: "ATRT Molecular Subtyping - GISTIC Comparison"
+output: 
+  html_notebook:
+    toc: TRUE
+    toc_float: TRUE
+author: Chante Bethell for ALSF CCDL
+date: 2019
+---
+
+This notebook compares GISTIC calls for `SMARCB1` with the current calls in 
+found in the OpenPBTA repository. This notebook is related to the issue on 
+molecular subtyping ATRT samples and thus focuses on ATRT samples. 
+
+# Usage
+
+This notebook is intended to be run via the command line from the top directory
+of the repository as follows:
+
+`Rscript -e "rmarkdown::render('analyses/molecular-subtyping-ATRT/03-ATRT-molecular-subtyping-gistic-comparison.Rmd', clean = TRUE)"`
+
+# Set Up
+
+```{r}
+# Get `magrittr` pipe
+`%>%` <- dplyr::`%>%`
+```
+
+## Directories and Files
+
+```{r}
+# Set path to results and plots directory
+results_dir <- file.path("results")
+
+if (!dir.exists(results_dir)) {
+  dir.create(results_dir)
+}
+
+# Read in metadata
+metadata <-
+  readr::read_tsv(file.path("..", "..", "data", "pbta-histologies.tsv"))
+
+# Select wanted columns in metadata for merging and assign to a new object
+select_metadata <- metadata %>%
+  dplyr::select(sample_id,
+                Kids_First_Biospecimen_ID,
+                tumor_ploidy)
+
+# Read in focal CN data
+## TODO: This section will be updated to read in focal CN data derived from
+##       copy number consensus calls.
+cn_df <- readr::read_tsv(
+  file.path(
+    "..",
+    "focal-cn-file-preparation",
+    "results",
+    "controlfreec_annotated_cn_autosomes.tsv.gz"
+  )
+)
+
+# Read in GISTIC focal CN data by genes
+# TODO: Change file path to this data once GISTIC files are added to the 
+#       official data release
+gistic_focal_data <-
+  data.table::fread(file.path("data",
+                              "focal_data_by_genes.csv"), data.table = FALSE)
+
+# Read in final output from `01-ATRT-molecular-subtyping-data-prep.Rmd` which 
+# includes the `SMARCB1_focal_status` variable with the current `SMARCB1` CN 
+# calls
+final_df <-
+  data.table::fread(file.path("results",
+                              "ATRT_molecular_subtypes.tsv"),
+                    data.table = FALSE)
+```
+## Custom Function
+
+```{r}
+# Custom datatable function
+# Function code adapted from: https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/49acc98f5ffd86853fc70f220623311e13e3ca9f/analyses/collapse-rnaseq/02-analyze-drops.Rmd#L23
+viewDataTable <- function(data) {
+  DT::datatable(
+    data,
+    rownames = FALSE,
+    filter = "bottom",
+    class = "cell-border stripe",
+    options = list(
+      pageLength = 5,
+      searchHighlight = TRUE,
+      scrollX = TRUE,
+      dom = "tpi",
+      initComplete = htmlwidgets::JS(
+        "function(settings, json) {",
+        "$(this.api().table().header()).css({'background-color':
+                                            '#004467', 'color': '#fff'});",
+        "}"
+      )
+    )
+  )
+}
+```
+
+# Filter current focal CN data
+
+```{r}
+# Filter copy number data for values relevant to the `SMARCB1` gene and join
+# selected variables of the metadata
+cn_df <- cn_df %>%
+  dplyr::inner_join(select_metadata,
+                    by = c("biospecimen_id" = "Kids_First_Biospecimen_ID")) %>%
+  dplyr::filter(sample_id %in% final_df$sample_id, gene_symbol == "SMARCB1")
+```
+
+# Filter GISTIC data for ATRT samples
+
+```{r}
+# Filter and transpose GISTIC data 
+transposed_gistic <- gistic_focal_data %>%
+  # Filter to copy number data on `SMARCB1` only
+  dplyr::filter(`Gene Symbol` == "SMARCB1") %>%
+  # Transpose and coerce into a data.frame
+  t() %>%
+  as.data.frame() %>%
+  # Make the rownames a column for the samples ID's
+  tibble::rownames_to_column("Kids_First_Biospecimen_ID")
+
+# Remove the non-sample ID rows and filter for ATRT samples
+transposed_gistic <- transposed_gistic %>%
+  dplyr::filter(stringr::str_detect(Kids_First_Biospecimen_ID, "BS_")) %>%
+  # Join the `sample_id` and `tumor_ploidy` columns from the metadata
+  dplyr::inner_join(select_metadata, by = "Kids_First_Biospecimen_ID") %>%
+  # Filter this data for ATRT samples
+  dplyr::inner_join(cn_df, by = "sample_id") %>%
+  # Make the copy number values numeric
+  dplyr::mutate(GISTIC_copy_number = as.numeric(V1)) %>%
+  # Remove the non-numeric copy number variable
+  dplyr::select(-V1, -tumor_ploidy.x, -tumor_ploidy.y)
+
+# Create `gistic_SMARCB1_status` variable for comparison against the
+# `SMARCB1_focal_status` variable in the current focal copy number data
+final_gistic <-  transposed_gistic %>%
+  dplyr::mutate(GISTIC_SMARCB1_focal_status = dplyr::case_when(
+    # When the copy number is less than inferred ploidy, mark this as a loss
+    GISTIC_copy_number < ploidy ~ "loss",
+    # If copy number is higher than ploidy, mark as a gain
+    GISTIC_copy_number > ploidy ~ "gain",
+    GISTIC_copy_number == ploidy ~ "neutral"
+  ))
+```
+
+# Compare calls
+
+```{r}
+# Create a data.frame with the `SMARCB1` calls from GISTIC and the current calls.
+comparison_df <- final_gistic %>%
+  dplyr::inner_join(final_df, by = "sample_id") %>%
+  dplyr::select(sample_id,
+                Kids_First_Biospecimen_ID = Kids_First_Biospecimen_ID.x,
+                ploidy,
+                GISTIC_copy_number,
+                GISTIC_SMARCB1_focal_status,
+                copy_number,
+                SMARCB1_focal_status)
+
+# Save `comparison_df` to file
+readr::write_tsv(
+  comparison_df,
+  file.path("results", "ATRT_SMARCB1_GISTIC_comparison_results.tsv")
+)
+
+# Create a data.frame with the count of each call from GISTIC
+GISTIC_SMARCB1_focal_status <- comparison_df %>%
+  dplyr::group_by(GISTIC_SMARCB1_focal_status) %>%
+  dplyr::tally() %>%
+  tibble::column_to_rownames("GISTIC_SMARCB1_focal_status") 
+
+# Create a data.frame with the count of each current call 
+Current_SMARCB1_focal_status <- comparison_df %>%
+  dplyr::group_by(SMARCB1_focal_status) %>%
+  dplyr::tally() %>%
+  tibble::column_to_rownames("SMARCB1_focal_status")
+
+# Combine the calls into a contigency table
+contigency_df <-
+  data.frame(GISTIC_SMARCB1_focal_status, Current_SMARCB1_focal_status) %>%
+  dplyr::rename(GISTIC_SMARCB1_focal_status = n,
+         Current_SMARCB1_focal_status = n.1) %>%
+  tibble::rownames_to_column("Call") 
+
+# Reorder the calls
+contigency_df$Call <- as.factor(contigency_df$Call)
+levels(contigency_df$Call) <- c("loss", "neutral", "gain")
+
+# Display `contigency_df`
+viewDataTable(contigency_df) 
+```
+
+# Plot
+
+```{r}
+# Produce a copy number comparison plot
+copy_number_comparison_plot <-
+  ggplot2::ggplot(comparison_df,
+                  ggplot2::aes(x = copy_number, y = GISTIC_copy_number)) +
+  ggplot2::geom_point() +
+  ggplot2::labs(title = "`SMARCB1` focal copy number comparison scatterplot") +
+  ggplot2::theme_classic()
+
+# Display the copy number comparison plot
+copy_number_comparison_plot
+```
+
+# Session Info
+
+```{r}
+# Print session information
+sessionInfo()
+```
+
diff --git a/analyses/molecular-subtyping-ATRT/03-ATRT-molecular-subtyping-gistic-comparison.nb.html b/analyses/molecular-subtyping-ATRT/03-ATRT-molecular-subtyping-gistic-comparison.nb.html