diff --git a/analyses/molecular-subtyping-ATRT/00-subset-files-for-ATRT.R b/analyses/molecular-subtyping-ATRT/00-subset-files-for-ATRT.R
new file mode 100644
index 0000000000..b1476954e7
--- /dev/null
+++ b/analyses/molecular-subtyping-ATRT/00-subset-files-for-ATRT.R
@@ -0,0 +1,169 @@
+# This script subsets the focal copy number, RNA expression, tumor mutation
+# burden and histologies` files to include only ATRT samples.
+
+# Chante Bethell for CCDL 2019
+#
+# #### USAGE
+# This script is intended to be run via the command line from the top directory
+# of the repository as follows:
+#
+# Rscript 'analyses/molecular-subtyping-ATRT/00-subset-files-for-ATRT.R'
+
+
+#### Set Up --------------------------------------------------------------------
+
+# Get `magrittr` pipe
+`%>%` <- dplyr::`%>%`
+
+#### Directories and Files -----------------------------------------------------
+
+# Detect the ".git" folder -- this will in the project root directory.
+# Use this as the root directory to ensure proper sourcing of functions no
+# matter where this is called from
+root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
+
+# Set path to results and plots directory
+results_dir <-
+ file.path(root_dir, "analyses", "molecular-subtyping-ATRT", "atrt-subset")
+
+if (!dir.exists(results_dir)) {
+ dir.create(results_dir)
+}
+
+# Read in metadata
+metadata <-
+ readr::read_tsv(file.path(root_dir, "data", "pbta-histologies.tsv"))
+
+# Select wanted columns in metadata for merging and assign to a new object
+select_metadata <- metadata %>%
+ dplyr::select(sample_id,
+ Kids_First_Participant_ID,
+ Kids_First_Biospecimen_ID)
+
+# Read in ssGSEA pathway information
+ssGSEA <-
+ as.data.frame(readr::read_rds(
+ file.path(
+ root_dir,
+ "analyses",
+ "ssgsea-hallmark",
+ "results",
+ "GeneSetExpressionMatrix.RDS"
+ )
+ ))
+
+# Read in RNA expression data
+stranded_expression <-
+ readr::read_rds(
+ file.path(
+ root_dir,
+ "data",
+ "pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds"
+ )
+ )
+
+# Read in focal CN data
+## TODO: This section will be updated to read in focal CN data derived from
+## copy number consensus calls.
+cn_df <- readr::read_tsv(
+ file.path(
+ root_dir,
+ "analyses",
+ "focal-cn-file-preparation",
+ "results",
+ "controlfreec_annotated_cn_autosomes.tsv.gz"
+ )
+)
+
+# Read in consensus mutation data
+tmb_df <-
+ data.table::fread(file.path(root_dir,
+ "data",
+ "pbta-snv-consensus-mutation-tmb.tsv"))
+
+## TODO: Read in the SV data/GISTIC output to evaluate the chr22q loss
+# associated with SMARB1 deletions
+
+#### Filter metadata -----------------------------------------------------------
+
+# Filter metadata for `ATRT` and define `location_summary` based on values in
+# `primary_site`
+atrt_df <- metadata %>%
+ dplyr::filter(short_histology == "ATRT",
+ experimental_strategy == "RNA-Seq")
+
+# Write to file
+readr::write_tsv(atrt_df, file.path(results_dir, "atrt_histologies.tsv"))
+
+#### Filter expression data ----------------------------------------------------
+
+# Filter to ATRT samples only -- we can use atrt_df because it is subset to
+# RNA-seq samples
+stranded_expression <- stranded_expression %>%
+ dplyr::select(atrt_df$Kids_First_Biospecimen_ID)
+
+# Log2 transformation
+norm_expression <- log2(stranded_expression + 1)
+
+# normData mean and sd
+norm_expression_means <- rowMeans(norm_expression, na.rm = TRUE)
+norm_expression_sd <- apply(norm_expression, 1, sd, na.rm = TRUE)
+
+# Subtract mean
+expression_zscored <-
+ sweep(norm_expression, 1, norm_expression_means, FUN = "-")
+
+# Divide by SD remove NAs and Inf values from zscore for genes with 0 in normData
+expression_zscored <-
+ sweep(expression_zscored, 1, norm_expression_sd, FUN = "/") %>%
+ dplyr::na_if(Inf) %>%
+ na.omit()
+
+# Save matrix with all genes to file for downstream plotting
+readr::write_rds(expression_zscored,
+ file.path(results_dir, "atrt_zscored_expression.RDS"))
+
+#### Filter focal CN data ------------------------------------------------------
+
+# Filter focal CN to ATRT samples only
+cn_metadata <- cn_df %>%
+ dplyr::left_join(select_metadata,
+ by = c("biospecimen_id" = "Kids_First_Biospecimen_ID")) %>%
+ dplyr::select(gene_symbol,
+ sample_id,
+ Kids_First_Participant_ID,
+ biospecimen_id,
+ status) %>%
+ dplyr::filter(sample_id %in% atrt_df$sample_id) %>%
+ dplyr::distinct()
+
+# Write to file
+readr::write_tsv(cn_metadata, file.path(results_dir, "atrt_focal_cn.tsv.gz"))
+
+#### Filter ssGSEA data --------------------------------------------------------
+
+# Transpose
+transposed_ssGSEA <- t(ssGSEA) %>%
+ as.data.frame() %>%
+ tibble::rownames_to_column("Kids_First_Biospecimen_ID")
+
+# Filter for `sample_id` values found in the metadata filtered for ATRT samples
+transposed_ssGSEA <- transposed_ssGSEA %>%
+ dplyr::left_join(select_metadata, by = "Kids_First_Biospecimen_ID") %>%
+ dplyr::filter(sample_id %in% atrt_df$sample_id) %>%
+ tibble::column_to_rownames("Kids_First_Biospecimen_ID") %>%
+ dplyr::select(-c("sample_id", "Kids_First_Participant_ID"))
+
+# Write to file
+readr::write_rds(transposed_ssGSEA, file.path(results_dir, "atrt_ssgsea.RDS"))
+
+#### Filter tumor mutation burden data -----------------------------------------
+
+tmb_df <- tmb_df %>%
+ dplyr::select(Tumor_Sample_Barcode, tmb) %>%
+ dplyr::left_join(select_metadata,
+ by = c("Tumor_Sample_Barcode" = "Kids_First_Biospecimen_ID")) %>%
+ dplyr::filter(sample_id %in% atrt_df$sample_id)
+
+# Write to file
+readr::write_tsv(tmb_df, file.path(results_dir, "atrt_tmb.tsv"))
diff --git a/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.Rmd b/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.Rmd
index cd9fc9179b..ae9eeb9dd2 100644
--- a/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.Rmd
+++ b/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.Rmd
@@ -40,6 +40,10 @@ if (!("htmlwidgets" %in% installed.packages())) {
# matter where this is called from
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
+# File path to results directory
+input_dir <-
+ file.path(root_dir, "analyses", "molecular-subtyping-ATRT", "atrt-subset")
+
# File path to results directory
results_dir <-
file.path(root_dir, "analyses", "molecular-subtyping-ATRT", "results")
@@ -48,9 +52,9 @@ if (!dir.exists(results_dir)) {
dir.create(results_dir)
}
-# Read in metadata
+# Read in ATRT subset metadata
metadata <-
- readr::read_tsv(file.path(root_dir, "data", "pbta-histologies.tsv"))
+ readr::read_tsv(file.path(input_dir, "atrt_histologies.tsv"))
# Select wanted columns in metadata for merging and assign to a new object
select_metadata <- metadata %>%
@@ -58,46 +62,31 @@ select_metadata <- metadata %>%
Kids_First_Participant_ID,
Kids_First_Biospecimen_ID)
-# Read in ssGSEA pathway information
-ssGSEA <-
- as.data.frame(readr::read_rds(
- file.path(
- root_dir,
- "analyses",
- "ssgsea-hallmark",
- "results",
- "GeneSetExpressionMatrix.RDS"
- )
- ))
+# Read in ATRT subset ssGSEA pathway information
+ssGSEA_subset <-
+ as.data.frame(readr::read_rds(file.path(input_dir, "atrt_ssgsea.RDS")))
-# Read in RNA expression data
+# Read in ATRT subset z-scored RNA expression data
stranded_expression <-
readr::read_rds(
file.path(
- root_dir,
- "data",
- "pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds"
+ input_dir,
+ "atrt_zscored_expression.RDS"
)
)
-# Read in focal CN data
-## TODO: This section will be updated to read in focal CN data derived from
-## copy number consensus calls.
+# Read in ATRT subset focal CN data
cn_df <- readr::read_tsv(
file.path(
- root_dir,
- "analyses",
- "focal-cn-file-preparation",
- "results",
- "controlfreec_annotated_cn_autosomes.tsv.gz"
+ input_dir,
+ "atrt_focal_cn.tsv.gz"
)
)
-# Read in consensus mutation data
+# Read in ATRT subset consensus mutation data
tmb_df <-
- data.table::fread(file.path(root_dir,
- "data",
- "pbta-snv-consensus-mutation-tmb.tsv"))
+ data.table::fread(file.path(input_dir,
+ "atrt_tmb.tsv"))
```
## Custom Function
@@ -126,9 +115,9 @@ viewDataTable <- function(data) {
}
```
-# Filter Data
+# Prepare Data
-## Filter metadata
+## Metadata
```{r}
# Define regions of the brain (using Anatomy of the Brain figure found at
@@ -161,11 +150,7 @@ infratentorial <-
"Other locations NOS;Spinal Cord- Lumbar/Thecal Sac;Spinal Cord- Thoracic;Ventricles"
)
-# Filter metadata for `ATRT` and define `location_summary` based on values in
-# `primary_site`
-atrt_df <- metadata %>%
- dplyr::filter(short_histology == "ATRT",
- experimental_strategy == "RNA-Seq") %>%
+metadata <- metadata %>%
dplyr::mutate(
location_summary = dplyr::case_when(
primary_site %in% infratentorial ~ "infratentorial",
@@ -174,11 +159,9 @@ atrt_df <- metadata %>%
)
) %>%
dplyr::group_by(sample_id) %>%
- dplyr::summarize(
- Kids_First_Biospecimen_ID = paste(sort(unique(
- Kids_First_Biospecimen_ID
- )),
- collapse = ", "),
+ dplyr::select(
+ sample_id,
+ Kids_First_Biospecimen_ID,
Kids_First_Participant_ID,
location_summary,
age_at_diagnosis_days,
@@ -186,8 +169,8 @@ atrt_df <- metadata %>%
primary_site
)
-# Display `atrt_df`
-atrt_df %>%
+# Display metadata subsetted for ATRT samples
+metadata %>%
head(n = 15)
```
@@ -195,7 +178,7 @@ atrt_df %>%
```{r}
# Create an annotation data.frame for the relevant annotation data
-initial_annotation_df <- atrt_df %>%
+initial_annotation_df <- metadata %>%
tibble::column_to_rownames("Kids_First_Biospecimen_ID") %>%
dplyr::select(-c(
sample_id,
@@ -209,7 +192,7 @@ readr::write_rds(initial_annotation_df,
file.path(results_dir, "initial_heatmap_annotation.RDS"))
```
-## Filter RNA expression, CN, TMB, and ssGSEA data by merging with `sample_id`
+## Filter and join RNA expression, CN, TMB, and ssGSEA data
### RNA expression data
@@ -262,31 +245,10 @@ myc_genes <-
"ATF4"
)
-# Filter expression data for target overexpressed genes
-stranded_expression_df <- stranded_expression %>%
- as.data.frame() %>%
- tibble::rownames_to_column("gene_symbol") %>%
- dplyr::filter(gene_symbol %in% tyr_genes |
- gene_symbol %in% shh_genes |
- gene_symbol %in% myc_genes) %>%
- tibble::column_to_rownames("gene_symbol")
-
-# Log2 transformation
-norm_expression <- log2(stranded_expression_df + 1)
-
-# normData mean and sd
-norm_expression_means <- rowMeans(norm_expression, na.rm = TRUE)
-norm_expression_sd <- apply(norm_expression, 1, sd, na.rm = TRUE)
-
-# Subtract mean
-expression_zscored <-
- sweep(norm_expression, 1, norm_expression_means, FUN = "-")
-
-# Divide by SD remove NAs and Inf values from zscore for genes with 0 in normData
-expression_zscored <-
- sweep(expression_zscored, 1, norm_expression_sd, FUN = "/") %>%
- dplyr::na_if(Inf) %>%
- na.omit()
+# Filter to only the genes of interest
+expression_zscored <- stranded_expression[which(
+ rownames(stranded_expression) %in% c(tyr_genes, shh_genes, myc_genes)
+), ]
# Transpose
long_stranded_expression <- t(expression_zscored)
@@ -302,7 +264,7 @@ expression_metadata %>%
head(n = 15)
# Join expression data with metadata filtered for `ATRT`
-atrt_expression_df <- atrt_df %>%
+atrt_expression_df <- metadata %>%
dplyr::left_join(expression_metadata,
by = "sample_id")
```
@@ -310,15 +272,8 @@ atrt_expression_df <- atrt_df %>%
### CN data
```{r}
-# Join focal CN data with metadata
-cn_metadata <- cn_df %>%
- dplyr::left_join(metadata,
- by = c("biospecimen_id" = "Kids_First_Biospecimen_ID")) %>%
- dplyr::select(gene_symbol,
- sample_id,
- Kids_First_Participant_ID,
- biospecimen_id,
- status) %>%
+# Filter focal CN data for SMARCB1 and SMARCA4 status
+cn_df <- cn_df %>%
dplyr::filter(gene_symbol %in% c("SMARCB1", "SMARCA4")) %>%
dplyr::mutate(
SMARCB1_focal_status = dplyr::case_when(gene_symbol == "SMARCB1" ~ status,
@@ -339,23 +294,26 @@ cn_metadata <- cn_df %>%
)
#Display `cn_metadata`
-cn_metadata %>%
+cn_df %>%
head(n = 15)
# Join ATRT expression data with focal CN data
atrt_expression_cn_df <- atrt_expression_df %>%
- dplyr::left_join(cn_metadata, by = "sample_id")
+ dplyr::left_join(cn_df, by = "sample_id")
```
### ssGSEA data
```{r}
+# Transpose
+transposed_ssGSEA <- t(ssGSEA_subset)
+
# Calculate ssGSEA mean and sd
-ssGSEA_means <- rowMeans(ssGSEA, na.rm = TRUE)
-ssGSEA_sd <- apply(ssGSEA, 1, sd, na.rm = TRUE)
+ssGSEA_means <- rowMeans(transposed_ssGSEA, na.rm = TRUE)
+ssGSEA_sd <- apply(transposed_ssGSEA, 1, sd, na.rm = TRUE)
# Subtract mean
-ssGSEA_zscored <- sweep(ssGSEA, 1, ssGSEA_means, FUN = "-")
+ssGSEA_zscored <- sweep(transposed_ssGSEA, 1, ssGSEA_means, FUN = "-")
# Divide by SD remove NAs and Inf values from zscore for genes with 0
ssGSEA_zscored <-
@@ -363,11 +321,11 @@ ssGSEA_zscored <-
dplyr::na_if(Inf) %>%
na.omit()
-# Transpose
-transposed_ssGSEA <- t(ssGSEA_zscored)
+# Transpose back
+ssGSEA_zscored <- t(ssGSEA_zscored)
-# Select wanted pathways and merge metadata
-transposed_ssGSEA <- transposed_ssGSEA %>%
+# Summarise pathway values
+ssGSEA_summarised <- ssGSEA_zscored %>%
as.data.frame() %>%
tibble::rownames_to_column("Kids_First_Biospecimen_ID") %>%
dplyr::left_join(select_metadata, by = "Kids_First_Biospecimen_ID") %>%
@@ -379,24 +337,18 @@ transposed_ssGSEA <- transposed_ssGSEA %>%
)
#Display `transposed_ssGSEA`
-transposed_ssGSEA %>%
+ssGSEA_summarised %>%
head(n = 15)
# Join ATRT expression and focal CN data with transposed ssGSEA data
atrt_expression_cn_df <- atrt_expression_cn_df %>%
- dplyr::left_join(transposed_ssGSEA,
+ dplyr::left_join(ssGSEA_summarised,
by = "sample_id")
```
### Tumor mutation burden data
```{r}
-# Join tumor mutuation data with metadata
-tmb_df <- tmb_df %>%
- dplyr::select(Tumor_Sample_Barcode, tmb) %>%
- dplyr::inner_join(select_metadata,
- by = c("Tumor_Sample_Barcode" = "Kids_First_Biospecimen_ID"))
-
#Display `tmb_df`
tmb_df %>%
head(n = 15)
@@ -443,7 +395,7 @@ final_df <- atrt_expression_cn_tmb_df %>%
dplyr::ungroup()
readr::write_tsv(final_df,
- file.path(results_dir, "ATRT_molecular_subtypes.tsv.gz"))
+ file.path(results_dir, "ATRT_molecular_subtypes.tsv"))
# Display `final_df`
viewDataTable(final_df)
diff --git a/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.nb.html b/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.nb.html
index cfbdc931e0..ce18a56e1a 100644
--- a/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.nb.html
+++ b/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.nb.html
@@ -7,6 +7,7 @@
+
@@ -3878,11 +3879,27 @@
// remove the overflow: hidden attribute of the scrollHead
// (otherwise the scrolling table body obscures the filters)
+ // The workaround and the discussion from
+ // https://github.com/rstudio/DT/issues/554#issuecomment-518007347
+ // Otherwise the filter selection will not be anchored to the values
+ // when the columns number is many and scrollX is enabled.
var scrollHead = $(el).find('.dataTables_scrollHead,.dataTables_scrollFoot');
- var cssOverflow = scrollHead.css('overflow');
- if (cssOverflow === 'hidden') {
+ var cssOverflowHead = scrollHead.css('overflow');
+ var scrollBody = $(el).find('.dataTables_scrollBody');
+ var cssOverflowBody = scrollBody.css('overflow');
+ var scrollTable = $(el).find('.dataTables_scroll');
+ var cssOverflowTable = scrollTable.css('overflow');
+ if (cssOverflowHead === 'hidden') {
$x.on('show hide', function(e) {
- scrollHead.css('overflow', e.type === 'show' ? '' : cssOverflow);
+ if (e.type === 'show') {
+ scrollHead.css('overflow', 'visible');
+ scrollBody.css('overflow', 'visible');
+ scrollTable.css('overflow-x', 'scroll');
+ } else {
+ scrollHead.css('overflow', cssOverflowHead);
+ scrollBody.css('overflow', cssOverflowBody);
+ scrollTable.css('overflow-x', cssOverflowTable);
+ }
});
$x.css('z-index', 25);
}
@@ -4217,7 +4234,7 @@
$input.attr('title', 'Hit Ctrl+Enter to finish editing, or Esc to cancel');
}
$input.val(value);
- if (disableCols && inArray(_cell.index().column, disableCols)) {
+ if (inArray(_cell.index().column, disableCols)) {
$input.attr('readonly', '').css('filter', 'invert(25%)');
}
$cell.empty().append($input);
@@ -4296,7 +4313,7 @@
if (event !== 'cell_edit' && shinyData.hasOwnProperty(id) && shinyData[id] === JSON.stringify(value))
return;
shinyData[id] = JSON.stringify(value);
- if (HTMLWidgets.shinyMode) {
+ if (HTMLWidgets.shinyMode && Shiny.setInputValue) {
Shiny.setInputValue(id, value, opts);
if (!instance.clearInputs[id]) instance.clearInputs[id] = function() {
Shiny.setInputValue(id, null);
@@ -4477,7 +4494,7 @@
if (selTarget === 'row+column') {
$(table.columns().footer()).css('cursor', 'pointer');
}
- table.on('click.dt', selTarget === 'column' ? 'tbody td' : 'tfoot tr th', function() {
+ var callback = function() {
var colIdx = selTarget === 'column' ? table.cell(this).index().column :
$.inArray(this, table.columns().footer()),
thisCol = $(table.column(colIdx).nodes());
@@ -4491,7 +4508,12 @@
selected2 = selMode === 'single' ? [colIdx] : unique(selected2.concat([colIdx]));
}
changeInput('columns_selected', selected2);
- });
+ }
+ if (selTarget === 'column') {
+ $(table.table().body()).on('click.dt', 'td', callback);
+ } else {
+ $(table.table().footer()).on('click.dt', 'tr th', callback);
+ }
changeInput('columns_selected', selected2);
var selectCols = function() {
table.columns().nodes().flatten().to$().removeClass(selClass);
@@ -4743,169 +4765,171 @@
}
@@ -5638,57 +5662,12 @@
}
-
-
-
-
-
-
-