diff --git a/analyses/molecular-subtyping-ATRT/00-subset-files-for-ATRT.R b/analyses/molecular-subtyping-ATRT/00-subset-files-for-ATRT.R new file mode 100644 index 0000000000..b1476954e7 --- /dev/null +++ b/analyses/molecular-subtyping-ATRT/00-subset-files-for-ATRT.R @@ -0,0 +1,169 @@ +# This script subsets the focal copy number, RNA expression, tumor mutation +# burden and histologies` files to include only ATRT samples. + +# Chante Bethell for CCDL 2019 +# +# #### USAGE +# This script is intended to be run via the command line from the top directory +# of the repository as follows: +# +# Rscript 'analyses/molecular-subtyping-ATRT/00-subset-files-for-ATRT.R' + + +#### Set Up -------------------------------------------------------------------- + +# Get `magrittr` pipe +`%>%` <- dplyr::`%>%` + +#### Directories and Files ----------------------------------------------------- + +# Detect the ".git" folder -- this will in the project root directory. +# Use this as the root directory to ensure proper sourcing of functions no +# matter where this is called from +root_dir <- rprojroot::find_root(rprojroot::has_dir(".git")) + +# Set path to results and plots directory +results_dir <- + file.path(root_dir, "analyses", "molecular-subtyping-ATRT", "atrt-subset") + +if (!dir.exists(results_dir)) { + dir.create(results_dir) +} + +# Read in metadata +metadata <- + readr::read_tsv(file.path(root_dir, "data", "pbta-histologies.tsv")) + +# Select wanted columns in metadata for merging and assign to a new object +select_metadata <- metadata %>% + dplyr::select(sample_id, + Kids_First_Participant_ID, + Kids_First_Biospecimen_ID) + +# Read in ssGSEA pathway information +ssGSEA <- + as.data.frame(readr::read_rds( + file.path( + root_dir, + "analyses", + "ssgsea-hallmark", + "results", + "GeneSetExpressionMatrix.RDS" + ) + )) + +# Read in RNA expression data +stranded_expression <- + readr::read_rds( + file.path( + root_dir, + "data", + "pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds" + ) + ) + +# Read in focal CN data +## TODO: This section will be updated to read in focal CN data derived from +## copy number consensus calls. +cn_df <- readr::read_tsv( + file.path( + root_dir, + "analyses", + "focal-cn-file-preparation", + "results", + "controlfreec_annotated_cn_autosomes.tsv.gz" + ) +) + +# Read in consensus mutation data +tmb_df <- + data.table::fread(file.path(root_dir, + "data", + "pbta-snv-consensus-mutation-tmb.tsv")) + +## TODO: Read in the SV data/GISTIC output to evaluate the chr22q loss +# associated with SMARB1 deletions + +#### Filter metadata ----------------------------------------------------------- + +# Filter metadata for `ATRT` and define `location_summary` based on values in +# `primary_site` +atrt_df <- metadata %>% + dplyr::filter(short_histology == "ATRT", + experimental_strategy == "RNA-Seq") + +# Write to file +readr::write_tsv(atrt_df, file.path(results_dir, "atrt_histologies.tsv")) + +#### Filter expression data ---------------------------------------------------- + +# Filter to ATRT samples only -- we can use atrt_df because it is subset to +# RNA-seq samples +stranded_expression <- stranded_expression %>% + dplyr::select(atrt_df$Kids_First_Biospecimen_ID) + +# Log2 transformation +norm_expression <- log2(stranded_expression + 1) + +# normData mean and sd +norm_expression_means <- rowMeans(norm_expression, na.rm = TRUE) +norm_expression_sd <- apply(norm_expression, 1, sd, na.rm = TRUE) + +# Subtract mean +expression_zscored <- + sweep(norm_expression, 1, norm_expression_means, FUN = "-") + +# Divide by SD remove NAs and Inf values from zscore for genes with 0 in normData +expression_zscored <- + sweep(expression_zscored, 1, norm_expression_sd, FUN = "/") %>% + dplyr::na_if(Inf) %>% + na.omit() + +# Save matrix with all genes to file for downstream plotting +readr::write_rds(expression_zscored, + file.path(results_dir, "atrt_zscored_expression.RDS")) + +#### Filter focal CN data ------------------------------------------------------ + +# Filter focal CN to ATRT samples only +cn_metadata <- cn_df %>% + dplyr::left_join(select_metadata, + by = c("biospecimen_id" = "Kids_First_Biospecimen_ID")) %>% + dplyr::select(gene_symbol, + sample_id, + Kids_First_Participant_ID, + biospecimen_id, + status) %>% + dplyr::filter(sample_id %in% atrt_df$sample_id) %>% + dplyr::distinct() + +# Write to file +readr::write_tsv(cn_metadata, file.path(results_dir, "atrt_focal_cn.tsv.gz")) + +#### Filter ssGSEA data -------------------------------------------------------- + +# Transpose +transposed_ssGSEA <- t(ssGSEA) %>% + as.data.frame() %>% + tibble::rownames_to_column("Kids_First_Biospecimen_ID") + +# Filter for `sample_id` values found in the metadata filtered for ATRT samples +transposed_ssGSEA <- transposed_ssGSEA %>% + dplyr::left_join(select_metadata, by = "Kids_First_Biospecimen_ID") %>% + dplyr::filter(sample_id %in% atrt_df$sample_id) %>% + tibble::column_to_rownames("Kids_First_Biospecimen_ID") %>% + dplyr::select(-c("sample_id", "Kids_First_Participant_ID")) + +# Write to file +readr::write_rds(transposed_ssGSEA, file.path(results_dir, "atrt_ssgsea.RDS")) + +#### Filter tumor mutation burden data ----------------------------------------- + +tmb_df <- tmb_df %>% + dplyr::select(Tumor_Sample_Barcode, tmb) %>% + dplyr::left_join(select_metadata, + by = c("Tumor_Sample_Barcode" = "Kids_First_Biospecimen_ID")) %>% + dplyr::filter(sample_id %in% atrt_df$sample_id) + +# Write to file +readr::write_tsv(tmb_df, file.path(results_dir, "atrt_tmb.tsv")) diff --git a/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.Rmd b/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.Rmd index cd9fc9179b..ae9eeb9dd2 100644 --- a/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.Rmd +++ b/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.Rmd @@ -40,6 +40,10 @@ if (!("htmlwidgets" %in% installed.packages())) { # matter where this is called from root_dir <- rprojroot::find_root(rprojroot::has_dir(".git")) +# File path to results directory +input_dir <- + file.path(root_dir, "analyses", "molecular-subtyping-ATRT", "atrt-subset") + # File path to results directory results_dir <- file.path(root_dir, "analyses", "molecular-subtyping-ATRT", "results") @@ -48,9 +52,9 @@ if (!dir.exists(results_dir)) { dir.create(results_dir) } -# Read in metadata +# Read in ATRT subset metadata metadata <- - readr::read_tsv(file.path(root_dir, "data", "pbta-histologies.tsv")) + readr::read_tsv(file.path(input_dir, "atrt_histologies.tsv")) # Select wanted columns in metadata for merging and assign to a new object select_metadata <- metadata %>% @@ -58,46 +62,31 @@ select_metadata <- metadata %>% Kids_First_Participant_ID, Kids_First_Biospecimen_ID) -# Read in ssGSEA pathway information -ssGSEA <- - as.data.frame(readr::read_rds( - file.path( - root_dir, - "analyses", - "ssgsea-hallmark", - "results", - "GeneSetExpressionMatrix.RDS" - ) - )) +# Read in ATRT subset ssGSEA pathway information +ssGSEA_subset <- + as.data.frame(readr::read_rds(file.path(input_dir, "atrt_ssgsea.RDS"))) -# Read in RNA expression data +# Read in ATRT subset z-scored RNA expression data stranded_expression <- readr::read_rds( file.path( - root_dir, - "data", - "pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds" + input_dir, + "atrt_zscored_expression.RDS" ) ) -# Read in focal CN data -## TODO: This section will be updated to read in focal CN data derived from -## copy number consensus calls. +# Read in ATRT subset focal CN data cn_df <- readr::read_tsv( file.path( - root_dir, - "analyses", - "focal-cn-file-preparation", - "results", - "controlfreec_annotated_cn_autosomes.tsv.gz" + input_dir, + "atrt_focal_cn.tsv.gz" ) ) -# Read in consensus mutation data +# Read in ATRT subset consensus mutation data tmb_df <- - data.table::fread(file.path(root_dir, - "data", - "pbta-snv-consensus-mutation-tmb.tsv")) + data.table::fread(file.path(input_dir, + "atrt_tmb.tsv")) ``` ## Custom Function @@ -126,9 +115,9 @@ viewDataTable <- function(data) { } ``` -# Filter Data +# Prepare Data -## Filter metadata +## Metadata ```{r} # Define regions of the brain (using Anatomy of the Brain figure found at @@ -161,11 +150,7 @@ infratentorial <- "Other locations NOS;Spinal Cord- Lumbar/Thecal Sac;Spinal Cord- Thoracic;Ventricles" ) -# Filter metadata for `ATRT` and define `location_summary` based on values in -# `primary_site` -atrt_df <- metadata %>% - dplyr::filter(short_histology == "ATRT", - experimental_strategy == "RNA-Seq") %>% +metadata <- metadata %>% dplyr::mutate( location_summary = dplyr::case_when( primary_site %in% infratentorial ~ "infratentorial", @@ -174,11 +159,9 @@ atrt_df <- metadata %>% ) ) %>% dplyr::group_by(sample_id) %>% - dplyr::summarize( - Kids_First_Biospecimen_ID = paste(sort(unique( - Kids_First_Biospecimen_ID - )), - collapse = ", "), + dplyr::select( + sample_id, + Kids_First_Biospecimen_ID, Kids_First_Participant_ID, location_summary, age_at_diagnosis_days, @@ -186,8 +169,8 @@ atrt_df <- metadata %>% primary_site ) -# Display `atrt_df` -atrt_df %>% +# Display metadata subsetted for ATRT samples +metadata %>% head(n = 15) ``` @@ -195,7 +178,7 @@ atrt_df %>% ```{r} # Create an annotation data.frame for the relevant annotation data -initial_annotation_df <- atrt_df %>% +initial_annotation_df <- metadata %>% tibble::column_to_rownames("Kids_First_Biospecimen_ID") %>% dplyr::select(-c( sample_id, @@ -209,7 +192,7 @@ readr::write_rds(initial_annotation_df, file.path(results_dir, "initial_heatmap_annotation.RDS")) ``` -## Filter RNA expression, CN, TMB, and ssGSEA data by merging with `sample_id` +## Filter and join RNA expression, CN, TMB, and ssGSEA data ### RNA expression data @@ -262,31 +245,10 @@ myc_genes <- "ATF4" ) -# Filter expression data for target overexpressed genes -stranded_expression_df <- stranded_expression %>% - as.data.frame() %>% - tibble::rownames_to_column("gene_symbol") %>% - dplyr::filter(gene_symbol %in% tyr_genes | - gene_symbol %in% shh_genes | - gene_symbol %in% myc_genes) %>% - tibble::column_to_rownames("gene_symbol") - -# Log2 transformation -norm_expression <- log2(stranded_expression_df + 1) - -# normData mean and sd -norm_expression_means <- rowMeans(norm_expression, na.rm = TRUE) -norm_expression_sd <- apply(norm_expression, 1, sd, na.rm = TRUE) - -# Subtract mean -expression_zscored <- - sweep(norm_expression, 1, norm_expression_means, FUN = "-") - -# Divide by SD remove NAs and Inf values from zscore for genes with 0 in normData -expression_zscored <- - sweep(expression_zscored, 1, norm_expression_sd, FUN = "/") %>% - dplyr::na_if(Inf) %>% - na.omit() +# Filter to only the genes of interest +expression_zscored <- stranded_expression[which( + rownames(stranded_expression) %in% c(tyr_genes, shh_genes, myc_genes) +), ] # Transpose long_stranded_expression <- t(expression_zscored) @@ -302,7 +264,7 @@ expression_metadata %>% head(n = 15) # Join expression data with metadata filtered for `ATRT` -atrt_expression_df <- atrt_df %>% +atrt_expression_df <- metadata %>% dplyr::left_join(expression_metadata, by = "sample_id") ``` @@ -310,15 +272,8 @@ atrt_expression_df <- atrt_df %>% ### CN data ```{r} -# Join focal CN data with metadata -cn_metadata <- cn_df %>% - dplyr::left_join(metadata, - by = c("biospecimen_id" = "Kids_First_Biospecimen_ID")) %>% - dplyr::select(gene_symbol, - sample_id, - Kids_First_Participant_ID, - biospecimen_id, - status) %>% +# Filter focal CN data for SMARCB1 and SMARCA4 status +cn_df <- cn_df %>% dplyr::filter(gene_symbol %in% c("SMARCB1", "SMARCA4")) %>% dplyr::mutate( SMARCB1_focal_status = dplyr::case_when(gene_symbol == "SMARCB1" ~ status, @@ -339,23 +294,26 @@ cn_metadata <- cn_df %>% ) #Display `cn_metadata` -cn_metadata %>% +cn_df %>% head(n = 15) # Join ATRT expression data with focal CN data atrt_expression_cn_df <- atrt_expression_df %>% - dplyr::left_join(cn_metadata, by = "sample_id") + dplyr::left_join(cn_df, by = "sample_id") ``` ### ssGSEA data ```{r} +# Transpose +transposed_ssGSEA <- t(ssGSEA_subset) + # Calculate ssGSEA mean and sd -ssGSEA_means <- rowMeans(ssGSEA, na.rm = TRUE) -ssGSEA_sd <- apply(ssGSEA, 1, sd, na.rm = TRUE) +ssGSEA_means <- rowMeans(transposed_ssGSEA, na.rm = TRUE) +ssGSEA_sd <- apply(transposed_ssGSEA, 1, sd, na.rm = TRUE) # Subtract mean -ssGSEA_zscored <- sweep(ssGSEA, 1, ssGSEA_means, FUN = "-") +ssGSEA_zscored <- sweep(transposed_ssGSEA, 1, ssGSEA_means, FUN = "-") # Divide by SD remove NAs and Inf values from zscore for genes with 0 ssGSEA_zscored <- @@ -363,11 +321,11 @@ ssGSEA_zscored <- dplyr::na_if(Inf) %>% na.omit() -# Transpose -transposed_ssGSEA <- t(ssGSEA_zscored) +# Transpose back +ssGSEA_zscored <- t(ssGSEA_zscored) -# Select wanted pathways and merge metadata -transposed_ssGSEA <- transposed_ssGSEA %>% +# Summarise pathway values +ssGSEA_summarised <- ssGSEA_zscored %>% as.data.frame() %>% tibble::rownames_to_column("Kids_First_Biospecimen_ID") %>% dplyr::left_join(select_metadata, by = "Kids_First_Biospecimen_ID") %>% @@ -379,24 +337,18 @@ transposed_ssGSEA <- transposed_ssGSEA %>% ) #Display `transposed_ssGSEA` -transposed_ssGSEA %>% +ssGSEA_summarised %>% head(n = 15) # Join ATRT expression and focal CN data with transposed ssGSEA data atrt_expression_cn_df <- atrt_expression_cn_df %>% - dplyr::left_join(transposed_ssGSEA, + dplyr::left_join(ssGSEA_summarised, by = "sample_id") ``` ### Tumor mutation burden data ```{r} -# Join tumor mutuation data with metadata -tmb_df <- tmb_df %>% - dplyr::select(Tumor_Sample_Barcode, tmb) %>% - dplyr::inner_join(select_metadata, - by = c("Tumor_Sample_Barcode" = "Kids_First_Biospecimen_ID")) - #Display `tmb_df` tmb_df %>% head(n = 15) @@ -443,7 +395,7 @@ final_df <- atrt_expression_cn_tmb_df %>% dplyr::ungroup() readr::write_tsv(final_df, - file.path(results_dir, "ATRT_molecular_subtypes.tsv.gz")) + file.path(results_dir, "ATRT_molecular_subtypes.tsv")) # Display `final_df` viewDataTable(final_df) diff --git a/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.nb.html b/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.nb.html index cfbdc931e0..ce18a56e1a 100644 --- a/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.nb.html +++ b/analyses/molecular-subtyping-ATRT/01-ATRT-molecular-subtyping-data-prep.nb.html @@ -7,6 +7,7 @@ + @@ -3878,11 +3879,27 @@ // remove the overflow: hidden attribute of the scrollHead // (otherwise the scrolling table body obscures the filters) + // The workaround and the discussion from + // https://github.com/rstudio/DT/issues/554#issuecomment-518007347 + // Otherwise the filter selection will not be anchored to the values + // when the columns number is many and scrollX is enabled. var scrollHead = $(el).find('.dataTables_scrollHead,.dataTables_scrollFoot'); - var cssOverflow = scrollHead.css('overflow'); - if (cssOverflow === 'hidden') { + var cssOverflowHead = scrollHead.css('overflow'); + var scrollBody = $(el).find('.dataTables_scrollBody'); + var cssOverflowBody = scrollBody.css('overflow'); + var scrollTable = $(el).find('.dataTables_scroll'); + var cssOverflowTable = scrollTable.css('overflow'); + if (cssOverflowHead === 'hidden') { $x.on('show hide', function(e) { - scrollHead.css('overflow', e.type === 'show' ? '' : cssOverflow); + if (e.type === 'show') { + scrollHead.css('overflow', 'visible'); + scrollBody.css('overflow', 'visible'); + scrollTable.css('overflow-x', 'scroll'); + } else { + scrollHead.css('overflow', cssOverflowHead); + scrollBody.css('overflow', cssOverflowBody); + scrollTable.css('overflow-x', cssOverflowTable); + } }); $x.css('z-index', 25); } @@ -4217,7 +4234,7 @@ $input.attr('title', 'Hit Ctrl+Enter to finish editing, or Esc to cancel'); } $input.val(value); - if (disableCols && inArray(_cell.index().column, disableCols)) { + if (inArray(_cell.index().column, disableCols)) { $input.attr('readonly', '').css('filter', 'invert(25%)'); } $cell.empty().append($input); @@ -4296,7 +4313,7 @@ if (event !== 'cell_edit' && shinyData.hasOwnProperty(id) && shinyData[id] === JSON.stringify(value)) return; shinyData[id] = JSON.stringify(value); - if (HTMLWidgets.shinyMode) { + if (HTMLWidgets.shinyMode && Shiny.setInputValue) { Shiny.setInputValue(id, value, opts); if (!instance.clearInputs[id]) instance.clearInputs[id] = function() { Shiny.setInputValue(id, null); @@ -4477,7 +4494,7 @@ if (selTarget === 'row+column') { $(table.columns().footer()).css('cursor', 'pointer'); } - table.on('click.dt', selTarget === 'column' ? 'tbody td' : 'tfoot tr th', function() { + var callback = function() { var colIdx = selTarget === 'column' ? table.cell(this).index().column : $.inArray(this, table.columns().footer()), thisCol = $(table.column(colIdx).nodes()); @@ -4491,7 +4508,12 @@ selected2 = selMode === 'single' ? [colIdx] : unique(selected2.concat([colIdx])); } changeInput('columns_selected', selected2); - }); + } + if (selTarget === 'column') { + $(table.table().body()).on('click.dt', 'td', callback); + } else { + $(table.table().footer()).on('click.dt', 'tr th', callback); + } changeInput('columns_selected', selected2); var selectCols = function() { table.columns().nodes().flatten().to$().removeClass(selClass); @@ -4743,169 +4765,171 @@ } @@ -5638,57 +5662,12 @@ } - - - - - - -