From a45c5cd29213143335e699d2a52e32cde76194c7 Mon Sep 17 00:00:00 2001 From: Candace Savonen Date: Mon, 2 Mar 2020 13:40:02 -0500 Subject: [PATCH 01/11] dplyr:: were needed to differentiate `rename` --- .../01-Subtype-chordoma.Rmd | 8 +-- .../01-Subtype-chordoma.nb.html | 63 +++++++----------- .../plots/smarcb1_expression_copy_status.png | Bin 57032 -> 57109 bytes 3 files changed, 27 insertions(+), 44 deletions(-) diff --git a/analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.Rmd b/analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.Rmd index cd9b46083a..d0f8a29680 100644 --- a/analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.Rmd +++ b/analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.Rmd @@ -110,7 +110,7 @@ copy_neutral_df <- chordoma_id_df %>% # if there's no loss, let's assume status is copy neutral mutate(status = "neutral") %>% # let's get the columns to match chordoma_loss - rename(biospecimen_id = Kids_First_Biospecimen_ID) %>% + dplyr::rename(biospecimen_id = Kids_First_Biospecimen_ID) %>% select(biospecimen_id, status) copy_neutral_df ``` @@ -163,7 +163,7 @@ smarcb1_expression <- t(smarcb1_expression) %>% # we want the rownames that are biospecimen identifers as their own column called Kids_First_Biospecimen_ID tibble::rownames_to_column("Kids_First_Biospecimen_ID") %>% # give SMARCB1 column a slightly better column name - rename(SMARCB1_expression = SMARCB1) + dplyr::rename(SMARCB1_expression = SMARCB1) smarcb1_expression ``` @@ -182,7 +182,7 @@ Joining the copy number data with the expression data in this step chordoma_smarcb1_df <- smarcb1_expression %>% # any missing samples will get filled with NA when using a full join full_join(chordoma_copy, by = "sample_id") %>% - rename(Kids_First_Biospecimen_ID_DNA = Kids_First_Biospecimen_ID, + dplyr::rename(Kids_First_Biospecimen_ID_DNA = Kids_First_Biospecimen_ID, Kids_First_Biospecimen_ID_RNA = biospecimen_id) # this step adds in the participant identifier (sample_id to match between the two data.frame) @@ -204,7 +204,7 @@ chordoma_smarcb1_df <- chordoma_smarcb1_df %>% status, SMARCB1_expression) %>% # 'status' is replaced a more descriptive name - rename(focal_SMARCB1_status = status) + dplyr::rename(focal_SMARCB1_status = status) chordoma_smarcb1_df ``` diff --git a/analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.nb.html b/analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.nb.html index e83ff5467b..1bd5bcaf0c 100644 --- a/analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.nb.html +++ b/analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.nb.html @@ -1776,26 +1776,9 @@

20191121

Set up

- -
library(dplyr)
- - -
Registered S3 method overwritten by 'dplyr':
-  method           from
-  print.rowwise_df     
-
-Attaching package: ‘dplyr’
-
-The following objects are masked from ‘package:stats’:
-
-    filter, lag
-
-The following objects are masked from ‘package:base’:
-
-    intersect, setdiff, setequal, union
- - -
library(readr)
+
+
library(dplyr)
+library(readr)
 library(ggplot2)
@@ -1898,7 +1881,7 @@

Prepare the data

status == "loss") chordoma_loss
- +
-
- - - - - - -
# remove large copy number data frame
-rm(focal_cn_df)
- - - -

Modifying the output file to filter only chorodoma diagnoses and removing columns with information not relevant to the study

- - - -
chordoma_id_df <- histologies_df %>% 
-  # only rows with chordoma samples
-  filter(short_histology == "Chordoma") %>%
-  # select only these columns that we'll need later
-  select(Kids_First_Biospecimen_ID, sample_id, Kids_First_Participant_ID,
-         experimental_strategy)
-chordoma_id_df
- - -
-
@@ -1922,8 +1901,8 @@

Prepare the data

Distinguishing the chordoma samples with no copy number change chromosome 22

- -
copy_neutral_df <- chordoma_id_df %>% 
+
+
copy_neutral_df <- subset_metadata %>% 
   # the copy events can only be taken from WGS data not RNA-seq data
   # we also only want biospecimens where a loss was not recorded to avoid duplicates
   filter(experimental_strategy == "WGS",
@@ -1935,7 +1914,7 @@ 

Prepare the data

select(biospecimen_id, status) copy_neutral_df
- +