This repository has been archived by the owner on Jun 21, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 83
/
Copy path01-HGG-molecular-subtyping-defining-lesions.Rmd
158 lines (132 loc) · 4.67 KB
/
01-HGG-molecular-subtyping-defining-lesions.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
---
title: "High-Grade Glioma Molecular Subtyping - Defining Lesions"
output:
html_notebook:
toc: TRUE
toc_float: TRUE
author: Chante Bethell for ALSF CCDL
date: 2019
---
This notebook looks at the defining lesions for all samples for the issue of
molecular subtyping high-grade glioma samples in the OpenPBTA dataset.
# Usage
This notebook is intended to be run via the command line from the top directory
of the repository as follows:
`Rscript -e "rmarkdown::render('analyses/molecular-subtyping-HGG/01-HGG-molecular-subtyping-defining-lesions.Rmd', clean = TRUE)"`
# Set Up
```{r}
# Get `magrittr` pipe
`%>%` <- dplyr::`%>%`
```
## Directories and Files
```{r}
# Detect the ".git" folder -- this will in the project root directory.
# Use this as the root directory to ensure proper sourcing of functions no
# matter where this is called from
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
# File path to results directory
results_dir <-
file.path(root_dir, "analyses", "molecular-subtyping-HGG", "results")
if (!dir.exists(results_dir)) {
dir.create(results_dir)
}
# Read in metadata
metadata <-
readr::read_tsv(file.path(root_dir, "data", "pbta-histologies.tsv")) %>%
dplyr::filter(sample_type == "Tumor",
composition == "Solid Tissue")
# Select wanted columns in metadata for merging and assign to a new object
select_metadata <- metadata %>%
dplyr::select(Kids_First_Participant_ID,
sample_id,
Kids_First_Biospecimen_ID,
short_histology,
disease_type_new)
# Read in snv consensus mutation data
snv_df <-
data.table::fread(file.path(root_dir,
"data",
"pbta-snv-consensus-mutation.maf.tsv.gz"))
```
# Prepare Data
## SNV consensus mutation data - defining lesions
```{r}
# Filter the snv consensus mutatation data for the target lesions
snv_lesions_df <- snv_df %>%
dplyr::filter(Hugo_Symbol %in% c("H3F3A", "HIST1H3B") &
HGVSp_Short %in% c("p.K28M", "p.G35R",
"p.G35V")) %>%
dplyr::select(Tumor_Sample_Barcode, Hugo_Symbol, HGVSp_Short) %>%
dplyr::mutate(
H3F3A.K28M = dplyr::case_when(Hugo_Symbol == "H3F3A" &
HGVSp_Short == "p.K28M" ~ "Yes",
TRUE ~ "No"),
HIST1H3B.K28M = dplyr::case_when(
Hugo_Symbol == "HIST1H3B" & HGVSp_Short == "p.K28M" ~ "Yes",
TRUE ~ "No"
),
H3F3A.G35R = dplyr::case_when(Hugo_Symbol == "H3F3A" &
HGVSp_Short == "p.G35R" ~ "Yes",
TRUE ~ "No"),
H3F3A.G35V = dplyr::case_when(Hugo_Symbol == "H3F3A" &
HGVSp_Short == "p.G35V" ~ "Yes",
TRUE ~ "No")
) %>%
dplyr::select(
-HGVSp_Short,
-Hugo_Symbol
)
# add back in samples with no evidence of these specific mutations
snv_lesions_df <- snv_lesions_df %>%
dplyr::bind_rows(
data.frame(
Tumor_Sample_Barcode = setdiff(unique(snv_df$Tumor_Sample_Barcode),
snv_lesions_df$Tumor_Sample_Barcode)
)
) %>%
dplyr::mutate_all(function(x) tidyr::replace_na(x, "No"))
# Join the selected variables from the metadata with the snv consensus mutation
# and defining lesions data.frame
snv_lesions_df <- select_metadata %>%
dplyr::inner_join(snv_lesions_df,
by = c("Kids_First_Biospecimen_ID" = "Tumor_Sample_Barcode")) %>%
dplyr::select(
dplyr::ends_with("ID"),
dplyr::starts_with("H"),
short_histology,
disease_type_new
) %>%
dplyr::mutate(
disease_type_reclassified = dplyr::case_when(
H3F3A.K28M == "Yes" ~ "High-grade glioma, H3 K28 mutant",
HIST1H3B.K28M == "Yes" ~ "High-grade glioma, H3 K28 mutant",
H3F3A.G35R == "Yes" ~ "High-grade glioma, H3 G35 mutant",
H3F3A.G35V == "Yes" ~ "High-grade glioma, H3 G35 mutant",
TRUE ~ as.character(disease_type_new)
)
)
# Display `snv_lesions_df`
snv_lesions_df
```
## Save final table of results
```{r}
# Save final data.frame to file
readr::write_tsv(snv_lesions_df,
file.path(results_dir, "HGG_defining_lesions.tsv"))
```
## Inconsistencies in disease classification
```{r}
# Isolate the samples with the specified mutations that were not classified
# as HGG or DIPG
snv_lesions_df %>%
dplyr::filter(
grepl("High-grade glioma", disease_type_reclassified) &
!(disease_type_new %in% c("High-grade glioma",
"Brainstem glioma- Diffuse intrinsic pontine glioma"))
)
```
# Session Info
```{r}
# Print the session information
sessionInfo()
```