Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Update oncoprints to use histology-specific goi lists #1046

Merged
merged 22 commits into from
May 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5d0bcf0
Incorporate histology specific goi lists
cbethell Apr 29, 2021
a5e5af1
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell Apr 29, 2021
da63721
Update analyses/oncoprint-landscape/util/prepare-goi-lists.R
cbethell May 3, 2021
7e28741
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell May 3, 2021
bdc3319
Merge branch 'master' of https://github.com/AlexsLemonade/OpenPBTA-an…
cbethell May 4, 2021
9cbd240
add `top_n` argument for goi plots and re-run
cbethell May 4, 2021
d4c2e51
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell May 4, 2021
c9a1f8a
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell May 6, 2021
c51132c
Oncoprint GOI list revisions (#1053)
jaclyn-taroni May 6, 2021
b39e417
remove `tolower()` and `gsub()`
cbethell May 6, 2021
e64290f
re-run module shell script with v19 data
cbethell May 6, 2021
567effe
Update top_n logic (#1054)
jaclyn-taroni May 7, 2021
f735ceb
re-run with #1054 updates
cbethell May 7, 2021
04874a4
add coding for `intron`, `5'flank`, and `3'flank`
cbethell May 10, 2021
52ac162
add flag to handle the inclusion of introns
cbethell May 10, 2021
ede7838
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell May 11, 2021
ea6e207
fix merge conflicts and re-run
cbethell May 11, 2021
44cf639
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
jaclyn-taroni May 13, 2021
f889ad5
Try deleting all PNGs currently in the plots directory
jaclyn-taroni May 13, 2021
3ad02de
Rerun locally and add plots
jaclyn-taroni May 13, 2021
d71f76b
adjust logic around goi subsetting and re-run
cbethell May 13, 2021
34c3b16
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
jaclyn-taroni May 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions analyses/oncoprint-landscape/00-prepare-goi-lists.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Take in oncoprint-goi-lists-OpenPBTA.csv and create a goi file for each
# column with associated genes of interest for each specified broad histology
#
#
# Chante Bethell for CCDL 2021
#
# # #### USAGE
# This script is intended to be sourced in the script as follows:
#
# Rscript --vanilla 00-prepare-goi-lists.R \


#### Set Up --------------------------------------------------------------------

library(dplyr)
library(stringr)

#### Directories and Files -----------------------------------------------------

# Detect the ".git" folder -- this will in the project root directory.
# Use this as the root directory to ensure proper execution, no matter where
# it is called from.
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))

# Path to directory that contains the manually curated genes of interest list
data_dir <-
file.path(root_dir, "analyses", "oncoprint-landscape", "data")

# Each histology has it's own column, and there are source columns
all_goi_df <- readr::read_csv(file.path("data",
"oncoprint-goi-lists-OpenPBTA.csv"))

# Drop the source columns
all_goi_df <- all_goi_df %>%
select(-contains("Source"))

# Now each column will be a broad histology
for (col_iter in 1:ncol(all_goi_df)) {

# The broad histology is the column name, but let's make it all lowercase
# and replace spaces with hyphens for use as part of the output file name
broad_histology <- str_to_lower(str_replace_all(
colnames(all_goi_df)[col_iter],
pattern = " ",
replacement = "-"
))

# Create the output file name
output_file <- file.path(data_dir, str_c(broad_histology, "_goi_list.tsv"))

# Write the current column to file, removing any NA values
all_goi_df[, col_iter] %>%
tidyr::drop_na() %>%
distinct() %>%
readr::write_tsv(output_file)

}
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,23 @@ option_list <- list(
default = NULL,
help = "optional name of `broad_histology` value to plot associated oncoprint"
),
optparse::make_option(
c("-n", "--top_n"),
type = "integer",
default = 25,
help = "`n` to display top n genes based on count of mutations, default is 25"
),
optparse::make_option(
c("-p", "--png_name"),
type = "character",
default = NULL,
help = "oncoprint output png file name"
),
optparse::make_option(
c("--include_introns"),
action = "store_true",
default = FALSE,
help = "logical statement on whether to include intronic variants in oncoprint plot"
)
)

Expand All @@ -115,22 +127,6 @@ cnv_df <- opt$cnv_file
fusion_df <- opt$fusion_file
goi_list <- opt$goi_list

#### Functions ----------------------------------------------------------------

read_genes <- function(gene_list) {
# This function takes in the file path to a gene list and pulls out
# the gene information from that list
#
# Args:
# gene_list: file path to genes of interest file
#
# Return:
# genes: a vector of genes from the genes of interest file

genes <- readr::read_tsv(gene_list) %>%
dplyr::pull("gene")
}

#### Read in data --------------------------------------------------------------

# Read in metadata
Expand All @@ -142,6 +138,11 @@ maf_df <- data.table::fread(opt$maf_file,
stringsAsFactors = FALSE,
data.table = FALSE)

if (!opt$include_introns) {
maf_df <- maf_df %>%
dplyr::filter(Variant_Classification != "Intron")
}

# Read in cnv file
if (!is.null(opt$cnv_file)) {
cnv_df <- readr::read_tsv(opt$cnv_file) %>%
Expand All @@ -155,16 +156,6 @@ if (!is.null(opt$fusion_file)) {
fusion_df <- readr::read_tsv(opt$fusion_file)
}

# Read in gene information from the list of genes of interest files
if (!is.null(opt$goi_list)) {
goi_files <- unlist(stringr::str_split(goi_list, ",| "))
# Read in using the `read_genes` custom function and unlist the gene column
# data from the genes of interest file paths given
goi_list <- lapply(goi_files, read_genes)
# Include only the unique genes of interest
goi_list <- unique(unlist(goi_list))
}

#### Set up oncoprint annotation objects --------------------------------------
# Read in histology standard color palette for project
histology_label_mapping <- readr::read_tsv(
Expand Down Expand Up @@ -254,6 +245,44 @@ maf_object <- prepare_maf_object(
fusion_df = fusion_df
)

#### Subset MAF Object (Optional)----------------------------------------------

# Code here is specifically adapted from:
# https://github.com/marislab/create-pptc-pdx-oncoprints/blob/master/R/create-complexheat-oncoprint-revision.R

# We only need to subset the GOI list if there are more GOI than the top n argument
# Subset `maf_object` for histology-specific goi list
if (!is.null(opt$goi_list)){

# Read in genes of interest information using the `read_tsv()` function
goi_list <- readr::read_tsv(opt$goi_list) %>%
as.matrix()

filtered_maf_object <- subsetMaf(
maf = maf_object,
tsb = metadata$Tumor_Sample_Barcode,
genes = goi_list,
mafObj = TRUE
)

# Get top mutated genes per this subset object
gene_sum <- mafSummary(filtered_maf_object)$gene.summary

# Sort to get top altered genes rather than mutated only genes
goi_list <- gene_sum %>%
dplyr::arrange(dplyr::desc(AlteredSamples)) %>%
# Filter to genes where multiple samples have an alteration
dplyr::filter(AlteredSamples > 1) %>%
dplyr::pull(Hugo_Symbol)

if (opt$top_n < length(goi_list)) {
# Now let's filter to the `top_n` genes
goi_list <- goi_list[1:opt$top_n]

}

}

#### Plot and Save Oncoprint --------------------------------------------------

# Given a maf object, plot an oncoprint of the variants in the
Expand All @@ -265,6 +294,7 @@ png(
units = "cm",
res = 300
)

oncoplot(
maf_object,
clinicalFeatures = "display_group",
Expand All @@ -279,6 +309,8 @@ oncoplot(
colors = oncoprint_col_palette,
annotationColor = annotation_colors,
bgCol = "#F5F5F5",
top = 25
top = opt$top_n
)

dev.off()

63 changes: 63 additions & 0 deletions analyses/oncoprint-landscape/data/embryonal-tumor_goi_list.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
Embryonal tumor
DICER1
TP53
MSH2
TERT
TFPT
PAARP8
CENPE
DDX11
MUTYH
CHEK2
CTNNB1
DYNC2H1
PTCH1
MAP4K4
SUFU
ROS1
KSR2
RASSF5
FOXO3
IGFN1
BCOR
TTYH1
MIR17HG
LIN28A
APC
CSNK2B
SMO
KMT2D
SMARCA4
PRDM6
KMT2C
KDM6A
CREBBP
ZMYM3
GSE1
ARID1A
MED12
GFIB
MYCN
OTX2
TCF4
ZIC1
GFI1
TBR1
BRCA2
ATM
PTEN
PIK3CA
PRKAR1A
BAI3
EPHA7
KBTBD4
CTDNEP1
DDX3X
SYNCRIP
IDH1
CDK6
SNCAIP
FOXR2
BEND2
MN1
SMARCB1
14 changes: 14 additions & 0 deletions analyses/oncoprint-landscape/data/ependymal-tumor_goi_list.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Ependymal tumor
H3F3A
CDKN2A
RELA
YAP1
C11orf95
MAMLD1
FAM118B
MAML2
NF2
CLDN1
PTEN
ARL4D
L1CAM
82 changes: 82 additions & 0 deletions analyses/oncoprint-landscape/data/hgat_goi_list.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
HGAT
H3F3A
TP53
ATRX
PPM1D
PIK3CA
NF1
PIK3R1
PDGFRA
ACVR1
PTEN
EGFR
ATM
FGFR1
CCND2
HIST1H3B
KIT
KDR
CDKN2A
MET
IGF1R
BRAF
IDH1
MYCN
CDK4
ID2
MYC
TOP3A
CDK6
ASXL1
KRAS
MDM2
TERT
MAFK
PDGFA
PIK3C2B
PLAGL2
GAB2
AURKB
NFIB
PIK3C2G
AKT1
CCND1
ID3
APOBEC3H
AKT2
GOLPH3
FGFR2
BCOR
CDKN1B
CDKN1C
RB1
SMARCE1
CDKN2C
CIC
DIS3L2P1
CDKN2B
SETD2
KDM6B
NTRK1
NTRK2
NTRK3
HIST1H3C
HIST2H3C
DDX11
TSC2
DDR2
TOP2A
FOSB
VEGFA
NRAS
MTOR
PTPN11
FANCA
SLCO1B3
PDGFC
YES1
FYN
POLE
HIST1H2BE
KMT2C
TDRD9
28 changes: 28 additions & 0 deletions analyses/oncoprint-landscape/data/lgat_goi_list.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
LGAT
KIAA1549
BRAF
FGFR1
TACC1
MYB
MYBL
NTRK1
NTRK2
NTRK3
IDH
H3F3A
RAF1
PTPN11
TSC1
TSC2
PIK3CA
PRKCA
FGFR2
FGFR3
MAP2K1
ALK
ROS1
QKI
KRAS
TP53
ATRX
CDKN2A
Loading