Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Update oncoprints to use histology-specific goi lists #1046

Merged
merged 22 commits into from
May 13, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5d0bcf0
Incorporate histology specific goi lists
cbethell Apr 29, 2021
a5e5af1
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell Apr 29, 2021
da63721
Update analyses/oncoprint-landscape/util/prepare-goi-lists.R
cbethell May 3, 2021
7e28741
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell May 3, 2021
bdc3319
Merge branch 'master' of https://github.com/AlexsLemonade/OpenPBTA-an…
cbethell May 4, 2021
9cbd240
add `top_n` argument for goi plots and re-run
cbethell May 4, 2021
d4c2e51
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell May 4, 2021
c9a1f8a
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell May 6, 2021
c51132c
Oncoprint GOI list revisions (#1053)
jaclyn-taroni May 6, 2021
b39e417
remove `tolower()` and `gsub()`
cbethell May 6, 2021
e64290f
re-run module shell script with v19 data
cbethell May 6, 2021
567effe
Update top_n logic (#1054)
jaclyn-taroni May 7, 2021
f735ceb
re-run with #1054 updates
cbethell May 7, 2021
04874a4
add coding for `intron`, `5'flank`, and `3'flank`
cbethell May 10, 2021
52ac162
add flag to handle the inclusion of introns
cbethell May 10, 2021
ede7838
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
cbethell May 11, 2021
ea6e207
fix merge conflicts and re-run
cbethell May 11, 2021
44cf639
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
jaclyn-taroni May 13, 2021
f889ad5
Try deleting all PNGs currently in the plots directory
jaclyn-taroni May 13, 2021
3ad02de
Rerun locally and add plots
jaclyn-taroni May 13, 2021
d71f76b
adjust logic around goi subsetting and re-run
cbethell May 13, 2021
34c3b16
Merge branch 'master' into cbethell/prep-for-histology-goi-lists
jaclyn-taroni May 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 43 additions & 23 deletions analyses/oncoprint-landscape/01-plot-oncoprint.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ option_list <- list(
default = NULL,
help = "optional name of `broad_histology` value to plot associated oncoprint"
),
optparse::make_option(
c("-n", "--top_n"),
type = "integer",
default = NULL,
help = "optional `n` to display top n genes based on count of mutations"
),
optparse::make_option(
c("-p", "--png_name"),
type = "character",
Expand All @@ -115,22 +121,6 @@ cnv_df <- opt$cnv_file
fusion_df <- opt$fusion_file
goi_list <- opt$goi_list

#### Functions ----------------------------------------------------------------

read_genes <- function(gene_list) {
# This function takes in the file path to a gene list and pulls out
# the gene information from that list
#
# Args:
# gene_list: file path to genes of interest file
#
# Return:
# genes: a vector of genes from the genes of interest file

genes <- readr::read_tsv(gene_list) %>%
dplyr::pull("gene")
}

#### Read in data --------------------------------------------------------------

# Read in metadata
Expand All @@ -154,12 +144,10 @@ if (!is.null(opt$fusion_file)) {

# Read in gene information from the list of genes of interest files
if (!is.null(opt$goi_list)) {
goi_files <- unlist(stringr::str_split(goi_list, ",| "))
# Read in using the `read_genes` custom function and unlist the gene column
# data from the genes of interest file paths given
goi_list <- lapply(goi_files, read_genes)
# Include only the unique genes of interest
goi_list <- unique(unlist(goi_list))
# Read in using the `read_tsv()` function
goi_list <- readr::read_tsv(tolower(gsub(" ", "-",opt$goi_list))) %>%
as.matrix()

}

#### Set up oncoprint annotation objects --------------------------------------
Expand Down Expand Up @@ -251,6 +239,37 @@ maf_object <- prepare_maf_object(
fusion_df = fusion_df
)

#### Subset MAF Object (Optional)----------------------------------------------

# Code here is specifically adapted from:
# https://github.com/marislab/create-pptc-pdx-oncoprints/blob/master/R/create-complexheat-oncoprint-revision.R

# Subset `maf_object` for histology-specific goi list
if (!is.null(opt$goi_list)) {
maf_object = subsetMaf(
maf = maf_object,
tsb = metadata$Tumor_Sample_Barcode,
genes = goi_list,
mafObj = TRUE
)

# Get top mutated genes per this subset object
gene_sum <- mafSummary(maf_object)$gene.summary

# Sort to get top altered genes rather than mutated only genes
goi_ordered <-
gene_sum[order(gene_sum$AlteredSamples, decreasing = T),]

if (!is.null(opt$top_n)) {

# Select top `n` genes if the argument is provided
top_n <- ifelse(nrow(gene_sum) < opt$top_n, nrow(gene_sum), opt$top_n)

goi_list <- goi_ordered[1:top_n,]

}

}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The actual code changes I'm suggesting are untested, but I believe you will only have to take these steps if someone specifies a GOI list and a top n, so you can simplify this a bit:

Suggested change
# Subset `maf_object` for histology-specific goi list
if (!is.null(opt$goi_list)) {
maf_object = subsetMaf(
maf = maf_object,
tsb = metadata$Tumor_Sample_Barcode,
genes = goi_list,
mafObj = TRUE
)
# Get top mutated genes per this subset object
gene_sum <- mafSummary(maf_object)$gene.summary
# Sort to get top altered genes rather than mutated only genes
goi_ordered <-
gene_sum[order(gene_sum$AlteredSamples, decreasing = T),]
if (!is.null(opt$top_n)) {
# Select top `n` genes if the argument is provided
top_n <- ifelse(nrow(gene_sum) < opt$top_n, nrow(gene_sum), opt$top_n)
goi_list <- goi_ordered[1:top_n,]
}
}
# Subset `maf_object` for histology-specific goi list
if (!is.null(opt$goi_list) & !is.null(opt$top_n)) {
maf_object <- subsetMaf(
maf = maf_object,
tsb = metadata$Tumor_Sample_Barcode,
genes = goi_list,
mafObj = TRUE
)
# Get top mutated genes per this subset object
gene_sum <- mafSummary(maf_object)$gene.summary
# Sort to get top altered genes rather than mutated only genes
goi_ordered <-
gene_sum[order(gene_sum$AlteredSamples, decreasing = T),]
# Select top `n` genes if the argument is provided
top_n <- ifelse(nrow(gene_sum) < opt$top_n, nrow(gene_sum), opt$top_n)
goi_list <- goi_ordered[1:top_n,]
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Upon testing this method does not appear to obey the top_n argument.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you expand on that a bit?

Copy link
Contributor Author

@cbethell cbethell May 4, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not limit the amount of genes being displayed to top_n, it instead shows all of the genes listed in the goi list (the behavior it previously exhibited but we did not want).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the two if way worked?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That suggests that the logic isn't quite right because it's using the original list (goi_list) - the conditions for the if() are not being met. Since you don't want to subset the MAF and do the mafSummary() steps unless you have to and you only have to if you have a top_n argument I would see if you can get to the bottom of it.

#### Plot and Save Oncoprint --------------------------------------------------

# Given a maf object, plot an oncoprint of the variants in the
Expand All @@ -265,7 +284,7 @@ png(
oncoplot(
maf_object,
clinicalFeatures = "display_group",
genes = goi_list,
genes = goi_list$Hugo_Symbol,
logColBar = TRUE,
sortByAnnotation = TRUE,
showTumorSampleBarcodes = TRUE,
Expand All @@ -279,3 +298,4 @@ oncoplot(
top = 25
)
dev.off()

Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
HGAT
H3F3A
TP53
ATRX
PPM1D
PIK3CA
NF1
PIK3R1
PDGFRA
ACVR1
PTEN
EGFR
ATM
FGFR1
CCND2
HIST1H3B
KIT
KDR
CDKN2A
MET
IGF1R
BRAF
IDH1
MYCN
CDK4
ID2
MYC
TOP3A
CDK6
ASXL1
KRAS
MDM2
TERT
MAFK
PDGFA
PIK3C2B
PLAGL2
GAB2
AURKB
NFIB
PIK3C2G
AKT1
CCND1
ID3
APOBEC3H
AKT2
GOLPH3
FGFR2
BCOR
CDKN1B
CDKN1C
RB1
SMARCE1
CDKN2C
CIC
DIS3L2P1
CDKN2B
SETD2
KDM6B
NTRK1
NTRK2
NTRK3
HIST1H3C
HIST2H3C
DDX11
TSC2
DDR2
TOP2A
FOSB
VEGFA
NRAS
MTOR
PTPN11
FANCA
SLCO1B3
PDGFC
YES1
FYN
POLE
HIST1H2BE
KMT2C
TDRD9
63 changes: 63 additions & 0 deletions analyses/oncoprint-landscape/data/embryonal-tumor_goi_list.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
Embryonal tumor
DICER1
TP53
MSH2
TERT
TFPT
PAARP8
CENPE
DDX11
MUTYH
CHEK2
CTNNB1
DYNC2H1
PTCH1
MAP4K4
SUFU
ROS1
KSR2
RASSF5
FOXO3
IGFN1
BCOR
TTYH1
MIR17HG
LIN28A
APC
CSNK2B
SMO
KMT2D
SMARCA4
PRDM6
KMT2C
KDM6A
CREBBP
ZMYM3
GSE1
ARID1A
MED12
GFIB
MYCN
OTX2
TCF4
ZIC1
GFI1
TBR1
BRCA2
ATM
PTEN
PIK3CA
PRKAR1A
BAI3
EPHA7
KBTBD4
CTDNEP1
DDX3X
SYNCRIP
IDH1
CDK6
SNCAIP
FOXR2
BEND2
MN1
SMARCB1
14 changes: 14 additions & 0 deletions analyses/oncoprint-landscape/data/ependymal-tumor_goi_list.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Ependymal tumor
H3F3A
CDKN2A
RELA
YAP1
C11orf95
MAMLD1
FAM118B
MAML2
NF2
CLDN1
PTEN
ARL4D
L1CAM
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
LGAT
KIAA1549
BRAF
FGFR1
TACC1
MYB
MYBL
NTRK1
NTRK2
NTRK3
IDH
H3F3A
RAF1
PTPN11
TSC1
TSC2
PIK3CA
PRKCA
FGFR2
FGFR3
MAP2K1
ALK
ROS1
QKI
KRAS
TP53
ATRX
CDKN2A
82 changes: 82 additions & 0 deletions analyses/oncoprint-landscape/data/oncoprint-goi-lists-OpenPBTA.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
LGAT,Source,Embryonal tumor,Source,HGAT,Source,Ependymal tumor,Source,Other,Source
KIAA1549,RYALL 2020,DICER1,LAMBO 2020,H3F3A,MACKAY 2017,H3F3A,RYALL 2017,CTNNB1,SEKINE 2002
BRAF,RYALL 2020,TP53,LAMBO 2020,TP53,MACKAY 2017,CDKN2A,PAJTLER 2015,BRAF,QADDOUMI 2016
FGFR1,RYALL 2020,MSH2,LAMBO 2020,ATRX,MACKAY 2017,RELA,PAJTLER 2015,FGFR1,QADDOUMI 2016
TACC1,RYALL 2020,TERT,LAMBO 2020,PPM1D,MACKAY 2017,YAP1,PAJTLER 2015,MYB,QADDOUMI 2016
MYB,RYALL 2020,TFPT,LAMBO 2020,PIK3CA,MACKAY 2017,C11orf95,PAJTLER 2015,KRAS,QADDOUMI 2016
MYBL,RYALL 2020,PAARP8,LAMBO 2020,NF1,MACKAY 2017,MAMLD1,PAJTLER 2015,IDH1,QADDOUMI 2016
NTRK1,RYALL 2020,CENPE,LAMBO 2020,PIK3R1,MACKAY 2017,FAM118B,PAJTLER 2015,IDH2,QADDOUMI 2016
NTRK2,RYALL 2020,DDX11,LAMBO 2020,PDGFRA,MACKAY 2017,MAML2,PAJTLER 2015,H3F3A,QADDOUMI 2016
NTRK3,RYALL 2020,MUTYH,LAMBO 2020,ACVR1,MACKAY 2017,NF2,PAJTLER 2015,FGFR3,QADDOUMI 2016
IDH,RYALL 2020,CHEK2,LAMBO 2020,PTEN,MACKAY 2017,CLDN1,PAJTLER 2015,PDGFRA,QADDOUMI 2016
H3F3A,RYALL 2020,CTNNB1,LAMBO 2020,EGFR,MACKAY 2017,PTEN,PAJTLER 2015,NTRK2,QADDOUMI 2016
RAF1,RYALL 2020,DYNC2H1,LAMBO 2020,ATM,MACKAY 2017,ARL4D,PAJTLER 2015,MYBL1,QADDOUMI 2016
PTPN11,RYALL 2020,PTCH1,LAMBO 2020,FGFR1,MACKAY 2017,L1CAM,PARKER 2014,TACC1,QADDOUMI 2016
TSC1,RYALL 2020,MAP4K4,LAMBO 2020,CCND2,MACKAY 2017,,,MAML2,QADDOUMI 2016
TSC2,RYALL 2020,SUFU,LAMBO 2020,HIST1H3B,MACKAY 2017,,,QKI,QADDOUMI 2016
PIK3CA,RYALL 2020,ROS1,LAMBO 2020,KIT,MACKAY 2017,,,PATZ1,QADDOUMI 2016
PRKCA,RYALL 2020,KSR2,LAMBO 2020,KDR,MACKAY 2017,,,EGFR,QADDOUMI 2016
FGFR2,RYALL 2020,RASSF5,LAMBO 2020,CDKN2A,MACKAY 2017,,,EWSR1,QADDOUMI 2016
FGFR3,RYALL 2020,FOXO3,LAMBO 2020,MET,MACKAY 2017,,,NF2,BI 2017
MAP2K1,RYALL 2020,IGFN1,LAMBO 2020,IGF1R,MACKAY 2017,,,SMARCB1,BI 2017
ALK,RYALL 2020,BCOR,LAMBO 2020,BRAF,MACKAY 2017,,,AKT1,BI 2017
ROS1,RYALL 2020,TTYH1,LAMBO 2020,IDH1,MACKAY 2017,,,AKT3,BI 2017
QKI,RYALL 2020,MIR17HG,LAMBO 2020,MYCN,MACKAY 2017,,,PIK3CA,BI 2017
KRAS,RYALL 2020,LIN28A,LAMBO 2020,CDK4,MACKAY 2017,,,SMP,BI 2017
TP53,RYALL 2020,CTNNB1,NORTHCOTT 2017,ID2,MACKAY 2017,,,SUFU,BI 2017
ATRX,RYALL 2020,APC,NORTHCOTT 2017,MYC,MACKAY 2017,,,POLRA2,BI 2017
CDKN2A,RYALL 2020,CSNK2B,NORTHCOTT 2017,TOP3A,MACKAY 2017,,,BAP1,BI 2017
,,PTCH1,NORTHCOTT 2017,CDK6,MACKAY 2017,,,TERT,BI 2017
,,SUFU,NORTHCOTT 2017,ASXL1,MACKAY 2017,,,KLF4,YOUNGBLOOD 2019
,,SMO,NORTHCOTT 2017,KRAS,MACKAY 2017,,,TP53,YOUNGBLOOD 2019
,,KMT2D,NORTHCOTT 2017,MDM2,MACKAY 2017,,,LYST,YOUNGBLOOD 2019
,,SMARCA4,NORTHCOTT 2017,TERT,MACKAY 2017,,,CDKN2A,YOUNGBLOOD 2019
,,PRDM6,NORTHCOTT 2017,MAFK,MACKAY 2017,,,PIK3CA,YOUNGBLOOD 2019
,,KMT2C,NORTHCOTT 2017,PDGFA,MACKAY 2017,,,PTEN,YOUNGBLOOD 2019
,,KDM6A,NORTHCOTT 2017,PIK3C2B,MACKAY 2017,,,PIK3R1,YOUNGBLOOD 2019
,,CREBBP,NORTHCOTT 2017,PLAGL2,MACKAY 2017,,,PBRM1,YOUNGBLOOD 2019
,,ZMYM3,NORTHCOTT 2017,GAB2,MACKAY 2017,,,SETD2,YOUNGBLOOD 2019
,,GSE1,NORTHCOTT 2017,AURKB,MACKAY 2017,,,ARID1A,YOUNGBLOOD 2019
,,BCOR,NORTHCOTT 2017,NFIB,MACKAY 2017,,,TERT,YOUNGBLOOD 2019
,,ARID1A,NORTHCOTT 2017,PIK3C2G,MACKAY 2017,,,SMAD4,YOUNGBLOOD 2019
,,MED12,NORTHCOTT 2017,AKT1,MACKAY 2017,,,SDHA,YOUNGBLOOD 2019
,,ARID1A,NORTHCOTT 2017,CCND1,MACKAY 2017,,,DDB2,YOUNGBLOOD 2019
,,GFIB,NORTHCOTT 2017,ID3,MACKAY 2017,,,PRPRD,YOUNGBLOOD 2019
,,MYCN,NORTHCOTT 2017,APOBEC3H,MACKAY 2017,,,CDC27,YOUNGBLOOD 2019
,,MYCN,NORTHCOTT 2017,AKT2,MACKAY 2017,,,ARHGAP26,YOUNGBLOOD 2019
,,OTX2,NORTHCOTT 2017,GOLPH3,MACKAY 2017,,,ERBB4,YOUNGBLOOD 2019
,,TCF4,NORTHCOTT 2017,FGFR2,MACKAY 2017,,,BLM,YOUNGBLOOD 2019
,,ZIC1,NORTHCOTT 2017,BCOR,MACKAY 2017,,,USP9X,YOUNGBLOOD 2019
,,GFI1,NORTHCOTT 2017,CDKN1B,MACKAY 2017,,,TERT,THOMAS 2020
,,TBR1,NORTHCOTT 2017,CDKN1C,MACKAY 2017,,,PRKCA,THOMAS 2020
,,TERT,NORTHCOTT 2017,RB1,MACKAY 2017,,,TP53,THOMAS 2020
,,TP53,NORTHCOTT 2017,SMARCE1,MACKAY 2017,,,CCDC47,THOMAS 2020
,,BRCA2,NORTHCOTT 2017,CDKN2C,MACKAY 2017,,,BRAF,KROOKS 2018
,,ATM,NORTHCOTT 2017,CIC,MACKAY 2017,,,,
,,PTEN,NORTHCOTT 2017,DIS3L2P1,MACKAY 2017,,,,
,,PIK3CA,NORTHCOTT 2017,CDKN2B,MACKAY 2017,,,,
,,PRKAR1A,NORTHCOTT 2017,SETD2,MACKAY 2017,,,,
,,BAI3,NORTHCOTT 2017,KDM6B,MACKAY 2017,,,,
,,EPHA7,NORTHCOTT 2017,NTRK1,MACKAY 2017,,,,
,,KBTBD4,NORTHCOTT 2017,NTRK2,MACKAY 2017,,,,
,,CTDNEP1,NORTHCOTT 2017,NTRK3,MACKAY 2017,,,,
,,DDX3X,NORTHCOTT 2017,HIST1H3C,MACKAY 2017,,,,
,,SYNCRIP,NORTHCOTT 2017,HIST2H3C,MACKAY 2017,,,,
,,IDH1,NORTHCOTT 2017,DDX11,MUELLER 2019,,,,
,,CDK6,NORTHCOTT 2017,TSC2,MUELLER 2019,,,,
,,SNCAIP,NORTHCOTT 2017,DDR2,MUELLER 2019,,,,
,,FOXR2,LASTOWSKA 2020,TOP2A,MUELLER 2019,,,,
,,BCOR,LASTOWSKA 2020,FOSB,MUELLER 2019,,,,
,,BEND2,LASTOWSKA 2020,VEGFA,MUELLER 2019,,,,
,,MN1,LASTOWSKA 2020,NRAS,PNOC003 MANUSCRIPT 2021,,,,
,,SMARCB1,,MTOR,PNOC003 MANUSCRIPT 2021,,,,
,,SMARCA4,,PTPN11,PNOC003 MANUSCRIPT 2021,,,,
,,,,FANCA,PNOC003 MANUSCRIPT 2021,,,,
,,,,SLCO1B3,PNOC003 MANUSCRIPT 2021,,,,
,,,,PDGFC,PNOC003 MANUSCRIPT 2021,,,,
,,,,YES1,PNOC003 MANUSCRIPT 2021,,,,
,,,,FYN,PNOC003 MANUSCRIPT 2021,,,,
,,,,POLE,PNOC003 MANUSCRIPT 2021,,,,
,,,,HIST1H2BE,PNOC003 MANUSCRIPT 2021,,,,
,,,,KMT2C,PNOC003 MANUSCRIPT 2021,,,,
,,,,TDRD9,PNOC003 MANUSCRIPT 2021,,,,
Loading