Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Recode Oncoprints #1009

Merged
merged 19 commits into from
May 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions analyses/oncoprint-landscape/00-map-to-sample_id.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
# --fusion_file ../../scratch/arriba.tsv \
# --metadata_file ../../data/pbta-histologies.tsv \
# --output_directory ../../scratch/oncoprint_files \
# --filename_lead "all_participants_primary_only" \
# --filename_lead "primary_only" \
# --independent_specimens ../../data/independent-specimens.wgswxs.primary.tsv

library(dplyr)
library(stringr)

#### Command line options ------------------------------------------------------

Expand Down Expand Up @@ -185,8 +186,46 @@ readr::write_tsv(maf_df, maf_output)

message("Preparing fusion file...")

# We'll handle fusions where reciprocal fusions exist (e.g.,
# reciprocal_exists == TRUE) separately from other fusions
fusion_reciprocal_df <- fusion_df %>%
# Reciprocal fusions only
filter(reciprocal_exists) %>%
# BSID + Gene1--Gene2
select(Sample, FusionName) %>%
# Because we're only looking at presence or absence here, we can filter to
# distinct identifier-fusion pairs
distinct() %>%
group_by(Sample, FusionName) %>%
# Put genes in the fusions in alphabetical order to collapse
mutate(SortedFusionName = str_c(
sort(str_split(FusionName,
"--",
simplify = TRUE),
),
collapse = "--")
) %>%
ungroup() %>%
select(Sample,
FusionName = SortedFusionName) %>%
# When fusions are in alphabetical order, this ensures each reciprocal
# fusion is only counted once
distinct()

# No need to put fusions where no reciprocal exists in alphabetical order,
# so we handle these separately
fusion_no_reciprocal_df <- fusion_df %>%
filter(!reciprocal_exists) %>%
select(Sample, FusionName) %>%
# But we can remove duplicates to avoid them being counted as multihit
distinct()

# Bind reciprocal and no reciprocal together
fusion_filtered_df <- bind_rows(fusion_reciprocal_df, fusion_no_reciprocal_df)
rm(fusion_reciprocal_df, fusion_no_reciprocal_df)

# Separate fusion gene partners
fus_sep <- fusion_df %>%
fus_sep <- fusion_filtered_df %>%
# Separate the 5' and 3' genes
tidyr::separate(FusionName, c("Gene1", "Gene2"), sep = "--") %>%
# Use row numbers to mark unique fusions - this will help us when
Expand Down
5 changes: 4 additions & 1 deletion analyses/oncoprint-landscape/01-plot-oncoprint.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,10 @@ maf_df <- data.table::fread(opt$maf_file,

# Read in cnv file
if (!is.null(opt$cnv_file)) {
cnv_df <- readr::read_tsv(opt$cnv_file)
cnv_df <- readr::read_tsv(opt$cnv_file) %>%
dplyr::mutate(Variant_Classification = dplyr::case_when(Variant_Classification == "loss" ~ "Del",
Variant_Classification %in% c("gain", "amplification") ~ "Amp",
TRUE ~ as.character(Variant_Classification)))
}

# Read in fusion file and join
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions analyses/oncoprint-landscape/run-oncoprint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ maf_consensus=../../data/pbta-snv-consensus-mutation.maf.tsv.gz
fusion_file=../../data/pbta-fusion-putative-oncogenic.tsv
histologies_file=../../data/pbta-histologies.tsv
intermediate_directory=../../scratch/oncoprint_files
primary_filename="all_participants_primary_only"
primaryplus_filename="all_participants_primary-plus"
primary_filename="primary_only"
primaryplus_filename="primary-plus"
focal_directory=../focal-cn-file-preparation/results
focal_cnv_file=${focal_directory}/consensus_seg_most_focal_cn_status.tsv.gz

Expand Down
5 changes: 2 additions & 3 deletions analyses/oncoprint-landscape/util/oncoplot-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,8 @@ prepare_maf_object <- function(maf_df,
"Multi_Hit_Fusion",
"Hom_Deletion",
"Hem_Deletion",
"amplification",
"gain",
"loss"
"Amp",
"Del"
)
)

Expand Down
5 changes: 2 additions & 3 deletions figures/palettes/oncoprint_color_palette.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ Fusion #7B68EE
Multi_Hit #CCCCCC
Hom_Deletion #313695
Hem_Deletion #abd9e9
amplification #c51b7d
loss #0072B2
gain #D55E00
Del #0072B2
Amp #D55E00
High_Level_Gain #FF0000
Multi_Hit_Fusion #CD96CD