Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Using mapping histology groups for plotting implementation (PR 5 of 5) #927

Merged
Merged
39 changes: 0 additions & 39 deletions figures/palettes/histology_color_palette.tsv

This file was deleted.

Binary file modified figures/pngs/SuppTelomerase_Activities.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figures/pngs/Telomerase_Activities.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
103 changes: 63 additions & 40 deletions figures/scripts/TelomeraseActivitites.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
library('ggpubr')
stringsAsFactors=FALSE
# Script for creating the Telomerase activities figure

library(stringr)
library(gridBase)
library(gridGraphics)
library(optparse)
library(forcats) ### for fct_reorder()
library(forcats) # for fct_reorder()
library(ggpubr) # For pvalue_stat_manual

################################ File paths and reading data for making figure ###################################################################

Expand All @@ -29,28 +30,41 @@ Histologies = file.path(root_dir, "data", "pbta-histologies.tsv") ### Variable


palette_dir <- file.path(root_dir, "figures", "palettes")
histology_palette <- readr::read_tsv(file.path(palette_dir,"histology_color_palette.tsv")) ### reading in histologies color from the define file
colnames(histology_palette)[1]='short_histology'
histology_palette = data.frame(histology_palette)
hex_codes = histology_palette$hex_codes
names(hex_codes)= histology_palette$short_histology

# Import standard color palettes for project
histology_label_mapping <- readr::read_tsv(
file.path(palette_dir, "histology_label_color_table.tsv")) %>%
# Select just the columns we will need for plotting
dplyr::select(Kids_First_Biospecimen_ID, display_group, display_order, hex_codes) %>%
# Reorder display_group based on display_order
dplyr::mutate(display_group = forcats::fct_reorder(display_group, display_order))


# Declare output directory
output_dir <- file.path(root_dir, "figures", "pngs")
telomerase_png <- file.path(output_dir, "Telomerase_Activities.png")
supplementary_telomerase_png <- file.path(output_dir, "SuppTelomerase_Activities.png")

# Read in the histologies file and join on the histology color mappings and labels
PBTA_Histology <- readr::read_tsv(Histologies) %>%
dplyr::inner_join(histology_label_mapping, by = "Kids_First_Biospecimen_ID") %>%
dplyr::rename('SampleID' = "Kids_First_Biospecimen_ID") ## Renaming "Kids_First_Biospecimen_ID" as SampleID for comparison purpose


# Get a distinct version of the color keys
histologies_color_key_df <- PBTA_Histology %>%
dplyr::select(display_group, hex_codes) %>%
dplyr::distinct()

# Make color key specific to these samples
annotation_colors <- histologies_color_key_df$hex_codes
names(annotation_colors) <- histologies_color_key_df$display_group

PTBA_Histology = read.table(Histologies,sep='\t',head=T) ## Reading the clinical data
colnames(PTBA_Histology)[colnames(PTBA_Histology)=="Kids_First_Biospecimen_ID"]='SampleID' ## Renaming "Kids_First_Biospecimen_ID" as SampleID for comparison purpose

TMScores1 = read.table(Telomerase_StdFpkm,sep='\t',head=T) ## Reading Stranded FPKM telomerase scores
colnames(TMScores1)[colnames(TMScores1)=="NormEXTENDScores"]='NormEXTENDScores_Stranded_FPKM'

PTBA_GE_Standard_Histology = merge(PTBA_Histology,TMScores1,by='SampleID') ### Merging Clinical data with the Telomerase scores
PTBA_GE_Standard_Histology = merge(PBTA_Histology,TMScores1,by='SampleID') ### Merging Clinical data with the Telomerase scores


TMScores2 = read.table(Telomerase_PolyaFpkm,sep='\t',head=T)
Expand All @@ -68,16 +82,7 @@ PBTA_Stranded_TMScores = merge(TMScores1,TMScores3,by='SampleID')

########################################## Figure A and B dataframe compilation #########################################################

Stranded_Histology = PTBA_GE_Standard_Histology
Stranded_Histology = Stranded_Histology[-which(Stranded_Histology$short_histology == "Other"),] ### Removing the tumors with catagory labelled as "Others"
Frequency = data.frame(table(Stranded_Histology$short_histology)) ### Counting the number of cases for all histologies to avoid less number further
colnames(Frequency)=c('Variables','Freq')
Frequency = Frequency[which(Frequency$Freq == 1),]
Stranded_Histology = Stranded_Histology[-which(Stranded_Histology$short_histology %in% Frequency$Variables),] ### Removing the tumors with only one case in histologies

Stranded_Histology$short_histology[Stranded_Histology$short_histology == "CNS ganglioneuroblastoma"]= "CNS neuroblastoma" #### Short hist color data has missing CNS ganglioneuroblastoma therefore renaming it
Stranded_Histology$short_histology[Stranded_Histology$short_histology == "CNS Embryonal tumor"] = "Embryonal Tumor" #### Short hist color data has missing CNS Embryonal tumor therefore renaming it

Stranded_Histology <- PTBA_GE_Standard_Histology

########################################## Figure C data compilation #########################################################

Expand All @@ -94,39 +99,51 @@ combinations = nrow(stat.test)
statistics = stat.test%>%
mutate(y.position = seq(1,by=0.04,length.out=combinations))


######################################### Saving Figure in PNG format


## Figure for main text: Boxplots
png(telomerase_png,width = 5, height = 6, units = "in", res = 1200)

theme_set(theme_classic() +
theme(plot.title = element_text(size=10, face="bold"),axis.text.x=element_text(angle=40,size=6,vjust=1,hjust=1),axis.text.y=element_text(size=7), axis.title.x = element_text(size=0), axis.title.y = element_text(size=8),
legend.position = "none",
legend.key.size= unit(0.3,"cm"),
legend.key.width = unit(0.3,"cm"),
legend.title = element_text(size=7),
legend.text =element_text(size=6)
theme(plot.title = element_text(size=10, face="bold"),
axis.text.x=element_text(angle=40,size=6,vjust=1,hjust=1),
axis.text.y=element_text(size=7),
axis.title.x = element_text(size=0),
axis.title.y = element_text(size=8),
legend.position = "none",
legend.key.size= unit(0.3,"cm"),
legend.key.width = unit(0.3,"cm"),
legend.title = element_text(size=7),
legend.text =element_text(size=6)
)
)

P1 <- ggplot(Stranded_Histology, aes(x=fct_reorder(display_group, NormEXTENDScores_Stranded_FPKM,.desc =TRUE) %>%
forcats::fct_relevel("Benign", "Other tumor", "Normal", after = Inf),
y=NormEXTENDScores_Stranded_FPKM)) +
geom_boxplot(size=0.2,notch=FALSE,outlier.size = 0,outlier.shape=NA,
aes(color=display_group,fill=display_group),alpha=0.4)+
geom_jitter(shape=16, cex=0.1,aes(color=display_group))+
scale_fill_manual(values = annotation_colors, aesthetics = c("colour", "fill"))


P1 = ggplot(Stranded_Histology, aes(x=fct_reorder(short_histology,NormEXTENDScores_Stranded_FPKM,.desc =TRUE),y=NormEXTENDScores_Stranded_FPKM))+geom_boxplot(size=0.2,notch=FALSE,outlier.size = 0,outlier.shape=NA,aes(color=short_histology,fill=short_histology),alpha=0.4)+ geom_jitter(shape=16, cex=0.1,aes(color=short_histology))
P1 = P1+scale_fill_manual(values = hex_codes)
P1 = P1+scale_color_manual(values = hex_codes)



P2 = ggplot(Stranded_Histology, aes(x=fct_reorder(broad_histology,NormEXTENDScores_Stranded_FPKM,.desc =TRUE),y=NormEXTENDScores_Stranded_FPKM))+geom_boxplot(size=0.2,notch=FALSE,outlier.size = 0,outlier.shape=NA,color="black",fill="#808080",alpha=0.4)+ geom_jitter(shape=16, cex=0.1,color="black")
P2 <- ggplot(Stranded_Histology,
aes(x=fct_reorder(broad_histology,NormEXTENDScores_Stranded_FPKM,.desc =TRUE),
y=NormEXTENDScores_Stranded_FPKM)) +
geom_boxplot(size=0.2,notch=FALSE,outlier.size = 0, outlier.shape=NA,color="black",fill="#808080",alpha=0.4) +
geom_jitter(shape=16, cex=0.1,color="black")


P3 = ggplot(Medulloblastoma_His, aes(x=fct_reorder(molecular_subtype,NormEXTENDScores_Stranded_FPKM,.desc =TRUE),y=NormEXTENDScores_Stranded_FPKM))+geom_boxplot(size=0.2,notch=FALSE,outlier.size = 0,outlier.shape=NA,color="black",fill="#808080",alpha=0.4)+ geom_jitter(shape=16, width = 0.1,size=0.2,color="black")+stat_pvalue_manual(
P3 <- ggplot(Medulloblastoma_His, aes(x=fct_reorder(molecular_subtype,NormEXTENDScores_Stranded_FPKM,.desc =TRUE),
y=NormEXTENDScores_Stranded_FPKM)) +
geom_boxplot(size=0.2,notch=FALSE,outlier.size = 0,outlier.shape=NA,color="black",fill="#808080",alpha=0.4) +
geom_jitter(shape=16, width = 0.1,size=0.2,color="black") +
stat_pvalue_manual(
data = statistics, label = "p.adj",size=1.7,
xmin = "group1", xmax = "group2",tip.length = 0.003,
y.position = "y.position"
)
)

grid.newpage()
# Create layout : nrow = 2, ncol =2
Expand All @@ -138,9 +155,15 @@ define_region <- function(row, col){



print(ggpar(P1,font.xtickslab =c(5,"black"),font.ytickslab =6,font.x = 6,font.y=6,ylab="EXTEND Scores",xlab = "Tumor Short Histology",title="A",font.title=7),vp = define_region(row = 1:3, col = 1:3))
print(ggpar(P2,font.xtickslab =c(5,"black"),font.ytickslab =6,font.x = 6,font.y=6,ylab="EXTEND Scores",xlab = "Tumor Broad Histology",title="B",font.title=7),vp = define_region(row = 4:6, col = 1:2))
print(ggpar(P3,font.xtickslab =c(5,"black"),font.ytickslab =6,font.x = 6,font.y=6,font.legend=6,xlab="Medulloblastoma Subgroups",ylab="EXTEND Scores",title="C",font.title=7),vp = define_region(row = 4:5, col = 3))
print(ggpar(P1,font.xtickslab =c(5,"black"),
font.ytickslab =6,font.x = 6,font.y=6,ylab="EXTEND Scores",
xlab = "Tumor Display Group",title="A",font.title=7),vp = define_region(row = 1:3, col = 1:3))
print(ggpar(P2,font.xtickslab =c(5,"black"),
font.ytickslab =6,font.x = 6,font.y=6,ylab="EXTEND Scores",
xlab = "Tumor Broad Histology",title="B",font.title=7),vp = define_region(row = 4:6, col = 1:2))
print(ggpar(P3,font.xtickslab =c(5,"black"),
font.ytickslab =6,font.x = 6,font.y=6,font.legend=6,
xlab="Medulloblastoma Subgroups",ylab="EXTEND Scores",title="C",font.title=7),vp = define_region(row = 4:5, col = 3))

dev.off()

Expand Down