Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Update Figure 4 single panel PDFs, add compiled figure #1194

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
0fe0890
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Jan 11, 2021
7c0997c
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Jan 19, 2021
8727fac
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Mar 31, 2021
e7f1d0a
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Apr 12, 2021
a199d3c
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Apr 13, 2021
8431b1e
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Apr 19, 2021
c29d225
Merge branch 'AlexsLemonade:master' into master
jaclyn-taroni May 7, 2021
5292b8e
Merge branch 'AlexsLemonade:master' into master
jaclyn-taroni May 10, 2021
6175d61
Merge branch 'AlexsLemonade:master' into master
jaclyn-taroni May 10, 2021
43830e8
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni May 11, 2021
ed4bb16
Merge branch 'AlexsLemonade:master' into master
jaclyn-taroni May 13, 2021
0a5093a
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Jul 19, 2021
d9e5645
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Aug 9, 2021
1ac140e
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Aug 30, 2021
1912ce2
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Sep 9, 2021
65e2778
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Sep 14, 2021
728893c
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Oct 21, 2021
e030998
Merge remote-tracking branch 'upstream/master'
jaclyn-taroni Oct 21, 2021
29ebaaa
Initial pass at histology palette overhaul
jaclyn-taroni Sep 14, 2021
905b632
Add display columns for legends
jaclyn-taroni Sep 15, 2021
1f1078e
Remove histology label palette NB from CI
jaclyn-taroni Sep 15, 2021
aaa602b
Add broad histology order
jaclyn-taroni Sep 15, 2021
f1f87ae
Add oncoprint_include
jaclyn-taroni Sep 15, 2021
0c89ac5
Add oncoprint grouping info to palette df
jaclyn-taroni Sep 15, 2021
3f7661d
Allow for alteration of alpha values in custom function
jaclyn-taroni Sep 15, 2021
d246cd9
Remove immune deconv, rework to use new palette
jaclyn-taroni Sep 15, 2021
5ceea74
Use updated palette paradigm in telomerase activity box plot
jaclyn-taroni Sep 15, 2021
1fcecca
Move up telomerase activity plotting so panel can be used downstream
jaclyn-taroni Sep 15, 2021
e7309ee
Take an approach where individual panels are saved as PDFs
jaclyn-taroni Oct 22, 2021
f2ea887
panels sub directory
jaclyn-taroni Oct 22, 2021
d69d5b8
Make cancer_group other darker; rerun
jaclyn-taroni Oct 22, 2021
51e3943
Merge branch 'jaclyn-taroni/1176-palette' into jaclyn-taroni/new-tx-o…
jaclyn-taroni Oct 22, 2021
71a28b9
Rerun telomerase activities
jaclyn-taroni Oct 22, 2021
e37884c
Compile individual panels via AI & document
jaclyn-taroni Oct 22, 2021
8cd6443
Merge remote-tracking branch 'upstream/master' into jaclyn-taroni/117…
jaclyn-taroni Oct 22, 2021
cc1495a
Missed the instance where I recode based on other hex code
jaclyn-taroni Oct 27, 2021
9f68386
Merge branch 'master' into jaclyn-taroni/1176-palette
jaclyn-taroni Oct 27, 2021
9791110
Merge branch 'jaclyn-taroni/1176-palette' into jaclyn-taroni/new-tx-o…
jaclyn-taroni Oct 27, 2021
65a8918
Change up approach to oncoprint
jaclyn-taroni Oct 29, 2021
a7d3f26
Merge branch 'jaclyn-taroni/1176-palette' into jaclyn-taroni/new-tx-o…
jaclyn-taroni Oct 29, 2021
d2edad8
Add theme_pubr() to UMAP panel
jaclyn-taroni Oct 31, 2021
2427302
Tweak the cancer group palette
jaclyn-taroni Nov 15, 2021
010fab0
Merge branch 'jaclyn-taroni/1176-palette' into jaclyn-taroni/new-tx-o…
jaclyn-taroni Nov 15, 2021
de49da7
Rerun with new cancer group palette
jaclyn-taroni Nov 15, 2021
39690c3
Remove outdated assembled figure
jaclyn-taroni Nov 15, 2021
6cae3c5
Merge remote-tracking branch 'upstream/master' into jaclyn-taroni/new…
jaclyn-taroni Nov 15, 2021
94af73d
Updated versions of compiled figure
jaclyn-taroni Nov 16, 2021
740a18d
Remove commented out code
jaclyn-taroni Nov 16, 2021
94fa212
Merge branch 'master' into jaclyn-taroni/new-tx-overview
jaclyn-taroni Nov 17, 2021
7bc6c8a
Merge branch 'master' into jaclyn-taroni/new-tx-overview
jaclyn-taroni Nov 28, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ plot_dimension_reduction <- function(aligned_scores_df,
point_size = NULL,
x_label,
y_label,
alpha_value = 0.3,
score1 = 1,
score2 = 2,
color_palette = NULL) {
Expand Down Expand Up @@ -267,7 +268,7 @@ plot_dimension_reduction <- function(aligned_scores_df,
}

dimension_reduction_plot <- dimension_reduction_plot +
ggplot2::geom_point(alpha = 0.3) +
ggplot2::geom_point(alpha = alpha_value) +
ggplot2::labs(x = x_label, y = y_label) +
ggplot2::theme_bw()

Expand Down
59 changes: 25 additions & 34 deletions figures/generate-figures.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,29 @@ for filename in "${filenames[@]}"; do

done

## Copy number status heatmap
####### Telomerase Activities

# Generate collapsed data for count files
Rscript ${analyses_dir}/collapse-rnaseq/01-summarize_matrices.R \
-i ${data_dir}/pbta-gene-counts-rsem-expected_count.stranded.rds \
-g ${data_dir}/gencode.v27.primary_assembly.annotation.gtf.gz \
-m ${analyses_dir}/collapse-rnaseq/pbta-gene-counts-rsem-expected_count-collapsed.stranded.rds \
-t ${analyses_dir}/collapse-rnaseq/pbta-gene-counts-rsem-expected_count-collapsed_table.stranded.rds

Rscript ${analyses_dir}/collapse-rnaseq/01-summarize_matrices.R \
-i ${data_dir}/pbta-gene-counts-rsem-expected_count.polya.rds \
-g ${data_dir}/gencode.v27.primary_assembly.annotation.gtf.gz \
-m ${analyses_dir}/collapse-rnaseq/pbta-gene-counts-rsem-expected_count-collapsed.polya.rds \
-t ${analyses_dir}/collapse-rnaseq/pbta-gene-counts-rsem-expected_count-collapsed_table.polya.rds

# Generate telomerase activities using gene expression data from collapse RNA seq data files
Rscript --vanilla ${analyses_dir}/telomerase-activity-prediction/01-run-EXTEND.R --input ${analyses_dir}/collapse-rnaseq/results/pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds --output ${analyses_dir}/telomerase-activity-prediction/results/TelomeraseScores_PTBAStranded_FPKM.txt
Rscript --vanilla ${analyses_dir}/telomerase-activity-prediction/01-run-EXTEND.R --input ${analyses_dir}/collapse-rnaseq/results/pbta-gene-expression-rsem-fpkm-collapsed.polya.rds --output ${analyses_dir}/telomerase-activity-prediction/results/TelomeraseScores_PTBAPolya_FPKM.txt
Rscript --vanilla ${analyses_dir}/telomerase-activity-prediction/01-run-EXTEND.R --input ${analyses_dir}/collapse-rnaseq/results/pbta-gene-counts-rsem-expected_count-collapsed.stranded.rds --output ${analyses_dir}/telomerase-activity-prediction/results/TelomeraseScores_PTBAStranded_counts.txt
Rscript --vanilla ${analyses_dir}/telomerase-activity-prediction/01-run-EXTEND.R --input ${analyses_dir}/collapse-rnaseq/results/pbta-gene-counts-rsem-expected_count-collapsed.polya.rds --output ${analyses_dir}/telomerase-activity-prediction/results/TelomeraseScores_PTBAPolya_counts.txt

# Build figures of telomerase activity
Rscript --vanilla scripts/TelomeraseActivities.R

####### Transcriptomic overview

Expand All @@ -122,16 +144,8 @@ Rscript --vanilla ${analyses_dir}/gene-set-enrichment-analysis/01-conduct-gsea-a
# displayed in a heatmap
Rscript --vanilla -e "rmarkdown::render('${analyses_dir}/gene-set-enrichment-analysis/02-model-gsea.Rmd', clean = TRUE)"

# Immune deconvolution - we can't use CIBERSORT because we don't have access to it
# By not supplying an argument to --method, we are electing only to use xCell
Rscript --vanilla ${analyses_dir}/immune-deconv/01-immune-deconv.R \
--polyaexprs ${analyses_dir}/collapse-rnaseq/results/pbta-gene-expression-rsem-fpkm-collapsed.polya.rds \
--strandedexprs ${analyses_dir}/collapse-rnaseq/results/pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds \
--clin ${data_dir}/pbta-histologies.tsv \
--output ${analyses_dir}/immune-deconv/results/deconv-output-for-figures.RData

# Step that generates the transcriptomic overview figure itself
Rscript --vanilla scripts/transcriptomic-overview.R
# Step that generates the GSVA, UMAP, and legend panels
Rscript --vanilla scripts/fig4-panels-gsva-umap.R

####### CN Status Heatmap
if [ "$RUN_LOCAL" -lt "1" ]; then
Expand All @@ -143,26 +157,3 @@ fi
Rscript -e "rmarkdown::render('${analyses_dir}/cnv-chrom-plot/cn_status_heatmap.Rmd',
clean = TRUE, params = list(final_figure=TRUE))"

####### Telomerase Activities

# Generate collapsed data for count files
Rscript ${analyses_dir}/collapse-rnaseq/01-summarize_matrices.R \
-i ${data_dir}/pbta-gene-counts-rsem-expected_count.stranded.rds \
-g ${data_dir}/gencode.v27.primary_assembly.annotation.gtf.gz \
-m ${analyses_dir}/collapse-rnaseq/pbta-gene-counts-rsem-expected_count-collapsed.stranded.rds \
-t ${analyses_dir}/collapse-rnaseq/pbta-gene-counts-rsem-expected_count-collapsed_table.stranded.rds

Rscript ${analyses_dir}/collapse-rnaseq/01-summarize_matrices.R \
-i ${data_dir}/pbta-gene-counts-rsem-expected_count.polya.rds \
-g ${data_dir}/gencode.v27.primary_assembly.annotation.gtf.gz \
-m ${analyses_dir}/collapse-rnaseq/pbta-gene-counts-rsem-expected_count-collapsed.polya.rds \
-t ${analyses_dir}/collapse-rnaseq/pbta-gene-counts-rsem-expected_count-collapsed_table.polya.rds

# Generate telomerase activities using gene expression data from collapse RNA seq data files
Rscript --vanilla ${analyses_dir}/telomerase-activity-prediction/01-run-EXTEND.R --input ${analyses_dir}/collapse-rnaseq/results/pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds --output ${analyses_dir}/telomerase-activity-prediction/results/TelomeraseScores_PTBAStranded_FPKM.txt
Rscript --vanilla ${analyses_dir}/telomerase-activity-prediction/01-run-EXTEND.R --input ${analyses_dir}/collapse-rnaseq/results/pbta-gene-expression-rsem-fpkm-collapsed.polya.rds --output ${analyses_dir}/telomerase-activity-prediction/results/TelomeraseScores_PTBAPolya_FPKM.txt
Rscript --vanilla ${analyses_dir}/telomerase-activity-prediction/01-run-EXTEND.R --input ${analyses_dir}/collapse-rnaseq/results/pbta-gene-counts-rsem-expected_count-collapsed.stranded.rds --output ${analyses_dir}/telomerase-activity-prediction/results/TelomeraseScores_PTBAStranded_counts.txt
Rscript --vanilla ${analyses_dir}/telomerase-activity-prediction/01-run-EXTEND.R --input ${analyses_dir}/collapse-rnaseq/results/pbta-gene-counts-rsem-expected_count-collapsed.polya.rds --output ${analyses_dir}/telomerase-activity-prediction/results/TelomeraseScores_PTBAPolya_counts.txt

# Build figures of telomerase activity
Rscript --vanilla scripts/TelomeraseActivities.R
9 changes: 9 additions & 0 deletions figures/pdfs/fig4/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
### Figure 4

Figure 4 is compiled from the output of two figure scripts:

* `figures/scripts/TelomeraseActivities.R`
* `figures/scripts/fig4-panels-gsva-umap.R`

The individual panels can be found in `panels`.
They are compiled via Adobe Illustrator, saved as a PDF to this directory, and then a PNG file is exported to `figures/pngs`.
Binary file added figures/pdfs/fig4/figure4.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added figures/pdfs/fig4/panels/gsva_panel.pdf
Binary file not shown.
Binary file added figures/pdfs/fig4/panels/umap_panel.pdf
Binary file not shown.
Binary file removed figures/pngs/Telomerase_Activities.png
Binary file not shown.
Binary file added figures/pngs/figure4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed figures/pngs/transcriptomic-overview.png
Binary file not shown.
96 changes: 38 additions & 58 deletions figures/scripts/TelomeraseActivities.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,44 +39,41 @@ Histologies <- file.path(root_dir, "data", "pbta-histologies.tsv") ### Variable

palette_dir <- file.path(root_dir, "figures", "palettes")

# Import standard color palettes for project
histology_label_mapping <- readr::read_tsv(
file.path(palette_dir, "histology_label_color_table.tsv")
) %>%
# Select just the columns we will need for plotting
dplyr::select(Kids_First_Biospecimen_ID, cancer_group, cancer_group_order, cancer_group_hex_codes) %>%
# Reorder cancer_group based on cancer_group_order
dplyr::mutate(cancer_group = forcats::fct_reorder(cancer_group, cancer_group_order))


# Declare output directory
output_dir <- file.path(root_dir, "figures", "pngs")
telomerase_png <- file.path(output_dir, "Telomerase_Activities.png")
supplementary_telomerase_png <- file.path(output_dir, "SuppTelomerase_Activities.png")

# Read in the histologies file and join on the histology color mappings and labels
PBTA_Histology <- readr::read_tsv(Histologies) %>%
dplyr::inner_join(histology_label_mapping, by =
c("Kids_First_Biospecimen_ID",
"cancer_group")) %>%
## Renaming "Kids_First_Biospecimen_ID" as SampleID for comparison purpose
dplyr::rename("SampleID" = "Kids_First_Biospecimen_ID") %>%
dplyr::filter(!is.na(cancer_group))
output_dir <- file.path(root_dir, "figures", "pdfs", "fig4", "panels")
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}

# Get a distinct version of the color keys
histologies_color_key_df <- PBTA_Histology %>%
dplyr::select(cancer_group, cancer_group_hex_codes) %>%
dplyr::distinct()
telomerase_pdf <- file.path(output_dir, "Telomerase_Activities.pdf")

# Make color key specific to these samples
annotation_colors <- histologies_color_key_df$cancer_group_hex_codes
names(annotation_colors) <- histologies_color_key_df$cancer_group

supplementary_telomerase_png <- file.path(root_dir, "figures", "pngs", "SuppTelomerase_Activities.png")

# Get palette for cancer group
cancer_group_palette <- readr::read_tsv(
file.path(palette_dir, "broad_histology_cancer_group_palette.tsv")
) %>%
dplyr::select(cancer_group, cancer_group_hex) %>%
# Remove NA values -- a cancer group hex value will be NA only if the
# cancer group is NA
dplyr::filter(complete.cases(.))

# Make color palette suitable for use with ggplot
annotation_colors <- cancer_group_palette$cancer_group_hex
names(annotation_colors) <- cancer_group_palette$cancer_group

# We need to map between biospecimen ID and cancer group
cancer_group_id_df <- readr::read_tsv(Histologies) %>%
dplyr::filter(!is.na(cancer_group)) %>%
dplyr::select(Kids_First_Biospecimen_ID, cancer_group) %>%
## Renaming "Kids_First_Biospecimen_ID" as SampleID for comparison purpose
dplyr::rename("SampleID" = "Kids_First_Biospecimen_ID")

TMScores1 <- read.table(Telomerase_StdFpkm, sep = " ", head = T) ## Reading Stranded FPKM telomerase scores
colnames(TMScores1)[colnames(TMScores1) == "NormEXTENDScores"] <- "NormEXTENDScores_Stranded_FPKM"

PTBA_GE_Standard_Histology <- merge(PBTA_Histology, TMScores1, by = "SampleID") ### Merging Clinical data with the Telomerase scores
PTBA_GE_Standard_Histology <- merge(cancer_group_id_df, TMScores1, by = "SampleID") ### Merging Clinical data with the Telomerase scores

TMScores2 <- read.table(Telomerase_PolyaFpkm, sep = " ", head = T)
colnames(TMScores2)[colnames(TMScores2) == "NormEXTENDScores"] <- "NormEXTENDScores_PolyA_FPKM"
Expand All @@ -100,8 +97,6 @@ Stranded_Histology <- PTBA_GE_Standard_Histology


## Figure for main text: Boxplots
png(telomerase_png, width = 6, height = 6, units = "in", res = 1200)

theme_set(theme_classic() +
theme(
plot.title = element_text(size = 10, face = "bold"),
Expand All @@ -117,36 +112,21 @@ theme_set(theme_classic() +
))

P1 <- ggplot(Stranded_Histology , aes(
x = fct_reorder(cancer_group, NormEXTENDScores_Stranded_FPKM, .desc = TRUE) %>%
forcats::fct_relevel("Benign", "Other tumor", "Normal", after = Inf),
x = fct_reorder(cancer_group, NormEXTENDScores_Stranded_FPKM, .desc = TRUE),
y = NormEXTENDScores_Stranded_FPKM
)) +
geom_boxplot(
size = 0.2, notch = FALSE, outlier.size = 0, outlier.shape = NA,
aes(color = cancer_group, fill = cancer_group), alpha = 0.4
aes(color = cancer_group, fill = cancer_group), alpha = 0.65
) +
geom_jitter(shape = 16, cex = 0.1, aes(color = cancer_group)) +
scale_fill_manual(values = annotation_colors, aesthetics = c("colour", "fill"))

grid.newpage()
# Create layout : nrow = 2, ncol =2
pushViewport(viewport(layout = grid.layout(nrow = 6, ncol = 3)))
# A helper function to define a region on the layout
define_region <- function(row, col) {
viewport(layout.pos.row = row, layout.pos.col = col)
}



print(ggpar(P1,
font.xtickslab = c(5, "black"),
font.ytickslab = 6, font.x = 6, font.y = 6, ylab = "EXTEND Scores",
xlab = "Tumor Cancer Group", title = "A", font.title = 7
), vp = define_region(row = 1:3, col = 1:3))

dev.off()

geom_jitter(shape = 16, cex = 0.2, aes(color = cancer_group),
alpha = 0.75) +
scale_fill_manual(values = annotation_colors, aesthetics = c("colour", "fill")) +
ylab("EXTEND Scores (Stranded FPKM)") +
xlab("Cancer Group")

ggsave(plot = P1, telomerase_pdf, dpi = 1200, units = "in",
width = 8, height = 4)

## Figure for SI: scatterplots
png(supplementary_telomerase_png, width = 4, height = 2, units = "in", res = 1200)
Expand All @@ -164,15 +144,15 @@ theme_set(theme_classic() +

P1 <- ggscatter(PBTA_PolyA_TMScores,
x = "NormEXTENDScores_PolyACounts", y = "NormEXTENDScores_PolyA_FPKM", color = "red", size = 0.2,
add = "reg.line", # Add regressin line
add = "reg.line", # Add regression line
add.params = list(color = "black", fill = "grey", size = 0.5), # Customize reg. line
conf.int = TRUE # Add confidence interval
) + stat_cor(method = "spearman", size = 2)


P2 <- ggscatter(PBTA_Stranded_TMScores,
x = "NormEXTENDScores_StrandedCounts", y = "NormEXTENDScores_Stranded_FPKM", color = "red", size = 0.2,
add = "reg.line", # Add regressin line
add = "reg.line", # Add regression line
add.params = list(color = "black", fill = "grey", size = 0.5), # Customize reg. line
conf.int = TRUE # Add confidence interval
) + stat_cor(method = "spearman", size = 2)
Expand Down
Loading