Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Using mapping histology groups for plotting implementation (PR 3 of 4) #918

Merged
merged 10 commits into from
Jan 22, 2021
13 changes: 9 additions & 4 deletions analyses/cnv-chrom-plot/cn_status_heatmap.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -396,12 +396,17 @@ histologies <-
Make a color key that's formatted for ComplexHeatmap.

```{r}
# Get a distinct version of the color keys
histologies_color_key_df <- histologies %>%
dplyr::select(display_group, hex_codes) %>%
dplyr::distinct()

# Make color key specific to these samples
histologies_color_key_filtered <- unique(histologies$hex_codes)
names(histologies_color_key_filtered) <- unique(histologies$display_group)
histologies_color_key <- histologies_color_key_df$hex_codes
names(histologies_color_key) <- histologies_color_key_df$display_group

# Get coordinate start positions
hist_start <- match(names(histologies_color_key_filtered), histologies$display_group)
hist_start <- match(names(histologies_color_key), histologies$display_group)

# Get coordinate end positions for each histology group
hist_end <- hist_start + summary(histologies$display_group)
Expand All @@ -424,7 +429,7 @@ hist_text <- ComplexHeatmap::anno_mark(
# Create the Heatmap annotation object
hist_annot <- ComplexHeatmap::HeatmapAnnotation(
df = data.frame(histologies) %>% dplyr::select(-group_n, -hex_codes),
col = list(display_group = histologies_color_key_filtered),
col = list(display_group = histologies_color_key),
which = "row",
show_annotation_name = FALSE,
show_legend = FALSE,
Expand Down
107 changes: 59 additions & 48 deletions analyses/cnv-chrom-plot/cn_status_heatmap.nb.html

Large diffs are not rendered by default.

44 changes: 24 additions & 20 deletions analyses/oncoprint-landscape/01-plot-oncoprint.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,21 @@ if (!is.null(opt$focal_file)) {
focal_df <- readr::read_tsv(file.path(opt$focal_file))
}

#### Set up oncoprint annotation objects --------------------------------------
# Read in histology standard color palette for project
histology_col_palette <-
readr::read_tsv(file.path(
root_dir,
"figures",
"palettes",
"histology_color_palette.tsv"
))
histology_label_mapping <- readr::read_tsv(
file.path(root_dir,
"figures",
"palettes",
"histology_label_color_table.tsv")) %>%
# Select just the columns we will need for plotting
dplyr::select(Kids_First_Biospecimen_ID, display_group, display_order, hex_codes)

# Join on these columns to the metadata
metadata <- metadata %>%
dplyr::inner_join(histology_label_mapping, by = "Kids_First_Biospecimen_ID") %>%
# Reorder display_group based on display_order
dplyr::mutate(display_group = forcats::fct_reorder(display_group, display_order))

# Read in the oncoprint color palette
oncoprint_col_palette <- readr::read_tsv(file.path(
Expand All @@ -180,22 +187,19 @@ oncoprint_col_palette <- readr::read_tsv(file.path(
# Use deframe so we can use it as a recoding list
tibble::deframe()

#### Set up oncoprint annotation objects --------------------------------------

# Color coding for `short_histology` classification
# Color coding for `display_group` classification
# Get unique tumor descriptor categories
short_histologies <- unique(metadata$short_histology) %>%
tidyr::replace_na("none") %>%
sort()

# Save the vector of hex codes from the short histology palette
short_histology_col_key <- histology_col_palette$hex_codes
histologies_color_key_df <- metadata %>%
dplyr::arrange(display_order) %>%
dplyr::select(display_group, hex_codes) %>%
dplyr::distinct()

# Now assign the color names
names(short_histology_col_key) <- short_histologies
# Make color key specific to these samples
histologies_color_key <- histologies_color_key_df$hex_codes
names(histologies_color_key) <- histologies_color_key_df$display_group

# Now format the color key objet into a list
annotation_colors <- list(short_histology = short_histology_col_key)
annotation_colors <- list(display_group = histologies_color_key)

#### Prepare MAF object for plotting ------------------------------------------

Expand All @@ -219,7 +223,7 @@ png(
)
oncoplot(
maf_object,
clinicalFeatures = "short_histology",
clinicalFeatures = "display_group",
genes = goi_list,
logColBar = TRUE,
sortByAnnotation = TRUE,
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
52 changes: 27 additions & 25 deletions analyses/sample-distribution-analysis/02-multilayer-plots.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This script creates a treemap and a multilayer pie chart to represent the
# broad histologies, short histologies, and molecular subtypes within the dataset.
# broad histologies, display_group, and molecular subtypes within the dataset.
#
# This script uses the packages sunburstR, d3r, and treemap to produce the
# visualizations.
Expand Down Expand Up @@ -35,57 +35,58 @@ plots_dir <- file.path(output_dir, "plots")
histologies_df <- readr::read_tsv(file.path(root_dir, "data",
"pbta-histologies.tsv"), guess_max = 10000)

# Read in histology standard color palette for project
histology_label_mapping <- readr::read_tsv(
file.path(root_dir,
"figures",
"palettes",
"histology_label_color_table.tsv")) %>%
# Select just the columns we will need for plotting
dplyr::select(Kids_First_Biospecimen_ID, display_group, display_order, hex_codes)

# Create a colorblind-friendly color vector
color <- colorblindr::palette_OkabeIto

# Create final data.frame prepped for treemap and sunburst functions
final_df <- histologies_df %>%
dplyr::filter(sample_type == "Tumor",
composition == "Solid Tissue") %>%
dplyr::distinct(Kids_First_Participant_ID, broad_histology,
short_histology, harmonized_diagnosis) %>%
# Join on the color codes
dplyr::inner_join(histology_label_mapping, by = "Kids_First_Biospecimen_ID") %>%
# Reorder display_group based on display_order
dplyr::mutate(display_group = forcats::fct_reorder(display_group, display_order)) %>%
# Get distinct based on participant IDs
dplyr::distinct(Kids_First_Participant_ID,
broad_histology,
display_group,
harmonized_diagnosis,
hex_codes) %>%
# Select our 3 columns of interest
dplyr::select(broad_histology, short_histology, harmonized_diagnosis) %>%
dplyr::select(broad_histology, display_group, harmonized_diagnosis, hex_codes) %>%
# Remove any row that has an NA
dplyr::filter(complete.cases(.)) %>%
# Group by all 3 columns in order to count
dplyr::group_by(broad_histology, short_histology, harmonized_diagnosis) %>%
dplyr::group_by(broad_histology, display_group, harmonized_diagnosis, hex_codes) %>%
# Add the count to a column named size
dplyr::add_count(name = "size") %>%
# Place the value 1 in a column named counter for treemap and sunburt plots
dplyr::mutate(counter= c(1)) %>%
# Change the column names
dplyr::rename(level1 = broad_histology,
level2 = short_histology,
level2 = display_group,
level3 = harmonized_diagnosis) %>%
# Reorder the rows according to the 3 levels
dplyr::arrange(level1, level2, level3) %>%
# tbl_df -> data.frame
as.data.frame()
as.data.frame()

# Save to tsv file
readr::write_tsv(final_df, file.path(results_dir, "plots_df.tsv"))

# Create and save treemap using ggplot2
# Read in the histology color palette
color_palette <-
readr::read_tsv(file.path(
root_dir,
"figures",
"palettes",
"histology_color_palette.tsv"
))

# Join the color palette for the colors for each short histology value --
# palette is generated in `figures/scripts/color_palettes.R`
final_df2 <- final_df %>%
dplyr::left_join(color_palette, by = c("level2" = "color_names")) %>%
dplyr::distinct() # Remove the redundant rows from prep for the `treemap` function

# Plot the treemap
treemap <-
ggplot(
final_df2,
final_df,
aes(
area = size,
fill = hex_codes,
Expand All @@ -112,7 +113,8 @@ treemap <-
colour = "#FAFAFA",
min.size = 0
) +
theme(legend.position = "none")
theme(legend.position = "none") +
scale_fill_identity()

# Save treemap
ggsave(
Expand Down

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Loading