AlexsLemonade · jaclyn-taroni · Jan 22, 2021 · Jan 20, 2021 · Jan 20, 2021 · Jan 20, 2021
diff --git a/analyses/cnv-chrom-plot/cn_status_heatmap.Rmd b/analyses/cnv-chrom-plot/cn_status_heatmap.Rmd
@@ -396,12 +396,17 @@ histologies <-
 Make a color key that's formatted for ComplexHeatmap. 
 
 ```{r}
+# Get a distinct version of the color keys
+histologies_color_key_df <- histologies %>%
+  dplyr::select(display_group, hex_codes) %>%
+  dplyr::distinct()
+
 # Make color key specific to these samples
-histologies_color_key_filtered <- unique(histologies$hex_codes)
-names(histologies_color_key_filtered) <- unique(histologies$display_group)
+histologies_color_key <- histologies_color_key_df$hex_codes
+names(histologies_color_key) <- histologies_color_key_df$display_group
 
 # Get coordinate start positions
-hist_start <- match(names(histologies_color_key_filtered), histologies$display_group)
+hist_start <- match(names(histologies_color_key), histologies$display_group)
 
 # Get coordinate end positions for each histology group
 hist_end <- hist_start + summary(histologies$display_group)
@@ -424,7 +429,7 @@ hist_text <- ComplexHeatmap::anno_mark(
 # Create the Heatmap annotation object
 hist_annot <- ComplexHeatmap::HeatmapAnnotation(
   df = data.frame(histologies) %>% dplyr::select(-group_n, -hex_codes),
-  col = list(display_group = histologies_color_key_filtered),
+  col = list(display_group = histologies_color_key),
   which = "row",
   show_annotation_name = FALSE,
   show_legend = FALSE,

diff --git a/analyses/cnv-chrom-plot/cn_status_heatmap.nb.html b/analyses/cnv-chrom-plot/cn_status_heatmap.nb.html
diff --git a/analyses/oncoprint-landscape/01-plot-oncoprint.R b/analyses/oncoprint-landscape/01-plot-oncoprint.R
@@ -161,14 +161,21 @@ if (!is.null(opt$focal_file)) {
   focal_df <- readr::read_tsv(file.path(opt$focal_file))
 }
 
+#### Set up oncoprint annotation objects --------------------------------------
 # Read in histology standard color palette for project
-histology_col_palette <-
-  readr::read_tsv(file.path(
-    root_dir,
-    "figures",
-    "palettes",
-    "histology_color_palette.tsv"
-  ))
+histology_label_mapping <- readr::read_tsv(
+  file.path(root_dir,
+            "figures",
+            "palettes", 
+            "histology_label_color_table.tsv")) %>% 
+  # Select just the columns we will need for plotting
+  dplyr::select(Kids_First_Biospecimen_ID, display_group, display_order, hex_codes)
+
+# Join on these columns to the metadata
+metadata <- metadata %>% 
+  dplyr::inner_join(histology_label_mapping, by = "Kids_First_Biospecimen_ID") %>% 
+  # Reorder display_group based on display_order
+  dplyr::mutate(display_group = forcats::fct_reorder(display_group, display_order))
 
 # Read in the oncoprint color palette
 oncoprint_col_palette <- readr::read_tsv(file.path(
@@ -180,22 +187,19 @@ oncoprint_col_palette <- readr::read_tsv(file.path(
   # Use deframe so we can use it as a recoding list
   tibble::deframe()
 
-#### Set up oncoprint annotation objects --------------------------------------
-
-# Color coding for `short_histology` classification
+# Color coding for `display_group` classification
 # Get unique tumor descriptor categories
-short_histologies <- unique(metadata$short_histology) %>%
-  tidyr::replace_na("none") %>%
-  sort()
-
-# Save the vector of hex codes from the short histology palette
-short_histology_col_key <- histology_col_palette$hex_codes
+histologies_color_key_df <- metadata %>%
+  dplyr::arrange(display_order) %>%
+  dplyr::select(display_group, hex_codes) %>%
+  dplyr::distinct()
 
-# Now assign the color names
-names(short_histology_col_key) <- short_histologies
+# Make color key specific to these samples
+histologies_color_key <- histologies_color_key_df$hex_codes
+names(histologies_color_key) <- histologies_color_key_df$display_group
 
 # Now format the color key objet into a list
-annotation_colors <- list(short_histology = short_histology_col_key)
+annotation_colors <- list(display_group = histologies_color_key)
 
 #### Prepare MAF object for plotting ------------------------------------------
 
@@ -219,7 +223,7 @@ png(
 )
 oncoplot(
   maf_object,
-  clinicalFeatures = "short_histology",
+  clinicalFeatures = "display_group",
   genes = goi_list,
   logColBar = TRUE,
   sortByAnnotation = TRUE,

diff --git a/analyses/oncoprint-landscape/plots/all_participants_primary-plus_goi_oncoprint.png b/analyses/oncoprint-landscape/plots/all_participants_primary-plus_goi_oncoprint.png
diff --git a/analyses/oncoprint-landscape/plots/all_participants_primary-plus_oncoprint.png b/analyses/oncoprint-landscape/plots/all_participants_primary-plus_oncoprint.png
diff --git a/analyses/oncoprint-landscape/plots/all_participants_primary_only_goi_oncoprint.png b/analyses/oncoprint-landscape/plots/all_participants_primary_only_goi_oncoprint.png
diff --git a/analyses/oncoprint-landscape/plots/all_participants_primary_only_oncoprint.png b/analyses/oncoprint-landscape/plots/all_participants_primary_only_oncoprint.png
diff --git a/analyses/sample-distribution-analysis/02-multilayer-plots.R b/analyses/sample-distribution-analysis/02-multilayer-plots.R
@@ -1,5 +1,5 @@
 # This script creates a treemap and a multilayer pie chart to represent the
-# broad histologies, short histologies, and molecular subtypes within the dataset.
+# broad histologies, display_group, and molecular subtypes within the dataset.
 #
 # This script uses the packages sunburstR, d3r, and treemap to produce the
 # visualizations.
@@ -35,57 +35,58 @@ plots_dir <- file.path(output_dir, "plots")
 histologies_df <- readr::read_tsv(file.path(root_dir, "data",
                                             "pbta-histologies.tsv"), guess_max = 10000)
 
+# Read in histology standard color palette for project
+histology_label_mapping <- readr::read_tsv(
+  file.path(root_dir,
+            "figures",
+            "palettes", 
+            "histology_label_color_table.tsv")) %>% 
+  # Select just the columns we will need for plotting
+  dplyr::select(Kids_First_Biospecimen_ID, display_group, display_order, hex_codes)
+
 # Create a colorblind-friendly color vector
 color <- colorblindr::palette_OkabeIto
 
 # Create final data.frame prepped for treemap and sunburst functions
 final_df <- histologies_df %>%
   dplyr::filter(sample_type == "Tumor",
                 composition == "Solid Tissue") %>%
-  dplyr::distinct(Kids_First_Participant_ID, broad_histology,
-                  short_histology, harmonized_diagnosis) %>%
+  # Join on the color codes
+  dplyr::inner_join(histology_label_mapping, by = "Kids_First_Biospecimen_ID") %>% 
+  # Reorder display_group based on display_order
+  dplyr::mutate(display_group = forcats::fct_reorder(display_group, display_order)) %>%
+  # Get distinct based on participant IDs
+  dplyr::distinct(Kids_First_Participant_ID, 
+                  broad_histology, 
+                  display_group, 
+                  harmonized_diagnosis, 
+                  hex_codes) %>% 
   # Select our 3 columns of interest
-  dplyr::select(broad_histology, short_histology, harmonized_diagnosis) %>%
+  dplyr::select(broad_histology, display_group, harmonized_diagnosis, hex_codes) %>%
   # Remove any row that has an NA
   dplyr::filter(complete.cases(.)) %>%
   # Group by all 3 columns in order to count
-  dplyr::group_by(broad_histology, short_histology, harmonized_diagnosis) %>%
+  dplyr::group_by(broad_histology, display_group, harmonized_diagnosis, hex_codes) %>%
   # Add the count to a column named size
   dplyr::add_count(name = "size") %>%
   # Place the value 1 in a column named counter for treemap and sunburt plots
   dplyr::mutate(counter= c(1)) %>%
   # Change the column names
   dplyr::rename(level1 = broad_histology,
-                level2 = short_histology,
+                level2 = display_group,
                 level3 = harmonized_diagnosis) %>%
   # Reorder the rows according to the 3 levels
   dplyr::arrange(level1, level2, level3) %>%
   # tbl_df -> data.frame
-  as.data.frame()
+  as.data.frame() 
 
 # Save to tsv file
 readr::write_tsv(final_df, file.path(results_dir, "plots_df.tsv"))
 
-# Create and save treemap using ggplot2
-# Read in the histology color palette
-color_palette <-
-  readr::read_tsv(file.path(
-    root_dir,
-    "figures",
-    "palettes",
-    "histology_color_palette.tsv"
-  ))
-
-# Join the color palette for the colors for each short histology value --
-# palette is generated in `figures/scripts/color_palettes.R`
-final_df2 <- final_df %>%
-  dplyr::left_join(color_palette, by = c("level2" = "color_names")) %>%
-  dplyr::distinct() # Remove the redundant rows from prep for the `treemap` function
-
 # Plot the treemap
 treemap <-
   ggplot(
-    final_df2,
+    final_df,
     aes(
       area = size,
       fill = hex_codes,
@@ -112,7 +113,8 @@ treemap <-
     colour = "#FAFAFA",
     min.size = 0
   ) +
-  theme(legend.position = "none")
+  theme(legend.position = "none") +
+  scale_fill_identity()
 
 # Save treemap
 ggsave(

diff --git a/analyses/sample-distribution-analysis/03-tumor-descriptor-and-assay-count.nb.html b/analyses/sample-distribution-analysis/03-tumor-descriptor-and-assay-count.nb.html
diff --git a/analyses/sample-distribution-analysis/plots/distribution_across_cancer_types.pdf b/analyses/sample-distribution-analysis/plots/distribution_across_cancer_types.pdf
diff --git a/analyses/sample-distribution-analysis/plots/distribution_across_cancer_types_treemap.pdf b/analyses/sample-distribution-analysis/plots/distribution_across_cancer_types_treemap.pdf