From 2d59523132d16d19e1767956343197246b61508e Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Wed, 8 Jan 2020 09:41:55 -0500
Subject: [PATCH 01/10] Add the functions

---
 .../util/chr-break-calculate.R                | 181 ++++++++++++++++++
 .../util/chr-break-plot.R                     | 112 +++++++++++
 2 files changed, 293 insertions(+)
 create mode 100644 analyses/chromosomal-instability/util/chr-break-calculate.R
 create mode 100644 analyses/chromosomal-instability/util/chr-break-plot.R

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
new file mode 100644
index 0000000000..6aabc8a702
--- /dev/null
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -0,0 +1,181 @@
+# Functions for chromosomal instability plots
+#
+# C. Savonen for ALSF - CCDL
+#
+# 2020
+
+make_granges <- function(break_df = NULL,
+                         sample_id = NULL,
+                         samples_col = "samples",
+                         chrom_col = "chrom",
+                         start_col = "start",
+                         end_col = "end") {
+  # For a given breaks data.frame make a GenomicRanger object from it. Can
+  # filter to a single samples' data.
+  #
+  # Args:
+  #   break_df: for a data.frame with chromosomal coordinates and sample IDs.
+  #   sample_id: sample ID for which the data needs to be extracted and made into
+  #              a GenomicRanges object. If "all" is designated, all samples will
+  #              be kept and the data combined. The original data.frame is stored
+  #              in elementMetadata@listData
+  #   samples_col: character string that indicates the column name with the
+  #                sample ID information. Default is "samples".
+  #   chrom_col: character string that indicates the column name with the
+  #              chromosome information. Default is "chrom".
+  #   start_col: character string that indicates the column name with the
+  #              start coordinate. Default is "start".
+  #   end_col: character string that indicates the column name with the
+  #            end coordinate. Default is "end".
+  #
+  # Returns:
+  # A Genomic Ranges formatted object for the particular sample that has the
+  # break points from the data.frame
+
+  # If no data.frame is provided, stop
+  if (is.null(breaks_df)) {
+    stop("No breaks data.frame has been provided. ")
+  }
+
+  # List the columns we need
+  needed_cols <- c(samples_col, chrom_col, start_col, end_col)
+
+  # Get logical vector indicating which are in metadata
+  found_cols <- (needed_cols %in% colnames(break_df))
+
+  # If not all the columns are found, stop
+  if (!all(found_cols)) {
+    stop(cat(
+      "The following column names specified for making the GenomicRanges object: \n",
+      paste(needed_cols[which(!found_cols)], collapse = "\n"),
+      "\n ...were not found in the specified metadata data.frame.",
+      "Check your `_col` arguments."
+    ))
+  }
+
+  if (sample_id != "all") {
+    # Extract samples' info from the data.frame
+    break_df <- break_df %>%
+      dplyr::filter(!!rlang::sym(samples_col) == sample_id)
+
+    # Stop if the sample doesn't exist in the data.frame
+    if (nrow(break_df) == 0) {
+      stop("No sample data was found for the given `sample_id` argument.")
+    }
+  }
+
+  # Make GRanges for CNV data
+  granges <- GenomicRanges::GRanges(
+    seqnames = dplyr::pull(break_df, !!rlang::sym(chrom_col)),
+    ranges = IRanges::IRanges(
+      start = dplyr::pull(break_df, !!rlang::sym(start_col)),
+      end = dplyr::pull(break_df, !!rlang::sym(end_col))
+    ),
+    mcols = break_df
+  )
+
+  return(granges)
+}
+
+break_density <- function(sv_breaks = NULL,
+                          cnv_breaks = NULL,
+                          sample_id = NULL,
+                          window_size = 1e6,
+                          chr_sizes_list = NULL,
+                          samples_col_cnv = "samples",
+                          chrom_col_cnv = "chrom",
+                          start_col_cnv = "start",
+                          end_col_cnv = "end",
+                          samples_col_sv = "samples",
+                          chrom_col_sv = "chrom",
+                          start_col_sv = "start",
+                          end_col_sv = "end") {
+  # For given breaks data.frame(s), calculate the breaks density for a tiled
+  # windows across the genome. Returns the data as a GenomicRanges object for
+  # easy mapping with ggbio. Where the density and counts are stored in
+  # @elementMetadata@listData.
+  #
+  # Args:
+  #   sv_breaks: a data.frame with the breaks for the SV data.
+  #   cnv_breaks: a data.frame with the breaks for the SV data.
+  #   sample_id: sample ID for which the data needs to be extracted and made into
+  #              a GenomicRanges object. If "all" is designated, all samples will
+  #              be kept.
+  #   chr_size_list: a named numeric vector of the sizes (bp) of the chromosomes.
+  #                  names of the chromosomes must match the format of the input
+  #                  break data. e.g. "chr1" or "1".
+  #   window_size: What size windows to calculate break density. Default is 1kb.
+  #   samples_col_sv/cnv: character string that indicates the column name with the
+  #                sample ID information. Default is "samples". Will be passed to
+  #                `make_granges`` function.
+  #   chrom_col_sv/cnv: character string that indicates the column name with the
+  #              chromosome information. Default is "chrom". Will be passed to
+  #              `make_granges`` function.
+  #   start_col_sv/cnv: character string that indicates the column name with the
+  #              start coordinate. Default is "start". Will be passed to
+  #              `make_granges`` function.
+  #   end_col_sv/cnv: character string that indicates the column name with the
+  #            end coordinate. Default is "end". Will be passed to
+  #            `make_granges`` function.
+  #
+  # Check that a sample ID has been specified.
+  if (is.null(sample_id)) {
+    stop("No sample ID has been specified. Use the `sample_id` argument.")
+  }
+
+  # Make CNV into GRanges
+  if (!is.null(cnv_breaks)) {
+    cnv_ranges <- make_granges(cnv_breaks,
+      sample_id = sample_id,
+      samples_col = samples_col_cnv,
+      chrom_col = chrom_col_cnv,
+      start_col = start_col_cnv,
+      end_col = end_col_cnv
+    )
+    # This will get written over if there are both datasets
+    combo_ranges <- cnv_ranges
+  }
+  # Make SV into GRanges
+  if (!is.null(sv_breaks)) {
+    sv_ranges <- make_granges(sv_breaks,
+      sample_id = sample_id,
+      samples_col = samples_col_sv,
+      chrom_col = chrom_col_sv,
+      start_col = start_col_sv,
+      end_col = end_col_sv
+    )
+    # This will get written over if there are both datasets
+    combo_ranges <- sv_ranges
+  }
+
+  # If both datasets are given, reduce them into one.
+  if (!is.null(cnv_breaks) && !is.null(sv_breaks)) {
+    combo_ranges <- GenomicRanges::union(
+      cnv_ranges,
+      sv_ranges
+    )
+  }
+
+  # Create genome bins
+  bins <- GenomicRanges::tileGenome(chr_sizes_list,
+    tilewidth = window_size
+  )
+
+  # Uncompress GRangesList
+  bins <- unlist(bins)
+
+  # Get counts for each genome bin
+  bin_counts <- GenomicRanges::countOverlaps(
+    bins,
+    combo_ranges
+  )
+
+  # Store count info
+  bins@elementMetadata@listData$counts <- bin_counts
+
+  # Calculate and store density
+  bins@elementMetadata@listData$density <- bin_counts / window_size
+
+  # Return the GRanges object for mapping purposes
+  return(bins)
+}
diff --git a/analyses/chromosomal-instability/util/chr-break-plot.R b/analyses/chromosomal-instability/util/chr-break-plot.R
new file mode 100644
index 0000000000..b516e6c467
--- /dev/null
+++ b/analyses/chromosomal-instability/util/chr-break-plot.R
@@ -0,0 +1,112 @@
+# Functions for chromosomal instability calculations
+#
+# C. Savonen for ALSF - CCDL
+#
+# 2020
+
+map_density_plot <- function(granges,
+                             y_val,
+                             y_lab,
+                             color,
+                             main_title) {
+  # Given a GRanges object, plot it y value along its chromosomal mappings using
+  # ggbio.
+  #
+  # Args:
+  #   granges: A Granges object to plot
+  #   y_val: a character string of the columnname in listData spot of the
+  #          GenomicRanges to plot on the y axis
+  #   color: a color parameter
+  #   y_lab: a character string to use for the ylabel. Will be passed to
+  #          ggplot2::ylab argument.
+  #   main_title: a character string to use for the main title.
+  #
+  # Returns:
+  #  ggplot of chromosomal mapping of the y value given.
+  #
+  # For setting the scale later, need to get y's max
+  max_y <- max(
+    dplyr::pull(
+      data.frame(granges@elementMetadata@listData),
+      !!rlang::sym(y_val)
+    )
+  )
+  # Make the density plot
+  density_plot <- ggbio::autoplot(granges, ggplot2::aes(y = !!rlang::sym(y_val)),
+    geom = "line", scales = "free_x", space = "free_x",
+    colour = color
+  ) +
+    ggplot2::theme_classic() +
+    ggplot2::theme(axis.text.x = ggplot2::element_text(size = 3, angle = 45, hjust = 1)) +
+    ggplot2::ylab(y_lab) +
+    ggplot2::ggtitle(main_title) +
+    ggplot2::scale_y_continuous(breaks = seq(0, max_y, by = 2))
+
+  # Print out plot
+  density_plot@ggplot
+}
+
+chr_break_plot <- function(granges_list) {
+  # A wrapper function to make a 3 row chromosomal map plot for a set of GRanges
+  # objects that contain common_density, cnv_density, and sv_density.
+  #
+  # Args:
+  #   granges_list: A list of Granges object to plot as a combination plot
+  #
+  # Returns:
+  #  ggplot of chromosomal mapping of the y value given.
+  #
+  # Get the sample name
+  sample_name <- common_samples[parent.frame()$i[]]
+
+  # Make combined SV and CNV plot
+  common_plot <- map_density_plot(sample_densities$common_density,
+    y_val = "counts",
+    y_lab = "Breaks per Mb",
+    color = "blue",
+    main_title = "Common Breaks"
+  )
+
+  # Make CNV plot
+  cnv_plot <- map_density_plot(sample_densities$cnv_density,
+    y_val = "counts",
+    y_lab = "Breaks per Mb",
+    color = "darkgreen",
+    main_title = "CNV Breaks"
+  )
+
+  # Make SV plot
+  sv_plot <- map_density_plot(sample_densities$sv_density,
+    y_val = "counts",
+    y_lab = "Breaks per Mb",
+    color = "orange",
+    main_title = "SV Breaks"
+  )
+
+  # Make a title
+  title <- cowplot::ggdraw() +
+    cowplot::draw_label(paste(sample_name, " - Chromosomal Break Density"),
+      fontface = "bold", x = .4, hjust = 0, size = 12
+    )
+
+  # Put all plots and title together
+  full_plot <- cowplot::plot_grid(title,
+    common_plot,
+    cnv_plot,
+    sv_plot,
+    nrow = 4,
+    axis = "b",
+    rel_heights = c(2, 5, 5, 5)
+  )
+
+  # Save plot to PNG
+  cowplot::save_plot(
+    plot = full_plot,
+    filename = file.path(plots_dir, paste0(sample_name, "_breaks.png")),
+    base_height = 7,
+    base_width = 20
+  )
+
+  # Print out the plot while we are here
+  full_plot
+}

From 6951c98714c1ce5937413625386a3082637cc0a7 Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Wed, 8 Jan 2020 09:58:49 -0500
Subject: [PATCH 02/10] Edited a messed up comment

---
 analyses/chromosomal-instability/util/chr-break-calculate.R | 2 +-
 analyses/chromosomal-instability/util/chr-break-plot.R      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
index 6aabc8a702..ebf4e85ba8 100644
--- a/analyses/chromosomal-instability/util/chr-break-calculate.R
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -1,4 +1,4 @@
-# Functions for chromosomal instability plots
+# Functions for chromosomal instability breakpoint calculations.
 #
 # C. Savonen for ALSF - CCDL
 #
diff --git a/analyses/chromosomal-instability/util/chr-break-plot.R b/analyses/chromosomal-instability/util/chr-break-plot.R
index b516e6c467..f1b556b8d1 100644
--- a/analyses/chromosomal-instability/util/chr-break-plot.R
+++ b/analyses/chromosomal-instability/util/chr-break-plot.R
@@ -1,4 +1,4 @@
-# Functions for chromosomal instability calculations
+# Functions for chromosomal instability plots
 #
 # C. Savonen for ALSF - CCDL
 #

From 9e0d321888c2ab5c99f78e86f24289a1ef6b6626 Mon Sep 17 00:00:00 2001
From: Candace Savonen <cansav09@gmail.com>
Date: Wed, 8 Jan 2020 10:39:41 -0500
Subject: [PATCH 03/10] Fixed some issues with the functions

---
 .../util/chr-break-calculate.R                | 24 +++++++++++------
 .../util/chr-break-plot.R                     | 27 ++++++++++---------
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
index ebf4e85ba8..964a3f6675 100644
--- a/analyses/chromosomal-instability/util/chr-break-calculate.R
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -1,4 +1,4 @@
-# Functions for chromosomal instability breakpoint calculations.
+# Functions for chromosomal instability plots
 #
 # C. Savonen for ALSF - CCDL
 #
@@ -33,7 +33,7 @@ make_granges <- function(break_df = NULL,
   # break points from the data.frame
 
   # If no data.frame is provided, stop
-  if (is.null(breaks_df)) {
+  if (is.null(break_df)) {
     stop("No breaks data.frame has been provided. ")
   }
 
@@ -56,7 +56,7 @@ make_granges <- function(break_df = NULL,
   if (sample_id != "all") {
     # Extract samples' info from the data.frame
     break_df <- break_df %>%
-      dplyr::filter(!!rlang::sym(samples_col) == sample_id)
+      dplyr::filter(c(sample_id) %in% !!rlang::sym(samples_col))
 
     # Stop if the sample doesn't exist in the data.frame
     if (nrow(break_df) == 0) {
@@ -100,7 +100,7 @@ break_density <- function(sv_breaks = NULL,
   #   cnv_breaks: a data.frame with the breaks for the SV data.
   #   sample_id: sample ID for which the data needs to be extracted and made into
   #              a GenomicRanges object. If "all" is designated, all samples will
-  #              be kept.
+  #              be kept. Multiple samples can be designated as a character vector.
   #   chr_size_list: a named numeric vector of the sizes (bp) of the chromosomes.
   #                  names of the chromosomes must match the format of the input
   #                  break data. e.g. "chr1" or "1".
@@ -122,10 +122,14 @@ break_density <- function(sv_breaks = NULL,
   if (is.null(sample_id)) {
     stop("No sample ID has been specified. Use the `sample_id` argument.")
   }
-
+  
+  # Determine how many samples are in the group
+  n_samples <- length(sample_id)
+  
   # Make CNV into GRanges
   if (!is.null(cnv_breaks)) {
-    cnv_ranges <- make_granges(cnv_breaks,
+    cnv_ranges <- make_granges(
+      break_df = cnv_breaks,
       sample_id = sample_id,
       samples_col = samples_col_cnv,
       chrom_col = chrom_col_cnv,
@@ -137,7 +141,8 @@ break_density <- function(sv_breaks = NULL,
   }
   # Make SV into GRanges
   if (!is.null(sv_breaks)) {
-    sv_ranges <- make_granges(sv_breaks,
+    sv_ranges <- make_granges(
+      break_df = sv_breaks,
       sample_id = sample_id,
       samples_col = samples_col_sv,
       chrom_col = chrom_col_sv,
@@ -171,8 +176,11 @@ break_density <- function(sv_breaks = NULL,
   )
 
   # Store count info
-  bins@elementMetadata@listData$counts <- bin_counts
+  bins@elementMetadata@listData$total_counts <- bin_counts
 
+  # Store average count info
+  bins@elementMetadata@listData$avg_counts <- bin_counts / n_samples
+  
   # Calculate and store density
   bins@elementMetadata@listData$density <- bin_counts / window_size
 
diff --git a/analyses/chromosomal-instability/util/chr-break-plot.R b/analyses/chromosomal-instability/util/chr-break-plot.R
index f1b556b8d1..2c1063d976 100644
--- a/analyses/chromosomal-instability/util/chr-break-plot.R
+++ b/analyses/chromosomal-instability/util/chr-break-plot.R
@@ -1,4 +1,4 @@
-# Functions for chromosomal instability plots
+# Functions for chromosomal instability calculations
 #
 # C. Savonen for ALSF - CCDL
 #
@@ -46,38 +46,39 @@ map_density_plot <- function(granges,
   density_plot@ggplot
 }
 
-chr_break_plot <- function(granges_list) {
+chr_break_plot <- function(granges_list,
+                           plot_name,
+                           y_val) {
   # A wrapper function to make a 3 row chromosomal map plot for a set of GRanges
   # objects that contain common_density, cnv_density, and sv_density.
   #
   # Args:
   #   granges_list: A list of Granges object to plot as a combination plot
+  #   plot_name: a character string specifying the plot
+  #   y_val: to be passed to map_density plot for mapping. 
   #
   # Returns:
   #  ggplot of chromosomal mapping of the y value given.
   #
-  # Get the sample name
-  sample_name <- common_samples[parent.frame()$i[]]
-
   # Make combined SV and CNV plot
-  common_plot <- map_density_plot(sample_densities$common_density,
-    y_val = "counts",
+  common_plot <- map_density_plot(granges_list$common_density,
+    y_val = y_val,
     y_lab = "Breaks per Mb",
     color = "blue",
     main_title = "Common Breaks"
   )
 
   # Make CNV plot
-  cnv_plot <- map_density_plot(sample_densities$cnv_density,
-    y_val = "counts",
+  cnv_plot <- map_density_plot(granges_list$cnv_density,
+    y_val = y_val,
     y_lab = "Breaks per Mb",
     color = "darkgreen",
     main_title = "CNV Breaks"
   )
 
   # Make SV plot
-  sv_plot <- map_density_plot(sample_densities$sv_density,
-    y_val = "counts",
+  sv_plot <- map_density_plot(granges_list$sv_density,
+    y_val = y_val,
     y_lab = "Breaks per Mb",
     color = "orange",
     main_title = "SV Breaks"
@@ -85,7 +86,7 @@ chr_break_plot <- function(granges_list) {
 
   # Make a title
   title <- cowplot::ggdraw() +
-    cowplot::draw_label(paste(sample_name, " - Chromosomal Break Density"),
+    cowplot::draw_label(paste(plot_name, " - Chromosomal Break Density"),
       fontface = "bold", x = .4, hjust = 0, size = 12
     )
 
@@ -102,7 +103,7 @@ chr_break_plot <- function(granges_list) {
   # Save plot to PNG
   cowplot::save_plot(
     plot = full_plot,
-    filename = file.path(plots_dir, paste0(sample_name, "_breaks.png")),
+    filename = file.path(plots_dir, paste0(plot_name, "_breaks.png")),
     base_height = 7,
     base_width = 20
   )

From 5a34518d471013d7fe22b7105ad9968fb59f3378 Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Wed, 8 Jan 2020 14:21:15 -0500
Subject: [PATCH 04/10] The functions handle grouped data

---
 .../util/chr-break-calculate.R                | 64 ++++++++++++++++---
 .../util/chr-break-plot.R                     | 28 +++++---
 2 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
index 964a3f6675..114acd19e8 100644
--- a/analyses/chromosomal-instability/util/chr-break-calculate.R
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -53,10 +53,10 @@ make_granges <- function(break_df = NULL,
     ))
   }
 
-  if (sample_id != "all") {
+  if (sample_id[1] != "all") {
     # Extract samples' info from the data.frame
     break_df <- break_df %>%
-      dplyr::filter(c(sample_id) %in% !!rlang::sym(samples_col))
+      dplyr::filter(!!rlang::sym(samples_col) %in% c(sample_id))
 
     # Stop if the sample doesn't exist in the data.frame
     if (nrow(break_df) == 0) {
@@ -71,7 +71,7 @@ make_granges <- function(break_df = NULL,
       start = dplyr::pull(break_df, !!rlang::sym(start_col)),
       end = dplyr::pull(break_df, !!rlang::sym(end_col))
     ),
-    mcols = break_df
+    mcols = dplyr::pull(break_df, !!rlang::sym(samples_col))
   )
 
   return(granges)
@@ -122,10 +122,10 @@ break_density <- function(sv_breaks = NULL,
   if (is.null(sample_id)) {
     stop("No sample ID has been specified. Use the `sample_id` argument.")
   }
-  
+
   # Determine how many samples are in the group
   n_samples <- length(sample_id)
-  
+
   # Make CNV into GRanges
   if (!is.null(cnv_breaks)) {
     cnv_ranges <- make_granges(
@@ -159,7 +159,24 @@ break_density <- function(sv_breaks = NULL,
       cnv_ranges,
       sv_ranges
     )
-  }
+    
+    # Carry over list data from sv_ranges
+    sv_overlaps <- GenomicRanges::findOverlaps(sv_ranges, combo_ranges)
+
+    # Carry over list data from cnv_ranges
+    cnv_overlaps <- GenomicRanges::findOverlaps(cnv_ranges, combo_ranges)
+
+    # Set up an empty list where we can store what sample each sequence came from
+    combo_ranges@elementMetadata@listData$samples <- rep(NA, length(combo_ranges))
+
+    # Bring over CNV samples
+    combo_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
+      cnv_ranges@elementMetadata@listData$mcols[cnv_overlaps@to]
+
+    # Bring over SV samples
+    combo_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
+      cnv_ranges@elementMetadata@listData$mcols[cnv_overlaps@to]
+    }
 
   # Create genome bins
   bins <- GenomicRanges::tileGenome(chr_sizes_list,
@@ -174,13 +191,40 @@ break_density <- function(sv_breaks = NULL,
     bins,
     combo_ranges
   )
-
+  ########################### Calculate summary stats ##########################
+  # Get a per sample break down if there is more than one sample
+  if (n_samples > 1) {
+    
+    # Get counts for each genome bin
+    bin_indices <- GenomicRanges::findOverlaps(
+      bins,
+      combo_ranges
+    )
+    
+    # Get list of samples 
+    bin_samples <- combo_ranges@elementMetadata@listData$mcols[bin_indices@to] 
+    
+    # Make a matrix that has the number of breaks per sample for each bin
+    freq_per_bin <- table(bin_indices@from, bin_samples) %>% 
+      data.frame() %>%
+      tidyr::spread(Var1, Freq) %>% 
+      tibble::column_to_rownames("bin_samples") %>%
+      t()
+    
+    # Calculate the median breaks per bin
+    median_counts <- apply(freq_per_bin, 1, median)
+    
+    # Store the median break counts, some bins data may be dropped off so we need 
+    # to start with NAs and then fill in the data based on the indices.
+    bins@elementMetadata@listData$median_counts <- rep(NA, length(bins))
+    bins@elementMetadata@listData$median_counts[as.numeric(names(median_counts))] <- median_counts
+    
+    # Store average count info
+    bins@elementMetadata@listData$avg_counts <- bin_counts / n_samples
+  }
   # Store count info
   bins@elementMetadata@listData$total_counts <- bin_counts
 
-  # Store average count info
-  bins@elementMetadata@listData$avg_counts <- bin_counts / n_samples
-  
   # Calculate and store density
   bins@elementMetadata@listData$density <- bin_counts / window_size
 
diff --git a/analyses/chromosomal-instability/util/chr-break-plot.R b/analyses/chromosomal-instability/util/chr-break-plot.R
index 2c1063d976..8b3a7b93bf 100644
--- a/analyses/chromosomal-instability/util/chr-break-plot.R
+++ b/analyses/chromosomal-instability/util/chr-break-plot.R
@@ -26,10 +26,11 @@ map_density_plot <- function(granges,
   #
   # For setting the scale later, need to get y's max
   max_y <- max(
-    dplyr::pull(
-      data.frame(granges@elementMetadata@listData),
+    data.frame(granges@elementMetadata@listData) %>% 
+      dplyr::pull(
       !!rlang::sym(y_val)
-    )
+    ), 
+    na.rm = TRUE
   )
   # Make the density plot
   density_plot <- ggbio::autoplot(granges, ggplot2::aes(y = !!rlang::sym(y_val)),
@@ -48,7 +49,9 @@ map_density_plot <- function(granges,
 
 chr_break_plot <- function(granges_list,
                            plot_name,
-                           y_val) {
+                           y_val, 
+                           y_lab, 
+                           plot_dir) {
   # A wrapper function to make a 3 row chromosomal map plot for a set of GRanges
   # objects that contain common_density, cnv_density, and sv_density.
   #
@@ -56,6 +59,8 @@ chr_break_plot <- function(granges_list,
   #   granges_list: A list of Granges object to plot as a combination plot
   #   plot_name: a character string specifying the plot
   #   y_val: to be passed to map_density plot for mapping. 
+  #   y_lab: to be passed to map_density plot for y axis label
+  #   plot_dir: a file path where you would like the plot PNG to be saved.
   #
   # Returns:
   #  ggplot of chromosomal mapping of the y value given.
@@ -63,7 +68,7 @@ chr_break_plot <- function(granges_list,
   # Make combined SV and CNV plot
   common_plot <- map_density_plot(granges_list$common_density,
     y_val = y_val,
-    y_lab = "Breaks per Mb",
+    y_lab = y_lab,
     color = "blue",
     main_title = "Common Breaks"
   )
@@ -71,7 +76,7 @@ chr_break_plot <- function(granges_list,
   # Make CNV plot
   cnv_plot <- map_density_plot(granges_list$cnv_density,
     y_val = y_val,
-    y_lab = "Breaks per Mb",
+    y_lab = y_lab,
     color = "darkgreen",
     main_title = "CNV Breaks"
   )
@@ -79,7 +84,7 @@ chr_break_plot <- function(granges_list,
   # Make SV plot
   sv_plot <- map_density_plot(granges_list$sv_density,
     y_val = y_val,
-    y_lab = "Breaks per Mb",
+    y_lab = y_lab,
     color = "orange",
     main_title = "SV Breaks"
   )
@@ -103,7 +108,7 @@ chr_break_plot <- function(granges_list,
   # Save plot to PNG
   cowplot::save_plot(
     plot = full_plot,
-    filename = file.path(plots_dir, paste0(plot_name, "_breaks.png")),
+    filename = file.path(plot_dir, paste0(plot_name, "_breaks.png")),
     base_height = 7,
     base_width = 20
   )
@@ -111,3 +116,10 @@ chr_break_plot <- function(granges_list,
   # Print out the plot while we are here
   full_plot
 }
+
+# Make the combo plot
+chr_break_plot(granges_list = granges_list,
+               plot_name = group_name,
+               y_val = "median_counts", 
+               y_lab = "Avg Breaks per Mb", 
+               plot_dir = hist_plots_dir)

From 0e7cbbb7ec510e684ffe11c20483723c93d8a89e Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Wed, 8 Jan 2020 14:47:05 -0500
Subject: [PATCH 05/10] Got rid of a development remnant I found

---
 analyses/chromosomal-instability/util/chr-break-plot.R | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/analyses/chromosomal-instability/util/chr-break-plot.R b/analyses/chromosomal-instability/util/chr-break-plot.R
index 8b3a7b93bf..f83cf7f790 100644
--- a/analyses/chromosomal-instability/util/chr-break-plot.R
+++ b/analyses/chromosomal-instability/util/chr-break-plot.R
@@ -116,10 +116,3 @@ chr_break_plot <- function(granges_list,
   # Print out the plot while we are here
   full_plot
 }
-
-# Make the combo plot
-chr_break_plot(granges_list = granges_list,
-               plot_name = group_name,
-               y_val = "median_counts", 
-               y_lab = "Avg Breaks per Mb", 
-               plot_dir = hist_plots_dir)

From 94560dad4f313fb69c2b4e07b602cf84680959de Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Wed, 8 Jan 2020 14:47:50 -0500
Subject: [PATCH 06/10] minor typo fixes

---
 .../util/chr-break-calculate.R                | 30 +++++++++----------
 .../util/chr-break-plot.R                     | 12 ++++----
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
index 114acd19e8..1708dab5a8 100644
--- a/analyses/chromosomal-instability/util/chr-break-calculate.R
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -1,4 +1,4 @@
-# Functions for chromosomal instability plots
+# Functions for chromosomal instability calculations
 #
 # C. Savonen for ALSF - CCDL
 #
@@ -10,7 +10,7 @@ make_granges <- function(break_df = NULL,
                          chrom_col = "chrom",
                          start_col = "start",
                          end_col = "end") {
-  # For a given breaks data.frame make a GenomicRanger object from it. Can
+  # For a given breaks data.frame make a GenomicRanges object from it. Can
   # filter to a single samples' data.
   #
   # Args:
@@ -92,7 +92,7 @@ break_density <- function(sv_breaks = NULL,
                           end_col_sv = "end") {
   # For given breaks data.frame(s), calculate the breaks density for a tiled
   # windows across the genome. Returns the data as a GenomicRanges object for
-  # easy mapping with ggbio. Where the density and counts are stored in
+  # easy mapping with `ggbio`. Where the density and counts are stored in
   # @elementMetadata@listData.
   #
   # Args:
@@ -159,7 +159,7 @@ break_density <- function(sv_breaks = NULL,
       cnv_ranges,
       sv_ranges
     )
-    
+
     # Carry over list data from sv_ranges
     sv_overlaps <- GenomicRanges::findOverlaps(sv_ranges, combo_ranges)
 
@@ -194,31 +194,31 @@ break_density <- function(sv_breaks = NULL,
   ########################### Calculate summary stats ##########################
   # Get a per sample break down if there is more than one sample
   if (n_samples > 1) {
-    
+
     # Get counts for each genome bin
     bin_indices <- GenomicRanges::findOverlaps(
       bins,
       combo_ranges
     )
-    
-    # Get list of samples 
-    bin_samples <- combo_ranges@elementMetadata@listData$mcols[bin_indices@to] 
-    
+
+    # Get list of samples
+    bin_samples <- combo_ranges@elementMetadata@listData$mcols[bin_indices@to]
+
     # Make a matrix that has the number of breaks per sample for each bin
-    freq_per_bin <- table(bin_indices@from, bin_samples) %>% 
+    freq_per_bin <- table(bin_indices@from, bin_samples) %>%
       data.frame() %>%
-      tidyr::spread(Var1, Freq) %>% 
+      tidyr::spread(Var1, Freq) %>%
       tibble::column_to_rownames("bin_samples") %>%
       t()
-    
+
     # Calculate the median breaks per bin
     median_counts <- apply(freq_per_bin, 1, median)
-    
-    # Store the median break counts, some bins data may be dropped off so we need 
+
+    # Store the median break counts, some bins data may be dropped off so we need
     # to start with NAs and then fill in the data based on the indices.
     bins@elementMetadata@listData$median_counts <- rep(NA, length(bins))
     bins@elementMetadata@listData$median_counts[as.numeric(names(median_counts))] <- median_counts
-    
+
     # Store average count info
     bins@elementMetadata@listData$avg_counts <- bin_counts / n_samples
   }
diff --git a/analyses/chromosomal-instability/util/chr-break-plot.R b/analyses/chromosomal-instability/util/chr-break-plot.R
index f83cf7f790..96145ed9f9 100644
--- a/analyses/chromosomal-instability/util/chr-break-plot.R
+++ b/analyses/chromosomal-instability/util/chr-break-plot.R
@@ -1,4 +1,4 @@
-# Functions for chromosomal instability calculations
+# Functions for chromosomal instability plots
 #
 # C. Savonen for ALSF - CCDL
 #
@@ -26,10 +26,10 @@ map_density_plot <- function(granges,
   #
   # For setting the scale later, need to get y's max
   max_y <- max(
-    data.frame(granges@elementMetadata@listData) %>% 
+    data.frame(granges@elementMetadata@listData) %>%
       dplyr::pull(
       !!rlang::sym(y_val)
-    ), 
+    ),
     na.rm = TRUE
   )
   # Make the density plot
@@ -49,8 +49,8 @@ map_density_plot <- function(granges,
 
 chr_break_plot <- function(granges_list,
                            plot_name,
-                           y_val, 
-                           y_lab, 
+                           y_val,
+                           y_lab,
                            plot_dir) {
   # A wrapper function to make a 3 row chromosomal map plot for a set of GRanges
   # objects that contain common_density, cnv_density, and sv_density.
@@ -58,7 +58,7 @@ chr_break_plot <- function(granges_list,
   # Args:
   #   granges_list: A list of Granges object to plot as a combination plot
   #   plot_name: a character string specifying the plot
-  #   y_val: to be passed to map_density plot for mapping. 
+  #   y_val: to be passed to map_density plot for mapping.
   #   y_lab: to be passed to map_density plot for y axis label
   #   plot_dir: a file path where you would like the plot PNG to be saved.
   #

From 252884f63fdf04165fa6a0b4955a245ddda714d1 Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Thu, 9 Jan 2020 08:34:03 -0500
Subject: [PATCH 07/10] Incorporate @sjspielman suggestions

---
 .../util/chr-break-calculate.R                | 104 +++++++++++-------
 .../util/chr-break-plot.R                     |  25 +++--
 2 files changed, 76 insertions(+), 53 deletions(-)

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
index 1708dab5a8..b06c0ef008 100644
--- a/analyses/chromosomal-instability/util/chr-break-calculate.R
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -1,4 +1,4 @@
-# Functions for chromosomal instability calculations
+# Functions for chromosomal instability plots
 #
 # C. Savonen for ALSF - CCDL
 #
@@ -10,17 +10,20 @@ make_granges <- function(break_df = NULL,
                          chrom_col = "chrom",
                          start_col = "start",
                          end_col = "end") {
-  # For a given breaks data.frame make a GenomicRanges object from it. Can
+  # For a given breaks data.frame make a GenomicRanger object from it. Can
   # filter to a single samples' data.
   #
   # Args:
   #   break_df: for a data.frame with chromosomal coordinates and sample IDs.
-  #   sample_id: sample ID for which the data needs to be extracted and made into
-  #              a GenomicRanges object. If "all" is designated, all samples will
-  #              be kept and the data combined. The original data.frame is stored
-  #              in elementMetadata@listData
+  #   sample_id: a character string that designates which data needs to be
+  #              extracted and made intoa GenomicRanges object by matching the
+  #              information in the designated sample_col. If "all" is designated,
+  #              all samples will be kept. Multiple samples can be designated
+  #              as a character vector.
   #   samples_col: character string that indicates the column name with the
-  #                sample ID information. Default is "samples".
+  #                sample ID information. Default is "samples". This information
+  #                will be store in the GenomicRanges object in
+  #                `elementData@listData$samples`
   #   chrom_col: character string that indicates the column name with the
   #              chromosome information. Default is "chrom".
   #   start_col: character string that indicates the column name with the
@@ -92,22 +95,24 @@ break_density <- function(sv_breaks = NULL,
                           end_col_sv = "end") {
   # For given breaks data.frame(s), calculate the breaks density for a tiled
   # windows across the genome. Returns the data as a GenomicRanges object for
-  # easy mapping with `ggbio`. Where the density and counts are stored in
+  # easy mapping with ggbio. Where the density and counts are stored in
   # @elementMetadata@listData.
   #
   # Args:
   #   sv_breaks: a data.frame with the breaks for the SV data.
-  #   cnv_breaks: a data.frame with the breaks for the SV data.
-  #   sample_id: sample ID for which the data needs to be extracted and made into
-  #              a GenomicRanges object. If "all" is designated, all samples will
-  #              be kept. Multiple samples can be designated as a character vector.
+  #   cnv_breaks: a data.frame with the breaks for the CNV data.
+  #   sample_id: a character string that designates which data needs to be
+  #              extracted and made intoa GenomicRanges object by matching the
+  #              information in the designated sample_col. If "all" is designated,
+  #              all samples will be kept. Multiple samples can be designated
+  #              as a character vector.
   #   chr_size_list: a named numeric vector of the sizes (bp) of the chromosomes.
   #                  names of the chromosomes must match the format of the input
   #                  break data. e.g. "chr1" or "1".
-  #   window_size: What size windows to calculate break density. Default is 1kb.
+  #   window_size: What size windows to calculate break density. Default is 1Mb.
   #   samples_col_sv/cnv: character string that indicates the column name with the
   #                sample ID information. Default is "samples". Will be passed to
-  #                `make_granges`` function.
+  #                `make_granges` function.
   #   chrom_col_sv/cnv: character string that indicates the column name with the
   #              chromosome information. Default is "chrom". Will be passed to
   #              `make_granges`` function.
@@ -116,18 +121,20 @@ break_density <- function(sv_breaks = NULL,
   #              `make_granges`` function.
   #   end_col_sv/cnv: character string that indicates the column name with the
   #            end coordinate. Default is "end". Will be passed to
-  #            `make_granges`` function.
+  #            `make_granges` function.
   #
   # Check that a sample ID has been specified.
   if (is.null(sample_id)) {
-    stop("No sample ID has been specified. Use the `sample_id` argument.")
+    stop("No sample ID(s) have been specified. Use the `sample_id` argument.")
   }
 
   # Determine how many samples are in the group
   n_samples <- length(sample_id)
 
-  # Make CNV into GRanges
-  if (!is.null(cnv_breaks)) {
+  # If both datasets are given, make them into one and use this
+  if (!is.null(cnv_breaks) & !is.null(sv_breaks)) {
+
+    # Read in CNV data
     cnv_ranges <- make_granges(
       break_df = cnv_breaks,
       sample_id = sample_id,
@@ -135,12 +142,8 @@ break_density <- function(sv_breaks = NULL,
       chrom_col = chrom_col_cnv,
       start_col = start_col_cnv,
       end_col = end_col_cnv
-    )
-    # This will get written over if there are both datasets
-    combo_ranges <- cnv_ranges
-  }
-  # Make SV into GRanges
-  if (!is.null(sv_breaks)) {
+
+    # Read in SV data
     sv_ranges <- make_granges(
       break_df = sv_breaks,
       sample_id = sample_id,
@@ -148,37 +151,54 @@ break_density <- function(sv_breaks = NULL,
       chrom_col = chrom_col_sv,
       start_col = start_col_sv,
       end_col = end_col_sv
-    )
-    # This will get written over if there are both datasets
-    combo_ranges <- sv_ranges
-  }
-
-  # If both datasets are given, reduce them into one.
-  if (!is.null(cnv_breaks) && !is.null(sv_breaks)) {
-    combo_ranges <- GenomicRanges::union(
+      )
+    # Combine datasets
+    break_ranges <- GenomicRanges::union(
       cnv_ranges,
       sv_ranges
     )
 
     # Carry over list data from sv_ranges
-    sv_overlaps <- GenomicRanges::findOverlaps(sv_ranges, combo_ranges)
+    sv_overlaps <- GenomicRanges::findOverlaps(sv_ranges, break_ranges)
 
     # Carry over list data from cnv_ranges
-    cnv_overlaps <- GenomicRanges::findOverlaps(cnv_ranges, combo_ranges)
+    cnv_overlaps <- GenomicRanges::findOverlaps(cnv_ranges, break_ranges)
 
     # Set up an empty list where we can store what sample each sequence came from
-    combo_ranges@elementMetadata@listData$samples <- rep(NA, length(combo_ranges))
+    break_ranges@elementMetadata@listData$samples <- rep(NA, length(break_ranges))
 
     # Bring over CNV samples
-    combo_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
+    break_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
       cnv_ranges@elementMetadata@listData$mcols[cnv_overlaps@to]
 
     # Bring over SV samples
-    combo_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
+    break_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
       cnv_ranges@elementMetadata@listData$mcols[cnv_overlaps@to]
-    }
 
-  # Create genome bins
+  } else if (!is.null(cnv_breaks) & is.null(sv_breaks)) {
+    # If only the CNV data is given, use this data only
+    break_ranges <- make_granges(
+      break_df = cnv_breaks,
+      sample_id = sample_id,
+      samples_col = samples_col_cnv,
+      chrom_col = chrom_col_cnv,
+      start_col = start_col_cnv,
+      end_col = end_col_cnv
+    )
+  } else if (!is.null(sv_breaks) & is.null(cnv_breaks)) {
+    break_ranges <- make_granges(
+      break_df = sv_breaks,
+      sample_id = sample_id,
+      samples_col = samples_col_sv,
+      chrom_col = chrom_col_sv,
+      start_col = start_col_sv,
+      end_col = end_col_sv
+    )
+  } else if (is.null(sv_breaks) & is.null(cnv_breaks)) {
+    stop("No data has been provided in either the `sv_break` or `cnv_break` arguments.")
+  }
+
+  ######################### Tally breaks by genome bins ########################
   bins <- GenomicRanges::tileGenome(chr_sizes_list,
     tilewidth = window_size
   )
@@ -189,7 +209,7 @@ break_density <- function(sv_breaks = NULL,
   # Get counts for each genome bin
   bin_counts <- GenomicRanges::countOverlaps(
     bins,
-    combo_ranges
+    break_ranges
   )
   ########################### Calculate summary stats ##########################
   # Get a per sample break down if there is more than one sample
@@ -198,11 +218,11 @@ break_density <- function(sv_breaks = NULL,
     # Get counts for each genome bin
     bin_indices <- GenomicRanges::findOverlaps(
       bins,
-      combo_ranges
+      break_ranges
     )
 
     # Get list of samples
-    bin_samples <- combo_ranges@elementMetadata@listData$mcols[bin_indices@to]
+    bin_samples <- break_ranges@elementMetadata@listData$mcols[bin_indices@to]
 
     # Make a matrix that has the number of breaks per sample for each bin
     freq_per_bin <- table(bin_indices@from, bin_samples) %>%
diff --git a/analyses/chromosomal-instability/util/chr-break-plot.R b/analyses/chromosomal-instability/util/chr-break-plot.R
index 96145ed9f9..67f658fdc5 100644
--- a/analyses/chromosomal-instability/util/chr-break-plot.R
+++ b/analyses/chromosomal-instability/util/chr-break-plot.R
@@ -1,4 +1,4 @@
-# Functions for chromosomal instability plots
+# Functions for chromosomal instability calculations
 #
 # C. Savonen for ALSF - CCDL
 #
@@ -23,13 +23,16 @@ map_density_plot <- function(granges,
   #
   # Returns:
   #  ggplot of chromosomal mapping of the y value given.
-  #
+  
+  # For the y-axis ticks, default is to print every two 
+  by_interval <- 2
+  
   # For setting the scale later, need to get y's max
   max_y <- max(
-    data.frame(granges@elementMetadata@listData) %>%
+    data.frame(granges@elementMetadata@listData) %>% 
       dplyr::pull(
       !!rlang::sym(y_val)
-    ),
+    ), 
     na.rm = TRUE
   )
   # Make the density plot
@@ -41,24 +44,24 @@ map_density_plot <- function(granges,
     ggplot2::theme(axis.text.x = ggplot2::element_text(size = 3, angle = 45, hjust = 1)) +
     ggplot2::ylab(y_lab) +
     ggplot2::ggtitle(main_title) +
-    ggplot2::scale_y_continuous(breaks = seq(0, max_y, by = 2))
+    ggplot2::scale_y_continuous(breaks = seq(0, max_y, by = by_interval))
 
   # Print out plot
   density_plot@ggplot
 }
 
-chr_break_plot <- function(granges_list,
-                           plot_name,
-                           y_val,
-                           y_lab,
-                           plot_dir) {
+multipanel_break_plot <- function(granges_list,
+                                  plot_name,
+                                  y_val, 
+                                  y_lab, 
+                                  plot_dir) {
   # A wrapper function to make a 3 row chromosomal map plot for a set of GRanges
   # objects that contain common_density, cnv_density, and sv_density.
   #
   # Args:
   #   granges_list: A list of Granges object to plot as a combination plot
   #   plot_name: a character string specifying the plot
-  #   y_val: to be passed to map_density plot for mapping.
+  #   y_val: to be passed to map_density plot for mapping. 
   #   y_lab: to be passed to map_density plot for y axis label
   #   plot_dir: a file path where you would like the plot PNG to be saved.
   #

From 1a95792d0d1f734ec9ddf06de964ed51892ea2e2 Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Thu, 9 Jan 2020 08:34:59 -0500
Subject: [PATCH 08/10] Missed a paranthesis

---
 analyses/chromosomal-instability/util/chr-break-calculate.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
index b06c0ef008..5435b988e4 100644
--- a/analyses/chromosomal-instability/util/chr-break-calculate.R
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -142,6 +142,7 @@ break_density <- function(sv_breaks = NULL,
       chrom_col = chrom_col_cnv,
       start_col = start_col_cnv,
       end_col = end_col_cnv
+    )
 
     # Read in SV data
     sv_ranges <- make_granges(

From b0f9cb951dbfb5857f6a778d8ce84d2099012e71 Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Thu, 9 Jan 2020 08:36:49 -0500
Subject: [PATCH 09/10] re-linter

---
 .../util/chr-break-calculate.R                 |  3 +--
 .../util/chr-break-plot.R                      | 18 +++++++++---------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
index 5435b988e4..c87020c7d4 100644
--- a/analyses/chromosomal-instability/util/chr-break-calculate.R
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -152,7 +152,7 @@ break_density <- function(sv_breaks = NULL,
       chrom_col = chrom_col_sv,
       start_col = start_col_sv,
       end_col = end_col_sv
-      )
+    )
     # Combine datasets
     break_ranges <- GenomicRanges::union(
       cnv_ranges,
@@ -175,7 +175,6 @@ break_density <- function(sv_breaks = NULL,
     # Bring over SV samples
     break_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
       cnv_ranges@elementMetadata@listData$mcols[cnv_overlaps@to]
-
   } else if (!is.null(cnv_breaks) & is.null(sv_breaks)) {
     # If only the CNV data is given, use this data only
     break_ranges <- make_granges(
diff --git a/analyses/chromosomal-instability/util/chr-break-plot.R b/analyses/chromosomal-instability/util/chr-break-plot.R
index 67f658fdc5..be2ef1bf91 100644
--- a/analyses/chromosomal-instability/util/chr-break-plot.R
+++ b/analyses/chromosomal-instability/util/chr-break-plot.R
@@ -23,16 +23,16 @@ map_density_plot <- function(granges,
   #
   # Returns:
   #  ggplot of chromosomal mapping of the y value given.
-  
-  # For the y-axis ticks, default is to print every two 
+
+  # For the y-axis ticks, default is to print every two
   by_interval <- 2
-  
+
   # For setting the scale later, need to get y's max
   max_y <- max(
-    data.frame(granges@elementMetadata@listData) %>% 
+    data.frame(granges@elementMetadata@listData) %>%
       dplyr::pull(
-      !!rlang::sym(y_val)
-    ), 
+        !!rlang::sym(y_val)
+      ),
     na.rm = TRUE
   )
   # Make the density plot
@@ -52,8 +52,8 @@ map_density_plot <- function(granges,
 
 multipanel_break_plot <- function(granges_list,
                                   plot_name,
-                                  y_val, 
-                                  y_lab, 
+                                  y_val,
+                                  y_lab,
                                   plot_dir) {
   # A wrapper function to make a 3 row chromosomal map plot for a set of GRanges
   # objects that contain common_density, cnv_density, and sv_density.
@@ -61,7 +61,7 @@ multipanel_break_plot <- function(granges_list,
   # Args:
   #   granges_list: A list of Granges object to plot as a combination plot
   #   plot_name: a character string specifying the plot
-  #   y_val: to be passed to map_density plot for mapping. 
+  #   y_val: to be passed to map_density plot for mapping.
   #   y_lab: to be passed to map_density plot for y axis label
   #   plot_dir: a file path where you would like the plot PNG to be saved.
   #

From f595d31f051ab8f0cc06d7fbbc2cbfc13d655a3a Mon Sep 17 00:00:00 2001
From: Candace Savonen <savonenc@msu.edu>
Date: Fri, 10 Jan 2020 12:29:06 -0500
Subject: [PATCH 10/10] Fix typos spotted by @jaclyn-taroni

---
 .../util/chr-break-calculate.R                | 29 ++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/analyses/chromosomal-instability/util/chr-break-calculate.R b/analyses/chromosomal-instability/util/chr-break-calculate.R
index c87020c7d4..88a649a048 100644
--- a/analyses/chromosomal-instability/util/chr-break-calculate.R
+++ b/analyses/chromosomal-instability/util/chr-break-calculate.R
@@ -10,13 +10,13 @@ make_granges <- function(break_df = NULL,
                          chrom_col = "chrom",
                          start_col = "start",
                          end_col = "end") {
-  # For a given breaks data.frame make a GenomicRanger object from it. Can
+  # For a given breaks data.frame make a GenomicRanges object from it. Can
   # filter to a single samples' data.
   #
   # Args:
   #   break_df: for a data.frame with chromosomal coordinates and sample IDs.
   #   sample_id: a character string that designates which data needs to be
-  #              extracted and made intoa GenomicRanges object by matching the
+  #              extracted and made into a GenomicRanges object by matching the
   #              information in the designated sample_col. If "all" is designated,
   #              all samples will be kept. Multiple samples can be designated
   #              as a character vector.
@@ -32,8 +32,8 @@ make_granges <- function(break_df = NULL,
   #            end coordinate. Default is "end".
   #
   # Returns:
-  # A Genomic Ranges formatted object for the particular sample that has the
-  # break points from the data.frame
+  # A GenomicRanges object for the particular sample that has the breakpoints
+  # from the data.frame
 
   # If no data.frame is provided, stop
   if (is.null(break_df)) {
@@ -160,21 +160,24 @@ break_density <- function(sv_breaks = NULL,
     )
 
     # Carry over list data from sv_ranges
-    sv_overlaps <- GenomicRanges::findOverlaps(sv_ranges, break_ranges)
+    sv_overlaps <- suppressWarnings(GenomicRanges::findOverlaps(sv_ranges,
+                                                                break_ranges))
 
     # Carry over list data from cnv_ranges
-    cnv_overlaps <- GenomicRanges::findOverlaps(cnv_ranges, break_ranges)
+    cnv_overlaps <- suppressWarnings(GenomicRanges::findOverlaps(cnv_ranges,
+                                                                 break_ranges))
 
     # Set up an empty list where we can store what sample each sequence came from
-    break_ranges@elementMetadata@listData$samples <- rep(NA, length(break_ranges))
+    break_ranges@elementMetadata@listData$mcols <- rep(NA, length(break_ranges))
 
     # Bring over CNV samples
-    break_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
+    break_ranges@elementMetadata@listData$mcols[cnv_overlaps@from] <-
       cnv_ranges@elementMetadata@listData$mcols[cnv_overlaps@to]
 
     # Bring over SV samples
-    break_ranges@elementMetadata@listData$samples[cnv_overlaps@from] <-
+    break_ranges@elementMetadata@listData$mcols[cnv_overlaps@from] <-
       cnv_ranges@elementMetadata@listData$mcols[cnv_overlaps@to]
+
   } else if (!is.null(cnv_breaks) & is.null(sv_breaks)) {
     # If only the CNV data is given, use this data only
     break_ranges <- make_granges(
@@ -207,19 +210,19 @@ break_density <- function(sv_breaks = NULL,
   bins <- unlist(bins)
 
   # Get counts for each genome bin
-  bin_counts <- GenomicRanges::countOverlaps(
+  bin_counts <- suppressWarnings(GenomicRanges::countOverlaps(
     bins,
     break_ranges
-  )
+  ))
   ########################### Calculate summary stats ##########################
   # Get a per sample break down if there is more than one sample
   if (n_samples > 1) {
 
     # Get counts for each genome bin
-    bin_indices <- GenomicRanges::findOverlaps(
+    bin_indices <- suppressWarnings(GenomicRanges::findOverlaps(
       bins,
       break_ranges
-    )
+    ))
 
     # Get list of samples
     bin_samples <- break_ranges@elementMetadata@listData$mcols[bin_indices@to]