Reverting median BAF per segment back to mean as it affects the purit…

…y estimates
Wedge-lab · Dec 15, 2016 · 5870830 · 5870830
1 parent 267f9b7
commit 5870830
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 8 deletions.
diff --git a/R/fitcopynumber.R b/R/fitcopynumber.R
@@ -557,8 +557,11 @@ merge_segments = function(subclones, bafsegmented, logR, rho, psi, platform_gamm
         # MERGE
         new_entry = data.frame(subclones[i-1,])
         new_entry$endpos = subclones[i,]$endpos
-        new_entry$BAF = median(c(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i-1]], 
-                               bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]]), na.rm=T)
+        # Adjusting the BAFseg values is commented out for now as it is introducing a bias in the purity
+        # new_entry$BAF = median(c(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i-1]], 
+        #                        bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]]), na.rm=T)
+        new_entry$BAF = mean(c(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i-1]], 
+                                 bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]]), na.rm=T)
         new_entry$LogR = mean(c(logR[logR$Chromosome==subclones$chr[i-1] & logR$Position>=subclones$startpos[i-1] & logR$Position<=subclones$endpos[i-1], 3], 
                                 logR[logR$Chromosome==subclones$chr[i] & logR$Position>=subclones$startpos[i] & logR$Position<=subclones$endpos[i], 3]), na.rm=T)
         subclones_cleaned = rbind(subclones_cleaned, new_entry)

diff --git a/R/segmentation.R b/R/segmentation.R
@@ -264,11 +264,12 @@ segment.baf.phased.sv = function(samplename, inputfile, outputfile, svs, gamma=1
       res = Battenberg:::selectFastPcf(BAFphased,kmin,gamma*sdev,T)
       BAFphseg = res$yhat
     }
-
-    if (length(BAF) > 0) {
-      # Adjust the segment BAF to not take the mean as that is sensitive to improperly phased segments
-      BAFphseg = adjustSegmValues(data.frame(BAFphased=BAFphased, BAFseg=BAFphseg))$BAFseg
-    }
+
+    # Adjusting the BAFseg values is commented out for now as it is introducing a bias in the purity
+    # if (length(BAF) > 0) {
+    #   # Adjust the segment BAF to not take the mean as that is sensitive to improperly phased segments
+    #   BAFphseg = adjustSegmValues(data.frame(BAFphased=BAFphased, BAFseg=BAFphseg))$BAFseg
+    # }
 
     return(data.frame(Chromosome=rep(chr, length(row.indices)), 
                       Position=BAFrawchr[row.indices,1], 

diff --git a/R/util.R b/R/util.R
@@ -9,7 +9,7 @@
 #' @param sep Column separator (Default: \t)
 #' @param chrom_col The column number that contains chromosome denominations. This column will automatically be cast as a character. Should be counted including the row.names (Default: 1)
 #' @param skip The number of rows to skip before reading (Default: 0)
-#' @return: A data frame with contents of the file 
+#' @return A data frame with contents of the file 
 #' @export
 read_table_generic = function(file, header=T, row.names=F, stringsAsFactor=F, sep="\t", chrom_col=1, skip=0) {
   # stringsAsFactor is not needed here, but kept for legacy purposes