Skip to content

Commit

Permalink
Adding uption to allow selection of how BAFsegm is calculated, either…
Browse files Browse the repository at this point in the history
… median or mean
  • Loading branch information
sdentro committed Mar 4, 2017
1 parent dee00a7 commit 297046b
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 20 deletions.
24 changes: 18 additions & 6 deletions R/fitcopynumber.R
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,10 @@ fit.copy.number = function(samplename, outputfile.prefix, inputfile.baf.segmente
#' @param siglevel Threshold under which a p-value becomes significant. When it is significant a second copy number state will be fitted (Default 0.05)
#' @param maxdist Slack in BAF space to allow a segment to be off it's optimum before becoming significant. A segment becomes significant very quickly when a breakpoint is missed, this parameter alleviates the effect (Default 0.01)
#' @param noperms The number of permutations to be run when bootstrapping the confidence intervals on the copy number state of each segment (Default 1000)
#' @param calc_seg_baf_option Various options to recalculate the BAF of a segment. Options are: 1 - median, 2 - mean. (Default: 1)
#' @author dw9, sd11
#' @export
callSubclones = function(sample.name, baf.segmented.file, logr.file, rho.psi.file, output.file, output.figures.prefix, output.gw.figures.prefix, chr_names, masking_output_file, max_allowed_state=250, sv_breakpoints_file=NULL, gamma=1, segmentation.gamma=NA, siglevel=0.05, maxdist=0.01, noperms=1000, seed=as.integer(Sys.time())) {
callSubclones = function(sample.name, baf.segmented.file, logr.file, rho.psi.file, output.file, output.figures.prefix, output.gw.figures.prefix, chr_names, masking_output_file, max_allowed_state=250, sv_breakpoints_file=NULL, gamma=1, segmentation.gamma=NA, siglevel=0.05, maxdist=0.01, noperms=1000, seed=as.integer(Sys.time()), calc_seg_baf_option=1) {

set.seed(seed)

Expand Down Expand Up @@ -242,7 +243,7 @@ callSubclones = function(sample.name, baf.segmented.file, logr.file, rho.psi.fil
write.table(subcloneres, gsub(".txt", "_1.txt", output.file), quote=F, col.names=T, row.names=F, sep="\t")

# Scan the segments for cases that should be merged
res = merge_segments(subcloneres, BAFvals, LogRvals, rho, psi, gamma)
res = merge_segments(subcloneres, BAFvals, LogRvals, rho, psi, gamma, calc_seg_baf_option)
BAFvals = res$bafsegmented

res = determine_copynumber(BAFvals, LogRvals, rho, psi, gamma, ctrans, ctrans.logR, maxdist, siglevel, noperms)
Expand Down Expand Up @@ -505,12 +506,13 @@ determine_copynumber = function(BAFvals, LogRvals, rho, psi, gamma, ctrans, ctra
#' @param rho The rho estimate that the profile was fit with
#' @param psi the psi estimate that the profile was fit with
#' @param platform_gamma The gamma parameter for this platform
#' @param calc_seg_baf_option Various options to recalculate the BAF of a segment. Options are: 1 - median, 2 - mean. (Default: 1)
#' @return A list with two fields: bafsegmented and subclones. The subclones field contains a data.frame in
#' Battenberg output format with the merged segments. The bafsegmented field contains the BAFsegmented data
#' corresponding to the provided subclones data.frame.
#' @author sd11
#' @noRd
merge_segments = function(subclones, bafsegmented, logR, rho, psi, platform_gamma) {
merge_segments = function(subclones, bafsegmented, logR, rho, psi, platform_gamma, calc_seg_baf_option=1) {
subclones_cleaned = data.frame()
merged = T
counter = 1
Expand Down Expand Up @@ -557,15 +559,25 @@ merge_segments = function(subclones, bafsegmented, logR, rho, psi, platform_gamm
# MERGE
new_entry = data.frame(subclones[i-1,])
new_entry$endpos = subclones[i,]$endpos
new_entry$BAF = median(c(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i-1]],
bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]]), na.rm=T)

if (calc_seg_baf_option==1) {
new_entry$BAF = median(c(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i-1]],
bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]]), na.rm=T)
} else if (calc_seg_baf_option==2 | ! calc_seg_baf_option %in% c(1,2)) {
new_entry$BAF = mean(c(bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i-1]],
bafsegmented$BAFphased[bafsegmented$Chromosome==subclones$chr[i] & bafsegmented$Position>=subclones$startpos[i] & bafsegmented$Position<=subclones$endpos[i]]), na.rm=T)
if (! calc_seg_baf_option %in% c(1,2)) {
warning("Supplied calc_seg_baf_option to callSubclones not valid, using mean BAF by default")
}
}

new_entry$LogR = mean(c(logR[logR$Chromosome==subclones$chr[i-1] & logR$Position>=subclones$startpos[i-1] & logR$Position<=subclones$endpos[i-1], 3],
logR[logR$Chromosome==subclones$chr[i] & logR$Position>=subclones$startpos[i] & logR$Position<=subclones$endpos[i], 3]), na.rm=T)
subclones_cleaned = rbind(subclones_cleaned, new_entry)

# Set BAFseg to reflect the current segment
bafsegmented$BAFseg[bafsegmented$Chromosome==subclones$chr[i-1] & bafsegmented$Position>=subclones$startpos[i-1] & bafsegmented$Position<=subclones$endpos[i]] = new_entry$BAF
# TODO Update the logRseg as well
# TODO The logRseg is currently not updated

previous_merged = T
merged = T
Expand Down
36 changes: 26 additions & 10 deletions R/segmentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,14 @@ adjustSegmValues = function(baf_chrom) {
#' @param samplename Name of the sample, which is used to name output figures
#' @param inputfile String that points to the output from the \code{combine.baf.files} function. This contains the phased SNPs with their BAF values
#' @param outputfile String where the segmentation output will be written
#' @param gamma The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default 10)
#' @param kmin Kmin represents the minimum number of probes/SNPs that a segment should consist of (Default 3)
#' @param phasegamma Gamma parameter used when correcting phasing mistakes (Default 3)
#' @param phasekmin Kmin parameter used when correcting phasing mistakes (Default 3)
#' @param gamma The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)
#' @param kmin Kmin represents the minimum number of probes/SNPs that a segment should consist of (Default: 3)
#' @param phasegamma Gamma parameter used when correcting phasing mistakes (Default: 3)
#' @param phasekmin Kmin parameter used when correcting phasing mistakes (Default: 3)
#' @param calc_seg_baf_option Various options to recalculate the BAF of a segment. Options are: 1 - median, 2 - mean. (Default: 1)
#' @author dw9
#' @export
segment.baf.phased = function(samplename, inputfile, outputfile, gamma=10, phasegamma=3, kmin=3, phasekmin=3) {
segment.baf.phased = function(samplename, inputfile, outputfile, gamma=10, phasegamma=3, kmin=3, phasekmin=3, calc_seg_baf_option=1) {
BAFraw = read.table(inputfile,sep="\t",header=T, stringsAsFactors=F)

BAFoutput = NULL
Expand Down Expand Up @@ -94,8 +95,15 @@ segment.baf.phased = function(samplename, inputfile, outputfile, gamma=10, phase
BAFphseg = res$yhat
}

# Adjust the segment BAF to not take the mean as that is sensitive to improperly phased segments
BAFphseg = adjustSegmValues(data.frame(BAFphased=BAFphased, BAFseg=BAFphseg))$BAFseg
#' Recalculate the BAF of each segment, if required
if (calc_seg_baf_option==1) {
# Adjust the segment BAF to not take the mean as that is sensitive to improperly phased segments
BAFphseg = adjustSegmValues(data.frame(BAFphased=BAFphased, BAFseg=BAFphseg))$BAFseg
} else if (calc_seg_baf_option==2) {
# Don't do anything, the BAF is already the mean
} else {
warning("Supplied calc_seg_baf_option to segment.baf.phased not valid, using mean BAF by default")
}

png(filename = paste(samplename,"_segment_chr",chr,".png",sep=""), width = 2000, height = 1000, res = 200)
create.baf.plot(chrom.position=pos/1000000,
Expand Down Expand Up @@ -132,9 +140,10 @@ segment.baf.phased = function(samplename, inputfile, outputfile, gamma=10, phase
#' @param phasegamma Gamma parameter used when correcting phasing mistakes (Default 3)
#' @param phasekmin Kmin parameter used when correcting phasing mistakes (Default 3)
#' @param no_segmentation Do not perform segmentation. This step will switch the haplotype blocks, but then just takes the mean BAFphased as BAFsegm
#' @param calc_seg_baf_option Various options to recalculate the BAF of a segment. Options are: 1 - median, 2 - mean. (Default: 1)
#' @author sd11, dw9
#' @export
segment.baf.phased.sv = function(samplename, inputfile, outputfile, svs, gamma=10, phasegamma=3, kmin=3, phasekmin=3, no_segmentation=F) {
segment.baf.phased.sv = function(samplename, inputfile, outputfile, svs, gamma=10, phasegamma=3, kmin=3, phasekmin=3, no_segmentation=F, calc_seg_baf_option=1) {

#' Function that takes SNPs that belong to a single segment and looks for big holes between
#' each pair of SNPs. If there is a big hole it will add another breakpoint to the breakpoints data.frame
Expand Down Expand Up @@ -266,8 +275,15 @@ segment.baf.phased.sv = function(samplename, inputfile, outputfile, svs, gamma=1
}

if (length(BAF) > 0) {
# Adjust the segment BAF to not take the mean as that is sensitive to improperly phased segments
BAFphseg = adjustSegmValues(data.frame(BAFphased=BAFphased, BAFseg=BAFphseg))$BAFseg
#' Recalculate the BAF of each segment, if required
if (calc_seg_baf_option==1) {
# Adjust the segment BAF to not take the mean as that is sensitive to improperly phased segments
BAFphseg = adjustSegmValues(data.frame(BAFphased=BAFphased, BAFseg=BAFphseg))$BAFseg
} else if (calc_seg_baf_option==2) {
# Don't do anything, the BAF is already the mean
} else {
warning("Supplied calc_seg_baf_option to segment.baf.phased.sv not valid, using mean BAF by default")
}
}

return(data.frame(Chromosome=rep(chr, length(row.indices)),
Expand Down
7 changes: 5 additions & 2 deletions inst/example/battenberg_snp6.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ MAX_RHO = 1.02 #1
MIN_GOODNESS_OF_FIT = 0.63
BALANCED_THRESHOLD = 0.51
MIN_NORMAL_DEPTH = 10
CALC_SEG_BAF_OPTION = 1

# Change to work directory and load the chromosome information
setwd(RUN_DIR)
Expand Down Expand Up @@ -148,7 +149,8 @@ segment.baf.phased(samplename=TUMOURNAME,
gamma=SEGMENTATION_GAMMA,
phasegamma=PHASING_GAMMA,
kmin=3,
phasekmin=1)
phasekmin=1,
calc_seg_baf_option=CALC_SEG_BAF_OPTION)

# Fit a clonal copy number profile
fit.copy.number(samplename=TUMOURNAME,
Expand Down Expand Up @@ -186,4 +188,5 @@ callSubclones(sample.name=TUMOURNAME,
maxdist=0.01,
noperms=1000,
max_allowed_state=250,
sv_breakpoints_file="NA")
sv_breakpoints_file="NA",
calc_seg_baf_option=CALC_SEG_BAF_OPTION)
7 changes: 5 additions & 2 deletions inst/example/battenberg_wgs.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ BALANCED_THRESHOLD = 0.51
MIN_NORMAL_DEPTH = 10
MIN_BASE_QUAL = 20
MIN_MAP_QUAL = 35
CALC_SEG_BAF_OPTION = 1

# WGS specific static
ALLELECOUNTER = "alleleCounter"
Expand Down Expand Up @@ -164,7 +165,8 @@ segment.baf.phased(samplename=TUMOURNAME,
gamma=SEGMENTATION_GAMMA,
phasegamma=PHASING_GAMMA,
kmin=3,
phasekmin=1)
phasekmin=1,
calc_seg_baf_option=CALC_SEG_BAF_OPTION)

# Fit a clonal copy number profile
fit.copy.number(samplename=TUMOURNAME,
Expand Down Expand Up @@ -200,4 +202,5 @@ callSubclones(sample.name=TUMOURNAME,
segmentation.gamma=NA,
siglevel=0.05,
maxdist=0.01,
noperms=1000)
noperms=1000,
calc_seg_baf_option=CALC_SEG_BAF_OPTION)

0 comments on commit 297046b

Please sign in to comment.