diff --git a/adam-apis/src/main/scala/org/bdgenomics/adam/apis/java/JavaAlignmentRecordRDD.scala b/adam-apis/src/main/scala/org/bdgenomics/adam/apis/java/JavaAlignmentRecordRDD.scala index e8f2c43541..ce6ccf1f6f 100644 --- a/adam-apis/src/main/scala/org/bdgenomics/adam/apis/java/JavaAlignmentRecordRDD.scala +++ b/adam-apis/src/main/scala/org/bdgenomics/adam/apis/java/JavaAlignmentRecordRDD.scala @@ -33,11 +33,13 @@ class JavaAlignmentRecordRDD(val jrdd: JavaRDD[AlignmentRecord]) extends Seriali * @param compressCodec Name of the compression codec to use. * @param disableDictionaryEncoding Whether or not to disable bit-packing. */ - def adamSave(filePath: java.lang.String, - blockSize: java.lang.Integer, - pageSize: java.lang.Integer, - compressCodec: CompressionCodecName, - disableDictionaryEncoding: java.lang.Boolean) { + def adamSave( + filePath: java.lang.String, + blockSize: java.lang.Integer, + pageSize: java.lang.Integer, + compressCodec: CompressionCodecName, + disableDictionaryEncoding: java.lang.Boolean + ) { jrdd.rdd.adamParquetSave( filePath, blockSize, @@ -62,8 +64,10 @@ class JavaAlignmentRecordRDD(val jrdd: JavaRDD[AlignmentRecord]) extends Seriali * @param filePath Path to save the file at. * @param asSam If true, saves as SAM. If false, saves as BAM. */ - def adamSAMSave(filePath: java.lang.String, - asSam: java.lang.Boolean) { + def adamSAMSave( + filePath: java.lang.String, + asSam: java.lang.Boolean + ) { jrdd.rdd.adamSAMSave(filePath, asSam) } diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala index 4e2d333928..8f93b75479 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala @@ -53,12 +53,14 @@ object AlleleCountHelper extends Serializable { } def countAlleles(adamVariants: RDD[Genotype], args: AlleleCountArgs) { - val usefulData = adamVariants.map(p => (p.getVariant.getContig.getContigName, + val usefulData = adamVariants.map(p => ( + p.getVariant.getContig.getContigName, p.getVariant.getStart, p.getVariant.getReferenceAllele, p.getVariant.getAlternateAllele, p.getAlleles.get(0), - p.getAlleles.get(1))) + p.getAlleles.get(1) + )) val reduced_Variants = usefulData.flatMap(p => Seq((p._1, p._2, p._3, p._4, p._5), (p._1, p._2, p._3, p._4, p._6))) val alleles = reduced_Variants.flatMap(chooseAllele) alleles.groupBy(identity).map { case (a, b) => "%s\t%s\t%s\t%d".format(a._1, a._2, a._3, b.size) } diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala index 0e46e461bf..69c8abadec 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala @@ -108,7 +108,8 @@ class CalculateDepth(protected val args: CalculateDepthArgs) extends BDGSparkCom println("%20s\t%15s\t% 5d".format( "%s:%d".format(region.referenceName, region.start), variantNames(region), - count)) + count + )) } } diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala index 234f094b35..6e035abfcb 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala @@ -61,7 +61,8 @@ class CountReadKmers(protected val args: CountReadKmersArgs) extends BDGSparkCom // read from disk var adamRecords: RDD[AlignmentRecord] = sc.loadAlignments( args.inputPath, - projection = Some(Projection(AlignmentRecordField.sequence))) + projection = Some(Projection(AlignmentRecordField.sequence)) + ) if (args.repartition != -1) { log.info("Repartitioning reads to '%d' partitions".format(args.repartition)) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala index cebd7c5c3b..b74fa46b14 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala @@ -62,7 +62,8 @@ class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagSta AlignmentRecordField.secondOfPair, AlignmentRecordField.properPair, AlignmentRecordField.mapq, - AlignmentRecordField.failedVendorQualityChecks) + AlignmentRecordField.failedVendorQualityChecks + ) val adamFile: RDD[AlignmentRecord] = sc.loadAlignments(args.inputPath, projection = Some(projection)) @@ -113,7 +114,8 @@ class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagSta percent(passedVendorQuality.singleton, passedVendorQuality.total), percent(failedVendorQuality.singleton, failedVendorQuality.total), passedVendorQuality.withMateMappedToDiffChromosome, failedVendorQuality.withMateMappedToDiffChromosome, - passedVendorQuality.withMateMappedToDiffChromosomeMapQ5, failedVendorQuality.withMateMappedToDiffChromosomeMapQ5) + passedVendorQuality.withMateMappedToDiffChromosomeMapQ5, failedVendorQuality.withMateMappedToDiffChromosomeMapQ5 + ) Option(args.outputPath) match { case Some(outputPath) => diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala index 052f2be71a..0dece18f8f 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala @@ -75,6 +75,7 @@ class Flatten(val args: FlattenArgs) extends BDGSparkCommand[FlattenArgs] with L args.pageSize, args.compressionCodec, args.disableDictionaryEncoding, - Some(flatSchema)) + Some(flatSchema) + ) } } diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala index 61c288ced9..2779aaa05d 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala @@ -64,6 +64,7 @@ class PrintGenes(protected val args: PrintGenesArgs) transcript.region.referenceName, transcript.region.start, transcript.region.end, if (transcript.strand) "+" else "-", - transcript.exons.size) + transcript.exons.size + ) } } diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala index 1be8e96f38..32e93e5a63 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala @@ -145,7 +145,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans log.info("Locally realigning indels.") val consensusGenerator = Option(args.knownIndelsFile) .fold(new ConsensusGeneratorFromReads().asInstanceOf[ConsensusGenerator])( - new ConsensusGeneratorFromKnowns(_, sc).asInstanceOf[ConsensusGenerator]) + new ConsensusGeneratorFromKnowns(_, sc).asInstanceOf[ConsensusGenerator] + ) adamRecords = oldRdd.adamRealignIndels( consensusGenerator, @@ -227,7 +228,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans if ((args.useAlignedReadPredicate || args.limitProjection) && (args.forceLoadBam || args.forceLoadFastq || args.forceLoadIFastq)) { throw new IllegalArgumentException( - "-aligned_read_predicate and -limit_projection only apply to Parquet files, but a non-Parquet force load flag was passed.") + "-aligned_read_predicate and -limit_projection only apply to Parquet files, but a non-Parquet force load flag was passed." + ) } val rdd = @@ -246,7 +248,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans None } val proj = if (args.limitProjection) { - Some(Projection(AlignmentRecordField.contig, + Some(Projection( + AlignmentRecordField.contig, AlignmentRecordField.start, AlignmentRecordField.end, AlignmentRecordField.mapq, @@ -265,13 +268,16 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans AlignmentRecordField.duplicateRead, AlignmentRecordField.mismatchingPositions, AlignmentRecordField.secondaryAlignment, - AlignmentRecordField.supplementaryAlignment)) + AlignmentRecordField.supplementaryAlignment + )) } else { None } - sc.loadParquetAlignments(args.inputPath, + sc.loadParquetAlignments( + args.inputPath, predicate = pred, - projection = proj) + projection = proj + ) } else { sc.loadAlignments( args.inputPath, @@ -297,8 +303,7 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans concatFilename, recordGroupOpt = Option(args.fastqRecordGroup) ) - } - ) + }) this.apply(concatRddOpt match { case Some(concatRdd) => rdd ++ concatRdd diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala index 4b022ba26f..d7a7ff7d54 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala @@ -36,41 +36,47 @@ class ViewArgs extends Args4jBase with ParquetArgs with ADAMSaveAnyArgs { required = false, name = "-f", metaVar = "N", - usage = "Restrict to reads that match all of the bits in ") + usage = "Restrict to reads that match all of the bits in " + ) var matchAllBits: Int = 0 @Args4jOption( required = false, name = "-F", metaVar = "N", - usage = "Restrict to reads that match none of the bits in ") + usage = "Restrict to reads that match none of the bits in " + ) var mismatchAllBits: Int = 0 @Args4jOption( required = false, name = "-g", metaVar = "N", - usage = "Restrict to reads that match any of the bits in ") + usage = "Restrict to reads that match any of the bits in " + ) var matchSomeBits: Int = 0 @Args4jOption( required = false, name = "-G", metaVar = "N", - usage = "Restrict to reads that mismatch at least one of the bits in ") + usage = "Restrict to reads that mismatch at least one of the bits in " + ) var mismatchSomeBits: Int = 0 @Args4jOption( required = false, name = "-c", - usage = "Print count of matching records, instead of the records themselves") + usage = "Print count of matching records, instead of the records themselves" + ) var printCount: Boolean = false @Args4jOption( required = false, name = "-o", metaVar = "", - usage = "Output to ; can also pass as the second argument") + usage = "Output to ; can also pass as the second argument" + ) var outputPathArg: String = null @Args4jOption(required = false, name = "-sort_fastq_output", usage = "Sets whether to sort the FASTQ output, if saving as FASTQ. False by default. Ignored if not saving as FASTQ.") @@ -148,8 +154,7 @@ class View(val args: ViewArgs) extends BDGSparkCommand[ViewArgs] { reads.filter(read => allFilters.forall(_(read)) && (matchSomeFilters.isEmpty || matchSomeFilters.exists(_(read))) && - (mismatchSomeFilters.isEmpty || mismatchSomeFilters.exists(_(read))) - ) + (mismatchSomeFilters.isEmpty || mismatchSomeFilters.exists(_(read)))) } else reads } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGenerator.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGenerator.scala index 7727c75eed..befeeadb59 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGenerator.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGenerator.scala @@ -39,9 +39,11 @@ abstract class ConsensusGenerator extends Serializable { * @param reads Reads to preprocess. * @return Preprocessed reads. */ - def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord], - reference: String, - region: ReferenceRegion): Iterable[RichAlignmentRecord] + def preprocessReadsForRealignment( + reads: Iterable[RichAlignmentRecord], + reference: String, + region: ReferenceRegion + ): Iterable[RichAlignmentRecord] /** * For all reads in this region, generates the list of consensus sequences for realignment. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromKnowns.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromKnowns.scala index 9e283d7567..7529203bc8 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromKnowns.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromKnowns.scala @@ -51,9 +51,11 @@ class ConsensusGeneratorFromKnowns(file: String, @transient sc: SparkContext) ex * @param reads Reads to preprocess. * @return Preprocessed reads. */ - def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord], - reference: String, - region: ReferenceRegion): Iterable[RichAlignmentRecord] = { + def preprocessReadsForRealignment( + reads: Iterable[RichAlignmentRecord], + reference: String, + region: ReferenceRegion + ): Iterable[RichAlignmentRecord] = { reads } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromReads.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromReads.scala index 936d203aa1..5caae2cc62 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromReads.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromReads.scala @@ -44,9 +44,11 @@ class ConsensusGeneratorFromReads extends ConsensusGenerator { * @param reads Reads to process. * @return Reads with indels normalized if they contain a single indel. */ - def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord], - reference: String, - region: ReferenceRegion): Iterable[RichAlignmentRecord] = { + def preprocessReadsForRealignment( + reads: Iterable[RichAlignmentRecord], + reference: String, + region: ReferenceRegion + ): Iterable[RichAlignmentRecord] = { reads.map(r => { // if there are two alignment blocks (sequence matches) then there is a single indel in the read if (r.samtoolsCigar.numAlignmentBlocks == 2) { @@ -74,10 +76,14 @@ class ConsensusGeneratorFromReads extends ConsensusGenerator { .flatMap(r => { // try to generate a consensus alignment - if a consensus exists, add it to our // list of consensuses to test - Consensus.generateAlternateConsensus(r.getSequence, - ReferencePosition(r.getContig.getContigName, - r.getStart), - r.samtoolsCigar) + Consensus.generateAlternateConsensus( + r.getSequence, + ReferencePosition( + r.getContig.getContigName, + r.getStart + ), + r.samtoolsCigar + ) }) .toSeq .distinct diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromSmithWaterman.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromSmithWaterman.scala index 1f2c6d6643..a7c4a9ee70 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromSmithWaterman.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromSmithWaterman.scala @@ -25,10 +25,12 @@ import org.bdgenomics.adam.rich.RichCigar._ import org.bdgenomics.adam.util.MdTag import org.bdgenomics.formats.avro.AlignmentRecord -class ConsensusGeneratorFromSmithWaterman(wMatch: Double, - wMismatch: Double, - wInsert: Double, - wDelete: Double) extends ConsensusGeneratorFromReads { +class ConsensusGeneratorFromSmithWaterman( + wMatch: Double, + wMismatch: Double, + wInsert: Double, + wDelete: Double +) extends ConsensusGeneratorFromReads { /** * Attempts realignment of all reads using Smith-Waterman. Accepts all realignments that have one @@ -37,25 +39,31 @@ class ConsensusGeneratorFromSmithWaterman(wMatch: Double, * @param reads Reads to process. * @return Reads with indels normalized if they contain a single indel. */ - override def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord], - reference: String, - region: ReferenceRegion): Iterable[RichAlignmentRecord] = { + override def preprocessReadsForRealignment( + reads: Iterable[RichAlignmentRecord], + reference: String, + region: ReferenceRegion + ): Iterable[RichAlignmentRecord] = { val rds: Iterable[RichAlignmentRecord] = reads.map(r => { - val sw = new SmithWatermanConstantGapScoring(r.record.getSequence, + val sw = new SmithWatermanConstantGapScoring( + r.record.getSequence, reference, wMatch, wMismatch, wInsert, - wDelete) + wDelete + ) println("for " + r.record.getReadName + " sw to " + sw.xStart + " with " + sw.cigarX) // if we realign with fewer than three alignment blocks, then take the new alignment if (sw.cigarX.numAlignmentBlocks <= 2) { - val mdTag = MdTag(r.record.getSequence, + val mdTag = MdTag( + r.record.getSequence, reference.drop(sw.xStart), sw.cigarX, - region.start) + region.start + ) val newRead: RichAlignmentRecord = AlignmentRecord.newBuilder(r) .setStart(sw.xStart + region.start) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala index 4e2ab4af9b..c3cb346911 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala @@ -130,11 +130,13 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria * * @see buildScoringMatrix */ - @tailrec private[smithwaterman] final def move(matrix: Array[Array[Char]], - i: Int, - j: Int, - cX: String, - cY: String): (String, String, Int, Int) = { + @tailrec private[smithwaterman] final def move( + matrix: Array[Array[Char]], + i: Int, + j: Int, + cX: String, + cY: String + ): (String, String, Int, Int) = { if (matrix(i)(j) == 'T') { // return if told to terminate (cigarFromRNNCigar(cX), cigarFromRNNCigar(cY), i, j) @@ -160,8 +162,10 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria * @param moveMatrix Move matrix to track back on. * @return Tuple of Cigar for X, Y. */ - private[smithwaterman] def trackback(scoreMatrix: Array[Array[Double]], - moveMatrix: Array[Array[Char]]): (Cigar, Cigar, Int, Int) = { + private[smithwaterman] def trackback( + scoreMatrix: Array[Array[Double]], + moveMatrix: Array[Array[Char]] + ): (Cigar, Cigar, Int, Int) = { assert(scoreMatrix.length == xSequence.length + 1) assert(scoreMatrix.forall(_.length == ySequence.length + 1)) assert(moveMatrix.length == xSequence.length + 1) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanConstantGapScoring.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanConstantGapScoring.scala index 1b8d2c7302..d91ca1df13 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanConstantGapScoring.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanConstantGapScoring.scala @@ -33,11 +33,13 @@ object SmithWatermanConstantGapScoring { } -class SmithWatermanConstantGapScoring(xSequence: String, - ySequence: String, - wMatch: Double, - wMismatch: Double, - wInsert: Double, - wDelete: Double) +class SmithWatermanConstantGapScoring( + xSequence: String, + ySequence: String, + wMatch: Double, + wMismatch: Double, + wInsert: Double, + wDelete: Double +) extends SmithWatermanGapScoringFromFn(xSequence, ySequence, SmithWatermanConstantGapScoring.constantGapFn(wMatch, wInsert, wDelete, wMismatch)) { } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanGapScoringFromFn.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanGapScoringFromFn.scala index 946acd3dae..1fe2b1ffd8 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanGapScoringFromFn.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanGapScoringFromFn.scala @@ -17,9 +17,11 @@ */ package org.bdgenomics.adam.algorithms.smithwaterman -abstract class SmithWatermanGapScoringFromFn(xSequence: String, - ySequence: String, - scoreFn: (Int, Int, Char, Char) => Double) +abstract class SmithWatermanGapScoringFromFn( + xSequence: String, + ySequence: String, + scoreFn: (Int, Int, Char, Char) => Double +) extends SmithWaterman(xSequence, ySequence) { def buildScoringMatrix(): (Array[Array[Double]], Array[Array[Char]]) = { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/AlignmentRecordConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/AlignmentRecordConverter.scala index dd35627846..22e576e5f2 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/AlignmentRecordConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/AlignmentRecordConverter.scala @@ -39,9 +39,11 @@ class AlignmentRecordConverter extends Serializable { * @param adamRecord Read to convert to FASTQ. * @return Returns this read in string form. */ - def convertToFastq(adamRecord: AlignmentRecord, - maybeAddSuffix: Boolean = false, - outputOriginalBaseQualities: Boolean = false): String = { + def convertToFastq( + adamRecord: AlignmentRecord, + maybeAddSuffix: Boolean = false, + outputOriginalBaseQualities: Boolean = false + ): String = { val readNameSuffix = if (maybeAddSuffix && !AlignmentRecordConverter.readNameHasPairedSuffix(adamRecord) && diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/FastaConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/FastaConverter.scala index 5cfd8a7027..20e778cec6 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/FastaConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/FastaConverter.scala @@ -70,8 +70,10 @@ private[adam] object FastaConverter { * @param maxFragmentLength The maximum length of fragments in the contig. * @return An RDD of ADAM FASTA data. */ - def apply(rdd: RDD[(Long, String)], - maxFragmentLength: Long = 10000L): RDD[NucleotideContigFragment] = { + def apply( + rdd: RDD[(Long, String)], + maxFragmentLength: Long = 10000L + ): RDD[NucleotideContigFragment] = { val filtered = rdd.map(kv => (kv._1, kv._2.trim())) .filter((kv: (Long, String)) => !kv._2.startsWith(";")) @@ -96,10 +98,12 @@ private[adam] object FastaConverter { assert(lines.size != 0, "Sequence " + descriptionLine.seqId + " has no sequence data.") val sequence: Seq[String] = lines.toSeq.sortBy(_._1).map(kv => cleanSequence(kv._2)) - converter.convert(descriptionLine.contigName, + converter.convert( + descriptionLine.contigName, descriptionLine.seqId, sequence, - descriptionLine.contigDescription) + descriptionLine.contigDescription + ) } } @@ -182,10 +186,12 @@ private[converters] class FastaConverter(fragmentLength: Long) extends Serializa * @param description Optional description of the sequence. * @return The converted ADAM FASTA contig. */ - def convert(name: Option[String], - id: Int, - sequence: Seq[String], - description: Option[String]): Seq[NucleotideContigFragment] = { + def convert( + name: Option[String], + id: Int, + sequence: Seq[String], + description: Option[String] + ): Seq[NucleotideContigFragment] = { // get sequence length val sequenceLength = sequence.map(_.length).sum diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/FastqRecordConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/FastqRecordConverter.scala index 3ecec9d784..2f6cab9362 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/FastqRecordConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/FastqRecordConverter.scala @@ -38,19 +38,24 @@ class FastqRecordConverter extends Serializable with Logging { val firstReadSequence = lines(1) val firstReadQualities = lines(3) - require(firstReadSequence.length == firstReadQualities.length, - "Read " + firstReadName + " has different sequence and qual length.") + require( + firstReadSequence.length == firstReadQualities.length, + "Read " + firstReadName + " has different sequence and qual length." + ) // get fields for second read in pair val secondReadName = lines(4).drop(1) val secondReadSequence = lines(5) val secondReadQualities = lines(7) - require(secondReadSequence.length == secondReadQualities.length, - "Read " + secondReadName + " has different sequence and qual length.") + require( + secondReadSequence.length == secondReadQualities.length, + "Read " + secondReadName + " has different sequence and qual length." + ) // build and return iterators - Iterable(AlignmentRecord.newBuilder() + Iterable( + AlignmentRecord.newBuilder() .setReadName(firstReadName) .setSequence(firstReadSequence) .setQual(firstReadQualities) @@ -75,7 +80,8 @@ class FastqRecordConverter extends Serializable with Logging { .setPrimaryAlignment(null) .setSecondaryAlignment(null) .setSupplementaryAlignment(null) - .build()) + .build() + ) } def convertFragment(element: (Void, Text)): Fragment = { @@ -87,19 +93,27 @@ class FastqRecordConverter extends Serializable with Logging { val firstReadSequence = lines(1) val firstReadQualities = lines(3) - require(firstReadSequence.length == firstReadQualities.length, - "Read " + firstReadName + " has different sequence and qual length.") + require( + firstReadSequence.length == firstReadQualities.length, + "Read " + firstReadName + " has different sequence and qual length." + ) // get fields for second read in pair val secondReadName = lines(4).drop(1) val secondReadSequence = lines(5) val secondReadQualities = lines(7) - require(secondReadSequence.length == secondReadQualities.length, - "Read " + secondReadName + " has different sequence and qual length.") - require(firstReadName == secondReadName, - "Reads %s and %s in Fragment have different names.".format(firstReadName, - secondReadName)) + require( + secondReadSequence.length == secondReadQualities.length, + "Read " + secondReadName + " has different sequence and qual length." + ) + require( + firstReadName == secondReadName, + "Reads %s and %s in Fragment have different names.".format( + firstReadName, + secondReadName + ) + ) // build and return record Fragment.newBuilder() @@ -114,11 +128,13 @@ class FastqRecordConverter extends Serializable with Logging { .build() } - def convertRead(element: (Void, Text), - recordGroupOpt: Option[String] = None, - setFirstOfPair: Boolean = false, - setSecondOfPair: Boolean = false, - stringency: ValidationStringency = ValidationStringency.STRICT): AlignmentRecord = { + def convertRead( + element: (Void, Text), + recordGroupOpt: Option[String] = None, + setFirstOfPair: Boolean = false, + setSecondOfPair: Boolean = false, + stringency: ValidationStringency = ValidationStringency.STRICT + ): AlignmentRecord = { val lines = element._2.toString.split('\n') require(lines.length == 4, "Record has wrong format:\n" + element._2.toString) @@ -162,9 +178,11 @@ class FastqRecordConverter extends Serializable with Logging { else lines(3) - require(readSequence.length == readQualities.length, + require( + readSequence.length == readQualities.length, "Read " + readName + " has different sequence and qual length: " + - "\n\tsequence=" + readSequence + "\n\tqual=" + readQualities) + "\n\tsequence=" + readSequence + "\n\tqual=" + readQualities + ) val builder = AlignmentRecord.newBuilder() .setReadName(readName) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala index 8ee7d80e16..0ad07ab979 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala @@ -25,8 +25,10 @@ import scala.annotation.tailrec private[converters] object FragmentCollector extends Serializable { def apply(fragment: NucleotideContigFragment): (Contig, FragmentCollector) = { - (fragment.getContig, - FragmentCollector(Seq((ReferenceRegion(fragment).get, fragment.getFragmentSequence)))) + ( + fragment.getContig, + FragmentCollector(Seq((ReferenceRegion(fragment).get, fragment.getFragmentSequence))) + ) } } @@ -35,8 +37,10 @@ private[converters] case class FragmentCollector(fragments: Seq[(ReferenceRegion object FragmentConverter extends Serializable { - private def mergeFragments(f1: FragmentCollector, - f2: FragmentCollector): FragmentCollector = { + private def mergeFragments( + f1: FragmentCollector, + f2: FragmentCollector + ): FragmentCollector = { assert(!(f1.fragments.isEmpty || f2.fragments.isEmpty)) // join fragments from each and sort @@ -44,8 +48,10 @@ object FragmentConverter extends Serializable { var fragmentList = List[(ReferenceRegion, String)]() - @tailrec def fragmentCombiner(lastFragment: (ReferenceRegion, String), - iter: Iterator[(ReferenceRegion, String)]) { + @tailrec def fragmentCombiner( + lastFragment: (ReferenceRegion, String), + iter: Iterator[(ReferenceRegion, String)] + ) { if (!iter.hasNext) { // prepend fragment to list fragmentList = lastFragment :: fragmentList diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/GenotypesToVariantsConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/GenotypesToVariantsConverter.scala index 62ecdf9411..6b9b7ff09c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/GenotypesToVariantsConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/GenotypesToVariantsConverter.scala @@ -20,8 +20,10 @@ package org.bdgenomics.adam.converters import org.bdgenomics.adam.util._ import scala.math.{ pow, sqrt } -private[adam] class GenotypesToVariantsConverter(validateSamples: Boolean = false, - failOnValidationError: Boolean = false) extends Serializable { +private[adam] class GenotypesToVariantsConverter( + validateSamples: Boolean = false, + failOnValidationError: Boolean = false +) extends Serializable { /** * Computes root mean squared (RMS) values for a series of doubles. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala index d71832ed5d..55aebf8b7b 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala @@ -36,9 +36,11 @@ import org.bdgenomics.formats.avro.AlignmentRecord import scala.collection.JavaConverters._ class SAMRecordConverter extends Serializable with Logging { - def convert(samRecord: SAMRecord, - dict: SequenceDictionary, - readGroups: RecordGroupDictionary): AlignmentRecord = { + def convert( + samRecord: SAMRecord, + dict: SequenceDictionary, + readGroups: RecordGroupDictionary + ): AlignmentRecord = { try { val cigar: String = samRecord.getCigarString val startTrim = if (cigar == "*") { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantAnnotationConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantAnnotationConverter.scala index d7f06cedfb..861bf2c7c6 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantAnnotationConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantAnnotationConverter.scala @@ -63,20 +63,24 @@ object VariantAnnotationConverter extends Serializable { AttrKey("geneSymbol", attrAsString _, new VCFInfoHeaderLine("GENE,", 1, VCFHeaderLineType.String, "Gene name")), AttrKey("strand", attrAsString _, new VCFInfoHeaderLine("STRAND,", 1, VCFHeaderLineType.String, "Gene strand")), AttrKey("cds", attrAsString _, new VCFInfoHeaderLine("CDS,", 1, VCFHeaderLineType.String, "CDS annotation")), - AttrKey("cnt", attrAsString _, new VCFInfoHeaderLine("CNT,", 1, VCFHeaderLineType.Integer, "How many samples have this mutation"))) + AttrKey("cnt", attrAsString _, new VCFInfoHeaderLine("CNT,", 1, VCFHeaderLineType.Integer, "How many samples have this mutation")) + ) val DBNSFP_KEYS: List[AttrKey] = List( AttrKey("phylop", attrAsFloat _, new VCFInfoHeaderLine("PHYLOP", 1, VCFHeaderLineType.Float, "PhyloP score. The larger the score, the more conserved the site.")), AttrKey("siftPred", attrAsString _, new VCFInfoHeaderLine("SIFT_PRED", 1, VCFHeaderLineType.Character, "SIFT Prediction: D (damaging), T (tolerated)")), AttrKey("siftScore", attrAsFloat _, new VCFInfoHeaderLine("SIFT_SCORE", 1, VCFHeaderLineType.Float, "SIFT Score")), - AttrKey("ancestralAllele", attrAsString _, new VCFInfoHeaderLine("AA", 1, VCFHeaderLineType.String, "Ancestral allele"))) + AttrKey("ancestralAllele", attrAsString _, new VCFInfoHeaderLine("AA", 1, VCFHeaderLineType.String, "Ancestral allele")) + ) val CLINVAR_KEYS: List[AttrKey] = List( AttrKey("dbSnpId", attrAsInt _, new VCFInfoHeaderLine("dbSNP ID", 1, VCFHeaderLineType.Integer, "dbSNP ID")), - AttrKey("geneSymbol", attrAsString _, new VCFInfoHeaderLine("GENEINFO", 1, VCFHeaderLineType.String, "Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar"))) + AttrKey("geneSymbol", attrAsString _, new VCFInfoHeaderLine("GENEINFO", 1, VCFHeaderLineType.String, "Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar")) + ) val OMIM_KEYS: List[AttrKey] = List( - AttrKey("omimId", attrAsString _, new VCFInfoHeaderLine("VAR", 1, VCFHeaderLineType.String, "MIM entry with variant mapped to rsID"))) + AttrKey("omimId", attrAsString _, new VCFInfoHeaderLine("VAR", 1, VCFHeaderLineType.String, "MIM entry with variant mapped to rsID")) + ) val INFO_KEYS: Seq[AttrKey] = Seq( AttrKey("fisherStrandBiasPValue", attrAsFloat _, VCFStandardHeaderLines.getInfoLine(VCFConstants.STRAND_BIAS_KEY)), @@ -85,7 +89,8 @@ object VariantAnnotationConverter extends Serializable { AttrKey("mqRankSum", attrAsFloat _, new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities")), AttrKey("readPositionRankSum", attrAsFloat _, new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias")), AttrKey("vqslod", attrAsFloat _, new VCFInfoHeaderLine("VQSLOD", 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model")), - AttrKey("culprit", attrAsString _, new VCFInfoHeaderLine("culprit", 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"))) + AttrKey("culprit", attrAsString _, new VCFInfoHeaderLine("culprit", 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out")) + ) val FORMAT_KEYS: Seq[AttrKey] = Seq( AttrKey("alleles", VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY)), @@ -97,7 +102,8 @@ object VariantAnnotationConverter extends Serializable { AttrKey("phaseQuality", attrAsInt _, new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality")), AttrKey("phaseSetId", attrAsInt _, new VCFFormatHeaderLine(VCFConstants.PHASE_SET_KEY, 1, VCFHeaderLineType.Integer, "Phase set")), AttrKey("minReadDepth", attrAsInt _, new VCFFormatHeaderLine("MIN_DP", 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block")), - AttrKey("strandBiasComponents", attrAsInt _, new VCFFormatHeaderLine("SB", 4, VCFHeaderLineType.Integer, "Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."))) + AttrKey("strandBiasComponents", attrAsInt _, new VCFFormatHeaderLine("SB", 4, VCFHeaderLineType.Integer, "Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.")) + ) lazy val infoHeaderLines: Seq[VCFCompoundHeaderLine] = INFO_KEYS.map(_.hdrLine) lazy val formatHeaderLines: Seq[VCFCompoundHeaderLine] = FORMAT_KEYS.map(_.hdrLine) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala index 5150ee28ef..cce578965f 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala @@ -110,15 +110,19 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S return Seq(ADAMVariantContext(variant, genotypes, None)) } case List(allele) => { - assert(allele.isNonReference, - "Assertion failed when converting: " + vc.toString) + assert( + allele.isNonReference, + "Assertion failed when converting: " + vc.toString + ) val variant = createADAMVariant(vc, Some(allele.getDisplayString)) val genotypes = extractReferenceModelGenotypes(vc, variant, calling_annotations) return Seq(ADAMVariantContext(variant, genotypes, None)) } case List(allele, NON_REF_ALLELE) => { - assert(allele.isNonReference, - "Assertion failed when converting: " + vc.toString) + assert( + allele.isNonReference, + "Assertion failed when converting: " + vc.toString + ) val variant = createADAMVariant(vc, Some(allele.getDisplayString)) val genotypes = extractReferenceModelGenotypes(vc, variant, calling_annotations) return Seq(ADAMVariantContext(variant, genotypes, None)) @@ -189,13 +193,17 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S createADAMVariant(vc, None /* No alternate allele */ ) } case List(allele) => { - assert(allele.isNonReference, - "Assertion failed when converting: " + vc.toString) + assert( + allele.isNonReference, + "Assertion failed when converting: " + vc.toString + ) createADAMVariant(vc, Some(allele.getDisplayString)) } case List(allele, NON_REF_ALLELE) => { - assert(allele.isNonReference, - "Assertion failed when converting: " + vc.toString) + assert( + allele.isNonReference, + "Assertion failed when converting: " + vc.toString + ) createADAMVariant(vc, Some(allele.getDisplayString)) } case alleles :+ NON_REF_ALLELE => { @@ -245,7 +253,8 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S vc: BroadVariantContext, variant: Variant, annotations: VariantCallingAnnotations, - setPL: (htsjdk.variant.variantcontext.Genotype, Genotype.Builder) => Unit): Seq[Genotype] = { + setPL: (htsjdk.variant.variantcontext.Genotype, Genotype.Builder) => Unit + ): Seq[Genotype] = { val genotypes: Seq[Genotype] = vc.getGenotypes.map( (g: htsjdk.variant.variantcontext.Genotype) => { @@ -266,7 +275,8 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S setPL(g, genotype) VariantAnnotationConverter.convert(g, genotype.build) - }).toSeq + } + ).toSeq genotypes } @@ -322,8 +332,10 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S def convert(vc: ADAMVariantContext): BroadVariantContext = { val variant: Variant = vc.variant val vcb = new VariantContextBuilder() - .chr(refSeqToContig.getOrElse(variant.getContig.getContigName, - variant.getContig.getContigName)) + .chr(refSeqToContig.getOrElse( + variant.getContig.getContigName, + variant.getContig.getContigName + )) .start(variant.getStart + 1 /* Recall ADAM is 0-indexed */ ) .stop(variant.getStart + variant.getReferenceAllele.length) .alleles(VariantContextConverter.convertAlleles(variant)) @@ -334,7 +346,8 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S try { vcb.genotypes(vc.genotypes.map(g => { val gb = new htsjdk.variant.variantcontext.GenotypeBuilder( - g.getSampleId, VariantContextConverter.convertAlleles(g)) + g.getSampleId, VariantContextConverter.convertAlleles(g) + ) Option(g.getIsPhased).foreach(gb.phased(_)) Option(g.getGenotypeQuality).foreach(gb.GQ(_)) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/Alphabet.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/Alphabet.scala index 391809da75..63d80619a0 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/Alphabet.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/Alphabet.scala @@ -52,7 +52,8 @@ trait Alphabet { * @throws IllegalArgumentException if the string contains a symbol which is not in the alphabet */ def reverseComplementExact(s: String): String = { - reverseComplement(s, + reverseComplement( + s, (symbol: Char) => throw new IllegalArgumentException("Character %s not found in alphabet.".format(symbol)) ) } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/Gene.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/Gene.scala index 5d7d46be22..115edce9cf 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/Gene.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/Gene.scala @@ -73,13 +73,15 @@ case class Gene(id: String, names: Seq[String], strand: Boolean, transcripts: It * transcript * @param utrs */ -case class Transcript(id: String, - names: Seq[String], - geneId: String, - strand: Boolean, - exons: Iterable[Exon], - cds: Iterable[CDS], - utrs: Iterable[UTR]) { +case class Transcript( + id: String, + names: Seq[String], + geneId: String, + strand: Boolean, + exons: Iterable[Exon], + cds: Iterable[CDS], + utrs: Iterable[UTR] +) { lazy val region = exons.map(_.region).reduceLeft[ReferenceRegion]((acc, ex) => acc.hull(ex)) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/IndelTable.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/IndelTable.scala index bbf46217cc..8b5dade283 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/IndelTable.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/IndelTable.scala @@ -24,8 +24,10 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.Variant class IndelTable(private val table: Map[String, Iterable[Consensus]]) extends Serializable with Logging { - log.info("Indel table has %s contigs and %s entries".format(table.size, - table.values.map(_.size).sum)) + log.info("Indel table has %s contigs and %s entries".format( + table.size, + table.values.map(_.size).sum + )) /** * Returns all known indels within the given reference region. If none are known, returns an empty Seq. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/NonoverlappingRegions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/NonoverlappingRegions.scala index ef0c06ad2e..4c3eff0d4f 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/NonoverlappingRegions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/NonoverlappingRegions.scala @@ -74,7 +74,8 @@ class NonoverlappingRegions(regions: Iterable[ReferenceRegion]) extends Serializ def mergeRegions(regs: Seq[(ReferenceRegion)]): List[ReferenceRegion] = regs.aggregate(List[ReferenceRegion]())( (lst: List[ReferenceRegion], p: (ReferenceRegion)) => updateListWithRegion(lst, p), - (a, b) => a ++ b) + (a, b) => a ++ b + ) def binaryPointSearch(pos: Long, lessThan: Boolean): Int = { var i = 0 @@ -186,8 +187,10 @@ object NonoverlappingRegions { * dictionary. */ class MultiContigNonoverlappingRegions(regions: Seq[(String, Iterable[ReferenceRegion])]) extends Serializable { - assert(regions != null, - "Regions was set to null") + assert( + regions != null, + "Regions was set to null" + ) val regionMap: Map[String, NonoverlappingRegions] = Map(regions.map(r => (r._1, new NonoverlappingRegions(r._2))): _*) @@ -203,9 +206,10 @@ object MultiContigNonoverlappingRegions { def apply[T](values: Seq[(ReferenceRegion, T)]): MultiContigNonoverlappingRegions = { new MultiContigNonoverlappingRegions( values.map(kv => (kv._1.referenceName, kv._1)) - .groupBy(t => t._1) - .map(t => (t._1, t._2.map(k => k._2))) - .toSeq) + .groupBy(t => t._1) + .map(t => (t._1, t._2.map(k => k._2))) + .toSeq + ) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ProgramRecord.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ProgramRecord.scala index 6b6ece8892..ac7f372619 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ProgramRecord.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ProgramRecord.scala @@ -36,11 +36,13 @@ object ProgramRecord { } } -case class ProgramRecord(id: String, - commandLine: Option[String], - name: Option[String], - version: Option[String], - previousID: Option[String]) { +case class ProgramRecord( + id: String, + commandLine: Option[String], + name: Option[String], + version: Option[String], + previousID: Option[String] +) { def toSAMProgramRecord(): SAMProgramRecord = { val pr = new SAMProgramRecord(id) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReadBucket.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReadBucket.scala index 9f60044d83..0f01e546df 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReadBucket.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReadBucket.scala @@ -30,13 +30,15 @@ import org.bdgenomics.formats.avro.AlignmentRecord * * This is useful as this will usually map a single read in any of the sequences. */ -case class ReadBucket(unpairedPrimaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, - pairedFirstPrimaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, - pairedSecondPrimaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, - unpairedSecondaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, - pairedFirstSecondaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, - pairedSecondSecondaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, - unmappedReads: Iterable[AlignmentRecord] = Seq.empty) { +case class ReadBucket( + unpairedPrimaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, + pairedFirstPrimaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, + pairedSecondPrimaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, + unpairedSecondaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, + pairedFirstSecondaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, + pairedSecondSecondaryMappedReads: Iterable[AlignmentRecord] = Seq.empty, + unmappedReads: Iterable[AlignmentRecord] = Seq.empty +) { def allReads(): Iterable[AlignmentRecord] = unpairedPrimaryMappedReads ++ pairedFirstPrimaryMappedReads ++ @@ -89,31 +91,40 @@ class ReadBucketSerializer extends Serializer[ReadBucket] { unpairedSecondaryReads, pairedFirstSecondaryMappedReads, pairedSecondSecondaryMappedReads, - unmappedReads) + unmappedReads + ) } } object ReadBucket { implicit def singleReadBucketToReadBucket(bucket: SingleReadBucket): ReadBucket = { // check that reads are either first or second read from fragment - bucket.primaryMapped.foreach(r => require(r.getReadNum >= 0 && r.getReadNum <= 1, - "Read %s is not first or second read from pair (num = %d).".format(r, r.getReadNum))) - bucket.secondaryMapped.foreach(r => require(r.getReadNum >= 0 && r.getReadNum <= 1, - "Read %s is not first or second read from pair (num = %d).".format(r, r.getReadNum))) - bucket.unmapped.foreach(r => require(r.getReadNum >= 0 && r.getReadNum <= 1, - "Read %s is not first or second read from pair (num = %d).".format(r, r.getReadNum))) + bucket.primaryMapped.foreach(r => require( + r.getReadNum >= 0 && r.getReadNum <= 1, + "Read %s is not first or second read from pair (num = %d).".format(r, r.getReadNum) + )) + bucket.secondaryMapped.foreach(r => require( + r.getReadNum >= 0 && r.getReadNum <= 1, + "Read %s is not first or second read from pair (num = %d).".format(r, r.getReadNum) + )) + bucket.unmapped.foreach(r => require( + r.getReadNum >= 0 && r.getReadNum <= 1, + "Read %s is not first or second read from pair (num = %d).".format(r, r.getReadNum) + )) val (pairedPrimary, unpairedPrimary) = bucket.primaryMapped.partition(_.getReadPaired) val (pairedFirstPrimary, pairedSecondPrimary) = pairedPrimary.partition(_.getReadNum == 0) val (pairedSecondary, unpairedSecondary) = bucket.secondaryMapped.partition(_.getReadPaired) val (pairedFirstSecondary, pairedSecondSecondary) = pairedSecondary.partition(_.getReadNum == 0) - new ReadBucket(unpairedPrimary, + new ReadBucket( + unpairedPrimary, pairedFirstPrimary, pairedSecondPrimary, unpairedSecondary, pairedFirstSecondary, pairedSecondSecondary, - bucket.unmapped) + bucket.unmapped + ) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/RecordGroupDictionary.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/RecordGroupDictionary.scala index df25dbd969..9bcab3aefe 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/RecordGroupDictionary.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/RecordGroupDictionary.scala @@ -68,8 +68,10 @@ class RecordGroupDictionary(val recordGroups: Seq[RecordGroup]) extends Serializ (name, (group, index)) }).toMap - assert(recordGroupMap.size == recordGroups.length, - "Read group dictionary contains multiple samples with identical read group names.") + assert( + recordGroupMap.size == recordGroups.length, + "Read group dictionary contains multiple samples with identical read group names." + ) def ++(that: RecordGroupDictionary): RecordGroupDictionary = { new RecordGroupDictionary(recordGroups ++ that.recordGroups) @@ -133,9 +135,12 @@ object RecordGroup { * @return Returns an equivalent ADAM format record group. */ def apply(samRGR: SAMReadGroupRecord): RecordGroup = { - assert(samRGR.getSample != null, - "Sample ID is not set for read group " + samRGR.getReadGroupId) - new RecordGroup(samRGR.getSample, + assert( + samRGR.getSample != null, + "Sample ID is not set for read group " + samRGR.getReadGroupId + ) + new RecordGroup( + samRGR.getSample, samRGR.getReadGroupId, Option(samRGR.getSequencingCenter).map(_.toString), Option(samRGR.getDescription).map(_.toString), @@ -149,21 +154,24 @@ object RecordGroup { i }).map(_.toInt), Option(samRGR.getPlatform).map(_.toString), - Option(samRGR.getPlatformUnit).map(_.toString)) + Option(samRGR.getPlatformUnit).map(_.toString) + ) } } -class RecordGroup(val sample: String, - val recordGroupName: String, - val sequencingCenter: Option[String] = None, - val description: Option[String] = None, - val runDateEpoch: Option[Long] = None, - val flowOrder: Option[String] = None, - val keySequence: Option[String] = None, - val library: Option[String] = None, - val predictedMedianInsertSize: Option[Int] = None, - val platform: Option[String] = None, - val platformUnit: Option[String] = None) extends Serializable { +class RecordGroup( + val sample: String, + val recordGroupName: String, + val sequencingCenter: Option[String] = None, + val description: Option[String] = None, + val runDateEpoch: Option[Long] = None, + val flowOrder: Option[String] = None, + val keySequence: Option[String] = None, + val library: Option[String] = None, + val predictedMedianInsertSize: Option[Int] = None, + val platform: Option[String] = None, + val platformUnit: Option[String] = None +) extends Serializable { /** * Compares equality to another object. Only checks equality via the sample and diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala index 9c5b22409f..73c57f9c9c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala @@ -83,9 +83,11 @@ object ReferencePosition extends Serializable { } } -class ReferencePosition(override val referenceName: String, - val pos: Long, - override val orientation: Strand = Strand.Independent) +class ReferencePosition( + override val referenceName: String, + val pos: Long, + override val orientation: Strand = Strand.Independent +) extends ReferenceRegion(referenceName, pos, pos + 1, orientation) class ReferencePositionSerializer extends Serializer[ReferencePosition] { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePositionPair.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePositionPair.scala index 64e47bdd74..b76bdc831d 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePositionPair.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePositionPair.scala @@ -42,18 +42,24 @@ object ReferencePositionPair extends Logging { } if (firstOfPair.size + secondOfPair.size > 0) { - new ReferencePositionPair(firstOfPair.lift(0).map(getPos), - secondOfPair.lift(0).map(getPos)) + new ReferencePositionPair( + firstOfPair.lift(0).map(getPos), + secondOfPair.lift(0).map(getPos) + ) } else { - new ReferencePositionPair((singleReadBucket.primaryMapped ++ + new ReferencePositionPair( + (singleReadBucket.primaryMapped ++ singleReadBucket.unmapped).toSeq.lift(0).map(getPos), - None) + None + ) } } } -case class ReferencePositionPair(read1refPos: Option[ReferencePosition], - read2refPos: Option[ReferencePosition]) +case class ReferencePositionPair( + read1refPos: Option[ReferencePosition], + read2refPos: Option[ReferencePosition] +) class ReferencePositionPairSerializer extends Serializer[ReferencePositionPair] { val rps = new ReferencePositionSerializer() diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala index 959769b02a..436c6ab22a 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala @@ -24,8 +24,10 @@ import org.bdgenomics.formats.avro._ import scala.math.{ max, min } trait ReferenceOrdering[T <: ReferenceRegion] extends Ordering[T] { - private def regionCompare(a: T, - b: T): Int = { + private def regionCompare( + a: T, + b: T + ): Int = { if (a.referenceName != b.referenceName) { a.referenceName.compareTo(b.referenceName) } else if (a.start != b.start) { @@ -35,8 +37,10 @@ trait ReferenceOrdering[T <: ReferenceRegion] extends Ordering[T] { } } - def compare(a: T, - b: T): Int = { + def compare( + a: T, + b: T + ): Int = { val rc = regionCompare(a, b) if (rc == 0) { a.orientation.ordinal compare b.orientation.ordinal @@ -137,10 +141,12 @@ object ReferenceRegion { * which is not in the region -- i.e. [start, end) define a 0-based * half-open interval. */ -case class ReferenceRegion(referenceName: String, - start: Long, - end: Long, - orientation: Strand = Strand.Independent) +case class ReferenceRegion( + referenceName: String, + start: Long, + end: Long, + orientation: Strand = Strand.Independent +) extends Comparable[ReferenceRegion] with Interval { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala index f3225d970d..cf169872be 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala @@ -125,15 +125,19 @@ class SequenceDictionary(val records: Vector[SequenceRecord]) extends Serializab } object SequenceOrderingByName extends Ordering[SequenceRecord] { - def compare(a: SequenceRecord, - b: SequenceRecord): Int = { + def compare( + a: SequenceRecord, + b: SequenceRecord + ): Int = { a.name.compareTo(b.name) } } object SequenceOrderingByRefIdx extends Ordering[SequenceRecord] { - def compare(a: SequenceRecord, - b: SequenceRecord): Int = { + def compare( + a: SequenceRecord, + b: SequenceRecord + ): Int = { (for { aRefIdx <- a.referenceIndex bRefIdx <- b.referenceIndex @@ -158,7 +162,8 @@ case class SequenceRecord( genbank: Option[String], assembly: Option[String], species: Option[String], - referenceIndex: Option[Int]) extends Serializable { + referenceIndex: Option[Int] +) extends Serializable { assert(name != null && !name.isEmpty, "SequenceRecord.name is null or empty") assert(length > 0, "SequenceRecord.length <= 0") @@ -214,15 +219,17 @@ object SequenceRecord { val REFSEQ_TAG = "REFSEQ" val GENBANK_TAG = "GENBANK" - def apply(name: String, - length: Long, - md5: String = null, - url: String = null, - refseq: String = null, - genbank: String = null, - assembly: String = null, - species: String = null, - referenceIndex: Option[Int] = None): SequenceRecord = { + def apply( + name: String, + length: Long, + md5: String = null, + url: String = null, + refseq: String = null, + genbank: String = null, + assembly: String = null, + species: String = null, + referenceIndex: Option[Int] = None + ): SequenceRecord = { new SequenceRecord( name, length, diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/SingleReadBucket.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/SingleReadBucket.scala index 0b07b6e9f6..48675ef66d 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/SingleReadBucket.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/SingleReadBucket.scala @@ -46,9 +46,11 @@ object SingleReadBucket extends Logging { } } -case class SingleReadBucket(primaryMapped: Iterable[AlignmentRecord] = Seq.empty, - secondaryMapped: Iterable[AlignmentRecord] = Seq.empty, - unmapped: Iterable[AlignmentRecord] = Seq.empty) { +case class SingleReadBucket( + primaryMapped: Iterable[AlignmentRecord] = Seq.empty, + secondaryMapped: Iterable[AlignmentRecord] = Seq.empty, + unmapped: Iterable[AlignmentRecord] = Seq.empty +) { // Note: not a val in order to save serialization/memory cost def allReads = { primaryMapped ++ secondaryMapped ++ unmapped diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala index ffb4c7192e..163875a7ce 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala @@ -72,8 +72,10 @@ object VariantContext { */ def buildFromGenotypes(genotypes: Seq[Genotype]): VariantContext = { val position = ReferencePosition(genotypes.head) - assert(genotypes.map(ReferencePosition(_)).forall(_ == position), - "Genotypes do not all have the same position.") + assert( + genotypes.map(ReferencePosition(_)).forall(_ == position), + "Genotypes do not all have the same position." + ) val variant = genotypes.head.getVariant @@ -85,6 +87,7 @@ class VariantContext( val position: ReferencePosition, val variant: RichVariant, val genotypes: Iterable[Genotype], - val databases: Option[DatabaseVariantAnnotation] = None) { + val databases: Option[DatabaseVariantAnnotation] = None +) { } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala index ad4a5a70ee..2287496010 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala @@ -132,9 +132,11 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log * @tparam T The type of records to return * @return An RDD with records of the specified type */ - def loadParquet[T](filePath: String, - predicate: Option[FilterPredicate] = None, - projection: Option[Schema] = None)(implicit ev1: T => SpecificRecord, ev2: Manifest[T]): RDD[T] = { + def loadParquet[T]( + filePath: String, + predicate: Option[FilterPredicate] = None, + projection: Option[Schema] = None + )(implicit ev1: T => SpecificRecord, ev2: Manifest[T]): RDD[T] = { //make sure a type was specified //not using require as to make the message clearer if (manifest[T] == manifest[scala.Nothing]) @@ -202,7 +204,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log AlignmentRecordField.readPaired, AlignmentRecordField.firstOfPair, AlignmentRecordField.readMapped, - AlignmentRecordField.mateMapped) + AlignmentRecordField.mateMapped + ) } else if (isADAMContig) { Projection(NucleotideContigFragmentField.contig) } else { @@ -229,7 +232,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log val dict = recs.aggregate(SequenceDictionary())( (dict: SequenceDictionary, rec: SequenceRecord) => dict + rec, - (dict1: SequenceDictionary, dict2: SequenceDictionary) => dict1 ++ dict2) + (dict1: SequenceDictionary, dict2: SequenceDictionary) => dict1 ++ dict2 + ) dict } @@ -297,7 +301,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log * @param viewRegion The ReferenceRegion we are filtering on */ def loadIndexedBam( - filePath: String, viewRegion: ReferenceRegion): RDD[AlignmentRecord] = { + filePath: String, viewRegion: ReferenceRegion + ): RDD[AlignmentRecord] = { val path = new Path(filePath) val fs = FileSystem.get(path.toUri, sc.hadoopConfiguration) assert(!fs.isDirectory(path)) @@ -327,10 +332,12 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log }) val samDict = SAMHeaderReader.readSAMHeaderFrom(path, sc.hadoopConfiguration).getSequenceDictionary - IndexedBamInputFormat.setVars(new Path(filePath), + IndexedBamInputFormat.setVars( + new Path(filePath), new Path(filePath + ".bai"), viewRegion, - samDict) + samDict + ) val job = HadoopUtil.newJob(sc) @@ -344,12 +351,14 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log def loadParquetAlignments( filePath: String, predicate: Option[FilterPredicate] = None, - projection: Option[Schema] = None): RDD[AlignmentRecord] = { + projection: Option[Schema] = None + ): RDD[AlignmentRecord] = { loadParquet[AlignmentRecord](filePath, predicate, projection) } def loadInterleavedFastq( - filePath: String): RDD[AlignmentRecord] = { + filePath: String + ): RDD[AlignmentRecord] = { val job = HadoopUtil.newJob(sc) val records = sc.newAPIHadoopFile( @@ -366,20 +375,24 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log records.flatMap(fastqRecordConverter.convertPair) } - def loadFastq(filePath1: String, - filePath2Opt: Option[String], - recordGroupOpt: Option[String] = None, - stringency: ValidationStringency = ValidationStringency.STRICT): RDD[AlignmentRecord] = { + def loadFastq( + filePath1: String, + filePath2Opt: Option[String], + recordGroupOpt: Option[String] = None, + stringency: ValidationStringency = ValidationStringency.STRICT + ): RDD[AlignmentRecord] = { filePath2Opt match { case Some(filePath2) => loadPairedFastq(filePath1, filePath2, recordGroupOpt, stringency) - case None => loadUnpairedFastq(filePath1, stringency = stringency) + case None => loadUnpairedFastq(filePath1, stringency = stringency) } } - def loadPairedFastq(filePath1: String, - filePath2: String, - recordGroupOpt: Option[String], - stringency: ValidationStringency): RDD[AlignmentRecord] = { + def loadPairedFastq( + filePath1: String, + filePath2: String, + recordGroupOpt: Option[String], + stringency: ValidationStringency + ): RDD[AlignmentRecord] = { val reads1 = loadUnpairedFastq(filePath1, setFirstOfPair = true, stringency = stringency) val reads2 = loadUnpairedFastq(filePath2, setSecondOfPair = true, stringency = stringency) @@ -403,11 +416,13 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log reads1 ++ reads2 } - def loadUnpairedFastq(filePath: String, - recordGroupOpt: Option[String] = None, - setFirstOfPair: Boolean = false, - setSecondOfPair: Boolean = false, - stringency: ValidationStringency = ValidationStringency.STRICT): RDD[AlignmentRecord] = { + def loadUnpairedFastq( + filePath: String, + recordGroupOpt: Option[String] = None, + setFirstOfPair: Boolean = false, + setSecondOfPair: Boolean = false, + stringency: ValidationStringency = ValidationStringency.STRICT + ): RDD[AlignmentRecord] = { val job = HadoopUtil.newJob(sc) val records = sc.newAPIHadoopFile( @@ -428,8 +443,7 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log if (recordGroup.isEmpty) filePath.substring(filePath.lastIndexOf("/") + 1) else - recordGroup - ), + recordGroup), setFirstOfPair, setSecondOfPair, stringency @@ -443,7 +457,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log val records = sc.newAPIHadoopFile( filePath, classOf[VCFInputFormat], classOf[LongWritable], classOf[VariantContextWritable], - ContextUtil.getConfiguration(job)) + ContextUtil.getConfiguration(job) + ) if (Metrics.isRecording) records.instrument() else records records.flatMap(p => vcc.convert(p._2.get)) @@ -452,24 +467,29 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log def loadParquetGenotypes( filePath: String, predicate: Option[FilterPredicate] = None, - projection: Option[Schema] = None): RDD[Genotype] = { + projection: Option[Schema] = None + ): RDD[Genotype] = { loadParquet[Genotype](filePath, predicate, projection) } def loadParquetVariants( filePath: String, predicate: Option[FilterPredicate] = None, - projection: Option[Schema] = None): RDD[Variant] = { + projection: Option[Schema] = None + ): RDD[Variant] = { loadParquet[Variant](filePath, predicate, projection) } def loadFasta( filePath: String, - fragmentLength: Long): RDD[NucleotideContigFragment] = { - val fastaData: RDD[(LongWritable, Text)] = sc.newAPIHadoopFile(filePath, + fragmentLength: Long + ): RDD[NucleotideContigFragment] = { + val fastaData: RDD[(LongWritable, Text)] = sc.newAPIHadoopFile( + filePath, classOf[TextInputFormat], classOf[LongWritable], - classOf[Text]) + classOf[Text] + ) if (Metrics.isRecording) fastaData.instrument() else fastaData val remapData = fastaData.map(kv => (kv._1.get, kv._2.toString)) @@ -478,7 +498,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log } def loadInterleavedFastqAsFragments( - filePath: String): RDD[Fragment] = { + filePath: String + ): RDD[Fragment] = { val job = HadoopUtil.newJob(sc) val records = sc.newAPIHadoopFile( @@ -533,34 +554,39 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log def loadParquetFeatures( filePath: String, predicate: Option[FilterPredicate] = None, - projection: Option[Schema] = None): RDD[Feature] = { + projection: Option[Schema] = None + ): RDD[Feature] = { loadParquet[Feature](filePath, predicate, projection) } def loadParquetContigFragments( filePath: String, predicate: Option[FilterPredicate] = None, - projection: Option[Schema] = None): RDD[NucleotideContigFragment] = { + projection: Option[Schema] = None + ): RDD[NucleotideContigFragment] = { loadParquet[NucleotideContigFragment](filePath, predicate, projection) } def loadParquetFragments( filePath: String, predicate: Option[FilterPredicate] = None, - projection: Option[Schema] = None): RDD[Fragment] = { + projection: Option[Schema] = None + ): RDD[Fragment] = { loadParquet[Fragment](filePath, predicate, projection) } def loadVcfAnnotations( filePath: String, - sd: Option[SequenceDictionary] = None): RDD[DatabaseVariantAnnotation] = { + sd: Option[SequenceDictionary] = None + ): RDD[DatabaseVariantAnnotation] = { val job = HadoopUtil.newJob(sc) val vcc = new VariantContextConverter(sd) val records = sc.newAPIHadoopFile( filePath, classOf[VCFInputFormat], classOf[LongWritable], classOf[VariantContextWritable], - ContextUtil.getConfiguration(job)) + ContextUtil.getConfiguration(job) + ) if (Metrics.isRecording) records.instrument() else records records.map(p => vcc.convertToAnnotation(p._2.get)) @@ -569,14 +595,16 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log def loadParquetVariantAnnotations( filePath: String, predicate: Option[FilterPredicate] = None, - projection: Option[Schema] = None): RDD[DatabaseVariantAnnotation] = { + projection: Option[Schema] = None + ): RDD[DatabaseVariantAnnotation] = { loadParquet[DatabaseVariantAnnotation](filePath, predicate, projection) } def loadVariantAnnotations( filePath: String, projection: Option[Schema] = None, - sd: Option[SequenceDictionary] = None): RDD[DatabaseVariantAnnotation] = { + sd: Option[SequenceDictionary] = None + ): RDD[DatabaseVariantAnnotation] = { if (filePath.endsWith(".vcf")) { log.info("Loading " + filePath + " as VCF, and converting to variant annotations. Projection is ignored.") loadVcfAnnotations(filePath, sd) @@ -589,7 +617,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log def loadFeatures( filePath: String, - projection: Option[Schema] = None): RDD[Feature] = { + projection: Option[Schema] = None + ): RDD[Feature] = { if (filePath.endsWith(".bed")) { log.info(s"Loading $filePath as BED and converting to features. Projection is ignored.") @@ -611,8 +640,10 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log } } - def loadGenes(filePath: String, - projection: Option[Schema] = None): RDD[Gene] = { + def loadGenes( + filePath: String, + projection: Option[Schema] = None + ): RDD[Gene] = { import ADAMContext._ loadFeatures(filePath, projection).asGenes() } @@ -629,12 +660,15 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log def loadSequence( filePath: String, projection: Option[Schema] = None, - fragmentLength: Long = 10000): RDD[NucleotideContigFragment] = { + fragmentLength: Long = 10000 + ): RDD[NucleotideContigFragment] = { if (filePath.endsWith(".fa") || filePath.endsWith(".fasta")) { log.info("Loading " + filePath + " as FASTA and converting to NucleotideContigFragment. Projection is ignored.") - loadFasta(filePath, - fragmentLength) + loadFasta( + filePath, + fragmentLength + ) } else { log.info("Loading " + filePath + " as Parquet containing NucleotideContigFragments.") loadParquetContigFragments(filePath, None, projection) @@ -644,7 +678,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log def loadGenotypes( filePath: String, projection: Option[Schema] = None, - sd: Option[SequenceDictionary] = None): RDD[Genotype] = { + sd: Option[SequenceDictionary] = None + ): RDD[Genotype] = { if (filePath.endsWith(".vcf")) { log.info("Loading " + filePath + " as VCF, and converting to Genotypes. Projection is ignored.") loadVcf(filePath, sd).flatMap(_.genotypes) @@ -657,7 +692,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log def loadVariants( filePath: String, projection: Option[Schema] = None, - sd: Option[SequenceDictionary] = None): RDD[Variant] = { + sd: Option[SequenceDictionary] = None + ): RDD[Variant] = { if (filePath.endsWith(".vcf")) { log.info("Loading " + filePath + " as VCF, and converting to Variants. Projection is ignored.") loadVcf(filePath, sd).map(_.variant.variant) @@ -672,7 +708,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log projection: Option[Schema] = None, filePath2Opt: Option[String] = None, recordGroupOpt: Option[String] = None, - stringency: ValidationStringency = ValidationStringency.STRICT): RDD[AlignmentRecord] = LoadAlignmentRecords.time { + stringency: ValidationStringency = ValidationStringency.STRICT + ): RDD[AlignmentRecord] = LoadAlignmentRecords.time { if (filePath.endsWith(".sam") || filePath.endsWith(".bam")) { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala index 821ac45069..2913f52f04 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala @@ -52,12 +52,14 @@ class ADAMRDDFunctions[T <% IndexedRecord: Manifest](rdd: RDD[T]) extends Serial ) } - def adamParquetSave(filePath: String, - blockSize: Int = 128 * 1024 * 1024, - pageSize: Int = 1 * 1024 * 1024, - compressCodec: CompressionCodecName = CompressionCodecName.GZIP, - disableDictionaryEncoding: Boolean = false, - schema: Option[Schema] = None): Unit = SaveAsADAM.time { + def adamParquetSave( + filePath: String, + blockSize: Int = 128 * 1024 * 1024, + pageSize: Int = 1 * 1024 * 1024, + compressCodec: CompressionCodecName = CompressionCodecName.GZIP, + disableDictionaryEncoding: Boolean = false, + schema: Option[Schema] = None + ): Unit = SaveAsADAM.time { log.info("Saving data in ADAM format") val job = HadoopUtil.newJob(rdd.context) @@ -66,14 +68,18 @@ class ADAMRDDFunctions[T <% IndexedRecord: Manifest](rdd: RDD[T]) extends Serial ParquetOutputFormat.setEnableDictionary(job, !disableDictionaryEncoding) ParquetOutputFormat.setBlockSize(job, blockSize) ParquetOutputFormat.setPageSize(job, pageSize) - AvroParquetOutputFormat.setSchema(job, - schema.getOrElse(manifest[T].runtimeClass.asInstanceOf[Class[T]].newInstance().getSchema)) + AvroParquetOutputFormat.setSchema( + job, + schema.getOrElse(manifest[T].runtimeClass.asInstanceOf[Class[T]].newInstance().getSchema) + ) // Add the Void Key val recordToSave = rdd.map(p => (null, p)) // Save the values to the ADAM/Parquet file - recordToSave.saveAsNewAPIHadoopFile(filePath, + recordToSave.saveAsNewAPIHadoopFile( + filePath, classOf[java.lang.Void], manifest[T].runtimeClass.asInstanceOf[Class[T]], classOf[InstrumentedADAMAvroParquetOutputFormat], - ContextUtil.getConfiguration(job)) + ContextUtil.getConfiguration(job) + ) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoin.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoin.scala index 5daa9fe487..2a35b74b3c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoin.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoin.scala @@ -60,9 +60,12 @@ object BroadcastRegionJoin extends RegionJoin { * @return An RDD of pairs (x, y), where x is from baseRDD, y is from joinedRDD, and the region * corresponding to x overlaps the region corresponding to y. */ - def partitionAndJoin[T, U](baseRDD: RDD[(ReferenceRegion, T)], - joinedRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T], - uManifest: ClassTag[U]): RDD[(T, U)] = { + def partitionAndJoin[T, U]( + baseRDD: RDD[(ReferenceRegion, T)], + joinedRDD: RDD[(ReferenceRegion, U)] + )(implicit + tManifest: ClassTag[T], + uManifest: ClassTag[U]): RDD[(T, U)] = { val sc = baseRDD.context @@ -139,9 +142,12 @@ object BroadcastRegionJoin extends RegionJoin { * realistic sized sets. * */ - def cartesianFilter[T, U](baseRDD: RDD[(ReferenceRegion, T)], - joinedRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T], - uManifest: ClassTag[U]): RDD[(T, U)] = { + def cartesianFilter[T, U]( + baseRDD: RDD[(ReferenceRegion, T)], + joinedRDD: RDD[(ReferenceRegion, U)] + )(implicit + tManifest: ClassTag[T], + uManifest: ClassTag[U]): RDD[(T, U)] = { baseRDD.cartesian(joinedRDD).filter({ case (t: (ReferenceRegion, T), u: (ReferenceRegion, U)) => t._1.overlaps(u._1) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala index f90e0adde5..91cbbb51e1 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala @@ -145,7 +145,8 @@ class Coverage(val window: Long) extends Serializable { } else { Seq( OrientedPoint(r1.referenceName, r1.end, false), - OrientedPoint(r2.referenceName, r2.start, true)) + OrientedPoint(r2.referenceName, r2.start, true) + ) } case _ => Seq() } @@ -164,7 +165,8 @@ class Coverage(val window: Long) extends Serializable { dict.records.toSeq.map { case seqRecord => ReferenceRegion(seqRecord.name, 0, seqRecord.length) - }) + } + ) val windowRegions: RDD[ReferenceRegion] = chromRegions.flatMap { case chromRegion => diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicPartitioners.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicPartitioners.scala index 421e25d94f..21b53b44f4 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicPartitioners.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicPartitioners.scala @@ -49,7 +49,8 @@ case class GenomicPositionPartitioner(numParts: Int, seqLengths: Map[String, Lon // referenceName -> cumulative length before this sequence (using seqDict.records as the implicit ordering) val cumulativeLengths: Map[String, Long] = Map( - names.zip(cumuls): _*) + names.zip(cumuls): _* + ) /** * 'parts' is the total number of partitions for non-UNMAPPED ReferencePositions -- @@ -78,9 +79,13 @@ case class GenomicPositionPartitioner(numParts: Int, seqLengths: Map[String, Lon // everything else gets assigned normally. case refpos: ReferencePosition => { - require(seqLengths.contains(refpos.referenceName), - "Received key (%s) that did not map to a known contig. Contigs are:\n%s".format(refpos, - seqLengths.keys.mkString("\n"))) + require( + seqLengths.contains(refpos.referenceName), + "Received key (%s) that did not map to a known contig. Contigs are:\n%s".format( + refpos, + seqLengths.keys.mkString("\n") + ) + ) getPart(refpos.referenceName, refpos.pos) } @@ -120,9 +125,13 @@ case class GenomicRegionPartitioner(partitionSize: Long, seqLengths: Map[String, override def getPartition(key: Any): Int = { key match { case region: ReferenceRegion => { - require(seqLengths.contains(region.referenceName), - "Received key (%s) that did not map to a known contig. Contigs are:\n%s".format(region, - seqLengths.keys.mkString("\n"))) + require( + seqLengths.contains(region.referenceName), + "Received key (%s) that did not map to a known contig. Contigs are:\n%s".format( + region, + seqLengths.keys.mkString("\n") + ) + ) computePartition(region) } case _ => throw new IllegalArgumentException("Only ReferenceMappable values can be partitioned by GenomicRegionPartitioner") diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/RegionJoin.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/RegionJoin.scala index 27e1aa3d2e..a0d033be58 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/RegionJoin.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/RegionJoin.scala @@ -37,7 +37,10 @@ trait RegionJoin { * @return An RDD of pairs (x, y), where x is from baseRDD, y is from joinedRDD, and the region * corresponding to x overlaps the region corresponding to y. */ - def partitionAndJoin[T, U](baseRDD: RDD[(ReferenceRegion, T)], - joinedRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T], - uManifest: ClassTag[U]): RDD[(T, U)] + def partitionAndJoin[T, U]( + baseRDD: RDD[(ReferenceRegion, T)], + joinedRDD: RDD[(ReferenceRegion, U)] + )(implicit + tManifest: ClassTag[T], + uManifest: ClassTag[U]): RDD[(T, U)] } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala index e344605365..341b97e0ac 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala @@ -47,9 +47,12 @@ case class ShuffleRegionJoin(sd: SequenceDictionary, partitionSize: Long) extend * @return An RDD of pairs (x, y), where x is from leftRDD, y is from rightRDD, and the region * corresponding to x overlaps the region corresponding to y. */ - def partitionAndJoin[T, U](leftRDD: RDD[(ReferenceRegion, T)], - rightRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T], - uManifest: ClassTag[U]): RDD[(T, U)] = { + def partitionAndJoin[T, U]( + leftRDD: RDD[(ReferenceRegion, T)], + rightRDD: RDD[(ReferenceRegion, U)] + )(implicit + tManifest: ClassTag[T], + uManifest: ClassTag[U]): RDD[(T, U)] = { val sc = leftRDD.context // Create the set of bins across the genome for parallel processing @@ -196,9 +199,11 @@ private case class ManualRegionPartitioner(partitions: Int) extends Partitioner * @tparam T type of leftIter * @tparam U type of rightIter */ -private case class SortedIntervalPartitionJoin[T, U](binRegion: ReferenceRegion, - leftIter: Iterator[((ReferenceRegion, Int), T)], - rightIter: Iterator[((ReferenceRegion, Int), U)]) +private case class SortedIntervalPartitionJoin[T, U]( + binRegion: ReferenceRegion, + leftIter: Iterator[((ReferenceRegion, Int), T)], + rightIter: Iterator[((ReferenceRegion, Int), U)] +) extends Iterator[(T, U)] with Serializable { // inspired by bedtools2 chromsweep private val left: BufferedIterator[((ReferenceRegion, Int), T)] = leftIter.buffered diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/FlankReferenceFragments.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/FlankReferenceFragments.scala index 2c4f5287a5..4290f0cd17 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/FlankReferenceFragments.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/FlankReferenceFragments.scala @@ -25,16 +25,20 @@ import org.bdgenomics.formats.avro.NucleotideContigFragment private[contig] object FlankReferenceFragments extends Serializable { - def apply(rdd: RDD[NucleotideContigFragment], - sd: SequenceDictionary, - flankSize: Int): RDD[NucleotideContigFragment] = { + def apply( + rdd: RDD[NucleotideContigFragment], + sd: SequenceDictionary, + flankSize: Int + ): RDD[NucleotideContigFragment] = { rdd.keyBy(ctg => ReferenceRegion(ctg).get) .repartitionAndSortWithinPartitions(ReferencePartitioner(sd)) .mapPartitions(flank(_, flankSize)) } - def flank(iter: Iterator[(ReferenceRegion, NucleotideContigFragment)], - flankSize: Int): Iterator[NucleotideContigFragment] = { + def flank( + iter: Iterator[(ReferenceRegion, NucleotideContigFragment)], + flankSize: Int + ): Iterator[NucleotideContigFragment] = { // we need to have at least one element in the iterator if (iter.hasNext) { // now, we apply a window and flank adjacent segments diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala index 8f32070287..1c12ed7356 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala @@ -120,14 +120,18 @@ class NucleotideContigFragmentRDDFunctions(rdd: RDD[NucleotideContigFragment]) e val str = fragmentSequence.drop(trimStart) .dropRight(trimEnd) - val reg = new ReferenceRegion(fragment._1.referenceName, + val reg = new ReferenceRegion( + fragment._1.referenceName, fragment._1.start + trimStart, - fragment._1.end - trimEnd) + fragment._1.end - trimEnd + ) (reg, str) } - def reducePairs(kv1: (ReferenceRegion, String), - kv2: (ReferenceRegion, String)): (ReferenceRegion, String) = { + def reducePairs( + kv1: (ReferenceRegion, String), + kv2: (ReferenceRegion, String) + ): (ReferenceRegion, String) = { assert(kv1._1.isAdjacent(kv2._1), "Regions being joined must be adjacent. For: " + kv1 + ", " + kv2) @@ -147,8 +151,10 @@ class NucleotideContigFragmentRDDFunctions(rdd: RDD[NucleotideContigFragment]) e .map(kv => getString(kv)) .reduce(reducePairs) - assert(pair._1.compareTo(region) == 0, - "Merging fragments returned a different region than requested.") + assert( + pair._1.compareTo(region) == 0, + "Merging fragments returned a different region than requested." + ) pair._2 } catch { @@ -171,8 +177,10 @@ class NucleotideContigFragmentRDDFunctions(rdd: RDD[NucleotideContigFragment]) e * sequence dictionary on the fly. Default is None. * @return Returns the RDD, with all adjacent fragments extended with flanking sequence. */ - def flankAdjacentFragments(flankLength: Int, - optSd: Option[SequenceDictionary] = None): RDD[NucleotideContigFragment] = { + def flankAdjacentFragments( + flankLength: Int, + optSd: Option[SequenceDictionary] = None + ): RDD[NucleotideContigFragment] = { FlankReferenceFragments(rdd, optSd.getOrElse(adamGetSequenceDictionary(performLexSort = false)), flankLength) } @@ -184,8 +192,10 @@ class NucleotideContigFragmentRDDFunctions(rdd: RDD[NucleotideContigFragment]) e * sequence dictionary on the fly. Default is None. * @return Returns an RDD containing k-mer/count pairs. */ - def countKmers(kmerLength: Int, - optSd: Option[SequenceDictionary] = None): RDD[(String, Long)] = { + def countKmers( + kmerLength: Int, + optSd: Option[SequenceDictionary] = None + ): RDD[(String, Long)] = { flankAdjacentFragments(kmerLength, optSd).flatMap(r => { // cut each read into k-mers, and attach a count of 1L r.getFragmentSequence diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureRDDFunctions.scala index d1eb95cc2b..4a7ea7fc7c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureRDDFunctions.scala @@ -71,24 +71,30 @@ class FeatureRDDFunctions(featureRDD: RDD[Feature]) extends Serializable with Lo // getParentIds is modeled as returning a List[], we'll write it this way. case ("exon", ftr: Feature) => val ids: Seq[String] = ftr.getParentIds - ids.map(transcriptId => (transcriptId, - Exon(ftr.getFeatureId, transcriptId, strand(ftr.getStrand), ReferenceRegion(ftr)))) + ids.map(transcriptId => ( + transcriptId, + Exon(ftr.getFeatureId, transcriptId, strand(ftr.getStrand), ReferenceRegion(ftr)) + )) }.groupByKey() val cdsByTranscript: RDD[(String, Iterable[CDS])] = typePartitioned.filter(_._1 == "CDS").flatMap { case ("CDS", ftr: Feature) => val ids: Seq[String] = ftr.getParentIds - ids.map(transcriptId => (transcriptId, - CDS(transcriptId, strand(ftr.getStrand), ReferenceRegion(ftr)))) + ids.map(transcriptId => ( + transcriptId, + CDS(transcriptId, strand(ftr.getStrand), ReferenceRegion(ftr)) + )) }.groupByKey() val utrsByTranscript: RDD[(String, Iterable[UTR])] = typePartitioned.filter(_._1 == "UTR").flatMap { case ("UTR", ftr: Feature) => val ids: Seq[String] = ftr.getParentIds - ids.map(transcriptId => (transcriptId, - UTR(transcriptId, strand(ftr.getStrand), ReferenceRegion(ftr)))) + ids.map(transcriptId => ( + transcriptId, + UTR(transcriptId, strand(ftr.getStrand), ReferenceRegion(ftr)) + )) }.groupByKey() // Step #3 @@ -106,10 +112,12 @@ class FeatureRDDFunctions(featureRDD: RDD[Feature]) extends Serializable with Lo utrs: Option[Iterable[UTR]]), cds: Option[Iterable[CDS]])) => val geneIds: Seq[String] = tgtf.getParentIds // should be length 1 - geneIds.map(geneId => (geneId, + geneIds.map(geneId => ( + geneId, Transcript(transcriptId, Seq(transcriptId), geneId, strand(tgtf.getStrand), - exons, cds.getOrElse(Seq()), utrs.getOrElse(Seq())))) + exons, cds.getOrElse(Seq()), utrs.getOrElse(Seq())) + )) }.groupByKey() // Step #4 diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala index 4c9690cae4..2be339e5c6 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala @@ -65,8 +65,10 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) rdd.filter(overlapsQuery) } - def maybeSaveBam(args: ADAMSaveAnyArgs, - isSorted: Boolean = false): Boolean = { + def maybeSaveBam( + args: ADAMSaveAnyArgs, + isSorted: Boolean = false + ): Boolean = { if (args.outputPath.endsWith(".sam")) { log.info("Saving data in SAM format") rdd.adamSAMSave(args.outputPath, asSingleFile = args.asSingleFile, isSorted = isSorted) @@ -92,8 +94,10 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) maybeSaveBam(args) || { rdd.adamParquetSave(args); true } } - def adamSave(args: ADAMSaveAnyArgs, - isSorted: Boolean = false) = { + def adamSave( + args: ADAMSaveAnyArgs, + isSorted: Boolean = false + ) = { maybeSaveBam(args, isSorted) || maybeSaveFastq(args) || { rdd.adamParquetSave(args); true } } @@ -124,10 +128,12 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * @param asSam Selects whether to save as SAM or BAM. The default value is true (save in SAM format). * @param isSorted If the output is sorted, this will modify the header. */ - def adamSAMSave(filePath: String, - asSam: Boolean = true, - asSingleFile: Boolean = false, - isSorted: Boolean = false) = SAMSave.time { + def adamSAMSave( + filePath: String, + asSam: Boolean = true, + asSingleFile: Boolean = false, + isSorted: Boolean = false + ) = SAMSave.time { // convert the records val (convertRecords: RDD[SAMRecordWritable], header: SAMFileHeader) = rdd.adamConvertToSAM(isSorted) @@ -203,12 +209,16 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) log.info(s"Writing single ${if (asSam) "SAM" else "BAM"} file (not Hadoop-style directory)") val (outputFormat, headerLessOutputFormat) = asSam match { case true => - (classOf[InstrumentedADAMSAMOutputFormat[LongWritable]], - classOf[InstrumentedADAMSAMOutputFormatHeaderLess[LongWritable]]) + ( + classOf[InstrumentedADAMSAMOutputFormat[LongWritable]], + classOf[InstrumentedADAMSAMOutputFormatHeaderLess[LongWritable]] + ) case false => - (classOf[InstrumentedADAMBAMOutputFormat[LongWritable]], - classOf[InstrumentedADAMBAMOutputFormatHeaderLess[LongWritable]]) + ( + classOf[InstrumentedADAMBAMOutputFormat[LongWritable]], + classOf[InstrumentedADAMBAMOutputFormatHeaderLess[LongWritable]] + ) } val headPath = filePath + "_head" @@ -343,9 +353,11 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * observations to. * @return Returns an RDD of recalibrated reads. */ - def adamBQSR(knownSnps: Broadcast[SnpTable], - observationDumpFile: Option[String] = None, - validationStringency: ValidationStringency = ValidationStringency.LENIENT): RDD[AlignmentRecord] = BQSRInDriver.time { + def adamBQSR( + knownSnps: Broadcast[SnpTable], + observationDumpFile: Option[String] = None, + validationStringency: ValidationStringency = ValidationStringency.LENIENT + ): RDD[AlignmentRecord] = BQSRInDriver.time { BaseQualityRecalibration(rdd, knownSnps, observationDumpFile, validationStringency) } @@ -364,12 +376,14 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * * @return Returns an RDD of mapped reads which have been realigned. */ - def adamRealignIndels(consensusModel: ConsensusGenerator = new ConsensusGeneratorFromReads, - isSorted: Boolean = false, - maxIndelSize: Int = 500, - maxConsensusNumber: Int = 30, - lodThreshold: Double = 5.0, - maxTargetSize: Int = 3000): RDD[AlignmentRecord] = RealignIndelsInDriver.time { + def adamRealignIndels( + consensusModel: ConsensusGenerator = new ConsensusGeneratorFromReads, + isSorted: Boolean = false, + maxIndelSize: Int = 500, + maxConsensusNumber: Int = 30, + lodThreshold: Double = 5.0, + maxTargetSize: Int = 3000 + ): RDD[AlignmentRecord] = RealignIndelsInDriver.time { RealignIndels(rdd, consensusModel, isSorted, maxIndelSize, maxConsensusNumber, lodThreshold) } @@ -406,7 +420,8 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) */ def adamCharacterizeTagValues(tag: String): Map[Any, Long] = { adamFilterRecordsWithTag(tag).flatMap(RichAlignmentRecord(_).tags.find(_.tag == tag)).map( - attr => Map(attr.value -> 1L)).reduce { + attr => Map(attr.value -> 1L) + ).reduce { (map1: Map[Any, Long], map2: Map[Any, Long]) => MapTools.add(map1, map2) } @@ -418,8 +433,10 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * @return An RDD[Read] containing the subset of records with a tag that matches the given name. */ def adamFilterRecordsWithTag(tagName: String): RDD[AlignmentRecord] = { - assert(tagName.length == 2, - "withAttribute takes a tagName argument of length 2; tagName=\"%s\"".format(tagName)) + assert( + tagName.length == 2, + "withAttribute takes a tagName argument of length 2; tagName=\"%s\"".format(tagName) + ) rdd.filter(RichAlignmentRecord(_).tags.exists(_.tag == tagName)) } @@ -430,11 +447,13 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * @param fileName2 Path at which to save a FASTQ file containing the second mate of each pair. * @param validationStringency Iff strict, throw an exception if any read in this RDD is not accompanied by its mate. */ - def adamSaveAsPairedFastq(fileName1: String, - fileName2: String, - outputOriginalBaseQualities: Boolean = false, - validationStringency: ValidationStringency = ValidationStringency.LENIENT, - persistLevel: Option[StorageLevel] = None): Unit = { + def adamSaveAsPairedFastq( + fileName1: String, + fileName2: String, + outputOriginalBaseQualities: Boolean = false, + validationStringency: ValidationStringency = ValidationStringency.LENIENT, + persistLevel: Option[StorageLevel] = None + ): Unit = { def maybePersist[T](r: RDD[T]): Unit = { persistLevel.foreach(r.persist(_)) } @@ -515,12 +534,10 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) if (validationStringency == ValidationStringency.STRICT) { firstInPairRecords.foreach(read => if (read.getReadNum == 1) - throw new Exception("Read %s found with first- and second-of-pair set".format(read.getReadName)) - ) + throw new Exception("Read %s found with first- and second-of-pair set".format(read.getReadName))) secondInPairRecords.foreach(read => if (read.getReadNum == 0) - throw new Exception("Read %s found with first- and second-of-pair set".format(read.getReadName)) - ) + throw new Exception("Read %s found with first- and second-of-pair set".format(read.getReadName))) } assert( @@ -552,12 +569,14 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * @param sort Whether to sort the FASTQ files by read name or not. Defaults * to false. Sorting the output will recover pair order, if desired. */ - def adamSaveAsFastq(fileName: String, - fileName2Opt: Option[String] = None, - outputOriginalBaseQualities: Boolean = false, - sort: Boolean = false, - validationStringency: ValidationStringency = ValidationStringency.LENIENT, - persistLevel: Option[StorageLevel] = None) { + def adamSaveAsFastq( + fileName: String, + fileName2Opt: Option[String] = None, + outputOriginalBaseQualities: Boolean = false, + sort: Boolean = false, + validationStringency: ValidationStringency = ValidationStringency.LENIENT, + persistLevel: Option[StorageLevel] = None + ) { log.info("Saving data in FASTQ format.") fileName2Opt match { case Some(fileName2) => @@ -595,8 +614,10 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * @param validationStringency How stringently to validate the reads. * @return Returns an RDD with the pair information recomputed. */ - def adamRePairReads(secondPairRdd: RDD[AlignmentRecord], - validationStringency: ValidationStringency = ValidationStringency.LENIENT): RDD[AlignmentRecord] = { + def adamRePairReads( + secondPairRdd: RDD[AlignmentRecord], + validationStringency: ValidationStringency = ValidationStringency.LENIENT + ): RDD[AlignmentRecord] = { // cache rdds val firstPairRdd = rdd.cache() secondPairRdd.cache() diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/FlagStat.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/FlagStat.scala index f271a40445..187ff0be9f 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/FlagStat.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/FlagStat.scala @@ -40,10 +40,12 @@ object DuplicateMetrics { } def duplicateMetrics(f: (AlignmentRecord) => Boolean) = { - new DuplicateMetrics(b2i(f(record)), + new DuplicateMetrics( + b2i(f(record)), b2i(f(record) && record.getReadMapped && record.getMateMapped), b2i(f(record) && record.getReadMapped && !record.getMateMapped), - b2i(f(record) && (!isSameContig(record.getContig, record.getMateContig)))) + b2i(f(record) && (!isSameContig(record.getContig, record.getMateContig))) + ) } (duplicateMetrics(isPrimary), duplicateMetrics(isSecondary)) } @@ -51,21 +53,24 @@ object DuplicateMetrics { case class DuplicateMetrics(total: Long, bothMapped: Long, onlyReadMapped: Long, crossChromosome: Long) { def +(that: DuplicateMetrics): DuplicateMetrics = { - new DuplicateMetrics(total + that.total, + new DuplicateMetrics( + total + that.total, bothMapped + that.bothMapped, onlyReadMapped + that.onlyReadMapped, - crossChromosome + that.crossChromosome) + crossChromosome + that.crossChromosome + ) } } case class FlagStatMetrics(total: Long, duplicatesPrimary: DuplicateMetrics, duplicatesSecondary: DuplicateMetrics, - mapped: Long, pairedInSequencing: Long, - read1: Long, read2: Long, properlyPaired: Long, withSelfAndMateMapped: Long, - singleton: Long, withMateMappedToDiffChromosome: Long, - withMateMappedToDiffChromosomeMapQ5: Long, failedQuality: Boolean) { + mapped: Long, pairedInSequencing: Long, + read1: Long, read2: Long, properlyPaired: Long, withSelfAndMateMapped: Long, + singleton: Long, withMateMappedToDiffChromosome: Long, + withMateMappedToDiffChromosomeMapQ5: Long, failedQuality: Boolean) { def +(that: FlagStatMetrics): FlagStatMetrics = { assert(failedQuality == that.failedQuality, "Can't reduce passedVendorQuality with different failedQuality values") - new FlagStatMetrics(total + that.total, + new FlagStatMetrics( + total + that.total, duplicatesPrimary + that.duplicatesPrimary, duplicatesSecondary + that.duplicatesSecondary, mapped + that.mapped, @@ -77,7 +82,8 @@ case class FlagStatMetrics(total: Long, duplicatesPrimary: DuplicateMetrics, dup singleton + that.singleton, withMateMappedToDiffChromosome + that.withMateMappedToDiffChromosome, withMateMappedToDiffChromosomeMapQ5 + that.withMateMappedToDiffChromosomeMapQ5, - failedQuality) + failedQuality + ) } } @@ -93,7 +99,8 @@ object FlagStat { val mateMappedToDiffChromosome = p.getReadPaired && p.getReadMapped && p.getMateMapped && !isSameContig(p.getContig, p.getMateContig) val (primaryDuplicates, secondaryDuplicates) = DuplicateMetrics(p) - new FlagStatMetrics(1, + new FlagStatMetrics( + 1, primaryDuplicates, secondaryDuplicates, b2i(b(p.getReadMapped)), b2i(b(p.getReadPaired)), @@ -104,19 +111,21 @@ object FlagStat { b2i(b(p.getReadPaired) && b(p.getReadMapped) && b(!p.getMateMapped)), b2i(b(mateMappedToDiffChromosome)), b2i(b(mateMappedToDiffChromosome && i(p.getMapq) >= 5)), - p.getFailedVendorQualityChecks) + p.getFailedVendorQualityChecks + ) }.aggregate((FlagStatMetrics.emptyFailedQuality, FlagStatMetrics.emptyPassedQuality))( seqOp = { - (a, b) => - if (b.failedQuality) { - (a._1 + b, a._2) - } else { - (a._1, a._2 + b) - } - }, + (a, b) => + if (b.failedQuality) { + (a._1 + b, a._2) + } else { + (a._1, a._2 + b) + } + }, combOp = { - (a, b) => - (a._1 + b._1, a._2 + b._2) - }) + (a, b) => + (a._1 + b._1, a._2 + b._2) + } + ) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala index d80b8d2645..a825e3d8dd 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala @@ -27,11 +27,13 @@ import org.bdgenomics.adam.models.ReferenceRegion import org.bdgenomics.adam.util.{ ReferenceFile, MdTag } import org.bdgenomics.formats.avro.AlignmentRecord -case class MDTagging(reads: RDD[AlignmentRecord], - @transient referenceFile: ReferenceFile, - partitionSize: Long = 1000000, - overwriteExistingTags: Boolean = false, - validationStringency: ValidationStringency = ValidationStringency.STRICT) extends Logging { +case class MDTagging( + reads: RDD[AlignmentRecord], + @transient referenceFile: ReferenceFile, + partitionSize: Long = 1000000, + overwriteExistingTags: Boolean = false, + validationStringency: ValidationStringency = ValidationStringency.STRICT +) extends Logging { @transient val sc = reads.sparkContext val mdTagsAdded = sc.accumulator(0L, "MDTags Added") @@ -85,11 +87,13 @@ case class MDTagging(reads: RDD[AlignmentRecord], } object MDTagging { - def apply(reads: RDD[AlignmentRecord], - referenceFile: String, - fragmentLength: Long, - overwriteExistingTags: Boolean, - validationStringency: ValidationStringency): RDD[AlignmentRecord] = { + def apply( + reads: RDD[AlignmentRecord], + referenceFile: String, + fragmentLength: Long, + overwriteExistingTags: Boolean, + validationStringency: ValidationStringency + ): RDD[AlignmentRecord] = { val sc = reads.sparkContext new MDTagging( reads, diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MarkDuplicates.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MarkDuplicates.scala index 93af1d4734..53fd510fa4 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MarkDuplicates.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MarkDuplicates.scala @@ -55,7 +55,7 @@ private[rdd] object MarkDuplicates extends Serializable { } private def markReads(reads: Iterable[(ReferencePositionPair, SingleReadBucket)], primaryAreDups: Boolean, secondaryAreDups: Boolean, - ignore: Option[(ReferencePositionPair, SingleReadBucket)] = None) = MarkReads.time { + ignore: Option[(ReferencePositionPair, SingleReadBucket)] = None) = MarkReads.time { reads.foreach(read => { if (ignore.forall(_ != read)) markReadsInBucket(read._2, primaryAreDups, secondaryAreDups) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala index b0dd378807..fe33fbc7e1 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala @@ -97,8 +97,10 @@ object IndelRealignmentTarget { * @param maxIndelSize Maximum allowable size of an indel. * @return Set of generated realignment targets. */ - def apply(read: RichAlignmentRecord, - maxIndelSize: Int): Seq[IndelRealignmentTarget] = CreateIndelRealignmentTargets.time { + def apply( + read: RichAlignmentRecord, + maxIndelSize: Int + ): Seq[IndelRealignmentTarget] = CreateIndelRealignmentTargets.time { val region = ReferenceRegion(read.record) val refId = read.record.getContig.getContigName @@ -135,8 +137,10 @@ object IndelRealignmentTarget { } } -class IndelRealignmentTarget(val variation: Option[ReferenceRegion], - val readRange: ReferenceRegion) extends Logging { +class IndelRealignmentTarget( + val variation: Option[ReferenceRegion], + val readRange: ReferenceRegion +) extends Logging { override def toString(): String = { variation + " over " + readRange diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala index 6bfd6aad1f..ed385ca899 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala @@ -43,19 +43,23 @@ private[rdd] object RealignIndels extends Serializable with Logging { * @param rdd RDD of reads to realign. * @return RDD of realigned reads. */ - def apply(rdd: RDD[AlignmentRecord], - consensusModel: ConsensusGenerator = new ConsensusGeneratorFromReads, - dataIsSorted: Boolean = false, - maxIndelSize: Int = 500, - maxConsensusNumber: Int = 30, - lodThreshold: Double = 5.0, - maxTargetSize: Int = 3000): RDD[AlignmentRecord] = { - new RealignIndels(consensusModel, + def apply( + rdd: RDD[AlignmentRecord], + consensusModel: ConsensusGenerator = new ConsensusGeneratorFromReads, + dataIsSorted: Boolean = false, + maxIndelSize: Int = 500, + maxConsensusNumber: Int = 30, + lodThreshold: Double = 5.0, + maxTargetSize: Int = 3000 + ): RDD[AlignmentRecord] = { + new RealignIndels( + consensusModel, dataIsSorted, maxIndelSize, maxConsensusNumber, lodThreshold, - maxTargetSize).realignIndels(rdd) + maxTargetSize + ).realignIndels(rdd) } /** @@ -69,8 +73,10 @@ private[rdd] object RealignIndels extends Serializable with Logging { * * @see mapTargets */ - @tailrec final def mapToTarget(read: RichAlignmentRecord, - targets: TreeSet[(IndelRealignmentTarget, Int)]): Int = { + @tailrec final def mapToTarget( + read: RichAlignmentRecord, + targets: TreeSet[(IndelRealignmentTarget, Int)] + ): Int = { // Perform tail call recursive binary search if (targets.size == 1) { if (TargetOrdering.contains(targets.head._1, read)) { @@ -103,8 +109,10 @@ private[rdd] object RealignIndels extends Serializable with Logging { * * @see mapTargets */ - def mapToTarget(read: RichAlignmentRecord, - targets: ZippedTargetSet): Int = { + def mapToTarget( + read: RichAlignmentRecord, + targets: ZippedTargetSet + ): Int = { mapToTarget(read, targets.set) } @@ -121,8 +129,10 @@ private[rdd] object RealignIndels extends Serializable with Logging { * * @see mapTargets */ - def mapToTargetUnpacked(targetIndex: Int, - targets: TreeSet[(IndelRealignmentTarget, Int)]): Option[IndelRealignmentTarget] = { + def mapToTargetUnpacked( + targetIndex: Int, + targets: TreeSet[(IndelRealignmentTarget, Int)] + ): Option[IndelRealignmentTarget] = { if (targetIndex < 0) { None } else { @@ -217,12 +227,14 @@ private[rdd] object RealignIndels extends Serializable with Logging { import org.bdgenomics.adam.rdd.read.realignment.RealignIndels._ -private[rdd] class RealignIndels(val consensusModel: ConsensusGenerator = new ConsensusGeneratorFromReads, - val dataIsSorted: Boolean = false, - val maxIndelSize: Int = 500, - val maxConsensusNumber: Int = 30, - val lodThreshold: Double = 5.0, - val maxTargetSize: Int = 3000) extends Serializable with Logging { +private[rdd] class RealignIndels( + val consensusModel: ConsensusGenerator = new ConsensusGeneratorFromReads, + val dataIsSorted: Boolean = false, + val maxIndelSize: Int = 500, + val maxConsensusNumber: Int = 30, + val lodThreshold: Double = 5.0, + val maxTargetSize: Int = 3000 +) extends Serializable with Logging { /** * Given a target group with an indel realignment target and a group of reads to realign, this method @@ -247,9 +259,11 @@ private[rdd] class RealignIndels(val consensusModel: ConsensusGenerator = new Co val refRegion = ReferenceRegion(reads.head.record.getContig.getContigName, refStart, refEnd) // preprocess reads and get consensus - val readsToClean = consensusModel.preprocessReadsForRealignment(reads.filter(r => r.mdTag.forall(_.hasMismatches)), + val readsToClean = consensusModel.preprocessReadsForRealignment( + reads.filter(r => r.mdTag.forall(_.hasMismatches)), reference, - refRegion) + refRegion + ) var consensus = consensusModel.findConsensus(readsToClean) // reduce count of consensus sequences @@ -347,9 +361,11 @@ private[rdd] class RealignIndels(val consensusModel: ConsensusGenerator = new Co // compensate the end builder.setEnd(refStart + remapping + r.getSequence.length + endPenalty) - val cigarElements = List[CigarElement](new CigarElement((bestConsensus.index.start - (refStart + remapping)).toInt, CigarOperator.M), + val cigarElements = List[CigarElement]( + new CigarElement((bestConsensus.index.start - (refStart + remapping)).toInt, CigarOperator.M), idElement, - new CigarElement(endLength.toInt, CigarOperator.M)) + new CigarElement(endLength.toInt, CigarOperator.M) + ) new Cigar(cigarElements) } @@ -435,9 +451,11 @@ private[rdd] class RealignIndels(val consensusModel: ConsensusGenerator = new Co * @return Mismatch quality of read for current alignment. */ def sumMismatchQuality(read: AlignmentRecord): Int = { - sumMismatchQualityIgnoreCigar(read.getSequence, + sumMismatchQualityIgnoreCigar( + read.getSequence, read.mdTag.get.getReference(read), - read.qualityScores) + read.qualityScores + ) } /** @@ -463,9 +481,11 @@ private[rdd] class RealignIndels(val consensusModel: ConsensusGenerator = new Co // find realignment targets log.info("Generating realignment targets...") - val targets: TreeSet[IndelRealignmentTarget] = RealignmentTargetFinder(richRdd, + val targets: TreeSet[IndelRealignmentTarget] = RealignmentTargetFinder( + richRdd, maxIndelSize, - maxTargetSize) + maxTargetSize + ) // we should only attempt realignment if the target set isn't empty if (targets.isEmpty) { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignmentTargetFinder.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignmentTargetFinder.scala index 7b8fdc83fb..5ad851571a 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignmentTargetFinder.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignmentTargetFinder.scala @@ -32,9 +32,11 @@ object RealignmentTargetFinder { * @param rdd RDD of reads to use in generating realignment targets. * @return Sorted set of realignment targets. */ - def apply(rdd: RDD[RichAlignmentRecord], - maxIndelSize: Int = 500, - maxTargetSize: Int = 3000): TreeSet[IndelRealignmentTarget] = { + def apply( + rdd: RDD[RichAlignmentRecord], + maxIndelSize: Int = 500, + maxTargetSize: Int = 3000 + ): TreeSet[IndelRealignmentTarget] = { new RealignmentTargetFinder().findTargets(rdd, maxIndelSize, maxTargetSize).set } } @@ -54,7 +56,8 @@ class RealignmentTargetFinder extends Serializable with Logging { */ @tailrec protected final def joinTargets( first: TreeSet[IndelRealignmentTarget], - second: TreeSet[IndelRealignmentTarget]): TreeSet[IndelRealignmentTarget] = { + second: TreeSet[IndelRealignmentTarget] + ): TreeSet[IndelRealignmentTarget] = { if (first.isEmpty && second.isEmpty) { TreeSet[IndelRealignmentTarget]()(TargetOrdering) @@ -83,8 +86,10 @@ class RealignmentTargetFinder extends Serializable with Logging { * @param second A sorted set of realignment targets. * @return A merged set of targets. */ - def joinTargets(first: TargetSet, - second: TargetSet): TargetSet = JoinTargets.time { + def joinTargets( + first: TargetSet, + second: TargetSet + ): TargetSet = JoinTargets.time { new TargetSet(joinTargets(first.set, second.set)) } @@ -94,9 +99,11 @@ class RealignmentTargetFinder extends Serializable with Logging { * @param reads An RDD containing reads to generate indel realignment targets from. * @return An ordered set of indel realignment targets. */ - def findTargets(reads: RDD[RichAlignmentRecord], - maxIndelSize: Int = 500, - maxTargetSize: Int = 3000): TargetSet = FindTargets.time { + def findTargets( + reads: RDD[RichAlignmentRecord], + maxIndelSize: Int = 500, + maxTargetSize: Int = 3000 + ): TargetSet = FindTargets.time { def createTargetSet(target: IndelRealignmentTarget): TargetSet = { val tmp = new TreeSet()(TargetOrdering) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibration.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibration.scala index 3b38256fe5..fd071e80cd 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibration.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibration.scala @@ -38,7 +38,8 @@ import org.bdgenomics.formats.avro.AlignmentRecord class BaseQualityRecalibration( val input: RDD[(Option[DecadentRead], Option[AlignmentRecord])], val knownSnps: Broadcast[SnpTable], - val dumpObservationTableFile: Option[String] = None) + val dumpObservationTableFile: Option[String] = None +) extends Serializable with Logging { // Additional covariates to use when computing the correction @@ -124,9 +125,11 @@ class BaseQualityRecalibration( } object BaseQualityRecalibration { - def apply(rdd: RDD[AlignmentRecord], - knownSnps: Broadcast[SnpTable], - observationDumpFile: Option[String] = None, - validationStringency: ValidationStringency = ValidationStringency.STRICT): RDD[AlignmentRecord] = + def apply( + rdd: RDD[AlignmentRecord], + knownSnps: Broadcast[SnpTable], + observationDumpFile: Option[String] = None, + validationStringency: ValidationStringency = ValidationStringency.STRICT + ): RDD[AlignmentRecord] = new BaseQualityRecalibration(cloy(rdd, validationStringency), knownSnps).result } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/Covariate.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/Covariate.scala index 2a4bc8d509..48986bf506 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/Covariate.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/Covariate.scala @@ -68,7 +68,8 @@ abstract class AbstractCovariate[ValueT] extends Covariate with Serializable { class CovariateKey( val readGroup: String, val quality: QualityScore, - val extras: Seq[Option[Covariate#Value]]) extends Serializable { + val extras: Seq[Option[Covariate#Value]] +) extends Serializable { def containsNone: Boolean = extras.exists(_.isEmpty) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/ObservationTable.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/ObservationTable.scala index 15fadd4bff..2c6cc74119 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/ObservationTable.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/ObservationTable.scala @@ -87,7 +87,7 @@ class Aggregate private ( total: Long, // number of total observations mismatches: Long, // number of mismatches observed val expectedMismatches: Double // expected number of mismatches based on reported quality scores - ) extends Observation(total, mismatches) { +) extends Observation(total, mismatches) { require(expectedMismatches <= total) @@ -97,7 +97,8 @@ class Aggregate private ( new Aggregate( this.total + that.total, this.mismatches + that.mismatches, - this.expectedMismatches + that.expectedMismatches) + this.expectedMismatches + that.expectedMismatches + ) } object Aggregate { @@ -112,7 +113,8 @@ object Aggregate { */ class ObservationTable( val space: CovariateSpace, - val entries: Map[CovariateKey, Observation]) extends Serializable { + val entries: Map[CovariateKey, Observation] +) extends Serializable { override def toString = entries.map { case (k, v) => "%s\t%s".format(k, v) }.mkString("\n") diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/Recalibrator.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/Recalibrator.scala index 52fe525702..b1965658c1 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/Recalibrator.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/Recalibrator.scala @@ -63,7 +63,8 @@ class RecalibrationTable( // covariates for this recalibration val covariates: CovariateSpace, // marginal and quality scores by read group, - val globalTable: Map[String, (Aggregate, QualityTable)]) + val globalTable: Map[String, (Aggregate, QualityTable)] +) extends (DecadentRead => Seq[QualityScore]) with Serializable { // TODO: parameterize? @@ -79,7 +80,7 @@ class RecalibrationTable( } def lookup(residueWithIndex: (Residue, Int), globalEntry: Option[(Aggregate, QualityTable)], globalDelta: Double, - extraValues: IndexedSeq[Seq[Option[Covariate#Value]]]): QualityScore = { + extraValues: IndexedSeq[Seq[Option[Covariate#Value]]]): QualityScore = { val (residue, index) = residueWithIndex val residueLogP = log(residue.quality.errorProbability) val qualityEntry: Option[(Aggregate, ExtrasTables)] = getQualityEntry(residue.quality, globalEntry) @@ -99,8 +100,10 @@ class RecalibrationTable( getOrElse(0.0) } - def getQualityEntry(quality: QualityScore, - globalEntry: Option[(Aggregate, QualityTable)]): Option[(Aggregate, ExtrasTables)] = { + def getQualityEntry( + quality: QualityScore, + globalEntry: Option[(Aggregate, QualityTable)] + ): Option[(Aggregate, ExtrasTables)] = { globalEntry.flatMap(_._2.table.get(quality)) } @@ -110,7 +113,7 @@ class RecalibrationTable( } def computeExtrasDelta(maybeQualityEntry: Option[(Aggregate, ExtrasTables)], residueIndex: Int, - extraValues: IndexedSeq[Seq[Option[Covariate#Value]]], offset: Double): Double = { + extraValues: IndexedSeq[Seq[Option[Covariate#Value]]], offset: Double): Double = { // Returns sum(delta for each extra covariate) maybeQualityEntry.map(qualityEntry => { val extrasTables = qualityEntry._2.extrasTables @@ -144,15 +147,19 @@ object RecalibrationTable { new RecalibrationTable(observed.space, globalTable) } - def computeQualityTable(globalEntry: (String, Map[CovariateKey, Observation]), - space: CovariateSpace): Map[QualityScore, (Aggregate, ExtrasTables)] = { + def computeQualityTable( + globalEntry: (String, Map[CovariateKey, Observation]), + space: CovariateSpace + ): Map[QualityScore, (Aggregate, ExtrasTables)] = { globalEntry._2.groupBy(_._1.quality).map(qualityEntry => { (qualityEntry._1, (aggregateObservations(qualityEntry._2), new ExtrasTables(computeExtrasTables(qualityEntry._2, space)))) }).map(identity) } - def computeExtrasTables(table: Map[CovariateKey, Observation], - space: CovariateSpace): IndexedSeq[Map[Option[Covariate#Value], Aggregate]] = { + def computeExtrasTables( + table: Map[CovariateKey, Observation], + space: CovariateSpace + ): IndexedSeq[Map[Option[Covariate#Value], Aggregate]] = { Range(0, space.extras.length).map(index => { table.groupBy(_._1.extras(index)).map(extraEntry => { (extraEntry._1, aggregateObservations(extraEntry._2)) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/ADAMVCFOutputFormat.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/ADAMVCFOutputFormat.scala index 49ac62112b..00d59c81c7 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/ADAMVCFOutputFormat.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/ADAMVCFOutputFormat.scala @@ -37,7 +37,8 @@ object ADAMVCFOutputFormat extends Serializable { def setHeader(samples: Seq[String]): VCFHeader = { header = Some(new VCFHeader( (VariantAnnotationConverter.infoHeaderLines ++ VariantAnnotationConverter.formatHeaderLines).toSet: Set[VCFHeaderLine], - samples)) + samples + )) header.get } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala index 7b7a6fdd90..80e7f53f90 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala @@ -77,10 +77,12 @@ class VariantContextRDDFunctions(rdd: RDD[VariantContext]) extends ADAMSequenceD * Default is false (no sort). * @param coalesceTo Optionally coalesces the RDD down to _n_ partitions. Default is none. */ - def saveAsVcf(filePath: String, - dict: Option[SequenceDictionary] = None, - sortOnSave: Boolean = false, - coalesceTo: Option[Int] = None) = { + def saveAsVcf( + filePath: String, + dict: Option[SequenceDictionary] = None, + sortOnSave: Boolean = false, + coalesceTo: Option[Int] = None + ) = { val vcfFormat = VCFFormat.inferFromFilePath(filePath) assert(vcfFormat == VCFFormat.VCF, "BCF not yet supported") // TODO: Add BCF support @@ -136,9 +138,11 @@ class VariantContextRDDFunctions(rdd: RDD[VariantContext]) extends ADAMSequenceD // save to disk val conf = rdd.context.hadoopConfiguration conf.set(VCFOutputFormat.OUTPUT_VCF_FORMAT_PROPERTY, vcfFormat.toString) - withKey.saveAsNewAPIHadoopFile(filePath, + withKey.saveAsNewAPIHadoopFile( + filePath, classOf[LongWritable], classOf[VariantContextWritable], classOf[ADAMVCFOutputFormat[LongWritable]], - conf) + conf + ) log.info("Write %d records".format(gatkVCs.count())) rdd.unpersist() diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rich/DecadentRead.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rich/DecadentRead.scala index de8f5585bb..aed40c70c9 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rich/DecadentRead.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rich/DecadentRead.scala @@ -49,8 +49,10 @@ private[adam] object DecadentRead extends Logging with Serializable { * 2. To clog, to glut, or satisfy, as the appetite; to satiate. * 3. To fill up or choke up; to stop up. */ - def cloy(rdd: RDD[AlignmentRecord], - strictness: ValidationStringency = ValidationStringency.STRICT): RDD[(Option[DecadentRead], Option[AlignmentRecord])] = { + def cloy( + rdd: RDD[AlignmentRecord], + strictness: ValidationStringency = ValidationStringency.STRICT + ): RDD[(Option[DecadentRead], Option[AlignmentRecord])] = { rdd.map(r => { try { val dr = DecadentRead.apply(r) @@ -61,7 +63,8 @@ private[adam] object DecadentRead extends Logging with Serializable { throw e } else { log.warn("Converting read %s to decadent read failed with %s. Skipping...".format( - r, e)) + r, e + )) (None, Some(r)) } } @@ -127,14 +130,16 @@ private[adam] class DecadentRead(val record: RichAlignmentRecord) extends Loggin def referencePositionOption: Option[ReferencePosition] = assumingAligned( - record.readOffsetToReferencePosition(offset)) + record.readOffsetToReferencePosition(offset) + ) def referenceSequenceContext: Option[ReferenceSequenceContext] = assumingAligned(record.readOffsetToReferenceSequenceContext(offset)) def referencePosition: ReferencePosition = referencePositionOption.getOrElse( - throw new IllegalArgumentException("Residue has no reference location (may be an insertion)")) + throw new IllegalArgumentException("Residue has no reference location (may be an insertion)") + ) } lazy val readGroup: String = record.getRecordGroupName diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichCigar.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichCigar.scala index 2e187f133c..3f9f3c9afb 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichCigar.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichCigar.scala @@ -63,9 +63,11 @@ class RichCigar(cigar: Cigar) { * @param cigarElements List of cigar elements to move. * @return List of cigar elements with single element moved. */ - @tailrec def moveCigarLeft(head: List[CigarElement], - index: Int, - cigarElements: List[CigarElement]): List[CigarElement] = { + @tailrec def moveCigarLeft( + head: List[CigarElement], + index: Int, + cigarElements: List[CigarElement] + ): List[CigarElement] = { if (index == 1) { val elementToTrim = cigarElements.headOption val elementToMove: Option[CigarElement] = PartialFunction.condOpt(cigarElements) { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/AttributeUtils.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/AttributeUtils.scala index efe17aa7af..af2812e4f4 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/AttributeUtils.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/AttributeUtils.scala @@ -74,7 +74,8 @@ object AttributeUtils { case Some(m) => createAttribute((m.group(1), m.group(2), m.group(3), m.group(4))) case None => throw new IllegalArgumentException( - "attribute string \"%s\" doesn't match format attrTuple:type:value".format(encoded)) + "attribute string \"%s\" doesn't match format attrTuple:type:value".format(encoded) + ) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/Flattener.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/Flattener.scala index 720baeb3f9..66d32532d2 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/Flattener.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/Flattener.scala @@ -39,8 +39,8 @@ object Flattener { } private def flatten(schema: Schema, prefix: String, - accumulator: ListBuffer[Schema.Field], - makeOptional: Boolean = false): ListBuffer[Schema.Field] = { + accumulator: ListBuffer[Schema.Field], + makeOptional: Boolean = false): ListBuffer[Schema.Field] = { for (f: Schema.Field <- schema.getFields) { f.schema.getType match { case NULL | BOOLEAN | INT | LONG | FLOAT | DOUBLE | BYTES | STRING | @@ -87,7 +87,8 @@ object Flattener { if (schema.getType ne Schema.Type.UNION) { return Schema.createUnion( - ListBuffer[Schema](Schema.create(Schema.Type.NULL), schema).asJava) + ListBuffer[Schema](Schema.create(Schema.Type.NULL), schema).asJava + ) } schema // TODO: what about unions that don't contain null? @@ -100,7 +101,7 @@ object Flattener { } private def flatten(schema: Schema, record: IndexedRecord, flatRecord: IndexedRecord, - offset: Int): Int = { + offset: Int): Int = { if (record == null) return offset + schema.getFields.size var off: Int = offset diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/IntervalListReader.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/IntervalListReader.scala index 4007ec6c78..c87f9ab55c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/IntervalListReader.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/IntervalListReader.scala @@ -48,6 +48,7 @@ class IntervalListReader(file: File) extends Traversable[(ReferenceRegion, Strin def foreach[U](f: ((ReferenceRegion, String)) => U) { IntervalList.fromFile(file).asScala.foreach( - i => f((ReferenceRegion(i.getSequence, i.getStart, i.getEnd), i.getName))) + i => f((ReferenceRegion(i.getSequence, i.getStart, i.getEnd), i.getName)) + ) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/MapTools.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/MapTools.scala index 31a4b70667..4d6100ca7a 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/MapTools.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/MapTools.scala @@ -40,8 +40,10 @@ object MapTools { * @tparam NumberType * @return */ - def add[KeyType, NumberType](map1: Map[KeyType, NumberType], - map2: Map[KeyType, NumberType])(implicit ops: Numeric[NumberType]): Map[KeyType, NumberType] = { + def add[KeyType, NumberType]( + map1: Map[KeyType, NumberType], + map2: Map[KeyType, NumberType] + )(implicit ops: Numeric[NumberType]): Map[KeyType, NumberType] = { (map1.keys ++ map2.keys.filter(!map1.contains(_))).map { (key: KeyType) => diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/MdTag.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/MdTag.scala index d0756f60ec..717ab8c423 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/MdTag.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/MdTag.scala @@ -45,9 +45,11 @@ object MdTag { * @param cigar Cigar operators for the read * @return Returns a populated MD tag. */ - def apply(mdTagInput: String, - referenceStart: Long, - cigar: Cigar): MdTag = { + def apply( + mdTagInput: String, + referenceStart: Long, + cigar: Cigar + ): MdTag = { var matches = List[NumericRange[Long]]() var mismatches = Map[Long, Char]() @@ -376,7 +378,8 @@ class MdTag( val start: Long, val matches: immutable.List[NumericRange[Long]], val mismatches: immutable.Map[Long, Char], - val deletions: immutable.Map[Long, Char]) { + val deletions: immutable.Map[Long, Char] +) { /** * Returns whether a base is a match against the reference. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/ReferenceContigMap.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/ReferenceContigMap.scala index f216f05be5..f81a65c0e0 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/ReferenceContigMap.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/ReferenceContigMap.scala @@ -67,9 +67,9 @@ object ReferenceContigMap { def apply(fragments: RDD[NucleotideContigFragment]): ReferenceContigMap = ReferenceContigMap( fragments - .groupBy(_.getContig.getContigName) - .mapValues(_.toSeq.sortBy(_.getFragmentStartPosition)) - .collectAsMap - .toMap + .groupBy(_.getContig.getContigName) + .mapValues(_.toSeq.sortBy(_.getFragmentStartPosition)) + .collectAsMap + .toMap ) } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/VcfHeaderUtils.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/VcfHeaderUtils.scala index 7598704d58..dcee8e118e 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/VcfHeaderUtils.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/VcfHeaderUtils.scala @@ -72,14 +72,20 @@ private[util] class VcfHeaderBuilder(samples: List[String]) { val formatLines: java.util.Set[VCFHeaderLine] = new java.util.HashSet[VCFHeaderLine]() val infoLines: java.util.Set[VCFHeaderLine] = new java.util.HashSet[VCFHeaderLine]() - val otherLines: Set[VCFHeaderLine] = Set(new VCFInfoHeaderLine(VCFConstants.RMS_BASE_QUALITY_KEY, + val otherLines: Set[VCFHeaderLine] = Set( + new VCFInfoHeaderLine( + VCFConstants.RMS_BASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, - "RMS Base Quality"), - new VCFInfoHeaderLine(VCFConstants.SAMPLE_NUMBER_KEY, + "RMS Base Quality" + ), + new VCFInfoHeaderLine( + VCFConstants.SAMPLE_NUMBER_KEY, VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Integer, - "RMS Mapping Quality")) + "RMS Mapping Quality" + ) + ) /** * Creates VCF contig lines from a sequence dictionary. @@ -97,16 +103,20 @@ private[util] class VcfHeaderBuilder(samples: List[String]) { * Adds standard VCF header lines to header. */ private def addStandardLines() { - val formatKeys = List(VCFConstants.GENOTYPE_KEY, + val formatKeys = List( + VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, - VCFConstants.GENOTYPE_PL_KEY) - val infoKeys = List(VCFConstants.ALLELE_FREQUENCY_KEY, + VCFConstants.GENOTYPE_PL_KEY + ) + val infoKeys = List( + VCFConstants.ALLELE_FREQUENCY_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.STRAND_BIAS_KEY, VCFConstants.RMS_MAPPING_QUALITY_KEY, VCFConstants.MAPPING_QUALITY_ZERO_KEY, - VCFConstants.DEPTH_KEY) + VCFConstants.DEPTH_KEY + ) VCFStandardHeaderLines.addStandardFormatLines(formatLines, false, formatKeys) VCFStandardHeaderLines.addStandardInfoLines(infoLines, false, infoKeys) diff --git a/pom.xml b/pom.xml index 3a0c20d615..69bc471cd1 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 7 - + 4.0.0 org.bdgenomics.adam adam-parent_2.10 @@ -29,13 +29,13 @@ 0.2.3 1.139 - + adam-core adam-apis adam-cli - + Apache License